hexapdf 0.15.3 → 0.15.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/parser.rb +19 -7
- data/lib/hexapdf/tokenizer.rb +15 -6
- data/lib/hexapdf/type/annotation.rb +7 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +21 -2
- data/test/hexapdf/test_parser.rb +19 -2
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_annotation.rb +5 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1385aca5e91916034a5494142b4c88e51de46d2d13b79ddaed9494c74808793a
|
4
|
+
data.tar.gz: 4fee33d3c96e74c00565ac6211901f39c0242cd2e0926f0760be7bfb18fe7f12
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3fa1454ec6821500c1f94981ad17efcbf36f125a29870a62ad0d626fe65cd35bb7ef6426021daba3b2554dcbd20f1ce6efc4d93c1d4d8b5303d6063eb27804fb
|
7
|
+
data.tar.gz: 8f2c3de849fed113c6f4fe7494312a202a872f7364052b584a38352315a4a358f135beea8dd951c29d2dbd3b842c4eefe3892ed3d9bb3c24e6875cdbb0c59123
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,40 @@
|
|
1
|
+
## 0.15.7 - 2021-07-17
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Infinite loop while parsing PDF array due to missing closing bracket
|
6
|
+
* Handling of invalid files with missing or corrupted trailer dictionary
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.6 - 2021-07-16
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
|
13
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
14
|
+
objects
|
15
|
+
|
16
|
+
|
17
|
+
## 0.15.5 - 2021-07-06
|
18
|
+
|
19
|
+
### Changed
|
20
|
+
|
21
|
+
* Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
|
22
|
+
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
* Handling of invalid cross-reference stream entries that ends with the sequence
|
27
|
+
`\r\r`
|
28
|
+
|
29
|
+
|
30
|
+
## 0.15.4 - 2021-05-27
|
31
|
+
|
32
|
+
### Fixed
|
33
|
+
|
34
|
+
* [HexaPDF::Type::Annotation#appearance] to handle cases where there is
|
35
|
+
no valid appearance stream
|
36
|
+
|
37
|
+
|
1
38
|
## 0.15.3 - 2021-05-01
|
2
39
|
|
3
40
|
### Fixed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -50,7 +50,7 @@ module HexaPDF
|
|
50
50
|
module Extensions #:nodoc:
|
51
51
|
def help_banner #:nodoc:
|
52
52
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
53
|
-
"Copyright (c) 2014-
|
53
|
+
"Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
54
54
|
"#{format(usage, indent: 7)}\n\n"
|
55
55
|
end
|
56
56
|
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -263,9 +266,9 @@ module HexaPDF
|
|
263
266
|
|
264
267
|
@tokenizer.skip_whitespace
|
265
268
|
start.upto(start + number_of_entries - 1) do |oid|
|
266
|
-
pos, gen, type = @tokenizer.next_xref_entry do |
|
269
|
+
pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
|
267
270
|
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
268
|
-
force: !
|
271
|
+
force: !recoverable)
|
269
272
|
end
|
270
273
|
if xref.entry?(oid)
|
271
274
|
next
|
@@ -444,6 +447,15 @@ module HexaPDF
|
|
444
447
|
|
445
448
|
if !trailer || trailer.empty?
|
446
449
|
_, trailer = load_revision(startxref_offset) rescue nil
|
450
|
+
unless trailer
|
451
|
+
xref.each do |_oid, _gen, xref_entry|
|
452
|
+
obj, * = parse_indirect_object(xref_entry.pos) rescue nil
|
453
|
+
if obj.kind_of?(Hash) && obj[:Type] == :Catalog
|
454
|
+
trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
|
455
|
+
break
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
447
459
|
unless trailer
|
448
460
|
@in_reconstruct_revision = false
|
449
461
|
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -55,6 +55,9 @@ module HexaPDF
|
|
55
55
|
|
56
56
|
# This object is returned when there are no more tokens to read.
|
57
57
|
NO_MORE_TOKENS = ::Object.new
|
58
|
+
def NO_MORE_TOKENS.to_s
|
59
|
+
"EOS - no more tokens"
|
60
|
+
end
|
58
61
|
|
59
62
|
# Characters defined as whitespace.
|
60
63
|
#
|
@@ -225,13 +228,14 @@ module HexaPDF
|
|
225
228
|
# Reads the cross-reference subsection entry at the current position and advances the scan
|
226
229
|
# pointer.
|
227
230
|
#
|
228
|
-
# If a
|
231
|
+
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
232
|
+
# problem is recoverable.
|
229
233
|
#
|
230
234
|
# See: PDF1.7 7.5.4
|
231
|
-
def next_xref_entry #:yield:
|
235
|
+
def next_xref_entry #:yield: recoverable
|
232
236
|
prepare_string_scanner(20)
|
233
|
-
|
234
|
-
yield(@ss
|
237
|
+
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
238
|
+
yield(@ss[4])
|
235
239
|
end
|
236
240
|
[@ss[1].to_i, @ss[2].to_i, @ss[3]]
|
237
241
|
end
|
@@ -383,7 +387,11 @@ module HexaPDF
|
|
383
387
|
result = []
|
384
388
|
while true
|
385
389
|
obj = next_object(allow_end_array_token: true)
|
386
|
-
|
390
|
+
if obj.equal?(TOKEN_ARRAY_END)
|
391
|
+
break
|
392
|
+
elsif obj.equal?(NO_MORE_TOKENS)
|
393
|
+
raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
|
394
|
+
end
|
387
395
|
result << obj
|
388
396
|
end
|
389
397
|
result
|
@@ -402,7 +410,8 @@ module HexaPDF
|
|
402
410
|
key = next_token
|
403
411
|
break if key.equal?(TOKEN_DICT_END)
|
404
412
|
unless key.kind_of?(Symbol)
|
405
|
-
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects
|
413
|
+
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
|
414
|
+
"found '#{key}'", pos: pos)
|
406
415
|
end
|
407
416
|
|
408
417
|
val = next_object
|
@@ -138,8 +138,13 @@ module HexaPDF
|
|
138
138
|
if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
|
139
139
|
entry = entry[self[:AS]]
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
return unless entry.kind_of?(HexaPDF::Stream)
|
142
|
+
|
143
|
+
if entry.type == :XObject && entry[:Subtype] == :Form
|
144
|
+
entry
|
145
|
+
elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
|
146
|
+
(entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
|
147
|
+
document.wrap(entry, type: :XObject, subtype: :Form)
|
143
148
|
end
|
144
149
|
end
|
145
150
|
alias appearance? appearance
|
data/lib/hexapdf/version.rb
CHANGED
@@ -161,6 +161,21 @@ module CommonTokenizerTests
|
|
161
161
|
assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
162
162
|
end
|
163
163
|
|
164
|
+
it "next_object: fails for an array without closing bracket, encountering EOS" do
|
165
|
+
create_tokenizer("[1 2")
|
166
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
167
|
+
assert_match(/Unclosed array found/, exception.message)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
|
171
|
+
create_tokenizer("<</Name 5")
|
172
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
173
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
174
|
+
create_tokenizer("<</Name 5 /Other")
|
175
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
176
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
177
|
+
end
|
178
|
+
|
164
179
|
it "returns the correct position on operations" do
|
165
180
|
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
181
|
@tokenizer.next_token
|
@@ -210,8 +225,12 @@ module CommonTokenizerTests
|
|
210
225
|
|
211
226
|
it "next_xref_entry: fails on invalidly formatted entries" do
|
212
227
|
create_tokenizer("0000000001 00001 g \n")
|
213
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
228
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
|
214
229
|
create_tokenizer("0000000001 00001 n\n")
|
215
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
230
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
231
|
+
create_tokenizer("0000000001 00001 n\r")
|
232
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
233
|
+
create_tokenizer("0000000001 00001 n\r\r")
|
234
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
235
|
end
|
217
236
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
110
116
|
it "recovers from an invalid stream length value" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
|
|
185
191
|
it "fails for numbers followed by endobj without space" do
|
186
192
|
create_parser("1 0 obj 749endobj")
|
187
193
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
194
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "fails for invalid values" do
|
198
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
199
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
200
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
201
|
end
|
190
202
|
|
191
203
|
it "fails if the stream length value is invalid" do
|
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
|
|
607
619
|
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
608
620
|
end
|
609
621
|
|
610
|
-
it "
|
622
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
623
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
624
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
625
|
+
end
|
626
|
+
|
627
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
611
628
|
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
612
629
|
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
613
630
|
end
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.7)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.7)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
|
|
51
51
|
|
52
52
|
stream = @doc.wrap({}, stream: '')
|
53
53
|
@annot[:AP][:N] = stream
|
54
|
+
assert_nil(@annot.appearance)
|
55
|
+
|
56
|
+
stream[:BBox] = [1, 2, 3, 4]
|
54
57
|
appearance = @annot.appearance
|
55
58
|
assert_same(stream.data, appearance.data)
|
56
59
|
assert_equal(:Form, appearance[:Subtype])
|
57
60
|
|
58
|
-
@annot[:AP][:N] = {X:
|
61
|
+
@annot[:AP][:N] = {X: {}}
|
59
62
|
assert_nil(@annot.appearance)
|
60
63
|
|
61
64
|
@annot[:AS] = :X
|
65
|
+
@annot[:AP][:N][:X] = stream
|
62
66
|
assert_same(stream.data, @annot.appearance.data)
|
63
67
|
|
64
68
|
@annot[:AP][:D] = {X: stream}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|