hexapdf 0.15.3 → 0.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/parser.rb +19 -7
- data/lib/hexapdf/tokenizer.rb +15 -6
- data/lib/hexapdf/type/annotation.rb +7 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +21 -2
- data/test/hexapdf/test_parser.rb +19 -2
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_annotation.rb +5 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1385aca5e91916034a5494142b4c88e51de46d2d13b79ddaed9494c74808793a
|
4
|
+
data.tar.gz: 4fee33d3c96e74c00565ac6211901f39c0242cd2e0926f0760be7bfb18fe7f12
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3fa1454ec6821500c1f94981ad17efcbf36f125a29870a62ad0d626fe65cd35bb7ef6426021daba3b2554dcbd20f1ce6efc4d93c1d4d8b5303d6063eb27804fb
|
7
|
+
data.tar.gz: 8f2c3de849fed113c6f4fe7494312a202a872f7364052b584a38352315a4a358f135beea8dd951c29d2dbd3b842c4eefe3892ed3d9bb3c24e6875cdbb0c59123
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,40 @@
|
|
1
|
+
## 0.15.7 - 2021-07-17
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Infinite loop while parsing PDF array due to missing closing bracket
|
6
|
+
* Handling of invalid files with missing or corrupted trailer dictionary
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.6 - 2021-07-16
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
|
13
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
14
|
+
objects
|
15
|
+
|
16
|
+
|
17
|
+
## 0.15.5 - 2021-07-06
|
18
|
+
|
19
|
+
### Changed
|
20
|
+
|
21
|
+
* Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
|
22
|
+
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
* Handling of invalid cross-reference stream entries that ends with the sequence
|
27
|
+
`\r\r`
|
28
|
+
|
29
|
+
|
30
|
+
## 0.15.4 - 2021-05-27
|
31
|
+
|
32
|
+
### Fixed
|
33
|
+
|
34
|
+
* [HexaPDF::Type::Annotation#appearance] to handle cases where there is
|
35
|
+
no valid appearance stream
|
36
|
+
|
37
|
+
|
1
38
|
## 0.15.3 - 2021-05-01
|
2
39
|
|
3
40
|
### Fixed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -50,7 +50,7 @@ module HexaPDF
|
|
50
50
|
module Extensions #:nodoc:
|
51
51
|
def help_banner #:nodoc:
|
52
52
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
53
|
-
"Copyright (c) 2014-
|
53
|
+
"Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
54
54
|
"#{format(usage, indent: 7)}\n\n"
|
55
55
|
end
|
56
56
|
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -263,9 +266,9 @@ module HexaPDF
|
|
263
266
|
|
264
267
|
@tokenizer.skip_whitespace
|
265
268
|
start.upto(start + number_of_entries - 1) do |oid|
|
266
|
-
pos, gen, type = @tokenizer.next_xref_entry do |
|
269
|
+
pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
|
267
270
|
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
268
|
-
force: !
|
271
|
+
force: !recoverable)
|
269
272
|
end
|
270
273
|
if xref.entry?(oid)
|
271
274
|
next
|
@@ -444,6 +447,15 @@ module HexaPDF
|
|
444
447
|
|
445
448
|
if !trailer || trailer.empty?
|
446
449
|
_, trailer = load_revision(startxref_offset) rescue nil
|
450
|
+
unless trailer
|
451
|
+
xref.each do |_oid, _gen, xref_entry|
|
452
|
+
obj, * = parse_indirect_object(xref_entry.pos) rescue nil
|
453
|
+
if obj.kind_of?(Hash) && obj[:Type] == :Catalog
|
454
|
+
trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
|
455
|
+
break
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
447
459
|
unless trailer
|
448
460
|
@in_reconstruct_revision = false
|
449
461
|
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -55,6 +55,9 @@ module HexaPDF
|
|
55
55
|
|
56
56
|
# This object is returned when there are no more tokens to read.
|
57
57
|
NO_MORE_TOKENS = ::Object.new
|
58
|
+
def NO_MORE_TOKENS.to_s
|
59
|
+
"EOS - no more tokens"
|
60
|
+
end
|
58
61
|
|
59
62
|
# Characters defined as whitespace.
|
60
63
|
#
|
@@ -225,13 +228,14 @@ module HexaPDF
|
|
225
228
|
# Reads the cross-reference subsection entry at the current position and advances the scan
|
226
229
|
# pointer.
|
227
230
|
#
|
228
|
-
# If a
|
231
|
+
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
232
|
+
# problem is recoverable.
|
229
233
|
#
|
230
234
|
# See: PDF1.7 7.5.4
|
231
|
-
def next_xref_entry #:yield:
|
235
|
+
def next_xref_entry #:yield: recoverable
|
232
236
|
prepare_string_scanner(20)
|
233
|
-
|
234
|
-
yield(@ss
|
237
|
+
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
238
|
+
yield(@ss[4])
|
235
239
|
end
|
236
240
|
[@ss[1].to_i, @ss[2].to_i, @ss[3]]
|
237
241
|
end
|
@@ -383,7 +387,11 @@ module HexaPDF
|
|
383
387
|
result = []
|
384
388
|
while true
|
385
389
|
obj = next_object(allow_end_array_token: true)
|
386
|
-
|
390
|
+
if obj.equal?(TOKEN_ARRAY_END)
|
391
|
+
break
|
392
|
+
elsif obj.equal?(NO_MORE_TOKENS)
|
393
|
+
raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
|
394
|
+
end
|
387
395
|
result << obj
|
388
396
|
end
|
389
397
|
result
|
@@ -402,7 +410,8 @@ module HexaPDF
|
|
402
410
|
key = next_token
|
403
411
|
break if key.equal?(TOKEN_DICT_END)
|
404
412
|
unless key.kind_of?(Symbol)
|
405
|
-
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects
|
413
|
+
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
|
414
|
+
"found '#{key}'", pos: pos)
|
406
415
|
end
|
407
416
|
|
408
417
|
val = next_object
|
@@ -138,8 +138,13 @@ module HexaPDF
|
|
138
138
|
if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
|
139
139
|
entry = entry[self[:AS]]
|
140
140
|
end
|
141
|
-
|
142
|
-
|
141
|
+
return unless entry.kind_of?(HexaPDF::Stream)
|
142
|
+
|
143
|
+
if entry.type == :XObject && entry[:Subtype] == :Form
|
144
|
+
entry
|
145
|
+
elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
|
146
|
+
(entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
|
147
|
+
document.wrap(entry, type: :XObject, subtype: :Form)
|
143
148
|
end
|
144
149
|
end
|
145
150
|
alias appearance? appearance
|
data/lib/hexapdf/version.rb
CHANGED
@@ -161,6 +161,21 @@ module CommonTokenizerTests
|
|
161
161
|
assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
162
162
|
end
|
163
163
|
|
164
|
+
it "next_object: fails for an array without closing bracket, encountering EOS" do
|
165
|
+
create_tokenizer("[1 2")
|
166
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
167
|
+
assert_match(/Unclosed array found/, exception.message)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
|
171
|
+
create_tokenizer("<</Name 5")
|
172
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
173
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
174
|
+
create_tokenizer("<</Name 5 /Other")
|
175
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
176
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
177
|
+
end
|
178
|
+
|
164
179
|
it "returns the correct position on operations" do
|
165
180
|
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
181
|
@tokenizer.next_token
|
@@ -210,8 +225,12 @@ module CommonTokenizerTests
|
|
210
225
|
|
211
226
|
it "next_xref_entry: fails on invalidly formatted entries" do
|
212
227
|
create_tokenizer("0000000001 00001 g \n")
|
213
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
228
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
|
214
229
|
create_tokenizer("0000000001 00001 n\n")
|
215
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
230
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
231
|
+
create_tokenizer("0000000001 00001 n\r")
|
232
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
233
|
+
create_tokenizer("0000000001 00001 n\r\r")
|
234
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
235
|
end
|
217
236
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
110
116
|
it "recovers from an invalid stream length value" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
|
|
185
191
|
it "fails for numbers followed by endobj without space" do
|
186
192
|
create_parser("1 0 obj 749endobj")
|
187
193
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
194
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "fails for invalid values" do
|
198
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
199
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
200
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
201
|
end
|
190
202
|
|
191
203
|
it "fails if the stream length value is invalid" do
|
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
|
|
607
619
|
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
608
620
|
end
|
609
621
|
|
610
|
-
it "
|
622
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
623
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
624
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
625
|
+
end
|
626
|
+
|
627
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
611
628
|
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
612
629
|
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
613
630
|
end
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.7)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.7)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
|
|
51
51
|
|
52
52
|
stream = @doc.wrap({}, stream: '')
|
53
53
|
@annot[:AP][:N] = stream
|
54
|
+
assert_nil(@annot.appearance)
|
55
|
+
|
56
|
+
stream[:BBox] = [1, 2, 3, 4]
|
54
57
|
appearance = @annot.appearance
|
55
58
|
assert_same(stream.data, appearance.data)
|
56
59
|
assert_equal(:Form, appearance[:Subtype])
|
57
60
|
|
58
|
-
@annot[:AP][:N] = {X:
|
61
|
+
@annot[:AP][:N] = {X: {}}
|
59
62
|
assert_nil(@annot.appearance)
|
60
63
|
|
61
64
|
@annot[:AS] = :X
|
65
|
+
@annot[:AP][:N][:X] = stream
|
62
66
|
assert_same(stream.data, @annot.appearance.data)
|
63
67
|
|
64
68
|
@annot[:AP][:D] = {X: stream}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|