hexapdf 0.15.5 → 0.15.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/parser.rb +18 -6
- data/lib/hexapdf/tokenizer.rb +10 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +15 -0
- data/test/hexapdf/test_parser.rb +28 -3
- data/test/hexapdf/test_writer.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24d17dfd6c8dc9e3f7014e1ea769dede6f8cea81529bb201a8447f21873d3b25
|
4
|
+
data.tar.gz: 7165a2e11983731ba2597d2e4a824415abc96936d2b58f3fb737a4fed94dcf16
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 244332a4f024c90cf6344b462ed422a5f73b32d3a4d04d0dcdadc6e7ede2cd0724f7a3329fa3aad68c99e47f49a9dff66806d9b8b152b9551eaecd7365c807d4
|
7
|
+
data.tar.gz: 06c4b9fd5ecd8f045a37e85ce4a6539f52b48bb16fb685290bcc2a0210b5f2e66bcfa9b7efb1115c45dd1618de9421c479d7a66d84556d1b9c37b19b1f8b6075
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,34 @@
|
|
1
|
+
## 0.15.9 - 2021-09-04
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Handling of files that contain stream length values that are indirect objects
|
6
|
+
not referring to a number
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.8 - 2021-08-16
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
|
13
|
+
* Regression when using `-v` with the hexapdf command line tool
|
14
|
+
|
15
|
+
|
16
|
+
## 0.15.7 - 2021-07-17
|
17
|
+
|
18
|
+
### Fixed
|
19
|
+
|
20
|
+
* Infinite loop while parsing PDF array due to missing closing bracket
|
21
|
+
* Handling of invalid files with missing or corrupted trailer dictionary
|
22
|
+
|
23
|
+
|
24
|
+
## 0.15.6 - 2021-07-16
|
25
|
+
|
26
|
+
### Fixed
|
27
|
+
|
28
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
29
|
+
objects
|
30
|
+
|
31
|
+
|
1
32
|
## 0.15.5 - 2021-07-06
|
2
33
|
|
3
34
|
### Changed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -119,7 +119,7 @@ module HexaPDF
|
|
119
119
|
# Writes the document to the given file or does nothing if +out_file+ is +nil+.
|
120
120
|
def write_document(doc, out_file, incremental: false)
|
121
121
|
if out_file
|
122
|
-
doc.validate(auto_correct: true) do |
|
122
|
+
doc.validate(auto_correct: true) do |msg, correctable, object|
|
123
123
|
if command_parser.strict && !correctable
|
124
124
|
raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
|
125
125
|
elsif command_parser.verbosity_info?
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -162,7 +165,7 @@ module HexaPDF
|
|
162
165
|
else
|
163
166
|
0
|
164
167
|
end
|
165
|
-
@tokenizer.pos = pos + length
|
168
|
+
@tokenizer.pos = pos + length rescue pos
|
166
169
|
|
167
170
|
tok = @tokenizer.next_token
|
168
171
|
unless tok.kind_of?(Tokenizer::Token) && tok == 'endstream'
|
@@ -444,6 +447,15 @@ module HexaPDF
|
|
444
447
|
|
445
448
|
if !trailer || trailer.empty?
|
446
449
|
_, trailer = load_revision(startxref_offset) rescue nil
|
450
|
+
unless trailer
|
451
|
+
xref.each do |_oid, _gen, xref_entry|
|
452
|
+
obj, * = parse_indirect_object(xref_entry.pos) rescue nil
|
453
|
+
if obj.kind_of?(Hash) && obj[:Type] == :Catalog
|
454
|
+
trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
|
455
|
+
break
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
447
459
|
unless trailer
|
448
460
|
@in_reconstruct_revision = false
|
449
461
|
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -55,6 +55,9 @@ module HexaPDF
|
|
55
55
|
|
56
56
|
# This object is returned when there are no more tokens to read.
|
57
57
|
NO_MORE_TOKENS = ::Object.new
|
58
|
+
def NO_MORE_TOKENS.to_s
|
59
|
+
"EOS - no more tokens"
|
60
|
+
end
|
58
61
|
|
59
62
|
# Characters defined as whitespace.
|
60
63
|
#
|
@@ -384,7 +387,11 @@ module HexaPDF
|
|
384
387
|
result = []
|
385
388
|
while true
|
386
389
|
obj = next_object(allow_end_array_token: true)
|
387
|
-
|
390
|
+
if obj.equal?(TOKEN_ARRAY_END)
|
391
|
+
break
|
392
|
+
elsif obj.equal?(NO_MORE_TOKENS)
|
393
|
+
raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
|
394
|
+
end
|
388
395
|
result << obj
|
389
396
|
end
|
390
397
|
result
|
@@ -403,7 +410,8 @@ module HexaPDF
|
|
403
410
|
key = next_token
|
404
411
|
break if key.equal?(TOKEN_DICT_END)
|
405
412
|
unless key.kind_of?(Symbol)
|
406
|
-
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects
|
413
|
+
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
|
414
|
+
"found '#{key}'", pos: pos)
|
407
415
|
end
|
408
416
|
|
409
417
|
val = next_object
|
data/lib/hexapdf/version.rb
CHANGED
@@ -161,6 +161,21 @@ module CommonTokenizerTests
|
|
161
161
|
assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
162
162
|
end
|
163
163
|
|
164
|
+
it "next_object: fails for an array without closing bracket, encountering EOS" do
|
165
|
+
create_tokenizer("[1 2")
|
166
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
167
|
+
assert_match(/Unclosed array found/, exception.message)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
|
171
|
+
create_tokenizer("<</Name 5")
|
172
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
173
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
174
|
+
create_tokenizer("<</Name 5 /Other")
|
175
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
176
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
177
|
+
end
|
178
|
+
|
164
179
|
it "returns the correct position on operations" do
|
165
180
|
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
181
|
@tokenizer.next_token
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,13 +107,27 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
-
it "
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "recovers from a stream length value that doesn't reflect the correct length" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
113
119
|
assert_equal(2, obj[:Length])
|
114
120
|
assert_equal('12', TestHelper.collector(stream.fiber))
|
115
121
|
end
|
116
122
|
|
123
|
+
it "recovers from an invalid stream length value" do
|
124
|
+
create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
|
125
|
+
@document.add([5], oid: 2)
|
126
|
+
obj, _, _, stream = @parser.parse_indirect_object
|
127
|
+
assert_equal(2, obj[:Length])
|
128
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
129
|
+
end
|
130
|
+
|
117
131
|
it "works even if the keyword endobj is missing or mangled" do
|
118
132
|
create_parser("1 0 obj<</Length 4>>5")
|
119
133
|
object, * = @parser.parse_indirect_object
|
@@ -185,7 +199,13 @@ describe HexaPDF::Parser do
|
|
185
199
|
it "fails for numbers followed by endobj without space" do
|
186
200
|
create_parser("1 0 obj 749endobj")
|
187
201
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
202
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
203
|
+
end
|
204
|
+
|
205
|
+
it "fails for invalid values" do
|
206
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
207
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
208
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
209
|
end
|
190
210
|
|
191
211
|
it "fails if the stream length value is invalid" do
|
@@ -607,7 +627,12 @@ describe HexaPDF::Parser do
|
|
607
627
|
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
608
628
|
end
|
609
629
|
|
610
|
-
it "
|
630
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
631
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
632
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
633
|
+
end
|
634
|
+
|
635
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
611
636
|
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
612
637
|
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
613
638
|
end
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.9)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.9)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|