hexapdf 0.15.5 → 0.15.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/lib/hexapdf/cli/command.rb +1 -1
- data/lib/hexapdf/parser.rb +18 -6
- data/lib/hexapdf/tokenizer.rb +10 -2
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +15 -0
- data/test/hexapdf/test_parser.rb +28 -3
- data/test/hexapdf/test_writer.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 24d17dfd6c8dc9e3f7014e1ea769dede6f8cea81529bb201a8447f21873d3b25
|
|
4
|
+
data.tar.gz: 7165a2e11983731ba2597d2e4a824415abc96936d2b58f3fb737a4fed94dcf16
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 244332a4f024c90cf6344b462ed422a5f73b32d3a4d04d0dcdadc6e7ede2cd0724f7a3329fa3aad68c99e47f49a9dff66806d9b8b152b9551eaecd7365c807d4
|
|
7
|
+
data.tar.gz: 06c4b9fd5ecd8f045a37e85ce4a6539f52b48bb16fb685290bcc2a0210b5f2e66bcfa9b7efb1115c45dd1618de9421c479d7a66d84556d1b9c37b19b1f8b6075
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,34 @@
|
|
|
1
|
+
## 0.15.9 - 2021-09-04
|
|
2
|
+
|
|
3
|
+
### Fixed
|
|
4
|
+
|
|
5
|
+
* Handling of files that contain stream length values that are indirect objects
|
|
6
|
+
not referring to a number
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## 0.15.8 - 2021-08-16
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
* Regression when using `-v` with the hexapdf command line tool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
## 0.15.7 - 2021-07-17
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
* Infinite loop while parsing PDF array due to missing closing bracket
|
|
21
|
+
* Handling of invalid files with missing or corrupted trailer dictionary
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## 0.15.6 - 2021-07-16
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
|
29
|
+
objects
|
|
30
|
+
|
|
31
|
+
|
|
1
32
|
## 0.15.5 - 2021-07-06
|
|
2
33
|
|
|
3
34
|
### Changed
|
data/lib/hexapdf/cli/command.rb
CHANGED
|
@@ -119,7 +119,7 @@ module HexaPDF
|
|
|
119
119
|
# Writes the document to the given file or does nothing if +out_file+ is +nil+.
|
|
120
120
|
def write_document(doc, out_file, incremental: false)
|
|
121
121
|
if out_file
|
|
122
|
-
doc.validate(auto_correct: true) do |
|
|
122
|
+
doc.validate(auto_correct: true) do |msg, correctable, object|
|
|
123
123
|
if command_parser.strict && !correctable
|
|
124
124
|
raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
|
|
125
125
|
elsif command_parser.verbosity_info?
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -125,11 +125,14 @@ module HexaPDF
|
|
|
125
125
|
begin
|
|
126
126
|
object = @tokenizer.next_object
|
|
127
127
|
rescue MalformedPDFError
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
|
131
|
+
object = tok.to_i
|
|
132
|
+
@tokenizer.pos -= 6
|
|
133
|
+
else
|
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
|
135
|
+
end
|
|
133
136
|
end
|
|
134
137
|
end
|
|
135
138
|
|
|
@@ -162,7 +165,7 @@ module HexaPDF
|
|
|
162
165
|
else
|
|
163
166
|
0
|
|
164
167
|
end
|
|
165
|
-
@tokenizer.pos = pos + length
|
|
168
|
+
@tokenizer.pos = pos + length rescue pos
|
|
166
169
|
|
|
167
170
|
tok = @tokenizer.next_token
|
|
168
171
|
unless tok.kind_of?(Tokenizer::Token) && tok == 'endstream'
|
|
@@ -444,6 +447,15 @@ module HexaPDF
|
|
|
444
447
|
|
|
445
448
|
if !trailer || trailer.empty?
|
|
446
449
|
_, trailer = load_revision(startxref_offset) rescue nil
|
|
450
|
+
unless trailer
|
|
451
|
+
xref.each do |_oid, _gen, xref_entry|
|
|
452
|
+
obj, * = parse_indirect_object(xref_entry.pos) rescue nil
|
|
453
|
+
if obj.kind_of?(Hash) && obj[:Type] == :Catalog
|
|
454
|
+
trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
|
|
455
|
+
break
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
end
|
|
447
459
|
unless trailer
|
|
448
460
|
@in_reconstruct_revision = false
|
|
449
461
|
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
|
@@ -55,6 +55,9 @@ module HexaPDF
|
|
|
55
55
|
|
|
56
56
|
# This object is returned when there are no more tokens to read.
|
|
57
57
|
NO_MORE_TOKENS = ::Object.new
|
|
58
|
+
def NO_MORE_TOKENS.to_s
|
|
59
|
+
"EOS - no more tokens"
|
|
60
|
+
end
|
|
58
61
|
|
|
59
62
|
# Characters defined as whitespace.
|
|
60
63
|
#
|
|
@@ -384,7 +387,11 @@ module HexaPDF
|
|
|
384
387
|
result = []
|
|
385
388
|
while true
|
|
386
389
|
obj = next_object(allow_end_array_token: true)
|
|
387
|
-
|
|
390
|
+
if obj.equal?(TOKEN_ARRAY_END)
|
|
391
|
+
break
|
|
392
|
+
elsif obj.equal?(NO_MORE_TOKENS)
|
|
393
|
+
raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
|
|
394
|
+
end
|
|
388
395
|
result << obj
|
|
389
396
|
end
|
|
390
397
|
result
|
|
@@ -403,7 +410,8 @@ module HexaPDF
|
|
|
403
410
|
key = next_token
|
|
404
411
|
break if key.equal?(TOKEN_DICT_END)
|
|
405
412
|
unless key.kind_of?(Symbol)
|
|
406
|
-
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects
|
|
413
|
+
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
|
|
414
|
+
"found '#{key}'", pos: pos)
|
|
407
415
|
end
|
|
408
416
|
|
|
409
417
|
val = next_object
|
data/lib/hexapdf/version.rb
CHANGED
|
@@ -161,6 +161,21 @@ module CommonTokenizerTests
|
|
|
161
161
|
assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
|
162
162
|
end
|
|
163
163
|
|
|
164
|
+
it "next_object: fails for an array without closing bracket, encountering EOS" do
|
|
165
|
+
create_tokenizer("[1 2")
|
|
166
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
|
167
|
+
assert_match(/Unclosed array found/, exception.message)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
|
|
171
|
+
create_tokenizer("<</Name 5")
|
|
172
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
|
173
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
|
174
|
+
create_tokenizer("<</Name 5 /Other")
|
|
175
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
|
176
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
|
177
|
+
end
|
|
178
|
+
|
|
164
179
|
it "returns the correct position on operations" do
|
|
165
180
|
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
|
166
181
|
@tokenizer.next_token
|
data/test/hexapdf/test_parser.rb
CHANGED
|
@@ -107,13 +107,27 @@ describe HexaPDF::Parser do
|
|
|
107
107
|
assert_equal(749, object)
|
|
108
108
|
end
|
|
109
109
|
|
|
110
|
-
it "
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
|
112
|
+
object, * = @parser.parse_indirect_object
|
|
113
|
+
assert_nil(object)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it "recovers from a stream length value that doesn't reflect the correct length" do
|
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
|
113
119
|
assert_equal(2, obj[:Length])
|
|
114
120
|
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
115
121
|
end
|
|
116
122
|
|
|
123
|
+
it "recovers from an invalid stream length value" do
|
|
124
|
+
create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
|
|
125
|
+
@document.add([5], oid: 2)
|
|
126
|
+
obj, _, _, stream = @parser.parse_indirect_object
|
|
127
|
+
assert_equal(2, obj[:Length])
|
|
128
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
129
|
+
end
|
|
130
|
+
|
|
117
131
|
it "works even if the keyword endobj is missing or mangled" do
|
|
118
132
|
create_parser("1 0 obj<</Length 4>>5")
|
|
119
133
|
object, * = @parser.parse_indirect_object
|
|
@@ -185,7 +199,13 @@ describe HexaPDF::Parser do
|
|
|
185
199
|
it "fails for numbers followed by endobj without space" do
|
|
186
200
|
create_parser("1 0 obj 749endobj")
|
|
187
201
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
188
|
-
assert_match(/
|
|
202
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
it "fails for invalid values" do
|
|
206
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
|
207
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
208
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
|
189
209
|
end
|
|
190
210
|
|
|
191
211
|
it "fails if the stream length value is invalid" do
|
|
@@ -607,7 +627,12 @@ describe HexaPDF::Parser do
|
|
|
607
627
|
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
|
608
628
|
end
|
|
609
629
|
|
|
610
|
-
it "
|
|
630
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
|
631
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
|
632
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
|
611
636
|
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
|
612
637
|
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
|
613
638
|
end
|
data/test/hexapdf/test_writer.rb
CHANGED
|
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
|
40
40
|
219
|
|
41
41
|
%%EOF
|
|
42
42
|
3 0 obj
|
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
|
43
|
+
<</Producer(HexaPDF version 0.15.9)>>
|
|
44
44
|
endobj
|
|
45
45
|
xref
|
|
46
46
|
3 1
|
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
|
72
72
|
141
|
|
73
73
|
%%EOF
|
|
74
74
|
6 0 obj
|
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
|
75
|
+
<</Producer(HexaPDF version 0.15.9)>>
|
|
76
76
|
endobj
|
|
77
77
|
2 0 obj
|
|
78
78
|
<</Length 10>>stream
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: hexapdf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.15.
|
|
4
|
+
version: 0.15.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Thomas Leitner
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-09-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cmdparse
|