hexapdf 0.15.4 → 0.15.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/lib/hexapdf/cli/command.rb +2 -2
- data/lib/hexapdf/parser.rb +19 -7
- data/lib/hexapdf/tokenizer.rb +15 -6
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +21 -2
- data/test/hexapdf/test_parser.rb +19 -2
- data/test/hexapdf/test_writer.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4953ab56f7c03c62e4f4e2ef1aa51a8a58f98c3d24725eb86dd6bc13419bd2d2
|
4
|
+
data.tar.gz: c4ac38e280f646eecf512481570ddc8670b48c9ac32601f55b24748f4044344b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 529f8f88d9553f300b842838c1f00e8bed3e05adecfe4478f81d41fcb6431fce888f56b76b2747a65a2935cbb76a6792dce2b1f480dcb120634e1932e461c883
|
7
|
+
data.tar.gz: 9e71874d7901145045fab5791ca09b3ec9cc8f9a9243366b0329cb2570c58408d4595ee37e995a22ce2c099127bc6bdc732e5d3562bde834be2fc57aa3f35b8a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
## 0.15.8 - 2021-08-16
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Regression when using `-v` with the hexapdf command line tool
|
6
|
+
|
7
|
+
|
8
|
+
## 0.15.7 - 2021-07-17
|
9
|
+
|
10
|
+
### Fixed
|
11
|
+
|
12
|
+
* Infinite loop while parsing PDF array due to missing closing bracket
|
13
|
+
* Handling of invalid files with missing or corrupted trailer dictionary
|
14
|
+
|
15
|
+
|
16
|
+
## 0.15.6 - 2021-07-16
|
17
|
+
|
18
|
+
### Fixed
|
19
|
+
|
20
|
+
* Handling of indirect objects with invalid values which are now treated as null
|
21
|
+
objects
|
22
|
+
|
23
|
+
|
24
|
+
## 0.15.5 - 2021-07-06
|
25
|
+
|
26
|
+
### Changed
|
27
|
+
|
28
|
+
* Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
|
29
|
+
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
* Handling of invalid cross-reference stream entries that ends with the sequence
|
34
|
+
`\r\r`
|
35
|
+
|
36
|
+
|
1
37
|
## 0.15.4 - 2021-05-27
|
2
38
|
|
3
39
|
### Fixed
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -50,7 +50,7 @@ module HexaPDF
|
|
50
50
|
module Extensions #:nodoc:
|
51
51
|
def help_banner #:nodoc:
|
52
52
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
53
|
-
"Copyright (c) 2014-
|
53
|
+
"Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
54
54
|
"#{format(usage, indent: 7)}\n\n"
|
55
55
|
end
|
56
56
|
end
|
@@ -119,7 +119,7 @@ module HexaPDF
|
|
119
119
|
# Writes the document to the given file or does nothing if +out_file+ is +nil+.
|
120
120
|
def write_document(doc, out_file, incremental: false)
|
121
121
|
if out_file
|
122
|
-
doc.validate(auto_correct: true) do |
|
122
|
+
doc.validate(auto_correct: true) do |msg, correctable, object|
|
123
123
|
if command_parser.strict && !correctable
|
124
124
|
raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
|
125
125
|
elsif command_parser.verbosity_info?
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -125,11 +125,14 @@ module HexaPDF
|
|
125
125
|
begin
|
126
126
|
object = @tokenizer.next_object
|
127
127
|
rescue MalformedPDFError
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
128
|
+
if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
|
129
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
130
|
+
maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
|
131
|
+
object = tok.to_i
|
132
|
+
@tokenizer.pos -= 6
|
133
|
+
else
|
134
|
+
maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
|
135
|
+
end
|
133
136
|
end
|
134
137
|
end
|
135
138
|
|
@@ -263,9 +266,9 @@ module HexaPDF
|
|
263
266
|
|
264
267
|
@tokenizer.skip_whitespace
|
265
268
|
start.upto(start + number_of_entries - 1) do |oid|
|
266
|
-
pos, gen, type = @tokenizer.next_xref_entry do |
|
269
|
+
pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
|
267
270
|
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
268
|
-
force: !
|
271
|
+
force: !recoverable)
|
269
272
|
end
|
270
273
|
if xref.entry?(oid)
|
271
274
|
next
|
@@ -444,6 +447,15 @@ module HexaPDF
|
|
444
447
|
|
445
448
|
if !trailer || trailer.empty?
|
446
449
|
_, trailer = load_revision(startxref_offset) rescue nil
|
450
|
+
unless trailer
|
451
|
+
xref.each do |_oid, _gen, xref_entry|
|
452
|
+
obj, * = parse_indirect_object(xref_entry.pos) rescue nil
|
453
|
+
if obj.kind_of?(Hash) && obj[:Type] == :Catalog
|
454
|
+
trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
|
455
|
+
break
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
447
459
|
unless trailer
|
448
460
|
@in_reconstruct_revision = false
|
449
461
|
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -55,6 +55,9 @@ module HexaPDF
|
|
55
55
|
|
56
56
|
# This object is returned when there are no more tokens to read.
|
57
57
|
NO_MORE_TOKENS = ::Object.new
|
58
|
+
def NO_MORE_TOKENS.to_s
|
59
|
+
"EOS - no more tokens"
|
60
|
+
end
|
58
61
|
|
59
62
|
# Characters defined as whitespace.
|
60
63
|
#
|
@@ -225,13 +228,14 @@ module HexaPDF
|
|
225
228
|
# Reads the cross-reference subsection entry at the current position and advances the scan
|
226
229
|
# pointer.
|
227
230
|
#
|
228
|
-
# If a
|
231
|
+
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
232
|
+
# problem is recoverable.
|
229
233
|
#
|
230
234
|
# See: PDF1.7 7.5.4
|
231
|
-
def next_xref_entry #:yield:
|
235
|
+
def next_xref_entry #:yield: recoverable
|
232
236
|
prepare_string_scanner(20)
|
233
|
-
|
234
|
-
yield(@ss
|
237
|
+
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
238
|
+
yield(@ss[4])
|
235
239
|
end
|
236
240
|
[@ss[1].to_i, @ss[2].to_i, @ss[3]]
|
237
241
|
end
|
@@ -383,7 +387,11 @@ module HexaPDF
|
|
383
387
|
result = []
|
384
388
|
while true
|
385
389
|
obj = next_object(allow_end_array_token: true)
|
386
|
-
|
390
|
+
if obj.equal?(TOKEN_ARRAY_END)
|
391
|
+
break
|
392
|
+
elsif obj.equal?(NO_MORE_TOKENS)
|
393
|
+
raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
|
394
|
+
end
|
387
395
|
result << obj
|
388
396
|
end
|
389
397
|
result
|
@@ -402,7 +410,8 @@ module HexaPDF
|
|
402
410
|
key = next_token
|
403
411
|
break if key.equal?(TOKEN_DICT_END)
|
404
412
|
unless key.kind_of?(Symbol)
|
405
|
-
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects
|
413
|
+
raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
|
414
|
+
"found '#{key}'", pos: pos)
|
406
415
|
end
|
407
416
|
|
408
417
|
val = next_object
|
data/lib/hexapdf/version.rb
CHANGED
@@ -161,6 +161,21 @@ module CommonTokenizerTests
|
|
161
161
|
assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
162
162
|
end
|
163
163
|
|
164
|
+
it "next_object: fails for an array without closing bracket, encountering EOS" do
|
165
|
+
create_tokenizer("[1 2")
|
166
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
167
|
+
assert_match(/Unclosed array found/, exception.message)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
|
171
|
+
create_tokenizer("<</Name 5")
|
172
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
173
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
174
|
+
create_tokenizer("<</Name 5 /Other")
|
175
|
+
exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
|
176
|
+
assert_match(/must be PDF name objects.*EOS/, exception.message)
|
177
|
+
end
|
178
|
+
|
164
179
|
it "returns the correct position on operations" do
|
165
180
|
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
181
|
@tokenizer.next_token
|
@@ -210,8 +225,12 @@ module CommonTokenizerTests
|
|
210
225
|
|
211
226
|
it "next_xref_entry: fails on invalidly formatted entries" do
|
212
227
|
create_tokenizer("0000000001 00001 g \n")
|
213
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
228
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
|
214
229
|
create_tokenizer("0000000001 00001 n\n")
|
215
|
-
assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
|
230
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
231
|
+
create_tokenizer("0000000001 00001 n\r")
|
232
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
233
|
+
create_tokenizer("0000000001 00001 n\r\r")
|
234
|
+
assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
|
216
235
|
end
|
217
236
|
end
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
|
|
107
107
|
assert_equal(749, object)
|
108
108
|
end
|
109
109
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
112
|
+
object, * = @parser.parse_indirect_object
|
113
|
+
assert_nil(object)
|
114
|
+
end
|
115
|
+
|
110
116
|
it "recovers from an invalid stream length value" do
|
111
117
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
112
118
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
|
|
185
191
|
it "fails for numbers followed by endobj without space" do
|
186
192
|
create_parser("1 0 obj 749endobj")
|
187
193
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
188
|
-
assert_match(/
|
194
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
195
|
+
end
|
196
|
+
|
197
|
+
it "fails for invalid values" do
|
198
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
199
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
200
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
189
201
|
end
|
190
202
|
|
191
203
|
it "fails if the stream length value is invalid" do
|
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
|
|
607
619
|
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
608
620
|
end
|
609
621
|
|
610
|
-
it "
|
622
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
623
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
624
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
625
|
+
end
|
626
|
+
|
627
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
611
628
|
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
612
629
|
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
613
630
|
end
|
data/test/hexapdf/test_writer.rb
CHANGED
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
|
|
40
40
|
219
|
41
41
|
%%EOF
|
42
42
|
3 0 obj
|
43
|
-
<</Producer(HexaPDF version 0.15.
|
43
|
+
<</Producer(HexaPDF version 0.15.8)>>
|
44
44
|
endobj
|
45
45
|
xref
|
46
46
|
3 1
|
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
|
|
72
72
|
141
|
73
73
|
%%EOF
|
74
74
|
6 0 obj
|
75
|
-
<</Producer(HexaPDF version 0.15.
|
75
|
+
<</Producer(HexaPDF version 0.15.8)>>
|
76
76
|
endobj
|
77
77
|
2 0 obj
|
78
78
|
<</Length 10>>stream
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hexapdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Leitner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cmdparse
|