hexapdf 0.15.5 → 0.15.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6aa5d94e3b69f1d7b2b369d248664c2c4d96d13306c1fbb1cf0960412f129faf
4
- data.tar.gz: f6e8ae5f61de78c41fecace64d0ad5d42f8c46032e6ac6550c01b2fb43109fe8
3
+ metadata.gz: 24d17dfd6c8dc9e3f7014e1ea769dede6f8cea81529bb201a8447f21873d3b25
4
+ data.tar.gz: 7165a2e11983731ba2597d2e4a824415abc96936d2b58f3fb737a4fed94dcf16
5
5
  SHA512:
6
- metadata.gz: 49f3b3aee754308956df811f564d127269e8e1dbfc613bed39e42bd77303855b80fe11c2ade362cef174838b58188470b5604e2d2c930dcf7637e7600410ac15
7
- data.tar.gz: 3a4d5555ae6f012176aebe0a29c36500ad20f385a342c27819f5db3a0e8c5e270c58526c8434b8c8d98c59b963721a51344e93bfc6d51f3eab4fc76cceeb830e
6
+ metadata.gz: 244332a4f024c90cf6344b462ed422a5f73b32d3a4d04d0dcdadc6e7ede2cd0724f7a3329fa3aad68c99e47f49a9dff66806d9b8b152b9551eaecd7365c807d4
7
+ data.tar.gz: 06c4b9fd5ecd8f045a37e85ce4a6539f52b48bb16fb685290bcc2a0210b5f2e66bcfa9b7efb1115c45dd1618de9421c479d7a66d84556d1b9c37b19b1f8b6075
data/CHANGELOG.md CHANGED
@@ -1,3 +1,34 @@
1
+ ## 0.15.9 - 2021-09-04
2
+
3
+ ### Fixed
4
+
5
+ * Handling of files that contain stream length values that are indirect objects
6
+ not referring to a number
7
+
8
+
9
+ ## 0.15.8 - 2021-08-16
10
+
11
+ ### Fixed
12
+
13
+ * Regression when using `-v` with the hexapdf command line tool
14
+
15
+
16
+ ## 0.15.7 - 2021-07-17
17
+
18
+ ### Fixed
19
+
20
+ * Infinite loop while parsing PDF array due to missing closing bracket
21
+ * Handling of invalid files with missing or corrupted trailer dictionary
22
+
23
+
24
+ ## 0.15.6 - 2021-07-16
25
+
26
+ ### Fixed
27
+
28
+ * Handling of indirect objects with invalid values which are now treated as null
29
+ objects
30
+
31
+
1
32
  ## 0.15.5 - 2021-07-06
2
33
 
3
34
  ### Changed
@@ -119,7 +119,7 @@ module HexaPDF
119
119
  # Writes the document to the given file or does nothing if +out_file+ is +nil+.
120
120
  def write_document(doc, out_file, incremental: false)
121
121
  if out_file
122
- doc.validate(auto_correct: true) do |object, msg, correctable|
122
+ doc.validate(auto_correct: true) do |msg, correctable, object|
123
123
  if command_parser.strict && !correctable
124
124
  raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
125
125
  elsif command_parser.verbosity_info?
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -162,7 +165,7 @@ module HexaPDF
162
165
  else
163
166
  0
164
167
  end
165
- @tokenizer.pos = pos + length
168
+ @tokenizer.pos = pos + length rescue pos
166
169
 
167
170
  tok = @tokenizer.next_token
168
171
  unless tok.kind_of?(Tokenizer::Token) && tok == 'endstream'
@@ -444,6 +447,15 @@ module HexaPDF
444
447
 
445
448
  if !trailer || trailer.empty?
446
449
  _, trailer = load_revision(startxref_offset) rescue nil
450
+ unless trailer
451
+ xref.each do |_oid, _gen, xref_entry|
452
+ obj, * = parse_indirect_object(xref_entry.pos) rescue nil
453
+ if obj.kind_of?(Hash) && obj[:Type] == :Catalog
454
+ trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
455
+ break
456
+ end
457
+ end
458
+ end
447
459
  unless trailer
448
460
  @in_reconstruct_revision = false
449
461
  raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
@@ -55,6 +55,9 @@ module HexaPDF
55
55
 
56
56
  # This object is returned when there are no more tokens to read.
57
57
  NO_MORE_TOKENS = ::Object.new
58
+ def NO_MORE_TOKENS.to_s
59
+ "EOS - no more tokens"
60
+ end
58
61
 
59
62
  # Characters defined as whitespace.
60
63
  #
@@ -384,7 +387,11 @@ module HexaPDF
384
387
  result = []
385
388
  while true
386
389
  obj = next_object(allow_end_array_token: true)
387
- break if obj.equal?(TOKEN_ARRAY_END)
390
+ if obj.equal?(TOKEN_ARRAY_END)
391
+ break
392
+ elsif obj.equal?(NO_MORE_TOKENS)
393
+ raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
394
+ end
388
395
  result << obj
389
396
  end
390
397
  result
@@ -403,7 +410,8 @@ module HexaPDF
403
410
  key = next_token
404
411
  break if key.equal?(TOKEN_DICT_END)
405
412
  unless key.kind_of?(Symbol)
406
- raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects", pos: pos)
413
+ raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
414
+ "found '#{key}'", pos: pos)
407
415
  end
408
416
 
409
417
  val = next_object
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.5'
40
+ VERSION = '0.15.9'
41
41
 
42
42
  end
@@ -161,6 +161,21 @@ module CommonTokenizerTests
161
161
  assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
162
162
  end
163
163
 
164
+ it "next_object: fails for an array without closing bracket, encountering EOS" do
165
+ create_tokenizer("[1 2")
166
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
167
+ assert_match(/Unclosed array found/, exception.message)
168
+ end
169
+
170
+ it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
171
+ create_tokenizer("<</Name 5")
172
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
173
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
174
+ create_tokenizer("<</Name 5 /Other")
175
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
176
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
177
+ end
178
+
164
179
  it "returns the correct position on operations" do
165
180
  create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
181
  @tokenizer.next_token
@@ -107,13 +107,27 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
- it "recovers from an invalid stream length value" do
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
116
+ it "recovers from a stream length value that doesn't reflect the correct length" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
113
119
  assert_equal(2, obj[:Length])
114
120
  assert_equal('12', TestHelper.collector(stream.fiber))
115
121
  end
116
122
 
123
+ it "recovers from an invalid stream length value" do
124
+ create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
125
+ @document.add([5], oid: 2)
126
+ obj, _, _, stream = @parser.parse_indirect_object
127
+ assert_equal(2, obj[:Length])
128
+ assert_equal('12', TestHelper.collector(stream.fiber))
129
+ end
130
+
117
131
  it "works even if the keyword endobj is missing or mangled" do
118
132
  create_parser("1 0 obj<</Length 4>>5")
119
133
  object, * = @parser.parse_indirect_object
@@ -185,7 +199,13 @@ describe HexaPDF::Parser do
185
199
  it "fails for numbers followed by endobj without space" do
186
200
  create_parser("1 0 obj 749endobj")
187
201
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
202
+ assert_match(/Missing whitespace after number/, exp.message)
203
+ end
204
+
205
+ it "fails for invalid values" do
206
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
207
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
208
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
209
  end
190
210
 
191
211
  it "fails if the stream length value is invalid" do
@@ -607,7 +627,12 @@ describe HexaPDF::Parser do
607
627
  assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
608
628
  end
609
629
 
610
- it "fails if no trailer is found and the trailer specified at the startxref position is not valid" do
630
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
631
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
632
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
633
+ end
634
+
635
+ it "fails if no valid trailer is found and couldn't be constructed" do
611
636
  create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
612
637
  assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
613
638
  end
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.5)>>
43
+ <</Producer(HexaPDF version 0.15.9)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.5)>>
75
+ <</Producer(HexaPDF version 0.15.9)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.5
4
+ version: 0.15.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-06 00:00:00.000000000 Z
11
+ date: 2021-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse