hexapdf 0.15.5 → 0.15.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6aa5d94e3b69f1d7b2b369d248664c2c4d96d13306c1fbb1cf0960412f129faf
4
- data.tar.gz: f6e8ae5f61de78c41fecace64d0ad5d42f8c46032e6ac6550c01b2fb43109fe8
3
+ metadata.gz: 24d17dfd6c8dc9e3f7014e1ea769dede6f8cea81529bb201a8447f21873d3b25
4
+ data.tar.gz: 7165a2e11983731ba2597d2e4a824415abc96936d2b58f3fb737a4fed94dcf16
5
5
  SHA512:
6
- metadata.gz: 49f3b3aee754308956df811f564d127269e8e1dbfc613bed39e42bd77303855b80fe11c2ade362cef174838b58188470b5604e2d2c930dcf7637e7600410ac15
7
- data.tar.gz: 3a4d5555ae6f012176aebe0a29c36500ad20f385a342c27819f5db3a0e8c5e270c58526c8434b8c8d98c59b963721a51344e93bfc6d51f3eab4fc76cceeb830e
6
+ metadata.gz: 244332a4f024c90cf6344b462ed422a5f73b32d3a4d04d0dcdadc6e7ede2cd0724f7a3329fa3aad68c99e47f49a9dff66806d9b8b152b9551eaecd7365c807d4
7
+ data.tar.gz: 06c4b9fd5ecd8f045a37e85ce4a6539f52b48bb16fb685290bcc2a0210b5f2e66bcfa9b7efb1115c45dd1618de9421c479d7a66d84556d1b9c37b19b1f8b6075
data/CHANGELOG.md CHANGED
@@ -1,3 +1,34 @@
1
+ ## 0.15.9 - 2021-09-04
2
+
3
+ ### Fixed
4
+
5
+ * Handling of files that contain stream length values that are indirect objects
6
+ not referring to a number
7
+
8
+
9
+ ## 0.15.8 - 2021-08-16
10
+
11
+ ### Fixed
12
+
13
+ * Regression when using `-v` with the hexapdf command line tool
14
+
15
+
16
+ ## 0.15.7 - 2021-07-17
17
+
18
+ ### Fixed
19
+
20
+ * Infinite loop while parsing PDF array due to missing closing bracket
21
+ * Handling of invalid files with missing or corrupted trailer dictionary
22
+
23
+
24
+ ## 0.15.6 - 2021-07-16
25
+
26
+ ### Fixed
27
+
28
+ * Handling of indirect objects with invalid values which are now treated as null
29
+ objects
30
+
31
+
1
32
  ## 0.15.5 - 2021-07-06
2
33
 
3
34
  ### Changed
@@ -119,7 +119,7 @@ module HexaPDF
119
119
  # Writes the document to the given file or does nothing if +out_file+ is +nil+.
120
120
  def write_document(doc, out_file, incremental: false)
121
121
  if out_file
122
- doc.validate(auto_correct: true) do |object, msg, correctable|
122
+ doc.validate(auto_correct: true) do |msg, correctable, object|
123
123
  if command_parser.strict && !correctable
124
124
  raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
125
125
  elsif command_parser.verbosity_info?
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -162,7 +165,7 @@ module HexaPDF
162
165
  else
163
166
  0
164
167
  end
165
- @tokenizer.pos = pos + length
168
+ @tokenizer.pos = pos + length rescue pos
166
169
 
167
170
  tok = @tokenizer.next_token
168
171
  unless tok.kind_of?(Tokenizer::Token) && tok == 'endstream'
@@ -444,6 +447,15 @@ module HexaPDF
444
447
 
445
448
  if !trailer || trailer.empty?
446
449
  _, trailer = load_revision(startxref_offset) rescue nil
450
+ unless trailer
451
+ xref.each do |_oid, _gen, xref_entry|
452
+ obj, * = parse_indirect_object(xref_entry.pos) rescue nil
453
+ if obj.kind_of?(Hash) && obj[:Type] == :Catalog
454
+ trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
455
+ break
456
+ end
457
+ end
458
+ end
447
459
  unless trailer
448
460
  @in_reconstruct_revision = false
449
461
  raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
@@ -55,6 +55,9 @@ module HexaPDF
55
55
 
56
56
  # This object is returned when there are no more tokens to read.
57
57
  NO_MORE_TOKENS = ::Object.new
58
+ def NO_MORE_TOKENS.to_s
59
+ "EOS - no more tokens"
60
+ end
58
61
 
59
62
  # Characters defined as whitespace.
60
63
  #
@@ -384,7 +387,11 @@ module HexaPDF
384
387
  result = []
385
388
  while true
386
389
  obj = next_object(allow_end_array_token: true)
387
- break if obj.equal?(TOKEN_ARRAY_END)
390
+ if obj.equal?(TOKEN_ARRAY_END)
391
+ break
392
+ elsif obj.equal?(NO_MORE_TOKENS)
393
+ raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
394
+ end
388
395
  result << obj
389
396
  end
390
397
  result
@@ -403,7 +410,8 @@ module HexaPDF
403
410
  key = next_token
404
411
  break if key.equal?(TOKEN_DICT_END)
405
412
  unless key.kind_of?(Symbol)
406
- raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects", pos: pos)
413
+ raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
414
+ "found '#{key}'", pos: pos)
407
415
  end
408
416
 
409
417
  val = next_object
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.5'
40
+ VERSION = '0.15.9'
41
41
 
42
42
  end
@@ -161,6 +161,21 @@ module CommonTokenizerTests
161
161
  assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
162
162
  end
163
163
 
164
+ it "next_object: fails for an array without closing bracket, encountering EOS" do
165
+ create_tokenizer("[1 2")
166
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
167
+ assert_match(/Unclosed array found/, exception.message)
168
+ end
169
+
170
+ it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
171
+ create_tokenizer("<</Name 5")
172
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
173
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
174
+ create_tokenizer("<</Name 5 /Other")
175
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
176
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
177
+ end
178
+
164
179
  it "returns the correct position on operations" do
165
180
  create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
181
  @tokenizer.next_token
@@ -107,13 +107,27 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
- it "recovers from an invalid stream length value" do
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
116
+ it "recovers from a stream length value that doesn't reflect the correct length" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
113
119
  assert_equal(2, obj[:Length])
114
120
  assert_equal('12', TestHelper.collector(stream.fiber))
115
121
  end
116
122
 
123
+ it "recovers from an invalid stream length value" do
124
+ create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
125
+ @document.add([5], oid: 2)
126
+ obj, _, _, stream = @parser.parse_indirect_object
127
+ assert_equal(2, obj[:Length])
128
+ assert_equal('12', TestHelper.collector(stream.fiber))
129
+ end
130
+
117
131
  it "works even if the keyword endobj is missing or mangled" do
118
132
  create_parser("1 0 obj<</Length 4>>5")
119
133
  object, * = @parser.parse_indirect_object
@@ -185,7 +199,13 @@ describe HexaPDF::Parser do
185
199
  it "fails for numbers followed by endobj without space" do
186
200
  create_parser("1 0 obj 749endobj")
187
201
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
202
+ assert_match(/Missing whitespace after number/, exp.message)
203
+ end
204
+
205
+ it "fails for invalid values" do
206
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
207
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
208
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
209
  end
190
210
 
191
211
  it "fails if the stream length value is invalid" do
@@ -607,7 +627,12 @@ describe HexaPDF::Parser do
607
627
  assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
608
628
  end
609
629
 
610
- it "fails if no trailer is found and the trailer specified at the startxref position is not valid" do
630
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
631
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
632
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
633
+ end
634
+
635
+ it "fails if no valid trailer is found and couldn't be constructed" do
611
636
  create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
612
637
  assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
613
638
  end
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.5)>>
43
+ <</Producer(HexaPDF version 0.15.9)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.5)>>
75
+ <</Producer(HexaPDF version 0.15.9)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.5
4
+ version: 0.15.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-06 00:00:00.000000000 Z
11
+ date: 2021-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse