hexapdf 0.15.3 → 0.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 592ea8ae7648df43e92ba50effdf3f8f34163e4acf7fb9567c3b38db46eb598e
4
- data.tar.gz: 6c3b7d32a1499f2e2133fbafdf46b9d3cd4d1df41b9ae308c0c32ea39aefff2d
3
+ metadata.gz: 1385aca5e91916034a5494142b4c88e51de46d2d13b79ddaed9494c74808793a
4
+ data.tar.gz: 4fee33d3c96e74c00565ac6211901f39c0242cd2e0926f0760be7bfb18fe7f12
5
5
  SHA512:
6
- metadata.gz: fdf9edf53c0443d459008634ddbff7cd80fc1422fa558df41db04af0d9eeb512ea050d5b4a10987b824c675203e39bc851d1b2a68d0178f2cd12fada66b31245
7
- data.tar.gz: 8e6a7b91da0ed2b63f7bc6d52c3993553f439edf986253b3508d0510310195c2a6f3721c2cfed735afc3d60dacacedc8207a6fac361bedc354bc6bd779207eac
6
+ metadata.gz: 3fa1454ec6821500c1f94981ad17efcbf36f125a29870a62ad0d626fe65cd35bb7ef6426021daba3b2554dcbd20f1ce6efc4d93c1d4d8b5303d6063eb27804fb
7
+ data.tar.gz: 8f2c3de849fed113c6f4fe7494312a202a872f7364052b584a38352315a4a358f135beea8dd951c29d2dbd3b842c4eefe3892ed3d9bb3c24e6875cdbb0c59123
data/CHANGELOG.md CHANGED
@@ -1,3 +1,40 @@
1
+ ## 0.15.7 - 2021-07-17
2
+
3
+ ### Fixed
4
+
5
+ * Infinite loop while parsing PDF array due to missing closing bracket
6
+ * Handling of invalid files with missing or corrupted trailer dictionary
7
+
8
+
9
+ ## 0.15.6 - 2021-07-16
10
+
11
+ ### Fixed
12
+
13
+ * Handling of indirect objects with invalid values which are now treated as null
14
+ objects
15
+
16
+
17
+ ## 0.15.5 - 2021-07-06
18
+
19
+ ### Changed
20
+
21
+ * Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
22
+
23
+
24
+ ### Fixed
25
+
26
+ * Handling of invalid cross-reference stream entries that ends with the sequence
27
+ `\r\r`
28
+
29
+
30
+ ## 0.15.4 - 2021-05-27
31
+
32
+ ### Fixed
33
+
34
+ * [HexaPDF::Type::Annotation#appearance] to handle cases where there is
35
+ no valid appearance stream
36
+
37
+
1
38
  ## 0.15.3 - 2021-05-01
2
39
 
3
40
  ### Fixed
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  module Extensions #:nodoc:
51
51
  def help_banner #:nodoc:
52
52
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
53
- "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" \
53
+ "Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
54
54
  "#{format(usage, indent: 7)}\n\n"
55
55
  end
56
56
  end
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -263,9 +266,9 @@ module HexaPDF
263
266
 
264
267
  @tokenizer.skip_whitespace
265
268
  start.upto(start + number_of_entries - 1) do |oid|
266
- pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
269
+ pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
267
270
  maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
268
- force: !matched_size)
271
+ force: !recoverable)
269
272
  end
270
273
  if xref.entry?(oid)
271
274
  next
@@ -444,6 +447,15 @@ module HexaPDF
444
447
 
445
448
  if !trailer || trailer.empty?
446
449
  _, trailer = load_revision(startxref_offset) rescue nil
450
+ unless trailer
451
+ xref.each do |_oid, _gen, xref_entry|
452
+ obj, * = parse_indirect_object(xref_entry.pos) rescue nil
453
+ if obj.kind_of?(Hash) && obj[:Type] == :Catalog
454
+ trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
455
+ break
456
+ end
457
+ end
458
+ end
447
459
  unless trailer
448
460
  @in_reconstruct_revision = false
449
461
  raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
@@ -55,6 +55,9 @@ module HexaPDF
55
55
 
56
56
  # This object is returned when there are no more tokens to read.
57
57
  NO_MORE_TOKENS = ::Object.new
58
+ def NO_MORE_TOKENS.to_s
59
+ "EOS - no more tokens"
60
+ end
58
61
 
59
62
  # Characters defined as whitespace.
60
63
  #
@@ -225,13 +228,14 @@ module HexaPDF
225
228
  # Reads the cross-reference subsection entry at the current position and advances the scan
226
229
  # pointer.
227
230
  #
228
- # If a possible problem is detected, yields to caller.
231
+ # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
232
+ # problem is recoverable.
229
233
  #
230
234
  # See: PDF1.7 7.5.4
231
- def next_xref_entry #:yield: matched_size
235
+ def next_xref_entry #:yield: recoverable
232
236
  prepare_string_scanner(20)
233
- unless @ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|\r|\n)/) && @ss.matched_size == 20
234
- yield(@ss.matched_size)
237
+ if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
238
+ yield(@ss[4])
235
239
  end
236
240
  [@ss[1].to_i, @ss[2].to_i, @ss[3]]
237
241
  end
@@ -383,7 +387,11 @@ module HexaPDF
383
387
  result = []
384
388
  while true
385
389
  obj = next_object(allow_end_array_token: true)
386
- break if obj.equal?(TOKEN_ARRAY_END)
390
+ if obj.equal?(TOKEN_ARRAY_END)
391
+ break
392
+ elsif obj.equal?(NO_MORE_TOKENS)
393
+ raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
394
+ end
387
395
  result << obj
388
396
  end
389
397
  result
@@ -402,7 +410,8 @@ module HexaPDF
402
410
  key = next_token
403
411
  break if key.equal?(TOKEN_DICT_END)
404
412
  unless key.kind_of?(Symbol)
405
- raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects", pos: pos)
413
+ raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
414
+ "found '#{key}'", pos: pos)
406
415
  end
407
416
 
408
417
  val = next_object
@@ -138,8 +138,13 @@ module HexaPDF
138
138
  if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
139
139
  entry = entry[self[:AS]]
140
140
  end
141
- if entry.kind_of?(HexaPDF::Stream)
142
- entry[:Subtype] == :Form ? entry : document.wrap(entry, type: :XObject, subtype: :Form)
141
+ return unless entry.kind_of?(HexaPDF::Stream)
142
+
143
+ if entry.type == :XObject && entry[:Subtype] == :Form
144
+ entry
145
+ elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
146
+ (entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
147
+ document.wrap(entry, type: :XObject, subtype: :Form)
143
148
  end
144
149
  end
145
150
  alias appearance? appearance
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.3'
40
+ VERSION = '0.15.7'
41
41
 
42
42
  end
@@ -161,6 +161,21 @@ module CommonTokenizerTests
161
161
  assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
162
162
  end
163
163
 
164
+ it "next_object: fails for an array without closing bracket, encountering EOS" do
165
+ create_tokenizer("[1 2")
166
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
167
+ assert_match(/Unclosed array found/, exception.message)
168
+ end
169
+
170
+ it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
171
+ create_tokenizer("<</Name 5")
172
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
173
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
174
+ create_tokenizer("<</Name 5 /Other")
175
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
176
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
177
+ end
178
+
164
179
  it "returns the correct position on operations" do
165
180
  create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
181
  @tokenizer.next_token
@@ -210,8 +225,12 @@ module CommonTokenizerTests
210
225
 
211
226
  it "next_xref_entry: fails on invalidly formatted entries" do
212
227
  create_tokenizer("0000000001 00001 g \n")
213
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
228
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
214
229
  create_tokenizer("0000000001 00001 n\n")
215
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
230
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
231
+ create_tokenizer("0000000001 00001 n\r")
232
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
233
+ create_tokenizer("0000000001 00001 n\r\r")
234
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
235
  end
217
236
  end
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
110
116
  it "recovers from an invalid stream length value" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
185
191
  it "fails for numbers followed by endobj without space" do
186
192
  create_parser("1 0 obj 749endobj")
187
193
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
194
+ assert_match(/Missing whitespace after number/, exp.message)
195
+ end
196
+
197
+ it "fails for invalid values" do
198
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
199
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
200
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
201
  end
190
202
 
191
203
  it "fails if the stream length value is invalid" do
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
607
619
  assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
608
620
  end
609
621
 
610
- it "fails if no trailer is found and the trailer specified at the startxref position is not valid" do
622
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
623
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
624
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
625
+ end
626
+
627
+ it "fails if no valid trailer is found and couldn't be constructed" do
611
628
  create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
612
629
  assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
613
630
  end
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.3)>>
43
+ <</Producer(HexaPDF version 0.15.7)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.3)>>
75
+ <</Producer(HexaPDF version 0.15.7)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
51
51
 
52
52
  stream = @doc.wrap({}, stream: '')
53
53
  @annot[:AP][:N] = stream
54
+ assert_nil(@annot.appearance)
55
+
56
+ stream[:BBox] = [1, 2, 3, 4]
54
57
  appearance = @annot.appearance
55
58
  assert_same(stream.data, appearance.data)
56
59
  assert_equal(:Form, appearance[:Subtype])
57
60
 
58
- @annot[:AP][:N] = {X: stream}
61
+ @annot[:AP][:N] = {X: {}}
59
62
  assert_nil(@annot.appearance)
60
63
 
61
64
  @annot[:AS] = :X
65
+ @annot[:AP][:N][:X] = stream
62
66
  assert_same(stream.data, @annot.appearance.data)
63
67
 
64
68
  @annot[:AP][:D] = {X: stream}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.3
4
+ version: 0.15.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-01 00:00:00.000000000 Z
11
+ date: 2021-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse