hexapdf 0.15.3 → 0.15.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 592ea8ae7648df43e92ba50effdf3f8f34163e4acf7fb9567c3b38db46eb598e
4
- data.tar.gz: 6c3b7d32a1499f2e2133fbafdf46b9d3cd4d1df41b9ae308c0c32ea39aefff2d
3
+ metadata.gz: 1385aca5e91916034a5494142b4c88e51de46d2d13b79ddaed9494c74808793a
4
+ data.tar.gz: 4fee33d3c96e74c00565ac6211901f39c0242cd2e0926f0760be7bfb18fe7f12
5
5
  SHA512:
6
- metadata.gz: fdf9edf53c0443d459008634ddbff7cd80fc1422fa558df41db04af0d9eeb512ea050d5b4a10987b824c675203e39bc851d1b2a68d0178f2cd12fada66b31245
7
- data.tar.gz: 8e6a7b91da0ed2b63f7bc6d52c3993553f439edf986253b3508d0510310195c2a6f3721c2cfed735afc3d60dacacedc8207a6fac361bedc354bc6bd779207eac
6
+ metadata.gz: 3fa1454ec6821500c1f94981ad17efcbf36f125a29870a62ad0d626fe65cd35bb7ef6426021daba3b2554dcbd20f1ce6efc4d93c1d4d8b5303d6063eb27804fb
7
+ data.tar.gz: 8f2c3de849fed113c6f4fe7494312a202a872f7364052b584a38352315a4a358f135beea8dd951c29d2dbd3b842c4eefe3892ed3d9bb3c24e6875cdbb0c59123
data/CHANGELOG.md CHANGED
@@ -1,3 +1,40 @@
1
+ ## 0.15.7 - 2021-07-17
2
+
3
+ ### Fixed
4
+
5
+ * Infinite loop while parsing PDF array due to missing closing bracket
6
+ * Handling of invalid files with missing or corrupted trailer dictionary
7
+
8
+
9
+ ## 0.15.6 - 2021-07-16
10
+
11
+ ### Fixed
12
+
13
+ * Handling of indirect objects with invalid values which are now treated as null
14
+ objects
15
+
16
+
17
+ ## 0.15.5 - 2021-07-06
18
+
19
+ ### Changed
20
+
21
+ * Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
22
+
23
+
24
+ ### Fixed
25
+
26
+ * Handling of invalid cross-reference stream entries that ends with the sequence
27
+ `\r\r`
28
+
29
+
30
+ ## 0.15.4 - 2021-05-27
31
+
32
+ ### Fixed
33
+
34
+ * [HexaPDF::Type::Annotation#appearance] to handle cases where there is
35
+ no valid appearance stream
36
+
37
+
1
38
  ## 0.15.3 - 2021-05-01
2
39
 
3
40
  ### Fixed
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  module Extensions #:nodoc:
51
51
  def help_banner #:nodoc:
52
52
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
53
- "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" \
53
+ "Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
54
54
  "#{format(usage, indent: 7)}\n\n"
55
55
  end
56
56
  end
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -263,9 +266,9 @@ module HexaPDF
263
266
 
264
267
  @tokenizer.skip_whitespace
265
268
  start.upto(start + number_of_entries - 1) do |oid|
266
- pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
269
+ pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
267
270
  maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
268
- force: !matched_size)
271
+ force: !recoverable)
269
272
  end
270
273
  if xref.entry?(oid)
271
274
  next
@@ -444,6 +447,15 @@ module HexaPDF
444
447
 
445
448
  if !trailer || trailer.empty?
446
449
  _, trailer = load_revision(startxref_offset) rescue nil
450
+ unless trailer
451
+ xref.each do |_oid, _gen, xref_entry|
452
+ obj, * = parse_indirect_object(xref_entry.pos) rescue nil
453
+ if obj.kind_of?(Hash) && obj[:Type] == :Catalog
454
+ trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
455
+ break
456
+ end
457
+ end
458
+ end
447
459
  unless trailer
448
460
  @in_reconstruct_revision = false
449
461
  raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
@@ -55,6 +55,9 @@ module HexaPDF
55
55
 
56
56
  # This object is returned when there are no more tokens to read.
57
57
  NO_MORE_TOKENS = ::Object.new
58
+ def NO_MORE_TOKENS.to_s
59
+ "EOS - no more tokens"
60
+ end
58
61
 
59
62
  # Characters defined as whitespace.
60
63
  #
@@ -225,13 +228,14 @@ module HexaPDF
225
228
  # Reads the cross-reference subsection entry at the current position and advances the scan
226
229
  # pointer.
227
230
  #
228
- # If a possible problem is detected, yields to caller.
231
+ # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
232
+ # problem is recoverable.
229
233
  #
230
234
  # See: PDF1.7 7.5.4
231
- def next_xref_entry #:yield: matched_size
235
+ def next_xref_entry #:yield: recoverable
232
236
  prepare_string_scanner(20)
233
- unless @ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|\r|\n)/) && @ss.matched_size == 20
234
- yield(@ss.matched_size)
237
+ if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
238
+ yield(@ss[4])
235
239
  end
236
240
  [@ss[1].to_i, @ss[2].to_i, @ss[3]]
237
241
  end
@@ -383,7 +387,11 @@ module HexaPDF
383
387
  result = []
384
388
  while true
385
389
  obj = next_object(allow_end_array_token: true)
386
- break if obj.equal?(TOKEN_ARRAY_END)
390
+ if obj.equal?(TOKEN_ARRAY_END)
391
+ break
392
+ elsif obj.equal?(NO_MORE_TOKENS)
393
+ raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
394
+ end
387
395
  result << obj
388
396
  end
389
397
  result
@@ -402,7 +410,8 @@ module HexaPDF
402
410
  key = next_token
403
411
  break if key.equal?(TOKEN_DICT_END)
404
412
  unless key.kind_of?(Symbol)
405
- raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects", pos: pos)
413
+ raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
414
+ "found '#{key}'", pos: pos)
406
415
  end
407
416
 
408
417
  val = next_object
@@ -138,8 +138,13 @@ module HexaPDF
138
138
  if entry.kind_of?(HexaPDF::Dictionary) && !entry.kind_of?(HexaPDF::Stream)
139
139
  entry = entry[self[:AS]]
140
140
  end
141
- if entry.kind_of?(HexaPDF::Stream)
142
- entry[:Subtype] == :Form ? entry : document.wrap(entry, type: :XObject, subtype: :Form)
141
+ return unless entry.kind_of?(HexaPDF::Stream)
142
+
143
+ if entry.type == :XObject && entry[:Subtype] == :Form
144
+ entry
145
+ elsif (entry[:Type].nil? || entry[:Type] == :XObject) &&
146
+ (entry[:Subtype].nil? || entry[:Subtype] == :Form) && entry[:BBox]
147
+ document.wrap(entry, type: :XObject, subtype: :Form)
143
148
  end
144
149
  end
145
150
  alias appearance? appearance
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.3'
40
+ VERSION = '0.15.7'
41
41
 
42
42
  end
@@ -161,6 +161,21 @@ module CommonTokenizerTests
161
161
  assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
162
162
  end
163
163
 
164
+ it "next_object: fails for an array without closing bracket, encountering EOS" do
165
+ create_tokenizer("[1 2")
166
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
167
+ assert_match(/Unclosed array found/, exception.message)
168
+ end
169
+
170
+ it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
171
+ create_tokenizer("<</Name 5")
172
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
173
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
174
+ create_tokenizer("<</Name 5 /Other")
175
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
176
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
177
+ end
178
+
164
179
  it "returns the correct position on operations" do
165
180
  create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
181
  @tokenizer.next_token
@@ -210,8 +225,12 @@ module CommonTokenizerTests
210
225
 
211
226
  it "next_xref_entry: fails on invalidly formatted entries" do
212
227
  create_tokenizer("0000000001 00001 g \n")
213
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
228
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
214
229
  create_tokenizer("0000000001 00001 n\n")
215
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
230
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
231
+ create_tokenizer("0000000001 00001 n\r")
232
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
233
+ create_tokenizer("0000000001 00001 n\r\r")
234
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
235
  end
217
236
  end
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
110
116
  it "recovers from an invalid stream length value" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
185
191
  it "fails for numbers followed by endobj without space" do
186
192
  create_parser("1 0 obj 749endobj")
187
193
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
194
+ assert_match(/Missing whitespace after number/, exp.message)
195
+ end
196
+
197
+ it "fails for invalid values" do
198
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
199
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
200
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
201
  end
190
202
 
191
203
  it "fails if the stream length value is invalid" do
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
607
619
  assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
608
620
  end
609
621
 
610
- it "fails if no trailer is found and the trailer specified at the startxref position is not valid" do
622
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
623
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
624
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
625
+ end
626
+
627
+ it "fails if no valid trailer is found and couldn't be constructed" do
611
628
  create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
612
629
  assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
613
630
  end
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.3)>>
43
+ <</Producer(HexaPDF version 0.15.7)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.3)>>
75
+ <</Producer(HexaPDF version 0.15.7)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
@@ -51,14 +51,18 @@ describe HexaPDF::Type::Annotation do
51
51
 
52
52
  stream = @doc.wrap({}, stream: '')
53
53
  @annot[:AP][:N] = stream
54
+ assert_nil(@annot.appearance)
55
+
56
+ stream[:BBox] = [1, 2, 3, 4]
54
57
  appearance = @annot.appearance
55
58
  assert_same(stream.data, appearance.data)
56
59
  assert_equal(:Form, appearance[:Subtype])
57
60
 
58
- @annot[:AP][:N] = {X: stream}
61
+ @annot[:AP][:N] = {X: {}}
59
62
  assert_nil(@annot.appearance)
60
63
 
61
64
  @annot[:AS] = :X
65
+ @annot[:AP][:N][:X] = stream
62
66
  assert_same(stream.data, @annot.appearance.data)
63
67
 
64
68
  @annot[:AP][:D] = {X: stream}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.3
4
+ version: 0.15.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-01 00:00:00.000000000 Z
11
+ date: 2021-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse