hexapdf 0.15.4 → 0.15.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6901d1281fa7f0585e2fe02b27985c9b7ff26770015902566c72ea62f5398e10
4
- data.tar.gz: 4b1a82d17d4d1144b47dbb713552bb5e977896b94210a54c7b83871399d393ac
3
+ metadata.gz: 4953ab56f7c03c62e4f4e2ef1aa51a8a58f98c3d24725eb86dd6bc13419bd2d2
4
+ data.tar.gz: c4ac38e280f646eecf512481570ddc8670b48c9ac32601f55b24748f4044344b
5
5
  SHA512:
6
- metadata.gz: 1d4acb6e9f867195e998b3fd900102013a8d4b74c576f3c3243225a515bbcf2be8b62852db95cbcd9213ec06334b174fba4db406dfb8e228083d9a527eb8a5a1
7
- data.tar.gz: 3306a678655f59c35b3349cd247606ca7e45a34e033dd9b9b3bad33c20a3fda95ba07bff88acdb7c5666c23b1ab63d293963e68db7e12371c56106645df7d9bb
6
+ metadata.gz: 529f8f88d9553f300b842838c1f00e8bed3e05adecfe4478f81d41fcb6431fce888f56b76b2747a65a2935cbb76a6792dce2b1f480dcb120634e1932e461c883
7
+ data.tar.gz: 9e71874d7901145045fab5791ca09b3ec9cc8f9a9243366b0329cb2570c58408d4595ee37e995a22ce2c099127bc6bdc732e5d3562bde834be2fc57aa3f35b8a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,39 @@
1
+ ## 0.15.8 - 2021-08-16
2
+
3
+ ### Fixed
4
+
5
+ * Regression when using `-v` with the hexapdf command line tool
6
+
7
+
8
+ ## 0.15.7 - 2021-07-17
9
+
10
+ ### Fixed
11
+
12
+ * Infinite loop while parsing PDF array due to missing closing bracket
13
+ * Handling of invalid files with missing or corrupted trailer dictionary
14
+
15
+
16
+ ## 0.15.6 - 2021-07-16
17
+
18
+ ### Fixed
19
+
20
+ * Handling of indirect objects with invalid values which are now treated as null
21
+ objects
22
+
23
+
24
+ ## 0.15.5 - 2021-07-06
25
+
26
+ ### Changed
27
+
28
+ * Refactored [HexaPDF::Tokenizer#next_xref_entry] and changed yielded value
29
+
30
+
31
+ ### Fixed
32
+
33
+ * Handling of invalid cross-reference stream entries that ends with the sequence
34
+ `\r\r`
35
+
36
+
1
37
  ## 0.15.4 - 2021-05-27
2
38
 
3
39
  ### Fixed
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  module Extensions #:nodoc:
51
51
  def help_banner #:nodoc:
52
52
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
53
- "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" \
53
+ "Copyright (c) 2014-2021 Thomas Leitner; licensed under the AGPLv3\n\n" \
54
54
  "#{format(usage, indent: 7)}\n\n"
55
55
  end
56
56
  end
@@ -119,7 +119,7 @@ module HexaPDF
119
119
  # Writes the document to the given file or does nothing if +out_file+ is +nil+.
120
120
  def write_document(doc, out_file, incremental: false)
121
121
  if out_file
122
- doc.validate(auto_correct: true) do |object, msg, correctable|
122
+ doc.validate(auto_correct: true) do |msg, correctable, object|
123
123
  if command_parser.strict && !correctable
124
124
  raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
125
125
  elsif command_parser.verbosity_info?
@@ -125,11 +125,14 @@ module HexaPDF
125
125
  begin
126
126
  object = @tokenizer.next_object
127
127
  rescue MalformedPDFError
128
- # Handle often found invalid indirect object with missing whitespace after number
129
- maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
130
- force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
131
- object = tok.to_i
132
- @tokenizer.pos -= 6
128
+ if tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/
129
+ # Handle often found invalid indirect object with missing whitespace after number
130
+ maybe_raise("Missing whitespace after number'", pos: @tokenizer.pos)
131
+ object = tok.to_i
132
+ @tokenizer.pos -= 6
133
+ else
134
+ maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
135
+ end
133
136
  end
134
137
  end
135
138
 
@@ -263,9 +266,9 @@ module HexaPDF
263
266
 
264
267
  @tokenizer.skip_whitespace
265
268
  start.upto(start + number_of_entries - 1) do |oid|
266
- pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
269
+ pos, gen, type = @tokenizer.next_xref_entry do |recoverable|
267
270
  maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
268
- force: !matched_size)
271
+ force: !recoverable)
269
272
  end
270
273
  if xref.entry?(oid)
271
274
  next
@@ -444,6 +447,15 @@ module HexaPDF
444
447
 
445
448
  if !trailer || trailer.empty?
446
449
  _, trailer = load_revision(startxref_offset) rescue nil
450
+ unless trailer
451
+ xref.each do |_oid, _gen, xref_entry|
452
+ obj, * = parse_indirect_object(xref_entry.pos) rescue nil
453
+ if obj.kind_of?(Hash) && obj[:Type] == :Catalog
454
+ trailer = {Root: HexaPDF::Reference.new(xref_entry.oid, xref_entry.gen)}
455
+ break
456
+ end
457
+ end
458
+ end
447
459
  unless trailer
448
460
  @in_reconstruct_revision = false
449
461
  raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
@@ -55,6 +55,9 @@ module HexaPDF
55
55
 
56
56
  # This object is returned when there are no more tokens to read.
57
57
  NO_MORE_TOKENS = ::Object.new
58
+ def NO_MORE_TOKENS.to_s
59
+ "EOS - no more tokens"
60
+ end
58
61
 
59
62
  # Characters defined as whitespace.
60
63
  #
@@ -225,13 +228,14 @@ module HexaPDF
225
228
  # Reads the cross-reference subsection entry at the current position and advances the scan
226
229
  # pointer.
227
230
  #
228
- # If a possible problem is detected, yields to caller.
231
+ # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
232
+ # problem is recoverable.
229
233
  #
230
234
  # See: PDF1.7 7.5.4
231
- def next_xref_entry #:yield: matched_size
235
+ def next_xref_entry #:yield: recoverable
232
236
  prepare_string_scanner(20)
233
- unless @ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|\r|\n)/) && @ss.matched_size == 20
234
- yield(@ss.matched_size)
237
+ if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
238
+ yield(@ss[4])
235
239
  end
236
240
  [@ss[1].to_i, @ss[2].to_i, @ss[3]]
237
241
  end
@@ -383,7 +387,11 @@ module HexaPDF
383
387
  result = []
384
388
  while true
385
389
  obj = next_object(allow_end_array_token: true)
386
- break if obj.equal?(TOKEN_ARRAY_END)
390
+ if obj.equal?(TOKEN_ARRAY_END)
391
+ break
392
+ elsif obj.equal?(NO_MORE_TOKENS)
393
+ raise HexaPDF::MalformedPDFError.new("Unclosed array found", pos: pos)
394
+ end
387
395
  result << obj
388
396
  end
389
397
  result
@@ -402,7 +410,8 @@ module HexaPDF
402
410
  key = next_token
403
411
  break if key.equal?(TOKEN_DICT_END)
404
412
  unless key.kind_of?(Symbol)
405
- raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects", pos: pos)
413
+ raise HexaPDF::MalformedPDFError.new("Dictionary keys must be PDF name objects, " \
414
+ "found '#{key}'", pos: pos)
406
415
  end
407
416
 
408
417
  val = next_object
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.15.4'
40
+ VERSION = '0.15.8'
41
41
 
42
42
  end
@@ -161,6 +161,21 @@ module CommonTokenizerTests
161
161
  assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
162
162
  end
163
163
 
164
+ it "next_object: fails for an array without closing bracket, encountering EOS" do
165
+ create_tokenizer("[1 2")
166
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
167
+ assert_match(/Unclosed array found/, exception.message)
168
+ end
169
+
170
+ it "next_object: fails for a dictionary without closing bracket, encountering EOS" do
171
+ create_tokenizer("<</Name 5")
172
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
173
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
174
+ create_tokenizer("<</Name 5 /Other")
175
+ exception = assert_raises(HexaPDF::MalformedPDFError) { @tokenizer.next_object }
176
+ assert_match(/must be PDF name objects.*EOS/, exception.message)
177
+ end
178
+
164
179
  it "returns the correct position on operations" do
165
180
  create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
181
  @tokenizer.next_token
@@ -210,8 +225,12 @@ module CommonTokenizerTests
210
225
 
211
226
  it "next_xref_entry: fails on invalidly formatted entries" do
212
227
  create_tokenizer("0000000001 00001 g \n")
213
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
228
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| refute(recoverable); raise } }
214
229
  create_tokenizer("0000000001 00001 n\n")
215
- assert_raises(RuntimeError) { @tokenizer.next_xref_entry { raise } }
230
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
231
+ create_tokenizer("0000000001 00001 n\r")
232
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
233
+ create_tokenizer("0000000001 00001 n\r\r")
234
+ assert_raises(RuntimeError) { @tokenizer.next_xref_entry {|recoverable| assert(recoverable); raise } }
216
235
  end
217
236
  end
@@ -107,6 +107,12 @@ describe HexaPDF::Parser do
107
107
  assert_equal(749, object)
108
108
  end
109
109
 
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
110
116
  it "recovers from an invalid stream length value" do
111
117
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
112
118
  obj, _, _, stream = @parser.parse_indirect_object
@@ -185,7 +191,13 @@ describe HexaPDF::Parser do
185
191
  it "fails for numbers followed by endobj without space" do
186
192
  create_parser("1 0 obj 749endobj")
187
193
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
188
- assert_match(/Invalid object value after 'obj'/, exp.message)
194
+ assert_match(/Missing whitespace after number/, exp.message)
195
+ end
196
+
197
+ it "fails for invalid values" do
198
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
199
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
200
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
189
201
  end
190
202
 
191
203
  it "fails if the stream length value is invalid" do
@@ -607,7 +619,12 @@ describe HexaPDF::Parser do
607
619
  assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
608
620
  end
609
621
 
610
- it "fails if no trailer is found and the trailer specified at the startxref position is not valid" do
622
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
623
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
624
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
625
+ end
626
+
627
+ it "fails if no valid trailer is found and couldn't be constructed" do
611
628
  create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
612
629
  assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
613
630
  end
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.15.4)>>
43
+ <</Producer(HexaPDF version 0.15.8)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.15.4)>>
75
+ <</Producer(HexaPDF version 0.15.8)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.4
4
+ version: 0.15.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-27 00:00:00.000000000 Z
11
+ date: 2021-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse