hexapdf 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +126 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/content/parser.rb +1 -1
  13. data/lib/hexapdf/dictionary.rb +4 -4
  14. data/lib/hexapdf/dictionary_fields.rb +1 -9
  15. data/lib/hexapdf/document.rb +41 -16
  16. data/lib/hexapdf/document/files.rb +0 -1
  17. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  18. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  19. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  20. data/lib/hexapdf/font/cmap.rb +1 -4
  21. data/lib/hexapdf/font/encoding/base.rb +8 -0
  22. data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
  23. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  24. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  25. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/layout/line.rb +1 -1
  28. data/lib/hexapdf/layout/style.rb +23 -23
  29. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  30. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  31. data/lib/hexapdf/object.rb +52 -25
  32. data/lib/hexapdf/parser.rb +87 -3
  33. data/lib/hexapdf/pdf_array.rb +11 -4
  34. data/lib/hexapdf/revisions.rb +29 -21
  35. data/lib/hexapdf/serializer.rb +1 -1
  36. data/lib/hexapdf/task/optimize.rb +6 -4
  37. data/lib/hexapdf/tokenizer.rb +4 -3
  38. data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
  39. data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
  40. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  41. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  42. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  43. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  44. data/lib/hexapdf/type/actions/uri.rb +3 -2
  45. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  46. data/lib/hexapdf/type/catalog.rb +2 -2
  47. data/lib/hexapdf/type/cid_font.rb +1 -1
  48. data/lib/hexapdf/type/file_specification.rb +1 -1
  49. data/lib/hexapdf/type/font.rb +1 -1
  50. data/lib/hexapdf/type/font_simple.rb +4 -2
  51. data/lib/hexapdf/type/font_true_type.rb +6 -2
  52. data/lib/hexapdf/type/font_type0.rb +4 -4
  53. data/lib/hexapdf/type/form.rb +15 -2
  54. data/lib/hexapdf/type/image.rb +2 -2
  55. data/lib/hexapdf/type/page.rb +37 -13
  56. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  57. data/lib/hexapdf/type/resources.rb +1 -0
  58. data/lib/hexapdf/type/trailer.rb +2 -3
  59. data/lib/hexapdf/utils/object_hash.rb +0 -1
  60. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  61. data/lib/hexapdf/version.rb +1 -1
  62. data/test/hexapdf/common_tokenizer_tests.rb +6 -1
  63. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  64. data/test/hexapdf/content/test_canvas.rb +3 -3
  65. data/test/hexapdf/content/test_color_space.rb +1 -1
  66. data/test/hexapdf/encryption/test_aes.rb +4 -4
  67. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  68. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  69. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  70. data/test/hexapdf/font/encoding/test_base.rb +10 -0
  71. data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
  72. data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
  73. data/test/hexapdf/layout/test_style.rb +1 -1
  74. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  75. data/test/hexapdf/test_configuration.rb +2 -2
  76. data/test/hexapdf/test_dictionary.rb +3 -1
  77. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  78. data/test/hexapdf/test_document.rb +18 -10
  79. data/test/hexapdf/test_object.rb +71 -26
  80. data/test/hexapdf/test_parser.rb +159 -53
  81. data/test/hexapdf/test_pdf_array.rb +8 -1
  82. data/test/hexapdf/test_revisions.rb +35 -0
  83. data/test/hexapdf/test_writer.rb +2 -2
  84. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
  85. data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
  86. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  87. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  88. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  89. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  90. data/test/hexapdf/type/test_font_simple.rb +2 -1
  91. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  92. data/test/hexapdf/type/test_form.rb +26 -1
  93. data/test/hexapdf/type/test_page.rb +45 -7
  94. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  95. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  96. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  97. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  98. data/test/test_helper.rb +2 -0
  99. metadata +6 -11
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
441
441
 
442
442
  describe "validate" do
443
443
  before do
444
- @doc.trailer.validate # to create a valid document
444
+ @doc.validate # to create a valid document
445
445
  end
446
446
 
447
447
  it "validates indirect objects" do
448
- obj = @doc.add({Type: :Catalog})
448
+ obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
449
449
  refute(@doc.validate(auto_correct: false))
450
450
 
451
451
  called = false
452
- assert(@doc.validate {|o| assert_same(obj, o); called = true })
452
+ assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
453
453
  assert(called)
454
454
  end
455
455
 
456
456
  it "validates the trailer object" do
457
457
  @doc.trailer[:ID] = :Symbol
458
- refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
458
+ refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
459
459
  end
460
460
 
461
461
  it "validates only loaded objects" do
@@ -609,16 +609,24 @@ describe HexaPDF::Document do
609
609
 
610
610
  describe "caching interface" do
611
611
  it "allows setting and retrieving values" do
612
- assert_equal(:test, @doc.cache(:a, :b, :test))
613
- assert_equal(:test, @doc.cache(:a, :b, :other))
614
- assert_equal(:other, @doc.cache(:a, :c) { :other })
612
+ assert_equal(:test, @doc.cache(:a, :b, :test) { :notused })
613
+ assert_equal(:test, @doc.cache(:a, :b) { :other })
614
+ assert_equal(:test, @doc.cache(:a, :b))
615
+ assert_nil(@doc.cache(:a, :c, nil))
616
+ assert_nil(@doc.cache(:a, :c) { :other })
617
+ assert_nil(@doc.cache(:a, :c))
615
618
  assert(@doc.cached?(:a, :b))
616
619
  assert(@doc.cached?(:a, :c))
617
620
  end
618
621
 
622
+ it "allows updating a value" do
623
+ @doc.cache(:a, :b) { :test }
624
+ assert_equal(:new, @doc.cache(:a, :b, update: true) { :new })
625
+ end
626
+
619
627
  it "allows clearing cached values" do
620
- @doc.cache(:a, :b, :c)
621
- @doc.cache(:b, :c, :d)
628
+ @doc.cache(:a, :b) { :c }
629
+ @doc.cache(:b, :c) { :d }
622
630
  @doc.clear_cache(:a)
623
631
  refute(@doc.cached?(:a, :b))
624
632
  assert(@doc.cached?(:b, :c))
@@ -626,7 +634,7 @@ describe HexaPDF::Document do
626
634
  refute(@doc.cached?(:a, :c))
627
635
  end
628
636
 
629
- it "fails if no cached value exists and neither a value nor a block is given" do
637
+ it "fails if no cached value exists and no block is given" do
630
638
  assert_raises(LocalJumpError) { @doc.cache(:a, :b) }
631
639
  end
632
640
  end
@@ -3,18 +3,10 @@
3
3
  require 'test_helper'
4
4
  require 'hexapdf/object'
5
5
  require 'hexapdf/reference'
6
+ require 'hexapdf/document'
6
7
 
7
8
  describe HexaPDF::Object do
8
9
  describe "class.deep_copy" do
9
- it "handles not-duplicatable classes" do
10
- assert_equal(5, HexaPDF::Object.deep_copy(5))
11
- assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
12
- assert_nil(HexaPDF::Object.deep_copy(nil))
13
- assert_equal(true, HexaPDF::Object.deep_copy(true))
14
- assert_equal(false, HexaPDF::Object.deep_copy(false))
15
- assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
16
- end
17
-
18
10
  it "handles general, duplicatable classes" do
19
11
  x = "test"
20
12
  assert_equal("test", HexaPDF::Object.deep_copy(x))
@@ -103,30 +95,57 @@ describe HexaPDF::Object do
103
95
  end
104
96
 
105
97
  describe "validate" do
106
- it "invokes perform_validation correctly via #validate" do
107
- obj = HexaPDF::Object.new(5)
108
- invoked = {}
109
- obj.define_singleton_method(:perform_validation) do |&block|
110
- invoked[:method] = true
98
+ before do
99
+ @obj = HexaPDF::Object.new(5)
100
+ end
101
+
102
+ it "invokes perform_validation correctly" do
103
+ invoked = false
104
+ @obj.define_singleton_method(:perform_validation) { invoked = true }
105
+ assert(@obj.validate)
106
+ assert(invoked)
107
+ end
108
+
109
+ it "yields all arguments yieled by perform_validation" do
110
+ invoked = []
111
+ @obj.define_singleton_method(:perform_validation) do |&block|
112
+ block.call("error", true, :object)
113
+ end
114
+ assert(@obj.validate {|*a| invoked << a })
115
+ assert_equal([["error", true, :object]], invoked)
116
+ end
117
+
118
+ it "provides self as third argument if none is yielded by perform_validation" do
119
+ invoked = []
120
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
121
  block.call("error", true)
112
122
  end
113
- assert(obj.validate {|*a| invoked[:block] = a })
114
- assert_equal([:method, :block], invoked.keys)
115
- assert_equal(["error", true], invoked[:block])
123
+ assert(@obj.validate {|*a| invoked << a })
124
+ assert_equal([["error", true, @obj]], invoked)
125
+ end
116
126
 
117
- refute(obj.validate(auto_correct: false))
127
+ it "yields all problems when auto_correct is true" do
128
+ invoked = []
129
+ @obj.define_singleton_method(:perform_validation) do |&block|
130
+ invoked << :before
131
+ block.call("error", false)
132
+ invoked << :after
133
+ block.call("error2", true)
134
+ invoked << :last
135
+ end
136
+ refute(@obj.validate)
137
+ assert_equal([:before, :after, :last], invoked)
118
138
  end
119
139
 
120
- it "stops validating on an uncorrectable problem" do
121
- obj = HexaPDF::Object.new(5)
122
- invoked = {}
123
- obj.define_singleton_method(:perform_validation) do |&block|
124
- invoked[:before] = true
140
+ it "stops at the first uncorrectable problem if auto_correct is false" do
141
+ invoked = []
142
+ @obj.define_singleton_method(:perform_validation) do |&block|
143
+ invoked << :before
125
144
  block.call("error", false)
126
- invoked[:after] = true
145
+ invoked << :after
127
146
  end
128
- refute(obj.validate {|*a| invoked[:block] = a })
129
- refute(invoked.key?(:after))
147
+ refute(@obj.validate(auto_correct: false))
148
+ assert_equal([:before], invoked)
130
149
  end
131
150
  end
132
151
 
@@ -181,6 +200,32 @@ describe HexaPDF::Object do
181
200
  end
182
201
  end
183
202
 
203
+ describe "caching" do
204
+ before do
205
+ @obj = HexaPDF::Object.new({}, document: HexaPDF::Document.new)
206
+ end
207
+
208
+ it "can set and return a cached value" do
209
+ assert_equal(:value, @obj.cache(:data, :value))
210
+ assert_equal(:value, @obj.cache(:data, :other))
211
+ assert_equal(:value, @obj.cache(:block) { :value })
212
+ assert_equal(:other, @obj.cache(:data, :other, update: true))
213
+ end
214
+
215
+ it "can check for the existence of a cached value" do
216
+ refute(@obj.cached?(:data))
217
+ @obj.cache(:data, :value)
218
+ assert(@obj.cached?(:data))
219
+ end
220
+
221
+ it "can clear all cached values" do
222
+ @obj.cache(:data, :value)
223
+ assert(@obj.cached?(:data))
224
+ @obj.clear_cache
225
+ refute(@obj.cached?(:data))
226
+ end
227
+ end
228
+
184
229
  describe "validation" do
185
230
  before do
186
231
  @doc = Object.new
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
132
133
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
134
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
135
  end
136
+
137
+ describe "with strict parsing" do
138
+ before do
139
+ @document.config['parser.on_correctable_error'] = proc { true }
140
+ end
141
+
142
+ it "fails if an empty indirect object is found" do
143
+ create_parser("1 0 obj\nendobj")
144
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
145
+ assert_match(/no indirect object value/i, exp.message)
146
+ end
147
+
148
+ it "fails if keyword stream is followed only by CR without LF" do
149
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
150
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
151
+ assert_match(/not CR alone/, exp.message)
152
+ end
153
+
154
+ it "fails if the stream length value is invalid" do
155
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/invalid stream length/i, exp.message)
158
+ end
159
+
160
+ it "fails if the keyword endobj is mangled" do
161
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/keyword endobj/, exp.message)
164
+ end
165
+
166
+ it "fails if the keyword endobj is missing" do
167
+ create_parser("1 0 obj\n<< >>")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/keyword endobj/, exp.message)
170
+ end
171
+
172
+ it "fails if there is data between 'endstream' and 'endobj'" do
173
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
175
+ assert_match(/keyword endobj/, exp.message)
176
+ end
177
+ end
135
178
  end
136
179
 
137
180
  describe "load_object" do
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
205
248
  end
206
249
 
207
250
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
251
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
252
  assert_equal(5, @parser.startxref_offset)
210
253
  end
211
254
 
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
215
258
  end
216
259
 
217
260
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
261
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
262
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
263
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
264
  assert_equal(5, @parser.startxref_offset)
222
265
  end
223
266
 
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
242
285
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
286
  assert_match(/missing startxref/, exp.message)
244
287
  end
288
+
289
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
290
+ @document.config['parser.on_correctable_error'] = proc { true }
291
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
292
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
293
+ assert_match(/end-of-file marker not found/, exp.message)
294
+ end
245
295
  end
246
296
 
247
297
  describe "file_header_version" do
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
262
312
  end
263
313
 
264
314
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
315
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
316
  assert_equal('1.4', @parser.file_header_version)
267
317
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
318
  end
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
318
368
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
369
  end
320
370
 
371
+ it "fails if a sub section entry is mangled" do
372
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
373
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
374
+ assert_match(/invalid cross-reference entry/i, exp.message)
375
+ end
376
+
321
377
  it "fails if there is no trailer" do
322
378
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
379
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +385,71 @@ describe HexaPDF::Parser do
329
385
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
386
  assert_match(/dictionary/, exp.message)
331
387
  end
388
+
389
+ describe "invalid numbering of main xref section" do
390
+ it "handles the xref if the numbering is off by N" do
391
+ create_parser(" 1 0 obj 1 endobj\n" \
392
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
393
+ section, _trailer = @parser.parse_xref_section_and_trailer(17)
394
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
395
+ end
396
+
397
+ it "fails if the first entry is not the one for oid=0" do
398
+ create_parser(" 1 0 obj 1 endobj\n" \
399
+ "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
400
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
401
+ assert_match(/Main.*invalid numbering/i, exp.message)
402
+
403
+ create_parser(" 1 0 obj 1 endobj\n" \
404
+ "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
405
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
406
+ assert_match(/Main.*invalid numbering/i, exp.message)
407
+ end
408
+
409
+ it "fails if the tested entry position is invalid" do
410
+ create_parser(" 1 0 obj 1 endobj\n" \
411
+ "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
412
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
413
+ assert_match(/Main.*invalid numbering/i, exp.message)
414
+ end
415
+
416
+ it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
417
+ create_parser(" 2 0 obj 1 endobj\n" \
418
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
419
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
420
+ assert_match(/Main.*invalid numbering/i, exp.message)
421
+ end
422
+ end
423
+
424
+ describe "with strict parsing" do
425
+ before do
426
+ @document.config['parser.on_correctable_error'] = proc { true }
427
+ end
428
+
429
+ it "fails if xref type=n with offset=0" do
430
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
431
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
432
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
433
+ end
434
+
435
+ it " fails xref type=n with gen>65535" do
436
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
437
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
438
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
439
+ end
440
+
441
+ it "fails if trailing second whitespace is missing" do
442
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
443
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
444
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
445
+ end
446
+
447
+ it "fails if the main cross-reference section has invalid numbering" do
448
+ create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
449
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
450
+ assert_match(/Main.*invalid numbering/i, exp.message)
451
+ end
452
+ end
332
453
  end
333
454
 
334
455
  describe "load_revision" do
@@ -348,75 +469,60 @@ describe HexaPDF::Parser do
348
469
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
470
  assert_match(/not a cross-reference stream/, exp.message)
350
471
  end
351
- end
352
472
 
353
- describe "with strict parsing enabled" do
354
- before do
473
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
474
  @document.config['parser.on_correctable_error'] = proc { true }
475
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
476
+ "stream\n\x01\x0A\x00\nendstream endobj")
477
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
478
+ assert_match(/entry for itself/, exp.message)
356
479
  end
480
+ end
357
481
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
362
- end
363
-
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
482
+ describe "reconstruct_revision" do
483
+ before do
484
+ @document.config['parser.try_xref_reconstruction'] = true
485
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
368
486
  end
369
487
 
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
488
+ it "serially parses the contents" do
489
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
490
+ assert_equal(6, @parser.load_object(@xref).value)
374
491
  end
375
492
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
493
+ it "ignores parts where the starting line is split across lines" do
494
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
495
+ assert_equal(5, @parser.load_object(@xref).value)
380
496
  end
381
497
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
498
+ it "handles cases where the line contains an invalid string that exceeds the read buffer" do
499
+ create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
500
+ assert_equal(6, @parser.load_object(@xref).value)
386
501
  end
387
502
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
503
+ it "ignores invalid objects" do
504
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
505
+ assert_equal(4, @parser.load_object(@xref).value)
392
506
  end
393
507
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
508
+ it "ignores invalid lines" do
509
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
510
+ assert_equal(6, @parser.load_object(@xref).value)
398
511
  end
399
512
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
513
+ it "uses the last trailer" do
514
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
515
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
516
  end
408
517
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
518
+ it "uses the first trailer in case of a linearized file" do
519
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
520
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
521
  end
414
522
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
523
+ it "fails if no valid trailer is found" do
524
+ create_parser("1 0 obj\n5\nendobj")
525
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
526
  end
421
527
  end
422
528
  end