hexapdf 0.12.3 → 0.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +132 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/dictionary.rb +12 -6
  13. data/lib/hexapdf/dictionary_fields.rb +2 -10
  14. data/lib/hexapdf/document.rb +41 -16
  15. data/lib/hexapdf/document/files.rb +0 -1
  16. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  17. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  18. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  19. data/lib/hexapdf/font/cmap.rb +1 -4
  20. data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
  21. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  22. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  23. data/lib/hexapdf/font/true_type/table/post.rb +15 -10
  24. data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
  25. data/lib/hexapdf/font_loader/from_file.rb +18 -8
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/importer.rb +3 -2
  28. data/lib/hexapdf/layout/line.rb +1 -1
  29. data/lib/hexapdf/layout/style.rb +23 -23
  30. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  31. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  32. data/lib/hexapdf/object.rb +52 -25
  33. data/lib/hexapdf/parser.rb +107 -7
  34. data/lib/hexapdf/pdf_array.rb +15 -5
  35. data/lib/hexapdf/revisions.rb +29 -21
  36. data/lib/hexapdf/serializer.rb +37 -10
  37. data/lib/hexapdf/task/optimize.rb +6 -4
  38. data/lib/hexapdf/tokenizer.rb +22 -0
  39. data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
  40. data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
  41. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  42. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  43. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  44. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  45. data/lib/hexapdf/type/actions/uri.rb +3 -2
  46. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  47. data/lib/hexapdf/type/catalog.rb +2 -2
  48. data/lib/hexapdf/type/cid_font.rb +1 -1
  49. data/lib/hexapdf/type/file_specification.rb +1 -1
  50. data/lib/hexapdf/type/font.rb +1 -1
  51. data/lib/hexapdf/type/font_simple.rb +4 -2
  52. data/lib/hexapdf/type/font_true_type.rb +6 -2
  53. data/lib/hexapdf/type/font_type0.rb +4 -4
  54. data/lib/hexapdf/type/form.rb +6 -2
  55. data/lib/hexapdf/type/image.rb +2 -2
  56. data/lib/hexapdf/type/page.rb +21 -12
  57. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  58. data/lib/hexapdf/type/resources.rb +5 -0
  59. data/lib/hexapdf/type/trailer.rb +2 -3
  60. data/lib/hexapdf/utils/object_hash.rb +0 -1
  61. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  62. data/lib/hexapdf/version.rb +1 -1
  63. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  64. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  65. data/test/hexapdf/content/test_canvas.rb +3 -3
  66. data/test/hexapdf/content/test_color_space.rb +1 -1
  67. data/test/hexapdf/encryption/test_aes.rb +4 -4
  68. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  69. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  70. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  71. data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
  72. data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
  73. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
  74. data/test/hexapdf/font_loader/test_from_file.rb +7 -0
  75. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  76. data/test/hexapdf/test_configuration.rb +2 -2
  77. data/test/hexapdf/test_dictionary.rb +8 -1
  78. data/test/hexapdf/test_dictionary_fields.rb +9 -2
  79. data/test/hexapdf/test_document.rb +18 -10
  80. data/test/hexapdf/test_object.rb +71 -26
  81. data/test/hexapdf/test_parser.rb +205 -51
  82. data/test/hexapdf/test_pdf_array.rb +8 -1
  83. data/test/hexapdf/test_revisions.rb +35 -0
  84. data/test/hexapdf/test_serializer.rb +7 -0
  85. data/test/hexapdf/test_tokenizer.rb +28 -0
  86. data/test/hexapdf/test_writer.rb +2 -2
  87. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
  88. data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
  89. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  90. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  91. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  92. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  93. data/test/hexapdf/type/test_font_simple.rb +2 -1
  94. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  95. data/test/hexapdf/type/test_form.rb +8 -1
  96. data/test/hexapdf/type/test_page.rb +8 -1
  97. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  98. data/test/hexapdf/type/test_resources.rb +6 -0
  99. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  100. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  101. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  102. data/test/test_helper.rb +2 -0
  103. metadata +6 -12
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -87,6 +88,18 @@ describe HexaPDF::Parser do
87
88
  assert_equal('12', TestHelper.collector(stream.fiber))
88
89
  end
89
90
 
91
+ it "handles keyword stream followed by space and CR or LF" do
92
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
93
+ *, stream = @parser.parse_indirect_object
94
+ assert_equal('12', TestHelper.collector(stream.fiber))
95
+ end
96
+
97
+ it "handles invalid indirect object value consisting of number followed by endobj without space" do
98
+ create_parser("1 0 obj 749endobj")
99
+ object, * = @parser.parse_indirect_object
100
+ assert_equal(749, object)
101
+ end
102
+
90
103
  it "recovers from an invalid stream length value" do
91
104
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
92
105
  obj, _, _, stream = @parser.parse_indirect_object
@@ -132,6 +145,60 @@ describe HexaPDF::Parser do
132
145
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
146
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
147
  end
148
+
149
+ describe "with strict parsing" do
150
+ before do
151
+ @document.config['parser.on_correctable_error'] = proc { true }
152
+ end
153
+
154
+ it "fails if an empty indirect object is found" do
155
+ create_parser("1 0 obj\nendobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/no indirect object value/i, exp.message)
158
+ end
159
+
160
+ it "fails if keyword stream is followed only by CR without LF" do
161
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/not CR alone/, exp.message)
164
+ end
165
+
166
+ it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
167
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/must be followed by LF or CR\/LF/, exp.message)
170
+ end
171
+
172
+ it "fails for numbers followed by endobj without space" do
173
+ create_parser("1 0 obj 749endobj")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
175
+ assert_match(/Invalid object value after 'obj'/, exp.message)
176
+ end
177
+
178
+ it "fails if the stream length value is invalid" do
179
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
180
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
181
+ assert_match(/invalid stream length/i, exp.message)
182
+ end
183
+
184
+ it "fails if the keyword endobj is mangled" do
185
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
186
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
187
+ assert_match(/keyword endobj/, exp.message)
188
+ end
189
+
190
+ it "fails if the keyword endobj is missing" do
191
+ create_parser("1 0 obj\n<< >>")
192
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
193
+ assert_match(/keyword endobj/, exp.message)
194
+ end
195
+
196
+ it "fails if there is data between 'endstream' and 'endobj'" do
197
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
198
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
199
+ assert_match(/keyword endobj/, exp.message)
200
+ end
201
+ end
135
202
  end
136
203
 
137
204
  describe "load_object" do
@@ -167,6 +234,23 @@ describe HexaPDF::Parser do
167
234
  assert_equal([1, 2], obj.value)
168
235
  end
169
236
 
237
+ it "handles an invalid indirect object offset of 0" do
238
+ obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
239
+ assert(obj.null?)
240
+ assert_equal(2, obj.oid)
241
+ assert_equal(0, obj.gen)
242
+ end
243
+
244
+ describe "with strict parsing" do
245
+ it "raises an error if an indirect object has an offset of 0" do
246
+ @document.config['parser.on_correctable_error'] = proc { true }
247
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
248
+ @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
249
+ end
250
+ assert_match(/has offset 0/, exp.message)
251
+ end
252
+ end
253
+
170
254
  it "fails if another object is found instead of an object stream" do
171
255
  def (@document).object(_oid)
172
256
  :invalid
@@ -205,7 +289,7 @@ describe HexaPDF::Parser do
205
289
  end
206
290
 
207
291
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
292
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
293
  assert_equal(5, @parser.startxref_offset)
210
294
  end
211
295
 
@@ -215,9 +299,9 @@ describe HexaPDF::Parser do
215
299
  end
216
300
 
217
301
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
302
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
303
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
304
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
305
  assert_equal(5, @parser.startxref_offset)
222
306
  end
223
307
 
@@ -242,6 +326,13 @@ describe HexaPDF::Parser do
242
326
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
327
  assert_match(/missing startxref/, exp.message)
244
328
  end
329
+
330
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
331
+ @document.config['parser.on_correctable_error'] = proc { true }
332
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
333
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
334
+ assert_match(/end-of-file marker not found/, exp.message)
335
+ end
245
336
  end
246
337
 
247
338
  describe "file_header_version" do
@@ -262,7 +353,7 @@ describe HexaPDF::Parser do
262
353
  end
263
354
 
264
355
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
356
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
357
  assert_equal('1.4', @parser.file_header_version)
267
358
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
359
  end
@@ -318,6 +409,12 @@ describe HexaPDF::Parser do
318
409
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
410
  end
320
411
 
412
+ it "fails if a sub section entry is mangled" do
413
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
414
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
415
+ assert_match(/invalid cross-reference entry/i, exp.message)
416
+ end
417
+
321
418
  it "fails if there is no trailer" do
322
419
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
420
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +426,71 @@ describe HexaPDF::Parser do
329
426
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
427
  assert_match(/dictionary/, exp.message)
331
428
  end
429
+
430
+ describe "invalid numbering of main xref section" do
431
+ it "handles the xref if the numbering is off by N" do
432
+ create_parser(" 1 0 obj 1 endobj\n" \
433
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
434
+ section, _trailer = @parser.parse_xref_section_and_trailer(17)
435
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
436
+ end
437
+
438
+ it "fails if the first entry is not the one for oid=0" do
439
+ create_parser(" 1 0 obj 1 endobj\n" \
440
+ "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
441
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
442
+ assert_match(/Main.*invalid numbering/i, exp.message)
443
+
444
+ create_parser(" 1 0 obj 1 endobj\n" \
445
+ "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
446
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
447
+ assert_match(/Main.*invalid numbering/i, exp.message)
448
+ end
449
+
450
+ it "fails if the tested entry position is invalid" do
451
+ create_parser(" 1 0 obj 1 endobj\n" \
452
+ "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
453
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
454
+ assert_match(/Main.*invalid numbering/i, exp.message)
455
+ end
456
+
457
+ it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
458
+ create_parser(" 2 0 obj 1 endobj\n" \
459
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
460
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
461
+ assert_match(/Main.*invalid numbering/i, exp.message)
462
+ end
463
+ end
464
+
465
+ describe "with strict parsing" do
466
+ before do
467
+ @document.config['parser.on_correctable_error'] = proc { true }
468
+ end
469
+
470
+ it "fails if xref type=n with offset=0" do
471
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
472
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
473
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
474
+ end
475
+
476
+ it " fails xref type=n with gen>65535" do
477
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
478
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
479
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
480
+ end
481
+
482
+ it "fails if trailing second whitespace is missing" do
483
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
484
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
485
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
486
+ end
487
+
488
+ it "fails if the main cross-reference section has invalid numbering" do
489
+ create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
490
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
491
+ assert_match(/Main.*invalid numbering/i, exp.message)
492
+ end
493
+ end
332
494
  end
333
495
 
334
496
  describe "load_revision" do
@@ -348,75 +510,67 @@ describe HexaPDF::Parser do
348
510
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
511
  assert_match(/not a cross-reference stream/, exp.message)
350
512
  end
351
- end
352
513
 
353
- describe "with strict parsing enabled" do
354
- before do
514
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
515
  @document.config['parser.on_correctable_error'] = proc { true }
516
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
517
+ "stream\n\x01\x0A\x00\nendstream endobj")
518
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
519
+ assert_match(/entry for itself/, exp.message)
356
520
  end
521
+ end
357
522
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
523
+ describe "reconstruct_revision" do
524
+ before do
525
+ @document.config['parser.try_xref_reconstruction'] = true
526
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
362
527
  end
363
528
 
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
529
+ it "serially parses the contents" do
530
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
531
+ assert_equal(6, @parser.load_object(@xref).value)
368
532
  end
369
533
 
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
534
+ it "ignores parts where the starting line is split across lines" do
535
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
536
+ assert_equal(5, @parser.load_object(@xref).value)
374
537
  end
375
538
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
539
+ it "handles cases where the line contains an invalid string that exceeds the read buffer" do
540
+ create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
541
+ assert_equal(6, @parser.load_object(@xref).value)
380
542
  end
381
543
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
544
+ it "handles pathalogical cases which contain many opened literal strings" do
545
+ time = Time.now
546
+ create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
547
+ assert_equal(6, @parser.load_object(@xref).value)
548
+ assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
386
549
  end
387
550
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
551
+ it "ignores invalid objects" do
552
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
553
+ assert_equal(4, @parser.load_object(@xref).value)
392
554
  end
393
555
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
556
+ it "ignores invalid lines" do
557
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
558
+ assert_equal(6, @parser.load_object(@xref).value)
398
559
  end
399
560
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
561
+ it "uses the last trailer" do
562
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
563
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
564
  end
408
565
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
566
+ it "uses the first trailer in case of a linearized file" do
567
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
568
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
569
  end
414
570
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
571
+ it "fails if no valid trailer is found" do
572
+ create_parser("1 0 obj\n5\nendobj")
573
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
574
  end
421
575
  end
422
576
  end
@@ -107,6 +107,13 @@ describe HexaPDF::PDFArray do
107
107
  assert_equal([1, :data, @array[2]], @array[0, 5])
108
108
  end
109
109
 
110
+ it "allows deleting an object" do
111
+ obj = @array.value[1]
112
+ assert_same(obj, @array.delete(obj))
113
+ ref = HexaPDF::Object.new(:test, oid: 1)
114
+ assert_equal(ref, @array.delete(ref))
115
+ end
116
+
110
117
  describe "slice!" do
111
118
  it "allows deleting a single element" do
112
119
  @array.slice!(2)
@@ -157,6 +164,6 @@ describe HexaPDF::PDFArray do
157
164
  end
158
165
 
159
166
  it "can be converted to a simple array" do
160
- assert_equal(@array.value, @array.to_ary)
167
+ assert_equal([1, :data, "deref", @array[3]], @array.to_ary)
161
168
  end
162
169
  end
@@ -158,4 +158,39 @@ describe HexaPDF::Revisions do
158
158
  doc = HexaPDF::Document.new(io: io)
159
159
  assert_equal(2, doc.revisions.count)
160
160
  end
161
+
162
+ it "uses the reconstructed revision if errors are found when loading from an IO" do
163
+ io = StringIO.new(<<~EOF)
164
+ %PDF-1.7
165
+ 1 0 obj
166
+ 10
167
+ endobj
168
+
169
+ xref
170
+ 0 2
171
+ 0000000000 65535 f
172
+ 0000000009 00000 n
173
+ trailer
174
+ << /Size 5 >>
175
+ startxref
176
+ 28
177
+ %%EOF
178
+
179
+ 2 0 obj
180
+ 300
181
+ endobj
182
+
183
+ xref
184
+ 2 1
185
+ 0000000301 00000 n
186
+ trailer
187
+ << /Size 3 /Prev 100>>
188
+ startxref
189
+ 139
190
+ %%EOF
191
+ EOF
192
+ doc = HexaPDF::Document.new(io: io)
193
+ assert_equal(2, doc.revisions.count)
194
+ assert_same(doc.revisions[0].trailer.value, doc.revisions[1].trailer.value)
195
+ end
161
196
  end
@@ -153,6 +153,13 @@ describe HexaPDF::Serializer do
153
153
  assert_equal("<</Key(value)/Length 6>>stream\nsome\nendstream", io.string)
154
154
  end
155
155
 
156
+ it "doesn't reset the internal recursion flag if the stream is serialized as part of another object" do
157
+ object = HexaPDF::Dictionary.new({}, oid: 5)
158
+ object[:Stream] = @stream
159
+ object[:Self] = object # needs to be the last entry so that :Stream gets serialized first!
160
+ assert_serialized("<</Stream 2 0 R/Self 5 0 R>>", object)
161
+ end
162
+
156
163
  it "fails if a stream without object identifier is serialized" do
157
164
  @stream.oid = 0
158
165
  assert_raises(HexaPDF::Error) { @serializer.serialize(@stream) }
@@ -27,4 +27,32 @@ describe HexaPDF::Tokenizer do
27
27
  5.times {|i| assert_equal(i, @tokenizer.next_token) }
28
28
  end
29
29
  end
30
+
31
+ it "has a special token scanning method for use with xref reconstruction" do
32
+ create_tokenizer(<<-EOF.chomp.gsub(/^ {8}/, ''))
33
+ % Comment
34
+ true
35
+ 123 50
36
+ obj
37
+ (ignored)
38
+ /Ignored
39
+ [/Ignored]
40
+ <</Ignored /Values>>
41
+ EOF
42
+
43
+ scan_to_newline = proc { @tokenizer.scan_until(/(\n|\r\n?)+|\z/) }
44
+
45
+ assert_nil(@tokenizer.next_integer_or_keyword)
46
+ scan_to_newline.call
47
+ assert_equal(true, @tokenizer.next_integer_or_keyword)
48
+ assert_equal(123, @tokenizer.next_integer_or_keyword)
49
+ assert_equal(50, @tokenizer.next_integer_or_keyword)
50
+ assert_equal('obj', @tokenizer.next_integer_or_keyword)
51
+ 4.times do
52
+ assert_nil(@tokenizer.next_integer_or_keyword)
53
+ scan_to_newline.call
54
+ end
55
+ assert_equal(HexaPDF::Tokenizer::NO_MORE_TOKENS, @tokenizer.next_integer_or_keyword)
56
+ end
57
+
30
58
  end