hexapdf 0.12.3 → 0.14.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +132 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/dictionary.rb +12 -6
  13. data/lib/hexapdf/dictionary_fields.rb +2 -10
  14. data/lib/hexapdf/document.rb +41 -16
  15. data/lib/hexapdf/document/files.rb +0 -1
  16. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  17. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  18. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  19. data/lib/hexapdf/font/cmap.rb +1 -4
  20. data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
  21. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  22. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  23. data/lib/hexapdf/font/true_type/table/post.rb +15 -10
  24. data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
  25. data/lib/hexapdf/font_loader/from_file.rb +18 -8
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/importer.rb +3 -2
  28. data/lib/hexapdf/layout/line.rb +1 -1
  29. data/lib/hexapdf/layout/style.rb +23 -23
  30. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  31. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  32. data/lib/hexapdf/object.rb +52 -25
  33. data/lib/hexapdf/parser.rb +107 -7
  34. data/lib/hexapdf/pdf_array.rb +15 -5
  35. data/lib/hexapdf/revisions.rb +29 -21
  36. data/lib/hexapdf/serializer.rb +37 -10
  37. data/lib/hexapdf/task/optimize.rb +6 -4
  38. data/lib/hexapdf/tokenizer.rb +22 -0
  39. data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
  40. data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
  41. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  42. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  43. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  44. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  45. data/lib/hexapdf/type/actions/uri.rb +3 -2
  46. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  47. data/lib/hexapdf/type/catalog.rb +2 -2
  48. data/lib/hexapdf/type/cid_font.rb +1 -1
  49. data/lib/hexapdf/type/file_specification.rb +1 -1
  50. data/lib/hexapdf/type/font.rb +1 -1
  51. data/lib/hexapdf/type/font_simple.rb +4 -2
  52. data/lib/hexapdf/type/font_true_type.rb +6 -2
  53. data/lib/hexapdf/type/font_type0.rb +4 -4
  54. data/lib/hexapdf/type/form.rb +6 -2
  55. data/lib/hexapdf/type/image.rb +2 -2
  56. data/lib/hexapdf/type/page.rb +21 -12
  57. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  58. data/lib/hexapdf/type/resources.rb +5 -0
  59. data/lib/hexapdf/type/trailer.rb +2 -3
  60. data/lib/hexapdf/utils/object_hash.rb +0 -1
  61. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  62. data/lib/hexapdf/version.rb +1 -1
  63. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  64. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  65. data/test/hexapdf/content/test_canvas.rb +3 -3
  66. data/test/hexapdf/content/test_color_space.rb +1 -1
  67. data/test/hexapdf/encryption/test_aes.rb +4 -4
  68. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  69. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  70. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  71. data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
  72. data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
  73. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
  74. data/test/hexapdf/font_loader/test_from_file.rb +7 -0
  75. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  76. data/test/hexapdf/test_configuration.rb +2 -2
  77. data/test/hexapdf/test_dictionary.rb +8 -1
  78. data/test/hexapdf/test_dictionary_fields.rb +9 -2
  79. data/test/hexapdf/test_document.rb +18 -10
  80. data/test/hexapdf/test_object.rb +71 -26
  81. data/test/hexapdf/test_parser.rb +205 -51
  82. data/test/hexapdf/test_pdf_array.rb +8 -1
  83. data/test/hexapdf/test_revisions.rb +35 -0
  84. data/test/hexapdf/test_serializer.rb +7 -0
  85. data/test/hexapdf/test_tokenizer.rb +28 -0
  86. data/test/hexapdf/test_writer.rb +2 -2
  87. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
  88. data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
  89. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  90. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  91. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  92. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  93. data/test/hexapdf/type/test_font_simple.rb +2 -1
  94. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  95. data/test/hexapdf/type/test_form.rb +8 -1
  96. data/test/hexapdf/type/test_page.rb +8 -1
  97. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  98. data/test/hexapdf/type/test_resources.rb +6 -0
  99. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  100. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  101. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  102. data/test/test_helper.rb +2 -0
  103. metadata +6 -12
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -87,6 +88,18 @@ describe HexaPDF::Parser do
87
88
  assert_equal('12', TestHelper.collector(stream.fiber))
88
89
  end
89
90
 
91
+ it "handles keyword stream followed by space and CR or LF" do
92
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
93
+ *, stream = @parser.parse_indirect_object
94
+ assert_equal('12', TestHelper.collector(stream.fiber))
95
+ end
96
+
97
+ it "handles invalid indirect object value consisting of number followed by endobj without space" do
98
+ create_parser("1 0 obj 749endobj")
99
+ object, * = @parser.parse_indirect_object
100
+ assert_equal(749, object)
101
+ end
102
+
90
103
  it "recovers from an invalid stream length value" do
91
104
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
92
105
  obj, _, _, stream = @parser.parse_indirect_object
@@ -132,6 +145,60 @@ describe HexaPDF::Parser do
132
145
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
146
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
147
  end
148
+
149
+ describe "with strict parsing" do
150
+ before do
151
+ @document.config['parser.on_correctable_error'] = proc { true }
152
+ end
153
+
154
+ it "fails if an empty indirect object is found" do
155
+ create_parser("1 0 obj\nendobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/no indirect object value/i, exp.message)
158
+ end
159
+
160
+ it "fails if keyword stream is followed only by CR without LF" do
161
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/not CR alone/, exp.message)
164
+ end
165
+
166
+ it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
167
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/must be followed by LF or CR\/LF/, exp.message)
170
+ end
171
+
172
+ it "fails for numbers followed by endobj without space" do
173
+ create_parser("1 0 obj 749endobj")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
175
+ assert_match(/Invalid object value after 'obj'/, exp.message)
176
+ end
177
+
178
+ it "fails if the stream length value is invalid" do
179
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
180
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
181
+ assert_match(/invalid stream length/i, exp.message)
182
+ end
183
+
184
+ it "fails if the keyword endobj is mangled" do
185
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
186
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
187
+ assert_match(/keyword endobj/, exp.message)
188
+ end
189
+
190
+ it "fails if the keyword endobj is missing" do
191
+ create_parser("1 0 obj\n<< >>")
192
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
193
+ assert_match(/keyword endobj/, exp.message)
194
+ end
195
+
196
+ it "fails if there is data between 'endstream' and 'endobj'" do
197
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
198
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
199
+ assert_match(/keyword endobj/, exp.message)
200
+ end
201
+ end
135
202
  end
136
203
 
137
204
  describe "load_object" do
@@ -167,6 +234,23 @@ describe HexaPDF::Parser do
167
234
  assert_equal([1, 2], obj.value)
168
235
  end
169
236
 
237
+ it "handles an invalid indirect object offset of 0" do
238
+ obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
239
+ assert(obj.null?)
240
+ assert_equal(2, obj.oid)
241
+ assert_equal(0, obj.gen)
242
+ end
243
+
244
+ describe "with strict parsing" do
245
+ it "raises an error if an indirect object has an offset of 0" do
246
+ @document.config['parser.on_correctable_error'] = proc { true }
247
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
248
+ @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
249
+ end
250
+ assert_match(/has offset 0/, exp.message)
251
+ end
252
+ end
253
+
170
254
  it "fails if another object is found instead of an object stream" do
171
255
  def (@document).object(_oid)
172
256
  :invalid
@@ -205,7 +289,7 @@ describe HexaPDF::Parser do
205
289
  end
206
290
 
207
291
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
292
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
293
  assert_equal(5, @parser.startxref_offset)
210
294
  end
211
295
 
@@ -215,9 +299,9 @@ describe HexaPDF::Parser do
215
299
  end
216
300
 
217
301
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
302
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
303
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
304
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
305
  assert_equal(5, @parser.startxref_offset)
222
306
  end
223
307
 
@@ -242,6 +326,13 @@ describe HexaPDF::Parser do
242
326
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
327
  assert_match(/missing startxref/, exp.message)
244
328
  end
329
+
330
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
331
+ @document.config['parser.on_correctable_error'] = proc { true }
332
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
333
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
334
+ assert_match(/end-of-file marker not found/, exp.message)
335
+ end
245
336
  end
246
337
 
247
338
  describe "file_header_version" do
@@ -262,7 +353,7 @@ describe HexaPDF::Parser do
262
353
  end
263
354
 
264
355
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
356
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
357
  assert_equal('1.4', @parser.file_header_version)
267
358
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
359
  end
@@ -318,6 +409,12 @@ describe HexaPDF::Parser do
318
409
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
410
  end
320
411
 
412
+ it "fails if a sub section entry is mangled" do
413
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
414
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
415
+ assert_match(/invalid cross-reference entry/i, exp.message)
416
+ end
417
+
321
418
  it "fails if there is no trailer" do
322
419
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
420
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +426,71 @@ describe HexaPDF::Parser do
329
426
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
427
  assert_match(/dictionary/, exp.message)
331
428
  end
429
+
430
+ describe "invalid numbering of main xref section" do
431
+ it "handles the xref if the numbering is off by N" do
432
+ create_parser(" 1 0 obj 1 endobj\n" \
433
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
434
+ section, _trailer = @parser.parse_xref_section_and_trailer(17)
435
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
436
+ end
437
+
438
+ it "fails if the first entry is not the one for oid=0" do
439
+ create_parser(" 1 0 obj 1 endobj\n" \
440
+ "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
441
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
442
+ assert_match(/Main.*invalid numbering/i, exp.message)
443
+
444
+ create_parser(" 1 0 obj 1 endobj\n" \
445
+ "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
446
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
447
+ assert_match(/Main.*invalid numbering/i, exp.message)
448
+ end
449
+
450
+ it "fails if the tested entry position is invalid" do
451
+ create_parser(" 1 0 obj 1 endobj\n" \
452
+ "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
453
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
454
+ assert_match(/Main.*invalid numbering/i, exp.message)
455
+ end
456
+
457
+ it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
458
+ create_parser(" 2 0 obj 1 endobj\n" \
459
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
460
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
461
+ assert_match(/Main.*invalid numbering/i, exp.message)
462
+ end
463
+ end
464
+
465
+ describe "with strict parsing" do
466
+ before do
467
+ @document.config['parser.on_correctable_error'] = proc { true }
468
+ end
469
+
470
+ it "fails if xref type=n with offset=0" do
471
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
472
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
473
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
474
+ end
475
+
476
+ it " fails xref type=n with gen>65535" do
477
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
478
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
479
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
480
+ end
481
+
482
+ it "fails if trailing second whitespace is missing" do
483
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
484
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
485
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
486
+ end
487
+
488
+ it "fails if the main cross-reference section has invalid numbering" do
489
+ create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
490
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
491
+ assert_match(/Main.*invalid numbering/i, exp.message)
492
+ end
493
+ end
332
494
  end
333
495
 
334
496
  describe "load_revision" do
@@ -348,75 +510,67 @@ describe HexaPDF::Parser do
348
510
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
511
  assert_match(/not a cross-reference stream/, exp.message)
350
512
  end
351
- end
352
513
 
353
- describe "with strict parsing enabled" do
354
- before do
514
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
515
  @document.config['parser.on_correctable_error'] = proc { true }
516
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
517
+ "stream\n\x01\x0A\x00\nendstream endobj")
518
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
519
+ assert_match(/entry for itself/, exp.message)
356
520
  end
521
+ end
357
522
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
523
+ describe "reconstruct_revision" do
524
+ before do
525
+ @document.config['parser.try_xref_reconstruction'] = true
526
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
362
527
  end
363
528
 
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
529
+ it "serially parses the contents" do
530
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
531
+ assert_equal(6, @parser.load_object(@xref).value)
368
532
  end
369
533
 
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
534
+ it "ignores parts where the starting line is split across lines" do
535
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
536
+ assert_equal(5, @parser.load_object(@xref).value)
374
537
  end
375
538
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
539
+ it "handles cases where the line contains an invalid string that exceeds the read buffer" do
540
+ create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
541
+ assert_equal(6, @parser.load_object(@xref).value)
380
542
  end
381
543
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
544
+ it "handles pathalogical cases which contain many opened literal strings" do
545
+ time = Time.now
546
+ create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
547
+ assert_equal(6, @parser.load_object(@xref).value)
548
+ assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
386
549
  end
387
550
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
551
+ it "ignores invalid objects" do
552
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
553
+ assert_equal(4, @parser.load_object(@xref).value)
392
554
  end
393
555
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
556
+ it "ignores invalid lines" do
557
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
558
+ assert_equal(6, @parser.load_object(@xref).value)
398
559
  end
399
560
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
561
+ it "uses the last trailer" do
562
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
563
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
564
  end
408
565
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
566
+ it "uses the first trailer in case of a linearized file" do
567
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
568
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
569
  end
414
570
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
571
+ it "fails if no valid trailer is found" do
572
+ create_parser("1 0 obj\n5\nendobj")
573
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
574
  end
421
575
  end
422
576
  end
@@ -107,6 +107,13 @@ describe HexaPDF::PDFArray do
107
107
  assert_equal([1, :data, @array[2]], @array[0, 5])
108
108
  end
109
109
 
110
+ it "allows deleting an object" do
111
+ obj = @array.value[1]
112
+ assert_same(obj, @array.delete(obj))
113
+ ref = HexaPDF::Object.new(:test, oid: 1)
114
+ assert_equal(ref, @array.delete(ref))
115
+ end
116
+
110
117
  describe "slice!" do
111
118
  it "allows deleting a single element" do
112
119
  @array.slice!(2)
@@ -157,6 +164,6 @@ describe HexaPDF::PDFArray do
157
164
  end
158
165
 
159
166
  it "can be converted to a simple array" do
160
- assert_equal(@array.value, @array.to_ary)
167
+ assert_equal([1, :data, "deref", @array[3]], @array.to_ary)
161
168
  end
162
169
  end
@@ -158,4 +158,39 @@ describe HexaPDF::Revisions do
158
158
  doc = HexaPDF::Document.new(io: io)
159
159
  assert_equal(2, doc.revisions.count)
160
160
  end
161
+
162
+ it "uses the reconstructed revision if errors are found when loading from an IO" do
163
+ io = StringIO.new(<<~EOF)
164
+ %PDF-1.7
165
+ 1 0 obj
166
+ 10
167
+ endobj
168
+
169
+ xref
170
+ 0 2
171
+ 0000000000 65535 f
172
+ 0000000009 00000 n
173
+ trailer
174
+ << /Size 5 >>
175
+ startxref
176
+ 28
177
+ %%EOF
178
+
179
+ 2 0 obj
180
+ 300
181
+ endobj
182
+
183
+ xref
184
+ 2 1
185
+ 0000000301 00000 n
186
+ trailer
187
+ << /Size 3 /Prev 100>>
188
+ startxref
189
+ 139
190
+ %%EOF
191
+ EOF
192
+ doc = HexaPDF::Document.new(io: io)
193
+ assert_equal(2, doc.revisions.count)
194
+ assert_same(doc.revisions[0].trailer.value, doc.revisions[1].trailer.value)
195
+ end
161
196
  end
@@ -153,6 +153,13 @@ describe HexaPDF::Serializer do
153
153
  assert_equal("<</Key(value)/Length 6>>stream\nsome\nendstream", io.string)
154
154
  end
155
155
 
156
+ it "doesn't reset the internal recursion flag if the stream is serialized as part of another object" do
157
+ object = HexaPDF::Dictionary.new({}, oid: 5)
158
+ object[:Stream] = @stream
159
+ object[:Self] = object # needs to be the last entry so that :Stream gets serialized first!
160
+ assert_serialized("<</Stream 2 0 R/Self 5 0 R>>", object)
161
+ end
162
+
156
163
  it "fails if a stream without object identifier is serialized" do
157
164
  @stream.oid = 0
158
165
  assert_raises(HexaPDF::Error) { @serializer.serialize(@stream) }
@@ -27,4 +27,32 @@ describe HexaPDF::Tokenizer do
27
27
  5.times {|i| assert_equal(i, @tokenizer.next_token) }
28
28
  end
29
29
  end
30
+
31
+ it "has a special token scanning method for use with xref reconstruction" do
32
+ create_tokenizer(<<-EOF.chomp.gsub(/^ {8}/, ''))
33
+ % Comment
34
+ true
35
+ 123 50
36
+ obj
37
+ (ignored)
38
+ /Ignored
39
+ [/Ignored]
40
+ <</Ignored /Values>>
41
+ EOF
42
+
43
+ scan_to_newline = proc { @tokenizer.scan_until(/(\n|\r\n?)+|\z/) }
44
+
45
+ assert_nil(@tokenizer.next_integer_or_keyword)
46
+ scan_to_newline.call
47
+ assert_equal(true, @tokenizer.next_integer_or_keyword)
48
+ assert_equal(123, @tokenizer.next_integer_or_keyword)
49
+ assert_equal(50, @tokenizer.next_integer_or_keyword)
50
+ assert_equal('obj', @tokenizer.next_integer_or_keyword)
51
+ 4.times do
52
+ assert_nil(@tokenizer.next_integer_or_keyword)
53
+ scan_to_newline.call
54
+ end
55
+ assert_equal(HexaPDF::Tokenizer::NO_MORE_TOKENS, @tokenizer.next_integer_or_keyword)
56
+ end
57
+
30
58
  end