hexapdf 0.12.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +126 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/content/parser.rb +1 -1
  13. data/lib/hexapdf/dictionary.rb +4 -4
  14. data/lib/hexapdf/dictionary_fields.rb +1 -9
  15. data/lib/hexapdf/document.rb +41 -16
  16. data/lib/hexapdf/document/files.rb +0 -1
  17. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  18. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  19. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  20. data/lib/hexapdf/font/cmap.rb +1 -4
  21. data/lib/hexapdf/font/encoding/base.rb +8 -0
  22. data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
  23. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  24. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  25. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/layout/line.rb +1 -1
  28. data/lib/hexapdf/layout/style.rb +23 -23
  29. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  30. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  31. data/lib/hexapdf/object.rb +52 -25
  32. data/lib/hexapdf/parser.rb +87 -3
  33. data/lib/hexapdf/pdf_array.rb +11 -4
  34. data/lib/hexapdf/revisions.rb +29 -21
  35. data/lib/hexapdf/serializer.rb +1 -1
  36. data/lib/hexapdf/task/optimize.rb +6 -4
  37. data/lib/hexapdf/tokenizer.rb +4 -3
  38. data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
  39. data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
  40. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  41. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  42. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  43. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  44. data/lib/hexapdf/type/actions/uri.rb +3 -2
  45. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  46. data/lib/hexapdf/type/catalog.rb +2 -2
  47. data/lib/hexapdf/type/cid_font.rb +1 -1
  48. data/lib/hexapdf/type/file_specification.rb +1 -1
  49. data/lib/hexapdf/type/font.rb +1 -1
  50. data/lib/hexapdf/type/font_simple.rb +4 -2
  51. data/lib/hexapdf/type/font_true_type.rb +6 -2
  52. data/lib/hexapdf/type/font_type0.rb +4 -4
  53. data/lib/hexapdf/type/form.rb +15 -2
  54. data/lib/hexapdf/type/image.rb +2 -2
  55. data/lib/hexapdf/type/page.rb +37 -13
  56. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  57. data/lib/hexapdf/type/resources.rb +1 -0
  58. data/lib/hexapdf/type/trailer.rb +2 -3
  59. data/lib/hexapdf/utils/object_hash.rb +0 -1
  60. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  61. data/lib/hexapdf/version.rb +1 -1
  62. data/test/hexapdf/common_tokenizer_tests.rb +6 -1
  63. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  64. data/test/hexapdf/content/test_canvas.rb +3 -3
  65. data/test/hexapdf/content/test_color_space.rb +1 -1
  66. data/test/hexapdf/encryption/test_aes.rb +4 -4
  67. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  68. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  69. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  70. data/test/hexapdf/font/encoding/test_base.rb +10 -0
  71. data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
  72. data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
  73. data/test/hexapdf/layout/test_style.rb +1 -1
  74. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  75. data/test/hexapdf/test_configuration.rb +2 -2
  76. data/test/hexapdf/test_dictionary.rb +3 -1
  77. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  78. data/test/hexapdf/test_document.rb +18 -10
  79. data/test/hexapdf/test_object.rb +71 -26
  80. data/test/hexapdf/test_parser.rb +159 -53
  81. data/test/hexapdf/test_pdf_array.rb +8 -1
  82. data/test/hexapdf/test_revisions.rb +35 -0
  83. data/test/hexapdf/test_writer.rb +2 -2
  84. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
  85. data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
  86. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  87. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  88. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  89. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  90. data/test/hexapdf/type/test_font_simple.rb +2 -1
  91. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  92. data/test/hexapdf/type/test_form.rb +26 -1
  93. data/test/hexapdf/type/test_page.rb +45 -7
  94. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  95. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  96. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  97. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  98. data/test/test_helper.rb +2 -0
  99. metadata +6 -11
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
441
441
 
442
442
  describe "validate" do
443
443
  before do
444
- @doc.trailer.validate # to create a valid document
444
+ @doc.validate # to create a valid document
445
445
  end
446
446
 
447
447
  it "validates indirect objects" do
448
- obj = @doc.add({Type: :Catalog})
448
+ obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
449
449
  refute(@doc.validate(auto_correct: false))
450
450
 
451
451
  called = false
452
- assert(@doc.validate {|o| assert_same(obj, o); called = true })
452
+ assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
453
453
  assert(called)
454
454
  end
455
455
 
456
456
  it "validates the trailer object" do
457
457
  @doc.trailer[:ID] = :Symbol
458
- refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
458
+ refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
459
459
  end
460
460
 
461
461
  it "validates only loaded objects" do
@@ -609,16 +609,24 @@ describe HexaPDF::Document do
609
609
 
610
610
  describe "caching interface" do
611
611
  it "allows setting and retrieving values" do
612
- assert_equal(:test, @doc.cache(:a, :b, :test))
613
- assert_equal(:test, @doc.cache(:a, :b, :other))
614
- assert_equal(:other, @doc.cache(:a, :c) { :other })
612
+ assert_equal(:test, @doc.cache(:a, :b, :test) { :notused })
613
+ assert_equal(:test, @doc.cache(:a, :b) { :other })
614
+ assert_equal(:test, @doc.cache(:a, :b))
615
+ assert_nil(@doc.cache(:a, :c, nil))
616
+ assert_nil(@doc.cache(:a, :c) { :other })
617
+ assert_nil(@doc.cache(:a, :c))
615
618
  assert(@doc.cached?(:a, :b))
616
619
  assert(@doc.cached?(:a, :c))
617
620
  end
618
621
 
622
+ it "allows updating a value" do
623
+ @doc.cache(:a, :b) { :test }
624
+ assert_equal(:new, @doc.cache(:a, :b, update: true) { :new })
625
+ end
626
+
619
627
  it "allows clearing cached values" do
620
- @doc.cache(:a, :b, :c)
621
- @doc.cache(:b, :c, :d)
628
+ @doc.cache(:a, :b) { :c }
629
+ @doc.cache(:b, :c) { :d }
622
630
  @doc.clear_cache(:a)
623
631
  refute(@doc.cached?(:a, :b))
624
632
  assert(@doc.cached?(:b, :c))
@@ -626,7 +634,7 @@ describe HexaPDF::Document do
626
634
  refute(@doc.cached?(:a, :c))
627
635
  end
628
636
 
629
- it "fails if no cached value exists and neither a value nor a block is given" do
637
+ it "fails if no cached value exists and no block is given" do
630
638
  assert_raises(LocalJumpError) { @doc.cache(:a, :b) }
631
639
  end
632
640
  end
@@ -3,18 +3,10 @@
3
3
  require 'test_helper'
4
4
  require 'hexapdf/object'
5
5
  require 'hexapdf/reference'
6
+ require 'hexapdf/document'
6
7
 
7
8
  describe HexaPDF::Object do
8
9
  describe "class.deep_copy" do
9
- it "handles not-duplicatable classes" do
10
- assert_equal(5, HexaPDF::Object.deep_copy(5))
11
- assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
12
- assert_nil(HexaPDF::Object.deep_copy(nil))
13
- assert_equal(true, HexaPDF::Object.deep_copy(true))
14
- assert_equal(false, HexaPDF::Object.deep_copy(false))
15
- assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
16
- end
17
-
18
10
  it "handles general, duplicatable classes" do
19
11
  x = "test"
20
12
  assert_equal("test", HexaPDF::Object.deep_copy(x))
@@ -103,30 +95,57 @@ describe HexaPDF::Object do
103
95
  end
104
96
 
105
97
  describe "validate" do
106
- it "invokes perform_validation correctly via #validate" do
107
- obj = HexaPDF::Object.new(5)
108
- invoked = {}
109
- obj.define_singleton_method(:perform_validation) do |&block|
110
- invoked[:method] = true
98
+ before do
99
+ @obj = HexaPDF::Object.new(5)
100
+ end
101
+
102
+ it "invokes perform_validation correctly" do
103
+ invoked = false
104
+ @obj.define_singleton_method(:perform_validation) { invoked = true }
105
+ assert(@obj.validate)
106
+ assert(invoked)
107
+ end
108
+
109
+ it "yields all arguments yieled by perform_validation" do
110
+ invoked = []
111
+ @obj.define_singleton_method(:perform_validation) do |&block|
112
+ block.call("error", true, :object)
113
+ end
114
+ assert(@obj.validate {|*a| invoked << a })
115
+ assert_equal([["error", true, :object]], invoked)
116
+ end
117
+
118
+ it "provides self as third argument if none is yielded by perform_validation" do
119
+ invoked = []
120
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
121
  block.call("error", true)
112
122
  end
113
- assert(obj.validate {|*a| invoked[:block] = a })
114
- assert_equal([:method, :block], invoked.keys)
115
- assert_equal(["error", true], invoked[:block])
123
+ assert(@obj.validate {|*a| invoked << a })
124
+ assert_equal([["error", true, @obj]], invoked)
125
+ end
116
126
 
117
- refute(obj.validate(auto_correct: false))
127
+ it "yields all problems when auto_correct is true" do
128
+ invoked = []
129
+ @obj.define_singleton_method(:perform_validation) do |&block|
130
+ invoked << :before
131
+ block.call("error", false)
132
+ invoked << :after
133
+ block.call("error2", true)
134
+ invoked << :last
135
+ end
136
+ refute(@obj.validate)
137
+ assert_equal([:before, :after, :last], invoked)
118
138
  end
119
139
 
120
- it "stops validating on an uncorrectable problem" do
121
- obj = HexaPDF::Object.new(5)
122
- invoked = {}
123
- obj.define_singleton_method(:perform_validation) do |&block|
124
- invoked[:before] = true
140
+ it "stops at the first uncorrectable problem if auto_correct is false" do
141
+ invoked = []
142
+ @obj.define_singleton_method(:perform_validation) do |&block|
143
+ invoked << :before
125
144
  block.call("error", false)
126
- invoked[:after] = true
145
+ invoked << :after
127
146
  end
128
- refute(obj.validate {|*a| invoked[:block] = a })
129
- refute(invoked.key?(:after))
147
+ refute(@obj.validate(auto_correct: false))
148
+ assert_equal([:before], invoked)
130
149
  end
131
150
  end
132
151
 
@@ -181,6 +200,32 @@ describe HexaPDF::Object do
181
200
  end
182
201
  end
183
202
 
203
+ describe "caching" do
204
+ before do
205
+ @obj = HexaPDF::Object.new({}, document: HexaPDF::Document.new)
206
+ end
207
+
208
+ it "can set and return a cached value" do
209
+ assert_equal(:value, @obj.cache(:data, :value))
210
+ assert_equal(:value, @obj.cache(:data, :other))
211
+ assert_equal(:value, @obj.cache(:block) { :value })
212
+ assert_equal(:other, @obj.cache(:data, :other, update: true))
213
+ end
214
+
215
+ it "can check for the existence of a cached value" do
216
+ refute(@obj.cached?(:data))
217
+ @obj.cache(:data, :value)
218
+ assert(@obj.cached?(:data))
219
+ end
220
+
221
+ it "can clear all cached values" do
222
+ @obj.cache(:data, :value)
223
+ assert(@obj.cached?(:data))
224
+ @obj.clear_cache
225
+ refute(@obj.cached?(:data))
226
+ end
227
+ end
228
+
184
229
  describe "validation" do
185
230
  before do
186
231
  @doc = Object.new
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
132
133
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
134
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
135
  end
136
+
137
+ describe "with strict parsing" do
138
+ before do
139
+ @document.config['parser.on_correctable_error'] = proc { true }
140
+ end
141
+
142
+ it "fails if an empty indirect object is found" do
143
+ create_parser("1 0 obj\nendobj")
144
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
145
+ assert_match(/no indirect object value/i, exp.message)
146
+ end
147
+
148
+ it "fails if keyword stream is followed only by CR without LF" do
149
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
150
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
151
+ assert_match(/not CR alone/, exp.message)
152
+ end
153
+
154
+ it "fails if the stream length value is invalid" do
155
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/invalid stream length/i, exp.message)
158
+ end
159
+
160
+ it "fails if the keyword endobj is mangled" do
161
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/keyword endobj/, exp.message)
164
+ end
165
+
166
+ it "fails if the keyword endobj is missing" do
167
+ create_parser("1 0 obj\n<< >>")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/keyword endobj/, exp.message)
170
+ end
171
+
172
+ it "fails if there is data between 'endstream' and 'endobj'" do
173
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
175
+ assert_match(/keyword endobj/, exp.message)
176
+ end
177
+ end
135
178
  end
136
179
 
137
180
  describe "load_object" do
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
205
248
  end
206
249
 
207
250
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
251
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
252
  assert_equal(5, @parser.startxref_offset)
210
253
  end
211
254
 
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
215
258
  end
216
259
 
217
260
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
261
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
262
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
263
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
264
  assert_equal(5, @parser.startxref_offset)
222
265
  end
223
266
 
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
242
285
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
286
  assert_match(/missing startxref/, exp.message)
244
287
  end
288
+
289
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
290
+ @document.config['parser.on_correctable_error'] = proc { true }
291
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
292
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
293
+ assert_match(/end-of-file marker not found/, exp.message)
294
+ end
245
295
  end
246
296
 
247
297
  describe "file_header_version" do
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
262
312
  end
263
313
 
264
314
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
315
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
316
  assert_equal('1.4', @parser.file_header_version)
267
317
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
318
  end
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
318
368
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
369
  end
320
370
 
371
+ it "fails if a sub section entry is mangled" do
372
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
373
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
374
+ assert_match(/invalid cross-reference entry/i, exp.message)
375
+ end
376
+
321
377
  it "fails if there is no trailer" do
322
378
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
379
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +385,71 @@ describe HexaPDF::Parser do
329
385
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
386
  assert_match(/dictionary/, exp.message)
331
387
  end
388
+
389
+ describe "invalid numbering of main xref section" do
390
+ it "handles the xref if the numbering is off by N" do
391
+ create_parser(" 1 0 obj 1 endobj\n" \
392
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
393
+ section, _trailer = @parser.parse_xref_section_and_trailer(17)
394
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
395
+ end
396
+
397
+ it "fails if the first entry is not the one for oid=0" do
398
+ create_parser(" 1 0 obj 1 endobj\n" \
399
+ "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
400
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
401
+ assert_match(/Main.*invalid numbering/i, exp.message)
402
+
403
+ create_parser(" 1 0 obj 1 endobj\n" \
404
+ "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
405
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
406
+ assert_match(/Main.*invalid numbering/i, exp.message)
407
+ end
408
+
409
+ it "fails if the tested entry position is invalid" do
410
+ create_parser(" 1 0 obj 1 endobj\n" \
411
+ "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
412
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
413
+ assert_match(/Main.*invalid numbering/i, exp.message)
414
+ end
415
+
416
+ it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
417
+ create_parser(" 2 0 obj 1 endobj\n" \
418
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
419
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
420
+ assert_match(/Main.*invalid numbering/i, exp.message)
421
+ end
422
+ end
423
+
424
+ describe "with strict parsing" do
425
+ before do
426
+ @document.config['parser.on_correctable_error'] = proc { true }
427
+ end
428
+
429
+ it "fails if xref type=n with offset=0" do
430
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
431
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
432
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
433
+ end
434
+
435
+ it " fails xref type=n with gen>65535" do
436
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
437
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
438
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
439
+ end
440
+
441
+ it "fails if trailing second whitespace is missing" do
442
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
443
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
444
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
445
+ end
446
+
447
+ it "fails if the main cross-reference section has invalid numbering" do
448
+ create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
449
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
450
+ assert_match(/Main.*invalid numbering/i, exp.message)
451
+ end
452
+ end
332
453
  end
333
454
 
334
455
  describe "load_revision" do
@@ -348,75 +469,60 @@ describe HexaPDF::Parser do
348
469
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
470
  assert_match(/not a cross-reference stream/, exp.message)
350
471
  end
351
- end
352
472
 
353
- describe "with strict parsing enabled" do
354
- before do
473
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
474
  @document.config['parser.on_correctable_error'] = proc { true }
475
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
476
+ "stream\n\x01\x0A\x00\nendstream endobj")
477
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
478
+ assert_match(/entry for itself/, exp.message)
356
479
  end
480
+ end
357
481
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
362
- end
363
-
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
482
+ describe "reconstruct_revision" do
483
+ before do
484
+ @document.config['parser.try_xref_reconstruction'] = true
485
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
368
486
  end
369
487
 
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
488
+ it "serially parses the contents" do
489
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
490
+ assert_equal(6, @parser.load_object(@xref).value)
374
491
  end
375
492
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
493
+ it "ignores parts where the starting line is split across lines" do
494
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
495
+ assert_equal(5, @parser.load_object(@xref).value)
380
496
  end
381
497
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
498
+ it "handles cases where the line contains an invalid string that exceeds the read buffer" do
499
+ create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
500
+ assert_equal(6, @parser.load_object(@xref).value)
386
501
  end
387
502
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
503
+ it "ignores invalid objects" do
504
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
505
+ assert_equal(4, @parser.load_object(@xref).value)
392
506
  end
393
507
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
508
+ it "ignores invalid lines" do
509
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
510
+ assert_equal(6, @parser.load_object(@xref).value)
398
511
  end
399
512
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
513
+ it "uses the last trailer" do
514
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
515
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
516
  end
408
517
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
518
+ it "uses the first trailer in case of a linearized file" do
519
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
520
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
521
  end
414
522
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
523
+ it "fails if no valid trailer is found" do
524
+ create_parser("1 0 obj\n5\nendobj")
525
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
526
  end
421
527
  end
422
528
  end