hexapdf 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.12.3'
40
+ VERSION = '0.13.0'
41
41
 
42
42
  end
@@ -122,7 +122,7 @@ module CommonTokenizerTests
122
122
  end
123
123
 
124
124
  it "next_token: should not fail when reading super long numbers" do
125
- create_tokenizer("1" + "0" * 10_000)
125
+ create_tokenizer("1" << "0" * 10_000)
126
126
  assert_equal(10**10_000, @tokenizer.next_token)
127
127
  end
128
128
 
@@ -162,7 +162,7 @@ module CommonTokenizerTests
162
162
  end
163
163
 
164
164
  it "returns the correct position on operations" do
165
- create_tokenizer("hallo du" + " " * 50000 + "hallo du")
165
+ create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
166
  @tokenizer.next_token
167
167
  assert_equal(5, @tokenizer.pos)
168
168
 
@@ -68,14 +68,14 @@ describe HexaPDF::Content::GraphicObject::Arc do
68
68
  arc.max_curves = 4
69
69
  curves = arc.curves
70
70
  assert_equal(2, curves.size)
71
- assert_curve_values([0, 1, p1: [1, 0.548584], p2: [0.548584, 1]], curves[0])
72
- assert_curve_values([-1, 0, p1: [-0.548584, 1], p2: [-1, 0.548584]], curves[1])
71
+ assert_curve_values([0, 1, {p1: [1, 0.548584], p2: [0.548584, 1]}], curves[0])
72
+ assert_curve_values([-1, 0, {p1: [-0.548584, 1], p2: [-1, 0.548584]}], curves[1])
73
73
 
74
74
  arc.configure(clockwise: true)
75
75
  curves = arc.curves
76
76
  assert_equal(2, curves.size)
77
- assert_curve_values([0, -1, p1: [1, -0.548584], p2: [0.548584, -1]], curves[0])
78
- assert_curve_values([-1, 0, p1: [-0.548584, -1], p2: [-1, -0.548584]], curves[1])
77
+ assert_curve_values([0, -1, {p1: [1, -0.548584], p2: [0.548584, -1]}], curves[0])
78
+ assert_curve_values([-1, 0, {p1: [-0.548584, -1], p2: [-1, -0.548584]}], curves[1])
79
79
  end
80
80
  end
81
81
 
@@ -531,7 +531,7 @@ describe HexaPDF::Content::Canvas do
531
531
  end
532
532
 
533
533
  it "invokes the polygon method when radius != 0" do
534
- args = [0, 0, 10, 0, 10, 10, 0, 10, radius: 5]
534
+ args = [0, 0, 10, 0, 10, 10, 0, 10, {radius: 5}]
535
535
  assert_method_invoked(@canvas, :polygon, args) do
536
536
  @canvas.rectangle(0, 0, 10, 10, radius: 5)
537
537
  end
@@ -631,7 +631,7 @@ describe HexaPDF::Content::Canvas do
631
631
 
632
632
  describe "circle" do
633
633
  it "uses arc for the hard work" do
634
- assert_method_invoked(@canvas, :arc, [5, 6, a: 7]) do
634
+ assert_method_invoked(@canvas, :arc, [5, 6, {a: 7}]) do
635
635
  @canvas.graphics_object = :path
636
636
  @canvas.circle(5, 6, 7)
637
637
  end
@@ -651,7 +651,7 @@ describe HexaPDF::Content::Canvas do
651
651
 
652
652
  describe "ellipse" do
653
653
  it "uses arc for the hard work" do
654
- assert_method_invoked(@canvas, :ellipse, [5, 6, a: 7, b: 5, inclination: 10]) do
654
+ assert_method_invoked(@canvas, :ellipse, [5, 6, {a: 7, b: 5, inclination: 10}]) do
655
655
  @canvas.ellipse(5, 6, a: 7, b: 5, inclination: 10)
656
656
  end
657
657
  end
@@ -155,7 +155,7 @@ describe HexaPDF::Content::ColorSpace::DeviceGray do
155
155
 
156
156
  before do
157
157
  @color_space = HexaPDF::Content::ColorSpace::DeviceGray.new
158
- @color_space_family = @color_space_definition = :DeviceGray
158
+ @color_space_family = @color_space_definition = :DeviceGray
159
159
  @color = @color_space.default_color
160
160
  @other_color = @color_space.color(128)
161
161
  @colors = [128]
@@ -101,13 +101,13 @@ describe HexaPDF::Encryption::AES do
101
101
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
102
102
  assert_equal('a' * 16, result)
103
103
 
104
- f = Fiber.new { 'a' * 31 + "\x00" }
104
+ f = Fiber.new { 'a' * 31 << "\x00" }
105
105
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
106
- assert_equal('a' * 15 + "\x00", result)
106
+ assert_equal('a' * 15 << "\x00", result)
107
107
 
108
- f = Fiber.new { 'a' * 29 + "\x00\x01\x03" }
108
+ f = Fiber.new { 'a' * 29 << "\x00\x01\x03" }
109
109
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
110
- assert_equal('a' * 13 + "\x00\x01\x03", result)
110
+ assert_equal('a' * 13 << "\x00\x01\x03", result)
111
111
  end
112
112
 
113
113
  it "fails on decryption if not enough bytes are provided" do
@@ -53,24 +53,24 @@ describe HexaPDF::Encryption::StandardEncryptionDictionary do
53
53
  end
54
54
 
55
55
  describe HexaPDF::Encryption::StandardSecurityHandler do
56
- TEST_FILES = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
57
- USER_PASSWORD = 'uhexapdf'
58
- OWNER_PASSWORD = 'ohexapdf'
56
+ test_files = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
57
+ user_password = 'uhexapdf'
58
+ owner_password = 'ohexapdf'
59
59
 
60
- MINIMAL_DOC = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
60
+ minimal_doc = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
61
61
 
62
- TEST_FILES.each do |file|
62
+ test_files.each do |file|
63
63
  basename = File.basename(file)
64
64
  it "can decrypt, encrypt and decrypt the encrypted file #{basename} with the user password" do
65
65
  begin
66
66
  doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
67
- decryption_opts: {password: USER_PASSWORD})
68
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
67
+ decryption_opts: {password: user_password})
68
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
69
69
 
70
70
  out = StringIO.new(''.b)
71
71
  HexaPDF::Writer.new(doc, out).write
72
- doc = HexaPDF::Document.new(io: out, decryption_opts: {password: USER_PASSWORD})
73
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
72
+ doc = HexaPDF::Document.new(io: out, decryption_opts: {password: user_password})
73
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
74
74
  rescue HexaPDF::EncryptionError => e
75
75
  flunk("Error processing #{basename}: #{e}")
76
76
  end
@@ -80,8 +80,8 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
80
80
  it "can decrypt the encrypted file #{basename} with the owner password" do
81
81
  begin
82
82
  doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
83
- decryption_opts: {password: OWNER_PASSWORD})
84
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
83
+ decryption_opts: {password: owner_password})
84
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
85
85
  rescue HexaPDF::EncryptionError => e
86
86
  flunk("Error processing #{basename}: #{e}")
87
87
  end
@@ -33,7 +33,7 @@ describe HexaPDF::Filter::ASCII85Decode do
33
33
  end
34
34
 
35
35
  it "ignores data after the EOD marker" do
36
- assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded.dup + "~>abcdefg"))))
36
+ assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << "~>abcdefg"))))
37
37
  end
38
38
 
39
39
  it "fails if the input contains invalid characters" do
@@ -24,7 +24,7 @@ describe HexaPDF::Filter::ASCIIHexDecode do
24
24
  end
25
25
 
26
26
  it "ignores data after the EOD marker" do
27
- assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded + '4e6f7gzz'))))
27
+ assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << '4e6f7gzz'))))
28
28
  end
29
29
 
30
30
  it "assumes the missing char is '0' if the input length is odd" do
@@ -674,10 +674,9 @@ describe HexaPDF::Layout::TextLayouter do
674
674
  pos = [0, 0]
675
675
  result.select! {|name, _| name == :set_text_matrix || name == :move_text_next_line }.
676
676
  map! do |name, ops|
677
- if name == :set_text_matrix
678
- pos = ops[-2, 2]
679
- elsif name == :move_text_next_line
680
- pos[1] -= leading
677
+ case name
678
+ when :set_text_matrix then pos = ops[-2, 2]
679
+ when :move_text_next_line then pos[1] -= leading
681
680
  end
682
681
  pos.dup
683
682
  end
@@ -66,8 +66,8 @@ describe HexaPDF::Configuration do
66
66
  assert_equal(HexaPDF, @config.constantize('test', 1))
67
67
  end
68
68
 
69
- def assert_constantize_error # :nodoc:
70
- exp = assert_raises(HexaPDF::Error) { yield }
69
+ def assert_constantize_error(&block) # :nodoc:
70
+ exp = assert_raises(HexaPDF::Error, &block)
71
71
  assert_match(/Error getting constant for configuration option/, exp.message)
72
72
  end
73
73
 
@@ -14,7 +14,9 @@ describe HexaPDF::Dictionary do
14
14
  end
15
15
 
16
16
  def add(obj)
17
- HexaPDF::Object.new(obj, oid: 1)
17
+ klass = HexaPDF::Object
18
+ klass = HexaPDF::Dictionary if obj.kind_of?(HexaPDF::Dictionary) || obj.kind_of?(Hash)
19
+ klass.new(obj, oid: 1)
18
20
  end
19
21
 
20
22
  def delete(_obj)
@@ -222,7 +222,7 @@ describe HexaPDF::DictionaryFields do
222
222
 
223
223
  it "allows conversion to a Rectangle from an Array" do
224
224
  doc = Minitest::Mock.new
225
- doc.expect(:wrap, :data, [[0, 1, 2, 3], type: HexaPDF::Rectangle])
225
+ doc.expect(:wrap, :data, [[0, 1, 2, 3], {type: HexaPDF::Rectangle}])
226
226
  @field.convert([0, 1, 2, 3], doc)
227
227
  doc.verify
228
228
  end
@@ -230,7 +230,7 @@ describe HexaPDF::DictionaryFields do
230
230
  it "allows conversion to a Rectangle from a HexaPDF::PDFArray" do
231
231
  data = HexaPDF::PDFArray.new([0, 1, 2, 3])
232
232
  doc = Minitest::Mock.new
233
- doc.expect(:wrap, :data, [data, type: HexaPDF::Rectangle])
233
+ doc.expect(:wrap, :data, [data, {type: HexaPDF::Rectangle}])
234
234
  @field.convert(data, doc)
235
235
  doc.verify
236
236
  end
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
441
441
 
442
442
  describe "validate" do
443
443
  before do
444
- @doc.trailer.validate # to create a valid document
444
+ @doc.validate # to create a valid document
445
445
  end
446
446
 
447
447
  it "validates indirect objects" do
448
- obj = @doc.add({Type: :Catalog})
448
+ obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
449
449
  refute(@doc.validate(auto_correct: false))
450
450
 
451
451
  called = false
452
- assert(@doc.validate {|o| assert_same(obj, o); called = true })
452
+ assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
453
453
  assert(called)
454
454
  end
455
455
 
456
456
  it "validates the trailer object" do
457
457
  @doc.trailer[:ID] = :Symbol
458
- refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
458
+ refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
459
459
  end
460
460
 
461
461
  it "validates only loaded objects" do
@@ -6,15 +6,6 @@ require 'hexapdf/reference'
6
6
 
7
7
  describe HexaPDF::Object do
8
8
  describe "class.deep_copy" do
9
- it "handles not-duplicatable classes" do
10
- assert_equal(5, HexaPDF::Object.deep_copy(5))
11
- assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
12
- assert_nil(HexaPDF::Object.deep_copy(nil))
13
- assert_equal(true, HexaPDF::Object.deep_copy(true))
14
- assert_equal(false, HexaPDF::Object.deep_copy(false))
15
- assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
16
- end
17
-
18
9
  it "handles general, duplicatable classes" do
19
10
  x = "test"
20
11
  assert_equal("test", HexaPDF::Object.deep_copy(x))
@@ -103,30 +94,57 @@ describe HexaPDF::Object do
103
94
  end
104
95
 
105
96
  describe "validate" do
106
- it "invokes perform_validation correctly via #validate" do
107
- obj = HexaPDF::Object.new(5)
108
- invoked = {}
109
- obj.define_singleton_method(:perform_validation) do |&block|
110
- invoked[:method] = true
97
+ before do
98
+ @obj = HexaPDF::Object.new(5)
99
+ end
100
+
101
+ it "invokes perform_validation correctly" do
102
+ invoked = false
103
+ @obj.define_singleton_method(:perform_validation) { invoked = true }
104
+ assert(@obj.validate)
105
+ assert(invoked)
106
+ end
107
+
108
+ it "yields all arguments yieled by perform_validation" do
109
+ invoked = []
110
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
+ block.call("error", true, :object)
112
+ end
113
+ assert(@obj.validate {|*a| invoked << a })
114
+ assert_equal([["error", true, :object]], invoked)
115
+ end
116
+
117
+ it "provides self as third argument if none is yielded by perform_validation" do
118
+ invoked = []
119
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
120
  block.call("error", true)
112
121
  end
113
- assert(obj.validate {|*a| invoked[:block] = a })
114
- assert_equal([:method, :block], invoked.keys)
115
- assert_equal(["error", true], invoked[:block])
122
+ assert(@obj.validate {|*a| invoked << a })
123
+ assert_equal([["error", true, @obj]], invoked)
124
+ end
116
125
 
117
- refute(obj.validate(auto_correct: false))
126
+ it "yields all problems when auto_correct is true" do
127
+ invoked = []
128
+ @obj.define_singleton_method(:perform_validation) do |&block|
129
+ invoked << :before
130
+ block.call("error", false)
131
+ invoked << :after
132
+ block.call("error2", true)
133
+ invoked << :last
134
+ end
135
+ refute(@obj.validate)
136
+ assert_equal([:before, :after, :last], invoked)
118
137
  end
119
138
 
120
- it "stops validating on an uncorrectable problem" do
121
- obj = HexaPDF::Object.new(5)
122
- invoked = {}
123
- obj.define_singleton_method(:perform_validation) do |&block|
124
- invoked[:before] = true
139
+ it "stops at the first uncorrectable problem if auto_correct is false" do
140
+ invoked = []
141
+ @obj.define_singleton_method(:perform_validation) do |&block|
142
+ invoked << :before
125
143
  block.call("error", false)
126
- invoked[:after] = true
144
+ invoked << :after
127
145
  end
128
- refute(obj.validate {|*a| invoked[:block] = a })
129
- refute(invoked.key?(:after))
146
+ refute(@obj.validate(auto_correct: false))
147
+ assert_equal([:before], invoked)
130
148
  end
131
149
  end
132
150
 
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
132
133
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
134
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
135
  end
136
+
137
+ describe "with strict parsing" do
138
+ before do
139
+ @document.config['parser.on_correctable_error'] = proc { true }
140
+ end
141
+
142
+ it "fails if an empty indirect object is found" do
143
+ create_parser("1 0 obj\nendobj")
144
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
145
+ assert_match(/no indirect object value/i, exp.message)
146
+ end
147
+
148
+ it "fails if keyword stream is followed only by CR without LF" do
149
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
150
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
151
+ assert_match(/not CR alone/, exp.message)
152
+ end
153
+
154
+ it "fails if the stream length value is invalid" do
155
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/invalid stream length/i, exp.message)
158
+ end
159
+
160
+ it "fails if the keyword endobj is mangled" do
161
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/keyword endobj/, exp.message)
164
+ end
165
+
166
+ it "fails if the keyword endobj is missing" do
167
+ create_parser("1 0 obj\n<< >>")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/keyword endobj/, exp.message)
170
+ end
171
+
172
+ it "fails if there is data between 'endstream' and 'endobj'" do
173
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
175
+ assert_match(/keyword endobj/, exp.message)
176
+ end
177
+ end
135
178
  end
136
179
 
137
180
  describe "load_object" do
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
205
248
  end
206
249
 
207
250
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
251
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
252
  assert_equal(5, @parser.startxref_offset)
210
253
  end
211
254
 
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
215
258
  end
216
259
 
217
260
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
261
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
262
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
263
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
264
  assert_equal(5, @parser.startxref_offset)
222
265
  end
223
266
 
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
242
285
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
286
  assert_match(/missing startxref/, exp.message)
244
287
  end
288
+
289
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
290
+ @document.config['parser.on_correctable_error'] = proc { true }
291
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
292
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
293
+ assert_match(/end-of-file marker not found/, exp.message)
294
+ end
245
295
  end
246
296
 
247
297
  describe "file_header_version" do
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
262
312
  end
263
313
 
264
314
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
315
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
316
  assert_equal('1.4', @parser.file_header_version)
267
317
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
318
  end
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
318
368
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
369
  end
320
370
 
371
+ it "fails if a sub section entry is mangled" do
372
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
373
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
374
+ assert_match(/invalid cross-reference entry/i, exp.message)
375
+ end
376
+
321
377
  it "fails if there is no trailer" do
322
378
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
379
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +385,30 @@ describe HexaPDF::Parser do
329
385
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
386
  assert_match(/dictionary/, exp.message)
331
387
  end
388
+
389
+ describe "with strict parsing" do
390
+ before do
391
+ @document.config['parser.on_correctable_error'] = proc { true }
392
+ end
393
+
394
+ it "fails if xref type=n with offset=0" do
395
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
396
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
397
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
398
+ end
399
+
400
+ it " fails xref type=n with gen>65535" do
401
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
402
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
403
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
404
+ end
405
+
406
+ it "fails if trailing second whitespace is missing" do
407
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
408
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
409
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
410
+ end
411
+ end
332
412
  end
333
413
 
334
414
  describe "load_revision" do
@@ -348,75 +428,55 @@ describe HexaPDF::Parser do
348
428
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
429
  assert_match(/not a cross-reference stream/, exp.message)
350
430
  end
351
- end
352
431
 
353
- describe "with strict parsing enabled" do
354
- before do
432
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
433
  @document.config['parser.on_correctable_error'] = proc { true }
434
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
435
+ "stream\n\x01\x0A\x00\nendstream endobj")
436
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
437
+ assert_match(/entry for itself/, exp.message)
356
438
  end
439
+ end
357
440
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
362
- end
363
-
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
368
- end
369
-
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
441
+ describe "reconstruct_revision" do
442
+ before do
443
+ @document.config['parser.try_xref_reconstruction'] = true
444
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
374
445
  end
375
446
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
447
+ it "serially parses the contents" do
448
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
449
+ assert_equal(6, @parser.load_object(@xref).value)
380
450
  end
381
451
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
452
+ it "ignores parts where the starting line is split across lines" do
453
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
454
+ assert_equal(5, @parser.load_object(@xref).value)
386
455
  end
387
456
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
457
+ it "ignores invalid objects" do
458
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
459
+ assert_equal(4, @parser.load_object(@xref).value)
392
460
  end
393
461
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
462
+ it "ignores invalid lines" do
463
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
464
+ assert_equal(6, @parser.load_object(@xref).value)
398
465
  end
399
466
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
467
+ it "uses the last trailer" do
468
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
469
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
470
  end
408
471
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
472
+ it "uses the first trailer in case of a linearized file" do
473
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
474
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
475
  end
414
476
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
477
+ it "fails if no valid trailer is found" do
478
+ create_parser("1 0 obj\n5\nendobj")
479
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
480
  end
421
481
  end
422
482
  end