hexapdf 0.12.3 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.12.3'
40
+ VERSION = '0.13.0'
41
41
 
42
42
  end
@@ -122,7 +122,7 @@ module CommonTokenizerTests
122
122
  end
123
123
 
124
124
  it "next_token: should not fail when reading super long numbers" do
125
- create_tokenizer("1" + "0" * 10_000)
125
+ create_tokenizer("1" << "0" * 10_000)
126
126
  assert_equal(10**10_000, @tokenizer.next_token)
127
127
  end
128
128
 
@@ -162,7 +162,7 @@ module CommonTokenizerTests
162
162
  end
163
163
 
164
164
  it "returns the correct position on operations" do
165
- create_tokenizer("hallo du" + " " * 50000 + "hallo du")
165
+ create_tokenizer("hallo du" << " " * 50000 << "hallo du")
166
166
  @tokenizer.next_token
167
167
  assert_equal(5, @tokenizer.pos)
168
168
 
@@ -68,14 +68,14 @@ describe HexaPDF::Content::GraphicObject::Arc do
68
68
  arc.max_curves = 4
69
69
  curves = arc.curves
70
70
  assert_equal(2, curves.size)
71
- assert_curve_values([0, 1, p1: [1, 0.548584], p2: [0.548584, 1]], curves[0])
72
- assert_curve_values([-1, 0, p1: [-0.548584, 1], p2: [-1, 0.548584]], curves[1])
71
+ assert_curve_values([0, 1, {p1: [1, 0.548584], p2: [0.548584, 1]}], curves[0])
72
+ assert_curve_values([-1, 0, {p1: [-0.548584, 1], p2: [-1, 0.548584]}], curves[1])
73
73
 
74
74
  arc.configure(clockwise: true)
75
75
  curves = arc.curves
76
76
  assert_equal(2, curves.size)
77
- assert_curve_values([0, -1, p1: [1, -0.548584], p2: [0.548584, -1]], curves[0])
78
- assert_curve_values([-1, 0, p1: [-0.548584, -1], p2: [-1, -0.548584]], curves[1])
77
+ assert_curve_values([0, -1, {p1: [1, -0.548584], p2: [0.548584, -1]}], curves[0])
78
+ assert_curve_values([-1, 0, {p1: [-0.548584, -1], p2: [-1, -0.548584]}], curves[1])
79
79
  end
80
80
  end
81
81
 
@@ -531,7 +531,7 @@ describe HexaPDF::Content::Canvas do
531
531
  end
532
532
 
533
533
  it "invokes the polygon method when radius != 0" do
534
- args = [0, 0, 10, 0, 10, 10, 0, 10, radius: 5]
534
+ args = [0, 0, 10, 0, 10, 10, 0, 10, {radius: 5}]
535
535
  assert_method_invoked(@canvas, :polygon, args) do
536
536
  @canvas.rectangle(0, 0, 10, 10, radius: 5)
537
537
  end
@@ -631,7 +631,7 @@ describe HexaPDF::Content::Canvas do
631
631
 
632
632
  describe "circle" do
633
633
  it "uses arc for the hard work" do
634
- assert_method_invoked(@canvas, :arc, [5, 6, a: 7]) do
634
+ assert_method_invoked(@canvas, :arc, [5, 6, {a: 7}]) do
635
635
  @canvas.graphics_object = :path
636
636
  @canvas.circle(5, 6, 7)
637
637
  end
@@ -651,7 +651,7 @@ describe HexaPDF::Content::Canvas do
651
651
 
652
652
  describe "ellipse" do
653
653
  it "uses arc for the hard work" do
654
- assert_method_invoked(@canvas, :ellipse, [5, 6, a: 7, b: 5, inclination: 10]) do
654
+ assert_method_invoked(@canvas, :ellipse, [5, 6, {a: 7, b: 5, inclination: 10}]) do
655
655
  @canvas.ellipse(5, 6, a: 7, b: 5, inclination: 10)
656
656
  end
657
657
  end
@@ -155,7 +155,7 @@ describe HexaPDF::Content::ColorSpace::DeviceGray do
155
155
 
156
156
  before do
157
157
  @color_space = HexaPDF::Content::ColorSpace::DeviceGray.new
158
- @color_space_family = @color_space_definition = :DeviceGray
158
+ @color_space_family = @color_space_definition = :DeviceGray
159
159
  @color = @color_space.default_color
160
160
  @other_color = @color_space.color(128)
161
161
  @colors = [128]
@@ -101,13 +101,13 @@ describe HexaPDF::Encryption::AES do
101
101
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
102
102
  assert_equal('a' * 16, result)
103
103
 
104
- f = Fiber.new { 'a' * 31 + "\x00" }
104
+ f = Fiber.new { 'a' * 31 << "\x00" }
105
105
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
106
- assert_equal('a' * 15 + "\x00", result)
106
+ assert_equal('a' * 15 << "\x00", result)
107
107
 
108
- f = Fiber.new { 'a' * 29 + "\x00\x01\x03" }
108
+ f = Fiber.new { 'a' * 29 << "\x00\x01\x03" }
109
109
  result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
110
- assert_equal('a' * 13 + "\x00\x01\x03", result)
110
+ assert_equal('a' * 13 << "\x00\x01\x03", result)
111
111
  end
112
112
 
113
113
  it "fails on decryption if not enough bytes are provided" do
@@ -53,24 +53,24 @@ describe HexaPDF::Encryption::StandardEncryptionDictionary do
53
53
  end
54
54
 
55
55
  describe HexaPDF::Encryption::StandardSecurityHandler do
56
- TEST_FILES = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
57
- USER_PASSWORD = 'uhexapdf'
58
- OWNER_PASSWORD = 'ohexapdf'
56
+ test_files = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
57
+ user_password = 'uhexapdf'
58
+ owner_password = 'ohexapdf'
59
59
 
60
- MINIMAL_DOC = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
60
+ minimal_doc = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
61
61
 
62
- TEST_FILES.each do |file|
62
+ test_files.each do |file|
63
63
  basename = File.basename(file)
64
64
  it "can decrypt, encrypt and decrypt the encrypted file #{basename} with the user password" do
65
65
  begin
66
66
  doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
67
- decryption_opts: {password: USER_PASSWORD})
68
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
67
+ decryption_opts: {password: user_password})
68
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
69
69
 
70
70
  out = StringIO.new(''.b)
71
71
  HexaPDF::Writer.new(doc, out).write
72
- doc = HexaPDF::Document.new(io: out, decryption_opts: {password: USER_PASSWORD})
73
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
72
+ doc = HexaPDF::Document.new(io: out, decryption_opts: {password: user_password})
73
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
74
74
  rescue HexaPDF::EncryptionError => e
75
75
  flunk("Error processing #{basename}: #{e}")
76
76
  end
@@ -80,8 +80,8 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
80
80
  it "can decrypt the encrypted file #{basename} with the owner password" do
81
81
  begin
82
82
  doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
83
- decryption_opts: {password: OWNER_PASSWORD})
84
- assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
83
+ decryption_opts: {password: owner_password})
84
+ assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
85
85
  rescue HexaPDF::EncryptionError => e
86
86
  flunk("Error processing #{basename}: #{e}")
87
87
  end
@@ -33,7 +33,7 @@ describe HexaPDF::Filter::ASCII85Decode do
33
33
  end
34
34
 
35
35
  it "ignores data after the EOD marker" do
36
- assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded.dup + "~>abcdefg"))))
36
+ assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << "~>abcdefg"))))
37
37
  end
38
38
 
39
39
  it "fails if the input contains invalid characters" do
@@ -24,7 +24,7 @@ describe HexaPDF::Filter::ASCIIHexDecode do
24
24
  end
25
25
 
26
26
  it "ignores data after the EOD marker" do
27
- assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded + '4e6f7gzz'))))
27
+ assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << '4e6f7gzz'))))
28
28
  end
29
29
 
30
30
  it "assumes the missing char is '0' if the input length is odd" do
@@ -674,10 +674,9 @@ describe HexaPDF::Layout::TextLayouter do
674
674
  pos = [0, 0]
675
675
  result.select! {|name, _| name == :set_text_matrix || name == :move_text_next_line }.
676
676
  map! do |name, ops|
677
- if name == :set_text_matrix
678
- pos = ops[-2, 2]
679
- elsif name == :move_text_next_line
680
- pos[1] -= leading
677
+ case name
678
+ when :set_text_matrix then pos = ops[-2, 2]
679
+ when :move_text_next_line then pos[1] -= leading
681
680
  end
682
681
  pos.dup
683
682
  end
@@ -66,8 +66,8 @@ describe HexaPDF::Configuration do
66
66
  assert_equal(HexaPDF, @config.constantize('test', 1))
67
67
  end
68
68
 
69
- def assert_constantize_error # :nodoc:
70
- exp = assert_raises(HexaPDF::Error) { yield }
69
+ def assert_constantize_error(&block) # :nodoc:
70
+ exp = assert_raises(HexaPDF::Error, &block)
71
71
  assert_match(/Error getting constant for configuration option/, exp.message)
72
72
  end
73
73
 
@@ -14,7 +14,9 @@ describe HexaPDF::Dictionary do
14
14
  end
15
15
 
16
16
  def add(obj)
17
- HexaPDF::Object.new(obj, oid: 1)
17
+ klass = HexaPDF::Object
18
+ klass = HexaPDF::Dictionary if obj.kind_of?(HexaPDF::Dictionary) || obj.kind_of?(Hash)
19
+ klass.new(obj, oid: 1)
18
20
  end
19
21
 
20
22
  def delete(_obj)
@@ -222,7 +222,7 @@ describe HexaPDF::DictionaryFields do
222
222
 
223
223
  it "allows conversion to a Rectangle from an Array" do
224
224
  doc = Minitest::Mock.new
225
- doc.expect(:wrap, :data, [[0, 1, 2, 3], type: HexaPDF::Rectangle])
225
+ doc.expect(:wrap, :data, [[0, 1, 2, 3], {type: HexaPDF::Rectangle}])
226
226
  @field.convert([0, 1, 2, 3], doc)
227
227
  doc.verify
228
228
  end
@@ -230,7 +230,7 @@ describe HexaPDF::DictionaryFields do
230
230
  it "allows conversion to a Rectangle from a HexaPDF::PDFArray" do
231
231
  data = HexaPDF::PDFArray.new([0, 1, 2, 3])
232
232
  doc = Minitest::Mock.new
233
- doc.expect(:wrap, :data, [data, type: HexaPDF::Rectangle])
233
+ doc.expect(:wrap, :data, [data, {type: HexaPDF::Rectangle}])
234
234
  @field.convert(data, doc)
235
235
  doc.verify
236
236
  end
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
441
441
 
442
442
  describe "validate" do
443
443
  before do
444
- @doc.trailer.validate # to create a valid document
444
+ @doc.validate # to create a valid document
445
445
  end
446
446
 
447
447
  it "validates indirect objects" do
448
- obj = @doc.add({Type: :Catalog})
448
+ obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
449
449
  refute(@doc.validate(auto_correct: false))
450
450
 
451
451
  called = false
452
- assert(@doc.validate {|o| assert_same(obj, o); called = true })
452
+ assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
453
453
  assert(called)
454
454
  end
455
455
 
456
456
  it "validates the trailer object" do
457
457
  @doc.trailer[:ID] = :Symbol
458
- refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
458
+ refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
459
459
  end
460
460
 
461
461
  it "validates only loaded objects" do
@@ -6,15 +6,6 @@ require 'hexapdf/reference'
6
6
 
7
7
  describe HexaPDF::Object do
8
8
  describe "class.deep_copy" do
9
- it "handles not-duplicatable classes" do
10
- assert_equal(5, HexaPDF::Object.deep_copy(5))
11
- assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
12
- assert_nil(HexaPDF::Object.deep_copy(nil))
13
- assert_equal(true, HexaPDF::Object.deep_copy(true))
14
- assert_equal(false, HexaPDF::Object.deep_copy(false))
15
- assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
16
- end
17
-
18
9
  it "handles general, duplicatable classes" do
19
10
  x = "test"
20
11
  assert_equal("test", HexaPDF::Object.deep_copy(x))
@@ -103,30 +94,57 @@ describe HexaPDF::Object do
103
94
  end
104
95
 
105
96
  describe "validate" do
106
- it "invokes perform_validation correctly via #validate" do
107
- obj = HexaPDF::Object.new(5)
108
- invoked = {}
109
- obj.define_singleton_method(:perform_validation) do |&block|
110
- invoked[:method] = true
97
+ before do
98
+ @obj = HexaPDF::Object.new(5)
99
+ end
100
+
101
+ it "invokes perform_validation correctly" do
102
+ invoked = false
103
+ @obj.define_singleton_method(:perform_validation) { invoked = true }
104
+ assert(@obj.validate)
105
+ assert(invoked)
106
+ end
107
+
108
+ it "yields all arguments yieled by perform_validation" do
109
+ invoked = []
110
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
+ block.call("error", true, :object)
112
+ end
113
+ assert(@obj.validate {|*a| invoked << a })
114
+ assert_equal([["error", true, :object]], invoked)
115
+ end
116
+
117
+ it "provides self as third argument if none is yielded by perform_validation" do
118
+ invoked = []
119
+ @obj.define_singleton_method(:perform_validation) do |&block|
111
120
  block.call("error", true)
112
121
  end
113
- assert(obj.validate {|*a| invoked[:block] = a })
114
- assert_equal([:method, :block], invoked.keys)
115
- assert_equal(["error", true], invoked[:block])
122
+ assert(@obj.validate {|*a| invoked << a })
123
+ assert_equal([["error", true, @obj]], invoked)
124
+ end
116
125
 
117
- refute(obj.validate(auto_correct: false))
126
+ it "yields all problems when auto_correct is true" do
127
+ invoked = []
128
+ @obj.define_singleton_method(:perform_validation) do |&block|
129
+ invoked << :before
130
+ block.call("error", false)
131
+ invoked << :after
132
+ block.call("error2", true)
133
+ invoked << :last
134
+ end
135
+ refute(@obj.validate)
136
+ assert_equal([:before, :after, :last], invoked)
118
137
  end
119
138
 
120
- it "stops validating on an uncorrectable problem" do
121
- obj = HexaPDF::Object.new(5)
122
- invoked = {}
123
- obj.define_singleton_method(:perform_validation) do |&block|
124
- invoked[:before] = true
139
+ it "stops at the first uncorrectable problem if auto_correct is false" do
140
+ invoked = []
141
+ @obj.define_singleton_method(:perform_validation) do |&block|
142
+ invoked << :before
125
143
  block.call("error", false)
126
- invoked[:after] = true
144
+ invoked << :after
127
145
  end
128
- refute(obj.validate {|*a| invoked[:block] = a })
129
- refute(invoked.key?(:after))
146
+ refute(@obj.validate(auto_correct: false))
147
+ assert_equal([:before], invoked)
130
148
  end
131
149
  end
132
150
 
@@ -8,6 +8,7 @@ require 'stringio'
8
8
  describe HexaPDF::Parser do
9
9
  before do
10
10
  @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
11
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
12
13
 
13
14
  create_parser(<<~EOF)
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
132
133
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
133
134
  assert_match(/stream.*followed by.*endstream/i, exp.message)
134
135
  end
136
+
137
+ describe "with strict parsing" do
138
+ before do
139
+ @document.config['parser.on_correctable_error'] = proc { true }
140
+ end
141
+
142
+ it "fails if an empty indirect object is found" do
143
+ create_parser("1 0 obj\nendobj")
144
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
145
+ assert_match(/no indirect object value/i, exp.message)
146
+ end
147
+
148
+ it "fails if keyword stream is followed only by CR without LF" do
149
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
150
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
151
+ assert_match(/not CR alone/, exp.message)
152
+ end
153
+
154
+ it "fails if the stream length value is invalid" do
155
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
157
+ assert_match(/invalid stream length/i, exp.message)
158
+ end
159
+
160
+ it "fails if the keyword endobj is mangled" do
161
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/keyword endobj/, exp.message)
164
+ end
165
+
166
+ it "fails if the keyword endobj is missing" do
167
+ create_parser("1 0 obj\n<< >>")
168
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
169
+ assert_match(/keyword endobj/, exp.message)
170
+ end
171
+
172
+ it "fails if there is data between 'endstream' and 'endobj'" do
173
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
174
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
175
+ assert_match(/keyword endobj/, exp.message)
176
+ end
177
+ end
135
178
  end
136
179
 
137
180
  describe "load_object" do
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
205
248
  end
206
249
 
207
250
  it "ignores garbage at the end of the file" do
208
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
251
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
209
252
  assert_equal(5, @parser.startxref_offset)
210
253
  end
211
254
 
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
215
258
  end
216
259
 
217
260
  it "finds the startxref anywhere in file" do
218
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
261
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
219
262
  assert_equal(5, @parser.startxref_offset)
220
- create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
263
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
221
264
  assert_equal(5, @parser.startxref_offset)
222
265
  end
223
266
 
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
242
285
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
243
286
  assert_match(/missing startxref/, exp.message)
244
287
  end
288
+
289
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
290
+ @document.config['parser.on_correctable_error'] = proc { true }
291
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
292
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
293
+ assert_match(/end-of-file marker not found/, exp.message)
294
+ end
245
295
  end
246
296
 
247
297
  describe "file_header_version" do
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
262
312
  end
263
313
 
264
314
  it "ignores junk at the beginning of the file and correctly calculates offset" do
265
- create_parser("junk" * 200 + "\n%PDF-1.4\n")
315
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
266
316
  assert_equal('1.4', @parser.file_header_version)
267
317
  assert_equal(801, @parser.instance_variable_get(:@header_offset))
268
318
  end
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
318
368
  assert_match(/invalid cross-reference subsection/i, exp.message)
319
369
  end
320
370
 
371
+ it "fails if a sub section entry is mangled" do
372
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
373
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
374
+ assert_match(/invalid cross-reference entry/i, exp.message)
375
+ end
376
+
321
377
  it "fails if there is no trailer" do
322
378
  create_parser("xref\n0 1\n0000000000 00000 n \n")
323
379
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +385,30 @@ describe HexaPDF::Parser do
329
385
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
330
386
  assert_match(/dictionary/, exp.message)
331
387
  end
388
+
389
+ describe "with strict parsing" do
390
+ before do
391
+ @document.config['parser.on_correctable_error'] = proc { true }
392
+ end
393
+
394
+ it "fails if xref type=n with offset=0" do
395
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
396
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
397
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
398
+ end
399
+
400
+ it " fails xref type=n with gen>65535" do
401
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
402
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
403
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
404
+ end
405
+
406
+ it "fails if trailing second whitespace is missing" do
407
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
408
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
409
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
410
+ end
411
+ end
332
412
  end
333
413
 
334
414
  describe "load_revision" do
@@ -348,75 +428,55 @@ describe HexaPDF::Parser do
348
428
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
349
429
  assert_match(/not a cross-reference stream/, exp.message)
350
430
  end
351
- end
352
431
 
353
- describe "with strict parsing enabled" do
354
- before do
432
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
355
433
  @document.config['parser.on_correctable_error'] = proc { true }
434
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
435
+ "stream\n\x01\x0A\x00\nendstream endobj")
436
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
437
+ assert_match(/entry for itself/, exp.message)
356
438
  end
439
+ end
357
440
 
358
- it "startxref_offset fails if the startxref is not in the last part of the file" do
359
- create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
360
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
361
- assert_match(/end-of-file marker not found/, exp.message)
362
- end
363
-
364
- it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
365
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
366
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
367
- assert_match(/invalid.*cross-reference entry/i, exp.message)
368
- end
369
-
370
- it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
371
- create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
372
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
373
- assert_match(/invalid.*cross-reference entry/i, exp.message)
441
+ describe "reconstruct_revision" do
442
+ before do
443
+ @document.config['parser.try_xref_reconstruction'] = true
444
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
374
445
  end
375
446
 
376
- it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
377
- create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
378
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
379
- assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
447
+ it "serially parses the contents" do
448
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
449
+ assert_equal(6, @parser.load_object(@xref).value)
380
450
  end
381
451
 
382
- it "parse_indirect_object fails if an empty indirect object is found" do
383
- create_parser("1 0 obj\nendobj")
384
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
385
- assert_match(/no indirect object value/i, exp.message)
452
+ it "ignores parts where the starting line is split across lines" do
453
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
454
+ assert_equal(5, @parser.load_object(@xref).value)
386
455
  end
387
456
 
388
- it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
389
- create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
390
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
391
- assert_match(/not CR alone/, exp.message)
457
+ it "ignores invalid objects" do
458
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
459
+ assert_equal(4, @parser.load_object(@xref).value)
392
460
  end
393
461
 
394
- it "parse_indirect_object fails if the stream length value is invalid" do
395
- create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
396
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
397
- assert_match(/invalid stream length/i, exp.message)
462
+ it "ignores invalid lines" do
463
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
464
+ assert_equal(6, @parser.load_object(@xref).value)
398
465
  end
399
466
 
400
- it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
401
- create_parser("1 0 obj\n<< >>\nendobjd\n")
402
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
403
- assert_match(/keyword endobj/, exp.message)
404
- create_parser("1 0 obj\n<< >>")
405
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
406
- assert_match(/keyword endobj/, exp.message)
467
+ it "uses the last trailer" do
468
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
469
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
407
470
  end
408
471
 
409
- it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
410
- create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
411
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
412
- assert_match(/keyword endobj/, exp.message)
472
+ it "uses the first trailer in case of a linearized file" do
473
+ create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
474
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
413
475
  end
414
476
 
415
- it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
416
- create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
417
- "stream\n\x01\x0A\x00\nendstream endobj")
418
- exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
419
- assert_match(/entry for itself/, exp.message)
477
+ it "fails if no valid trailer is found" do
478
+ create_parser("1 0 obj\n5\nendobj")
479
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
420
480
  end
421
481
  end
422
482
  end