hexapdf 0.12.3 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +31 -12
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +30 -25
- data/lib/hexapdf/parser.rb +65 -3
- data/lib/hexapdf/pdf_array.rb +9 -2
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +6 -4
- data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +3 -1
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +1 -1
- data/lib/hexapdf/type/form.rb +2 -1
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +16 -7
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +3 -4
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +4 -4
- data/test/hexapdf/test_object.rb +44 -26
- data/test/hexapdf/test_parser.rb +115 -55
- data/test/hexapdf/test_pdf_array.rb +7 -0
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +4 -4
- data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +1 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- metadata +5 -12
data/lib/hexapdf/version.rb
CHANGED
@@ -122,7 +122,7 @@ module CommonTokenizerTests
|
|
122
122
|
end
|
123
123
|
|
124
124
|
it "next_token: should not fail when reading super long numbers" do
|
125
|
-
create_tokenizer("1"
|
125
|
+
create_tokenizer("1" << "0" * 10_000)
|
126
126
|
assert_equal(10**10_000, @tokenizer.next_token)
|
127
127
|
end
|
128
128
|
|
@@ -162,7 +162,7 @@ module CommonTokenizerTests
|
|
162
162
|
end
|
163
163
|
|
164
164
|
it "returns the correct position on operations" do
|
165
|
-
create_tokenizer("hallo du"
|
165
|
+
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
166
|
@tokenizer.next_token
|
167
167
|
assert_equal(5, @tokenizer.pos)
|
168
168
|
|
@@ -68,14 +68,14 @@ describe HexaPDF::Content::GraphicObject::Arc do
|
|
68
68
|
arc.max_curves = 4
|
69
69
|
curves = arc.curves
|
70
70
|
assert_equal(2, curves.size)
|
71
|
-
assert_curve_values([0, 1, p1: [1, 0.548584], p2: [0.548584, 1]], curves[0])
|
72
|
-
assert_curve_values([-1, 0, p1: [-0.548584, 1], p2: [-1, 0.548584]], curves[1])
|
71
|
+
assert_curve_values([0, 1, {p1: [1, 0.548584], p2: [0.548584, 1]}], curves[0])
|
72
|
+
assert_curve_values([-1, 0, {p1: [-0.548584, 1], p2: [-1, 0.548584]}], curves[1])
|
73
73
|
|
74
74
|
arc.configure(clockwise: true)
|
75
75
|
curves = arc.curves
|
76
76
|
assert_equal(2, curves.size)
|
77
|
-
assert_curve_values([0, -1, p1: [1, -0.548584], p2: [0.548584, -1]], curves[0])
|
78
|
-
assert_curve_values([-1, 0, p1: [-0.548584, -1], p2: [-1, -0.548584]], curves[1])
|
77
|
+
assert_curve_values([0, -1, {p1: [1, -0.548584], p2: [0.548584, -1]}], curves[0])
|
78
|
+
assert_curve_values([-1, 0, {p1: [-0.548584, -1], p2: [-1, -0.548584]}], curves[1])
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
@@ -531,7 +531,7 @@ describe HexaPDF::Content::Canvas do
|
|
531
531
|
end
|
532
532
|
|
533
533
|
it "invokes the polygon method when radius != 0" do
|
534
|
-
args = [0, 0, 10, 0, 10, 10, 0, 10, radius: 5]
|
534
|
+
args = [0, 0, 10, 0, 10, 10, 0, 10, {radius: 5}]
|
535
535
|
assert_method_invoked(@canvas, :polygon, args) do
|
536
536
|
@canvas.rectangle(0, 0, 10, 10, radius: 5)
|
537
537
|
end
|
@@ -631,7 +631,7 @@ describe HexaPDF::Content::Canvas do
|
|
631
631
|
|
632
632
|
describe "circle" do
|
633
633
|
it "uses arc for the hard work" do
|
634
|
-
assert_method_invoked(@canvas, :arc, [5, 6, a: 7]) do
|
634
|
+
assert_method_invoked(@canvas, :arc, [5, 6, {a: 7}]) do
|
635
635
|
@canvas.graphics_object = :path
|
636
636
|
@canvas.circle(5, 6, 7)
|
637
637
|
end
|
@@ -651,7 +651,7 @@ describe HexaPDF::Content::Canvas do
|
|
651
651
|
|
652
652
|
describe "ellipse" do
|
653
653
|
it "uses arc for the hard work" do
|
654
|
-
assert_method_invoked(@canvas, :ellipse, [5, 6, a: 7, b: 5, inclination: 10]) do
|
654
|
+
assert_method_invoked(@canvas, :ellipse, [5, 6, {a: 7, b: 5, inclination: 10}]) do
|
655
655
|
@canvas.ellipse(5, 6, a: 7, b: 5, inclination: 10)
|
656
656
|
end
|
657
657
|
end
|
@@ -155,7 +155,7 @@ describe HexaPDF::Content::ColorSpace::DeviceGray do
|
|
155
155
|
|
156
156
|
before do
|
157
157
|
@color_space = HexaPDF::Content::ColorSpace::DeviceGray.new
|
158
|
-
@color_space_family = @color_space_definition =
|
158
|
+
@color_space_family = @color_space_definition = :DeviceGray
|
159
159
|
@color = @color_space.default_color
|
160
160
|
@other_color = @color_space.color(128)
|
161
161
|
@colors = [128]
|
@@ -101,13 +101,13 @@ describe HexaPDF::Encryption::AES do
|
|
101
101
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
102
102
|
assert_equal('a' * 16, result)
|
103
103
|
|
104
|
-
f = Fiber.new { 'a' * 31
|
104
|
+
f = Fiber.new { 'a' * 31 << "\x00" }
|
105
105
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
106
|
-
assert_equal('a' * 15
|
106
|
+
assert_equal('a' * 15 << "\x00", result)
|
107
107
|
|
108
|
-
f = Fiber.new { 'a' * 29
|
108
|
+
f = Fiber.new { 'a' * 29 << "\x00\x01\x03" }
|
109
109
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
110
|
-
assert_equal('a' * 13
|
110
|
+
assert_equal('a' * 13 << "\x00\x01\x03", result)
|
111
111
|
end
|
112
112
|
|
113
113
|
it "fails on decryption if not enough bytes are provided" do
|
@@ -53,24 +53,24 @@ describe HexaPDF::Encryption::StandardEncryptionDictionary do
|
|
53
53
|
end
|
54
54
|
|
55
55
|
describe HexaPDF::Encryption::StandardSecurityHandler do
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
test_files = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
|
57
|
+
user_password = 'uhexapdf'
|
58
|
+
owner_password = 'ohexapdf'
|
59
59
|
|
60
|
-
|
60
|
+
minimal_doc = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
|
61
61
|
|
62
|
-
|
62
|
+
test_files.each do |file|
|
63
63
|
basename = File.basename(file)
|
64
64
|
it "can decrypt, encrypt and decrypt the encrypted file #{basename} with the user password" do
|
65
65
|
begin
|
66
66
|
doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
|
67
|
-
decryption_opts: {password:
|
68
|
-
assert_equal(
|
67
|
+
decryption_opts: {password: user_password})
|
68
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
69
69
|
|
70
70
|
out = StringIO.new(''.b)
|
71
71
|
HexaPDF::Writer.new(doc, out).write
|
72
|
-
doc = HexaPDF::Document.new(io: out, decryption_opts: {password:
|
73
|
-
assert_equal(
|
72
|
+
doc = HexaPDF::Document.new(io: out, decryption_opts: {password: user_password})
|
73
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
74
74
|
rescue HexaPDF::EncryptionError => e
|
75
75
|
flunk("Error processing #{basename}: #{e}")
|
76
76
|
end
|
@@ -80,8 +80,8 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
80
80
|
it "can decrypt the encrypted file #{basename} with the owner password" do
|
81
81
|
begin
|
82
82
|
doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
|
83
|
-
decryption_opts: {password:
|
84
|
-
assert_equal(
|
83
|
+
decryption_opts: {password: owner_password})
|
84
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
85
85
|
rescue HexaPDF::EncryptionError => e
|
86
86
|
flunk("Error processing #{basename}: #{e}")
|
87
87
|
end
|
@@ -33,7 +33,7 @@ describe HexaPDF::Filter::ASCII85Decode do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it "ignores data after the EOD marker" do
|
36
|
-
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded
|
36
|
+
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << "~>abcdefg"))))
|
37
37
|
end
|
38
38
|
|
39
39
|
it "fails if the input contains invalid characters" do
|
@@ -24,7 +24,7 @@ describe HexaPDF::Filter::ASCIIHexDecode do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it "ignores data after the EOD marker" do
|
27
|
-
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded
|
27
|
+
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << '4e6f7gzz'))))
|
28
28
|
end
|
29
29
|
|
30
30
|
it "assumes the missing char is '0' if the input length is odd" do
|
@@ -674,10 +674,9 @@ describe HexaPDF::Layout::TextLayouter do
|
|
674
674
|
pos = [0, 0]
|
675
675
|
result.select! {|name, _| name == :set_text_matrix || name == :move_text_next_line }.
|
676
676
|
map! do |name, ops|
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
pos[1] -= leading
|
677
|
+
case name
|
678
|
+
when :set_text_matrix then pos = ops[-2, 2]
|
679
|
+
when :move_text_next_line then pos[1] -= leading
|
681
680
|
end
|
682
681
|
pos.dup
|
683
682
|
end
|
@@ -66,8 +66,8 @@ describe HexaPDF::Configuration do
|
|
66
66
|
assert_equal(HexaPDF, @config.constantize('test', 1))
|
67
67
|
end
|
68
68
|
|
69
|
-
def assert_constantize_error # :nodoc:
|
70
|
-
exp = assert_raises(HexaPDF::Error)
|
69
|
+
def assert_constantize_error(&block) # :nodoc:
|
70
|
+
exp = assert_raises(HexaPDF::Error, &block)
|
71
71
|
assert_match(/Error getting constant for configuration option/, exp.message)
|
72
72
|
end
|
73
73
|
|
@@ -14,7 +14,9 @@ describe HexaPDF::Dictionary do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def add(obj)
|
17
|
-
HexaPDF::Object
|
17
|
+
klass = HexaPDF::Object
|
18
|
+
klass = HexaPDF::Dictionary if obj.kind_of?(HexaPDF::Dictionary) || obj.kind_of?(Hash)
|
19
|
+
klass.new(obj, oid: 1)
|
18
20
|
end
|
19
21
|
|
20
22
|
def delete(_obj)
|
@@ -222,7 +222,7 @@ describe HexaPDF::DictionaryFields do
|
|
222
222
|
|
223
223
|
it "allows conversion to a Rectangle from an Array" do
|
224
224
|
doc = Minitest::Mock.new
|
225
|
-
doc.expect(:wrap, :data, [[0, 1, 2, 3], type: HexaPDF::Rectangle])
|
225
|
+
doc.expect(:wrap, :data, [[0, 1, 2, 3], {type: HexaPDF::Rectangle}])
|
226
226
|
@field.convert([0, 1, 2, 3], doc)
|
227
227
|
doc.verify
|
228
228
|
end
|
@@ -230,7 +230,7 @@ describe HexaPDF::DictionaryFields do
|
|
230
230
|
it "allows conversion to a Rectangle from a HexaPDF::PDFArray" do
|
231
231
|
data = HexaPDF::PDFArray.new([0, 1, 2, 3])
|
232
232
|
doc = Minitest::Mock.new
|
233
|
-
doc.expect(:wrap, :data, [data, type: HexaPDF::Rectangle])
|
233
|
+
doc.expect(:wrap, :data, [data, {type: HexaPDF::Rectangle}])
|
234
234
|
@field.convert(data, doc)
|
235
235
|
doc.verify
|
236
236
|
end
|
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
|
|
441
441
|
|
442
442
|
describe "validate" do
|
443
443
|
before do
|
444
|
-
@doc.
|
444
|
+
@doc.validate # to create a valid document
|
445
445
|
end
|
446
446
|
|
447
447
|
it "validates indirect objects" do
|
448
|
-
obj = @doc.add({Type: :
|
448
|
+
obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
|
449
449
|
refute(@doc.validate(auto_correct: false))
|
450
450
|
|
451
451
|
called = false
|
452
|
-
assert(@doc.validate {|o| assert_same(obj, o); called = true })
|
452
|
+
assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
|
453
453
|
assert(called)
|
454
454
|
end
|
455
455
|
|
456
456
|
it "validates the trailer object" do
|
457
457
|
@doc.trailer[:ID] = :Symbol
|
458
|
-
refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
|
458
|
+
refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
|
459
459
|
end
|
460
460
|
|
461
461
|
it "validates only loaded objects" do
|
data/test/hexapdf/test_object.rb
CHANGED
@@ -6,15 +6,6 @@ require 'hexapdf/reference'
|
|
6
6
|
|
7
7
|
describe HexaPDF::Object do
|
8
8
|
describe "class.deep_copy" do
|
9
|
-
it "handles not-duplicatable classes" do
|
10
|
-
assert_equal(5, HexaPDF::Object.deep_copy(5))
|
11
|
-
assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
|
12
|
-
assert_nil(HexaPDF::Object.deep_copy(nil))
|
13
|
-
assert_equal(true, HexaPDF::Object.deep_copy(true))
|
14
|
-
assert_equal(false, HexaPDF::Object.deep_copy(false))
|
15
|
-
assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
|
16
|
-
end
|
17
|
-
|
18
9
|
it "handles general, duplicatable classes" do
|
19
10
|
x = "test"
|
20
11
|
assert_equal("test", HexaPDF::Object.deep_copy(x))
|
@@ -103,30 +94,57 @@ describe HexaPDF::Object do
|
|
103
94
|
end
|
104
95
|
|
105
96
|
describe "validate" do
|
106
|
-
|
107
|
-
obj = HexaPDF::Object.new(5)
|
108
|
-
|
109
|
-
|
110
|
-
|
97
|
+
before do
|
98
|
+
@obj = HexaPDF::Object.new(5)
|
99
|
+
end
|
100
|
+
|
101
|
+
it "invokes perform_validation correctly" do
|
102
|
+
invoked = false
|
103
|
+
@obj.define_singleton_method(:perform_validation) { invoked = true }
|
104
|
+
assert(@obj.validate)
|
105
|
+
assert(invoked)
|
106
|
+
end
|
107
|
+
|
108
|
+
it "yields all arguments yieled by perform_validation" do
|
109
|
+
invoked = []
|
110
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
111
|
+
block.call("error", true, :object)
|
112
|
+
end
|
113
|
+
assert(@obj.validate {|*a| invoked << a })
|
114
|
+
assert_equal([["error", true, :object]], invoked)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "provides self as third argument if none is yielded by perform_validation" do
|
118
|
+
invoked = []
|
119
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
111
120
|
block.call("error", true)
|
112
121
|
end
|
113
|
-
assert(obj.validate {|*a| invoked
|
114
|
-
assert_equal([
|
115
|
-
|
122
|
+
assert(@obj.validate {|*a| invoked << a })
|
123
|
+
assert_equal([["error", true, @obj]], invoked)
|
124
|
+
end
|
116
125
|
|
117
|
-
|
126
|
+
it "yields all problems when auto_correct is true" do
|
127
|
+
invoked = []
|
128
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
129
|
+
invoked << :before
|
130
|
+
block.call("error", false)
|
131
|
+
invoked << :after
|
132
|
+
block.call("error2", true)
|
133
|
+
invoked << :last
|
134
|
+
end
|
135
|
+
refute(@obj.validate)
|
136
|
+
assert_equal([:before, :after, :last], invoked)
|
118
137
|
end
|
119
138
|
|
120
|
-
it "stops
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
invoked[:before] = true
|
139
|
+
it "stops at the first uncorrectable problem if auto_correct is false" do
|
140
|
+
invoked = []
|
141
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
142
|
+
invoked << :before
|
125
143
|
block.call("error", false)
|
126
|
-
invoked
|
144
|
+
invoked << :after
|
127
145
|
end
|
128
|
-
refute(obj.validate
|
129
|
-
|
146
|
+
refute(@obj.validate(auto_correct: false))
|
147
|
+
assert_equal([:before], invoked)
|
130
148
|
end
|
131
149
|
end
|
132
150
|
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -8,6 +8,7 @@ require 'stringio'
|
|
8
8
|
describe HexaPDF::Parser do
|
9
9
|
before do
|
10
10
|
@document = HexaPDF::Document.new
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
12
13
|
|
13
14
|
create_parser(<<~EOF)
|
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
|
|
132
133
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
133
134
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
134
135
|
end
|
136
|
+
|
137
|
+
describe "with strict parsing" do
|
138
|
+
before do
|
139
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
140
|
+
end
|
141
|
+
|
142
|
+
it "fails if an empty indirect object is found" do
|
143
|
+
create_parser("1 0 obj\nendobj")
|
144
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
145
|
+
assert_match(/no indirect object value/i, exp.message)
|
146
|
+
end
|
147
|
+
|
148
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
149
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
150
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
151
|
+
assert_match(/not CR alone/, exp.message)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "fails if the stream length value is invalid" do
|
155
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
157
|
+
assert_match(/invalid stream length/i, exp.message)
|
158
|
+
end
|
159
|
+
|
160
|
+
it "fails if the keyword endobj is mangled" do
|
161
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
163
|
+
assert_match(/keyword endobj/, exp.message)
|
164
|
+
end
|
165
|
+
|
166
|
+
it "fails if the keyword endobj is missing" do
|
167
|
+
create_parser("1 0 obj\n<< >>")
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
169
|
+
assert_match(/keyword endobj/, exp.message)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
173
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
175
|
+
assert_match(/keyword endobj/, exp.message)
|
176
|
+
end
|
177
|
+
end
|
135
178
|
end
|
136
179
|
|
137
180
|
describe "load_object" do
|
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
|
|
205
248
|
end
|
206
249
|
|
207
250
|
it "ignores garbage at the end of the file" do
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
251
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
209
252
|
assert_equal(5, @parser.startxref_offset)
|
210
253
|
end
|
211
254
|
|
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
|
|
215
258
|
end
|
216
259
|
|
217
260
|
it "finds the startxref anywhere in file" do
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
261
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
219
262
|
assert_equal(5, @parser.startxref_offset)
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
263
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
221
264
|
assert_equal(5, @parser.startxref_offset)
|
222
265
|
end
|
223
266
|
|
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
|
|
242
285
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
243
286
|
assert_match(/missing startxref/, exp.message)
|
244
287
|
end
|
288
|
+
|
289
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
290
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
291
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
292
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
293
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
294
|
+
end
|
245
295
|
end
|
246
296
|
|
247
297
|
describe "file_header_version" do
|
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
|
|
262
312
|
end
|
263
313
|
|
264
314
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
265
|
-
create_parser("junk" * 200
|
315
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
266
316
|
assert_equal('1.4', @parser.file_header_version)
|
267
317
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
268
318
|
end
|
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
|
|
318
368
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
319
369
|
end
|
320
370
|
|
371
|
+
it "fails if a sub section entry is mangled" do
|
372
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
373
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
374
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
375
|
+
end
|
376
|
+
|
321
377
|
it "fails if there is no trailer" do
|
322
378
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
323
379
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
@@ -329,6 +385,30 @@ describe HexaPDF::Parser do
|
|
329
385
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
330
386
|
assert_match(/dictionary/, exp.message)
|
331
387
|
end
|
388
|
+
|
389
|
+
describe "with strict parsing" do
|
390
|
+
before do
|
391
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
392
|
+
end
|
393
|
+
|
394
|
+
it "fails if xref type=n with offset=0" do
|
395
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
396
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
397
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
398
|
+
end
|
399
|
+
|
400
|
+
it " fails xref type=n with gen>65535" do
|
401
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
402
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
403
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
404
|
+
end
|
405
|
+
|
406
|
+
it "fails if trailing second whitespace is missing" do
|
407
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
408
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
409
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
410
|
+
end
|
411
|
+
end
|
332
412
|
end
|
333
413
|
|
334
414
|
describe "load_revision" do
|
@@ -348,75 +428,55 @@ describe HexaPDF::Parser do
|
|
348
428
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
349
429
|
assert_match(/not a cross-reference stream/, exp.message)
|
350
430
|
end
|
351
|
-
end
|
352
431
|
|
353
|
-
|
354
|
-
before do
|
432
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
355
433
|
@document.config['parser.on_correctable_error'] = proc { true }
|
434
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
435
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
436
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
437
|
+
assert_match(/entry for itself/, exp.message)
|
356
438
|
end
|
439
|
+
end
|
357
440
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
|
364
|
-
it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
|
365
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
366
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
368
|
-
end
|
369
|
-
|
370
|
-
it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
|
371
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
372
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
441
|
+
describe "reconstruct_revision" do
|
442
|
+
before do
|
443
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
444
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
374
445
|
end
|
375
446
|
|
376
|
-
it "
|
377
|
-
create_parser("
|
378
|
-
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
447
|
+
it "serially parses the contents" do
|
448
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
449
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
380
450
|
end
|
381
451
|
|
382
|
-
it "
|
383
|
-
create_parser("1 0 obj\nendobj")
|
384
|
-
|
385
|
-
assert_match(/no indirect object value/i, exp.message)
|
452
|
+
it "ignores parts where the starting line is split across lines" do
|
453
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
454
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
386
455
|
end
|
387
456
|
|
388
|
-
it "
|
389
|
-
create_parser("1
|
390
|
-
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
457
|
+
it "ignores invalid objects" do
|
458
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
459
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
392
460
|
end
|
393
461
|
|
394
|
-
it "
|
395
|
-
create_parser("1 0 obj
|
396
|
-
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
462
|
+
it "ignores invalid lines" do
|
463
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
464
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
398
465
|
end
|
399
466
|
|
400
|
-
it "
|
401
|
-
create_parser("1
|
402
|
-
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
467
|
+
it "uses the last trailer" do
|
468
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
469
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
407
470
|
end
|
408
471
|
|
409
|
-
it "
|
410
|
-
create_parser("1
|
411
|
-
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
472
|
+
it "uses the first trailer in case of a linearized file" do
|
473
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
474
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
413
475
|
end
|
414
476
|
|
415
|
-
it "
|
416
|
-
create_parser("
|
417
|
-
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
477
|
+
it "fails if no valid trailer is found" do
|
478
|
+
create_parser("1 0 obj\n5\nendobj")
|
479
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
420
480
|
end
|
421
481
|
end
|
422
482
|
end
|