hexapdf 0.12.3 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +31 -12
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +30 -25
- data/lib/hexapdf/parser.rb +65 -3
- data/lib/hexapdf/pdf_array.rb +9 -2
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +6 -4
- data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +3 -1
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +1 -1
- data/lib/hexapdf/type/form.rb +2 -1
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +16 -7
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +3 -4
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +4 -4
- data/test/hexapdf/test_object.rb +44 -26
- data/test/hexapdf/test_parser.rb +115 -55
- data/test/hexapdf/test_pdf_array.rb +7 -0
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +4 -4
- data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +1 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- metadata +5 -12
data/lib/hexapdf/version.rb
CHANGED
@@ -122,7 +122,7 @@ module CommonTokenizerTests
|
|
122
122
|
end
|
123
123
|
|
124
124
|
it "next_token: should not fail when reading super long numbers" do
|
125
|
-
create_tokenizer("1"
|
125
|
+
create_tokenizer("1" << "0" * 10_000)
|
126
126
|
assert_equal(10**10_000, @tokenizer.next_token)
|
127
127
|
end
|
128
128
|
|
@@ -162,7 +162,7 @@ module CommonTokenizerTests
|
|
162
162
|
end
|
163
163
|
|
164
164
|
it "returns the correct position on operations" do
|
165
|
-
create_tokenizer("hallo du"
|
165
|
+
create_tokenizer("hallo du" << " " * 50000 << "hallo du")
|
166
166
|
@tokenizer.next_token
|
167
167
|
assert_equal(5, @tokenizer.pos)
|
168
168
|
|
@@ -68,14 +68,14 @@ describe HexaPDF::Content::GraphicObject::Arc do
|
|
68
68
|
arc.max_curves = 4
|
69
69
|
curves = arc.curves
|
70
70
|
assert_equal(2, curves.size)
|
71
|
-
assert_curve_values([0, 1, p1: [1, 0.548584], p2: [0.548584, 1]], curves[0])
|
72
|
-
assert_curve_values([-1, 0, p1: [-0.548584, 1], p2: [-1, 0.548584]], curves[1])
|
71
|
+
assert_curve_values([0, 1, {p1: [1, 0.548584], p2: [0.548584, 1]}], curves[0])
|
72
|
+
assert_curve_values([-1, 0, {p1: [-0.548584, 1], p2: [-1, 0.548584]}], curves[1])
|
73
73
|
|
74
74
|
arc.configure(clockwise: true)
|
75
75
|
curves = arc.curves
|
76
76
|
assert_equal(2, curves.size)
|
77
|
-
assert_curve_values([0, -1, p1: [1, -0.548584], p2: [0.548584, -1]], curves[0])
|
78
|
-
assert_curve_values([-1, 0, p1: [-0.548584, -1], p2: [-1, -0.548584]], curves[1])
|
77
|
+
assert_curve_values([0, -1, {p1: [1, -0.548584], p2: [0.548584, -1]}], curves[0])
|
78
|
+
assert_curve_values([-1, 0, {p1: [-0.548584, -1], p2: [-1, -0.548584]}], curves[1])
|
79
79
|
end
|
80
80
|
end
|
81
81
|
|
@@ -531,7 +531,7 @@ describe HexaPDF::Content::Canvas do
|
|
531
531
|
end
|
532
532
|
|
533
533
|
it "invokes the polygon method when radius != 0" do
|
534
|
-
args = [0, 0, 10, 0, 10, 10, 0, 10, radius: 5]
|
534
|
+
args = [0, 0, 10, 0, 10, 10, 0, 10, {radius: 5}]
|
535
535
|
assert_method_invoked(@canvas, :polygon, args) do
|
536
536
|
@canvas.rectangle(0, 0, 10, 10, radius: 5)
|
537
537
|
end
|
@@ -631,7 +631,7 @@ describe HexaPDF::Content::Canvas do
|
|
631
631
|
|
632
632
|
describe "circle" do
|
633
633
|
it "uses arc for the hard work" do
|
634
|
-
assert_method_invoked(@canvas, :arc, [5, 6, a: 7]) do
|
634
|
+
assert_method_invoked(@canvas, :arc, [5, 6, {a: 7}]) do
|
635
635
|
@canvas.graphics_object = :path
|
636
636
|
@canvas.circle(5, 6, 7)
|
637
637
|
end
|
@@ -651,7 +651,7 @@ describe HexaPDF::Content::Canvas do
|
|
651
651
|
|
652
652
|
describe "ellipse" do
|
653
653
|
it "uses arc for the hard work" do
|
654
|
-
assert_method_invoked(@canvas, :ellipse, [5, 6, a: 7, b: 5, inclination: 10]) do
|
654
|
+
assert_method_invoked(@canvas, :ellipse, [5, 6, {a: 7, b: 5, inclination: 10}]) do
|
655
655
|
@canvas.ellipse(5, 6, a: 7, b: 5, inclination: 10)
|
656
656
|
end
|
657
657
|
end
|
@@ -155,7 +155,7 @@ describe HexaPDF::Content::ColorSpace::DeviceGray do
|
|
155
155
|
|
156
156
|
before do
|
157
157
|
@color_space = HexaPDF::Content::ColorSpace::DeviceGray.new
|
158
|
-
@color_space_family = @color_space_definition =
|
158
|
+
@color_space_family = @color_space_definition = :DeviceGray
|
159
159
|
@color = @color_space.default_color
|
160
160
|
@other_color = @color_space.color(128)
|
161
161
|
@colors = [128]
|
@@ -101,13 +101,13 @@ describe HexaPDF::Encryption::AES do
|
|
101
101
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
102
102
|
assert_equal('a' * 16, result)
|
103
103
|
|
104
|
-
f = Fiber.new { 'a' * 31
|
104
|
+
f = Fiber.new { 'a' * 31 << "\x00" }
|
105
105
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
106
|
-
assert_equal('a' * 15
|
106
|
+
assert_equal('a' * 15 << "\x00", result)
|
107
107
|
|
108
|
-
f = Fiber.new { 'a' * 29
|
108
|
+
f = Fiber.new { 'a' * 29 << "\x00\x01\x03" }
|
109
109
|
result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
|
110
|
-
assert_equal('a' * 13
|
110
|
+
assert_equal('a' * 13 << "\x00\x01\x03", result)
|
111
111
|
end
|
112
112
|
|
113
113
|
it "fails on decryption if not enough bytes are provided" do
|
@@ -53,24 +53,24 @@ describe HexaPDF::Encryption::StandardEncryptionDictionary do
|
|
53
53
|
end
|
54
54
|
|
55
55
|
describe HexaPDF::Encryption::StandardSecurityHandler do
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
test_files = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
|
57
|
+
user_password = 'uhexapdf'
|
58
|
+
owner_password = 'ohexapdf'
|
59
59
|
|
60
|
-
|
60
|
+
minimal_doc = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
|
61
61
|
|
62
|
-
|
62
|
+
test_files.each do |file|
|
63
63
|
basename = File.basename(file)
|
64
64
|
it "can decrypt, encrypt and decrypt the encrypted file #{basename} with the user password" do
|
65
65
|
begin
|
66
66
|
doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
|
67
|
-
decryption_opts: {password:
|
68
|
-
assert_equal(
|
67
|
+
decryption_opts: {password: user_password})
|
68
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
69
69
|
|
70
70
|
out = StringIO.new(''.b)
|
71
71
|
HexaPDF::Writer.new(doc, out).write
|
72
|
-
doc = HexaPDF::Document.new(io: out, decryption_opts: {password:
|
73
|
-
assert_equal(
|
72
|
+
doc = HexaPDF::Document.new(io: out, decryption_opts: {password: user_password})
|
73
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
74
74
|
rescue HexaPDF::EncryptionError => e
|
75
75
|
flunk("Error processing #{basename}: #{e}")
|
76
76
|
end
|
@@ -80,8 +80,8 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
|
|
80
80
|
it "can decrypt the encrypted file #{basename} with the owner password" do
|
81
81
|
begin
|
82
82
|
doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
|
83
|
-
decryption_opts: {password:
|
84
|
-
assert_equal(
|
83
|
+
decryption_opts: {password: owner_password})
|
84
|
+
assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
|
85
85
|
rescue HexaPDF::EncryptionError => e
|
86
86
|
flunk("Error processing #{basename}: #{e}")
|
87
87
|
end
|
@@ -33,7 +33,7 @@ describe HexaPDF::Filter::ASCII85Decode do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it "ignores data after the EOD marker" do
|
36
|
-
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded
|
36
|
+
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << "~>abcdefg"))))
|
37
37
|
end
|
38
38
|
|
39
39
|
it "fails if the input contains invalid characters" do
|
@@ -24,7 +24,7 @@ describe HexaPDF::Filter::ASCIIHexDecode do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it "ignores data after the EOD marker" do
|
27
|
-
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded
|
27
|
+
assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << '4e6f7gzz'))))
|
28
28
|
end
|
29
29
|
|
30
30
|
it "assumes the missing char is '0' if the input length is odd" do
|
@@ -674,10 +674,9 @@ describe HexaPDF::Layout::TextLayouter do
|
|
674
674
|
pos = [0, 0]
|
675
675
|
result.select! {|name, _| name == :set_text_matrix || name == :move_text_next_line }.
|
676
676
|
map! do |name, ops|
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
pos[1] -= leading
|
677
|
+
case name
|
678
|
+
when :set_text_matrix then pos = ops[-2, 2]
|
679
|
+
when :move_text_next_line then pos[1] -= leading
|
681
680
|
end
|
682
681
|
pos.dup
|
683
682
|
end
|
@@ -66,8 +66,8 @@ describe HexaPDF::Configuration do
|
|
66
66
|
assert_equal(HexaPDF, @config.constantize('test', 1))
|
67
67
|
end
|
68
68
|
|
69
|
-
def assert_constantize_error # :nodoc:
|
70
|
-
exp = assert_raises(HexaPDF::Error)
|
69
|
+
def assert_constantize_error(&block) # :nodoc:
|
70
|
+
exp = assert_raises(HexaPDF::Error, &block)
|
71
71
|
assert_match(/Error getting constant for configuration option/, exp.message)
|
72
72
|
end
|
73
73
|
|
@@ -14,7 +14,9 @@ describe HexaPDF::Dictionary do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def add(obj)
|
17
|
-
HexaPDF::Object
|
17
|
+
klass = HexaPDF::Object
|
18
|
+
klass = HexaPDF::Dictionary if obj.kind_of?(HexaPDF::Dictionary) || obj.kind_of?(Hash)
|
19
|
+
klass.new(obj, oid: 1)
|
18
20
|
end
|
19
21
|
|
20
22
|
def delete(_obj)
|
@@ -222,7 +222,7 @@ describe HexaPDF::DictionaryFields do
|
|
222
222
|
|
223
223
|
it "allows conversion to a Rectangle from an Array" do
|
224
224
|
doc = Minitest::Mock.new
|
225
|
-
doc.expect(:wrap, :data, [[0, 1, 2, 3], type: HexaPDF::Rectangle])
|
225
|
+
doc.expect(:wrap, :data, [[0, 1, 2, 3], {type: HexaPDF::Rectangle}])
|
226
226
|
@field.convert([0, 1, 2, 3], doc)
|
227
227
|
doc.verify
|
228
228
|
end
|
@@ -230,7 +230,7 @@ describe HexaPDF::DictionaryFields do
|
|
230
230
|
it "allows conversion to a Rectangle from a HexaPDF::PDFArray" do
|
231
231
|
data = HexaPDF::PDFArray.new([0, 1, 2, 3])
|
232
232
|
doc = Minitest::Mock.new
|
233
|
-
doc.expect(:wrap, :data, [data, type: HexaPDF::Rectangle])
|
233
|
+
doc.expect(:wrap, :data, [data, {type: HexaPDF::Rectangle}])
|
234
234
|
@field.convert(data, doc)
|
235
235
|
doc.verify
|
236
236
|
end
|
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
|
|
441
441
|
|
442
442
|
describe "validate" do
|
443
443
|
before do
|
444
|
-
@doc.
|
444
|
+
@doc.validate # to create a valid document
|
445
445
|
end
|
446
446
|
|
447
447
|
it "validates indirect objects" do
|
448
|
-
obj = @doc.add({Type: :
|
448
|
+
obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
|
449
449
|
refute(@doc.validate(auto_correct: false))
|
450
450
|
|
451
451
|
called = false
|
452
|
-
assert(@doc.validate {|o| assert_same(obj, o); called = true })
|
452
|
+
assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
|
453
453
|
assert(called)
|
454
454
|
end
|
455
455
|
|
456
456
|
it "validates the trailer object" do
|
457
457
|
@doc.trailer[:ID] = :Symbol
|
458
|
-
refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
|
458
|
+
refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
|
459
459
|
end
|
460
460
|
|
461
461
|
it "validates only loaded objects" do
|
data/test/hexapdf/test_object.rb
CHANGED
@@ -6,15 +6,6 @@ require 'hexapdf/reference'
|
|
6
6
|
|
7
7
|
describe HexaPDF::Object do
|
8
8
|
describe "class.deep_copy" do
|
9
|
-
it "handles not-duplicatable classes" do
|
10
|
-
assert_equal(5, HexaPDF::Object.deep_copy(5))
|
11
|
-
assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
|
12
|
-
assert_nil(HexaPDF::Object.deep_copy(nil))
|
13
|
-
assert_equal(true, HexaPDF::Object.deep_copy(true))
|
14
|
-
assert_equal(false, HexaPDF::Object.deep_copy(false))
|
15
|
-
assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
|
16
|
-
end
|
17
|
-
|
18
9
|
it "handles general, duplicatable classes" do
|
19
10
|
x = "test"
|
20
11
|
assert_equal("test", HexaPDF::Object.deep_copy(x))
|
@@ -103,30 +94,57 @@ describe HexaPDF::Object do
|
|
103
94
|
end
|
104
95
|
|
105
96
|
describe "validate" do
|
106
|
-
|
107
|
-
obj = HexaPDF::Object.new(5)
|
108
|
-
|
109
|
-
|
110
|
-
|
97
|
+
before do
|
98
|
+
@obj = HexaPDF::Object.new(5)
|
99
|
+
end
|
100
|
+
|
101
|
+
it "invokes perform_validation correctly" do
|
102
|
+
invoked = false
|
103
|
+
@obj.define_singleton_method(:perform_validation) { invoked = true }
|
104
|
+
assert(@obj.validate)
|
105
|
+
assert(invoked)
|
106
|
+
end
|
107
|
+
|
108
|
+
it "yields all arguments yieled by perform_validation" do
|
109
|
+
invoked = []
|
110
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
111
|
+
block.call("error", true, :object)
|
112
|
+
end
|
113
|
+
assert(@obj.validate {|*a| invoked << a })
|
114
|
+
assert_equal([["error", true, :object]], invoked)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "provides self as third argument if none is yielded by perform_validation" do
|
118
|
+
invoked = []
|
119
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
111
120
|
block.call("error", true)
|
112
121
|
end
|
113
|
-
assert(obj.validate {|*a| invoked
|
114
|
-
assert_equal([
|
115
|
-
|
122
|
+
assert(@obj.validate {|*a| invoked << a })
|
123
|
+
assert_equal([["error", true, @obj]], invoked)
|
124
|
+
end
|
116
125
|
|
117
|
-
|
126
|
+
it "yields all problems when auto_correct is true" do
|
127
|
+
invoked = []
|
128
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
129
|
+
invoked << :before
|
130
|
+
block.call("error", false)
|
131
|
+
invoked << :after
|
132
|
+
block.call("error2", true)
|
133
|
+
invoked << :last
|
134
|
+
end
|
135
|
+
refute(@obj.validate)
|
136
|
+
assert_equal([:before, :after, :last], invoked)
|
118
137
|
end
|
119
138
|
|
120
|
-
it "stops
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
invoked[:before] = true
|
139
|
+
it "stops at the first uncorrectable problem if auto_correct is false" do
|
140
|
+
invoked = []
|
141
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
142
|
+
invoked << :before
|
125
143
|
block.call("error", false)
|
126
|
-
invoked
|
144
|
+
invoked << :after
|
127
145
|
end
|
128
|
-
refute(obj.validate
|
129
|
-
|
146
|
+
refute(@obj.validate(auto_correct: false))
|
147
|
+
assert_equal([:before], invoked)
|
130
148
|
end
|
131
149
|
end
|
132
150
|
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -8,6 +8,7 @@ require 'stringio'
|
|
8
8
|
describe HexaPDF::Parser do
|
9
9
|
before do
|
10
10
|
@document = HexaPDF::Document.new
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
12
13
|
|
13
14
|
create_parser(<<~EOF)
|
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
|
|
132
133
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
133
134
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
134
135
|
end
|
136
|
+
|
137
|
+
describe "with strict parsing" do
|
138
|
+
before do
|
139
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
140
|
+
end
|
141
|
+
|
142
|
+
it "fails if an empty indirect object is found" do
|
143
|
+
create_parser("1 0 obj\nendobj")
|
144
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
145
|
+
assert_match(/no indirect object value/i, exp.message)
|
146
|
+
end
|
147
|
+
|
148
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
149
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
150
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
151
|
+
assert_match(/not CR alone/, exp.message)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "fails if the stream length value is invalid" do
|
155
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
157
|
+
assert_match(/invalid stream length/i, exp.message)
|
158
|
+
end
|
159
|
+
|
160
|
+
it "fails if the keyword endobj is mangled" do
|
161
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
163
|
+
assert_match(/keyword endobj/, exp.message)
|
164
|
+
end
|
165
|
+
|
166
|
+
it "fails if the keyword endobj is missing" do
|
167
|
+
create_parser("1 0 obj\n<< >>")
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
169
|
+
assert_match(/keyword endobj/, exp.message)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
173
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
175
|
+
assert_match(/keyword endobj/, exp.message)
|
176
|
+
end
|
177
|
+
end
|
135
178
|
end
|
136
179
|
|
137
180
|
describe "load_object" do
|
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
|
|
205
248
|
end
|
206
249
|
|
207
250
|
it "ignores garbage at the end of the file" do
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
251
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
209
252
|
assert_equal(5, @parser.startxref_offset)
|
210
253
|
end
|
211
254
|
|
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
|
|
215
258
|
end
|
216
259
|
|
217
260
|
it "finds the startxref anywhere in file" do
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
261
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
219
262
|
assert_equal(5, @parser.startxref_offset)
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
263
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
221
264
|
assert_equal(5, @parser.startxref_offset)
|
222
265
|
end
|
223
266
|
|
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
|
|
242
285
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
243
286
|
assert_match(/missing startxref/, exp.message)
|
244
287
|
end
|
288
|
+
|
289
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
290
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
291
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
292
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
293
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
294
|
+
end
|
245
295
|
end
|
246
296
|
|
247
297
|
describe "file_header_version" do
|
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
|
|
262
312
|
end
|
263
313
|
|
264
314
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
265
|
-
create_parser("junk" * 200
|
315
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
266
316
|
assert_equal('1.4', @parser.file_header_version)
|
267
317
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
268
318
|
end
|
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
|
|
318
368
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
319
369
|
end
|
320
370
|
|
371
|
+
it "fails if a sub section entry is mangled" do
|
372
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
373
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
374
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
375
|
+
end
|
376
|
+
|
321
377
|
it "fails if there is no trailer" do
|
322
378
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
323
379
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
@@ -329,6 +385,30 @@ describe HexaPDF::Parser do
|
|
329
385
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
330
386
|
assert_match(/dictionary/, exp.message)
|
331
387
|
end
|
388
|
+
|
389
|
+
describe "with strict parsing" do
|
390
|
+
before do
|
391
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
392
|
+
end
|
393
|
+
|
394
|
+
it "fails if xref type=n with offset=0" do
|
395
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
396
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
397
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
398
|
+
end
|
399
|
+
|
400
|
+
it " fails xref type=n with gen>65535" do
|
401
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
402
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
403
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
404
|
+
end
|
405
|
+
|
406
|
+
it "fails if trailing second whitespace is missing" do
|
407
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
408
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
409
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
410
|
+
end
|
411
|
+
end
|
332
412
|
end
|
333
413
|
|
334
414
|
describe "load_revision" do
|
@@ -348,75 +428,55 @@ describe HexaPDF::Parser do
|
|
348
428
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
349
429
|
assert_match(/not a cross-reference stream/, exp.message)
|
350
430
|
end
|
351
|
-
end
|
352
431
|
|
353
|
-
|
354
|
-
before do
|
432
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
355
433
|
@document.config['parser.on_correctable_error'] = proc { true }
|
434
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
435
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
436
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
437
|
+
assert_match(/entry for itself/, exp.message)
|
356
438
|
end
|
439
|
+
end
|
357
440
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
|
364
|
-
it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
|
365
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
366
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
368
|
-
end
|
369
|
-
|
370
|
-
it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
|
371
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
372
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
441
|
+
describe "reconstruct_revision" do
|
442
|
+
before do
|
443
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
444
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
374
445
|
end
|
375
446
|
|
376
|
-
it "
|
377
|
-
create_parser("
|
378
|
-
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
447
|
+
it "serially parses the contents" do
|
448
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
449
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
380
450
|
end
|
381
451
|
|
382
|
-
it "
|
383
|
-
create_parser("1 0 obj\nendobj")
|
384
|
-
|
385
|
-
assert_match(/no indirect object value/i, exp.message)
|
452
|
+
it "ignores parts where the starting line is split across lines" do
|
453
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
454
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
386
455
|
end
|
387
456
|
|
388
|
-
it "
|
389
|
-
create_parser("1
|
390
|
-
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
457
|
+
it "ignores invalid objects" do
|
458
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
459
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
392
460
|
end
|
393
461
|
|
394
|
-
it "
|
395
|
-
create_parser("1 0 obj
|
396
|
-
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
462
|
+
it "ignores invalid lines" do
|
463
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
464
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
398
465
|
end
|
399
466
|
|
400
|
-
it "
|
401
|
-
create_parser("1
|
402
|
-
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
467
|
+
it "uses the last trailer" do
|
468
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
469
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
407
470
|
end
|
408
471
|
|
409
|
-
it "
|
410
|
-
create_parser("1
|
411
|
-
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
472
|
+
it "uses the first trailer in case of a linearized file" do
|
473
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
474
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
413
475
|
end
|
414
476
|
|
415
|
-
it "
|
416
|
-
create_parser("
|
417
|
-
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
477
|
+
it "fails if no valid trailer is found" do
|
478
|
+
create_parser("1 0 obj\n5\nendobj")
|
479
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
420
480
|
end
|
421
481
|
end
|
422
482
|
end
|