hexapdf 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +126 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/encoding/base.rb +8 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +87 -3
- data/lib/hexapdf/pdf_array.rb +11 -4
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/encoding/test_base.rb +10 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +159 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
|
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
|
|
|
441
441
|
|
|
442
442
|
describe "validate" do
|
|
443
443
|
before do
|
|
444
|
-
@doc.
|
|
444
|
+
@doc.validate # to create a valid document
|
|
445
445
|
end
|
|
446
446
|
|
|
447
447
|
it "validates indirect objects" do
|
|
448
|
-
obj = @doc.add({Type: :
|
|
448
|
+
obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
|
|
449
449
|
refute(@doc.validate(auto_correct: false))
|
|
450
450
|
|
|
451
451
|
called = false
|
|
452
|
-
assert(@doc.validate {|o| assert_same(obj, o); called = true })
|
|
452
|
+
assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
|
|
453
453
|
assert(called)
|
|
454
454
|
end
|
|
455
455
|
|
|
456
456
|
it "validates the trailer object" do
|
|
457
457
|
@doc.trailer[:ID] = :Symbol
|
|
458
|
-
refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
|
|
458
|
+
refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
|
|
459
459
|
end
|
|
460
460
|
|
|
461
461
|
it "validates only loaded objects" do
|
|
@@ -609,16 +609,24 @@ describe HexaPDF::Document do
|
|
|
609
609
|
|
|
610
610
|
describe "caching interface" do
|
|
611
611
|
it "allows setting and retrieving values" do
|
|
612
|
-
assert_equal(:test, @doc.cache(:a, :b, :test))
|
|
613
|
-
assert_equal(:test, @doc.cache(:a, :b
|
|
614
|
-
assert_equal(:
|
|
612
|
+
assert_equal(:test, @doc.cache(:a, :b, :test) { :notused })
|
|
613
|
+
assert_equal(:test, @doc.cache(:a, :b) { :other })
|
|
614
|
+
assert_equal(:test, @doc.cache(:a, :b))
|
|
615
|
+
assert_nil(@doc.cache(:a, :c, nil))
|
|
616
|
+
assert_nil(@doc.cache(:a, :c) { :other })
|
|
617
|
+
assert_nil(@doc.cache(:a, :c))
|
|
615
618
|
assert(@doc.cached?(:a, :b))
|
|
616
619
|
assert(@doc.cached?(:a, :c))
|
|
617
620
|
end
|
|
618
621
|
|
|
622
|
+
it "allows updating a value" do
|
|
623
|
+
@doc.cache(:a, :b) { :test }
|
|
624
|
+
assert_equal(:new, @doc.cache(:a, :b, update: true) { :new })
|
|
625
|
+
end
|
|
626
|
+
|
|
619
627
|
it "allows clearing cached values" do
|
|
620
|
-
@doc.cache(:a, :b
|
|
621
|
-
@doc.cache(:b, :c
|
|
628
|
+
@doc.cache(:a, :b) { :c }
|
|
629
|
+
@doc.cache(:b, :c) { :d }
|
|
622
630
|
@doc.clear_cache(:a)
|
|
623
631
|
refute(@doc.cached?(:a, :b))
|
|
624
632
|
assert(@doc.cached?(:b, :c))
|
|
@@ -626,7 +634,7 @@ describe HexaPDF::Document do
|
|
|
626
634
|
refute(@doc.cached?(:a, :c))
|
|
627
635
|
end
|
|
628
636
|
|
|
629
|
-
it "fails if no cached value exists and
|
|
637
|
+
it "fails if no cached value exists and no block is given" do
|
|
630
638
|
assert_raises(LocalJumpError) { @doc.cache(:a, :b) }
|
|
631
639
|
end
|
|
632
640
|
end
|
data/test/hexapdf/test_object.rb
CHANGED
|
@@ -3,18 +3,10 @@
|
|
|
3
3
|
require 'test_helper'
|
|
4
4
|
require 'hexapdf/object'
|
|
5
5
|
require 'hexapdf/reference'
|
|
6
|
+
require 'hexapdf/document'
|
|
6
7
|
|
|
7
8
|
describe HexaPDF::Object do
|
|
8
9
|
describe "class.deep_copy" do
|
|
9
|
-
it "handles not-duplicatable classes" do
|
|
10
|
-
assert_equal(5, HexaPDF::Object.deep_copy(5))
|
|
11
|
-
assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
|
|
12
|
-
assert_nil(HexaPDF::Object.deep_copy(nil))
|
|
13
|
-
assert_equal(true, HexaPDF::Object.deep_copy(true))
|
|
14
|
-
assert_equal(false, HexaPDF::Object.deep_copy(false))
|
|
15
|
-
assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
|
|
16
|
-
end
|
|
17
|
-
|
|
18
10
|
it "handles general, duplicatable classes" do
|
|
19
11
|
x = "test"
|
|
20
12
|
assert_equal("test", HexaPDF::Object.deep_copy(x))
|
|
@@ -103,30 +95,57 @@ describe HexaPDF::Object do
|
|
|
103
95
|
end
|
|
104
96
|
|
|
105
97
|
describe "validate" do
|
|
106
|
-
|
|
107
|
-
obj = HexaPDF::Object.new(5)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
98
|
+
before do
|
|
99
|
+
@obj = HexaPDF::Object.new(5)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it "invokes perform_validation correctly" do
|
|
103
|
+
invoked = false
|
|
104
|
+
@obj.define_singleton_method(:perform_validation) { invoked = true }
|
|
105
|
+
assert(@obj.validate)
|
|
106
|
+
assert(invoked)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "yields all arguments yieled by perform_validation" do
|
|
110
|
+
invoked = []
|
|
111
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
|
112
|
+
block.call("error", true, :object)
|
|
113
|
+
end
|
|
114
|
+
assert(@obj.validate {|*a| invoked << a })
|
|
115
|
+
assert_equal([["error", true, :object]], invoked)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "provides self as third argument if none is yielded by perform_validation" do
|
|
119
|
+
invoked = []
|
|
120
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
|
111
121
|
block.call("error", true)
|
|
112
122
|
end
|
|
113
|
-
assert(obj.validate {|*a| invoked
|
|
114
|
-
assert_equal([
|
|
115
|
-
|
|
123
|
+
assert(@obj.validate {|*a| invoked << a })
|
|
124
|
+
assert_equal([["error", true, @obj]], invoked)
|
|
125
|
+
end
|
|
116
126
|
|
|
117
|
-
|
|
127
|
+
it "yields all problems when auto_correct is true" do
|
|
128
|
+
invoked = []
|
|
129
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
|
130
|
+
invoked << :before
|
|
131
|
+
block.call("error", false)
|
|
132
|
+
invoked << :after
|
|
133
|
+
block.call("error2", true)
|
|
134
|
+
invoked << :last
|
|
135
|
+
end
|
|
136
|
+
refute(@obj.validate)
|
|
137
|
+
assert_equal([:before, :after, :last], invoked)
|
|
118
138
|
end
|
|
119
139
|
|
|
120
|
-
it "stops
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
invoked[:before] = true
|
|
140
|
+
it "stops at the first uncorrectable problem if auto_correct is false" do
|
|
141
|
+
invoked = []
|
|
142
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
|
143
|
+
invoked << :before
|
|
125
144
|
block.call("error", false)
|
|
126
|
-
invoked
|
|
145
|
+
invoked << :after
|
|
127
146
|
end
|
|
128
|
-
refute(obj.validate
|
|
129
|
-
|
|
147
|
+
refute(@obj.validate(auto_correct: false))
|
|
148
|
+
assert_equal([:before], invoked)
|
|
130
149
|
end
|
|
131
150
|
end
|
|
132
151
|
|
|
@@ -181,6 +200,32 @@ describe HexaPDF::Object do
|
|
|
181
200
|
end
|
|
182
201
|
end
|
|
183
202
|
|
|
203
|
+
describe "caching" do
|
|
204
|
+
before do
|
|
205
|
+
@obj = HexaPDF::Object.new({}, document: HexaPDF::Document.new)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
it "can set and return a cached value" do
|
|
209
|
+
assert_equal(:value, @obj.cache(:data, :value))
|
|
210
|
+
assert_equal(:value, @obj.cache(:data, :other))
|
|
211
|
+
assert_equal(:value, @obj.cache(:block) { :value })
|
|
212
|
+
assert_equal(:other, @obj.cache(:data, :other, update: true))
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
it "can check for the existence of a cached value" do
|
|
216
|
+
refute(@obj.cached?(:data))
|
|
217
|
+
@obj.cache(:data, :value)
|
|
218
|
+
assert(@obj.cached?(:data))
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
it "can clear all cached values" do
|
|
222
|
+
@obj.cache(:data, :value)
|
|
223
|
+
assert(@obj.cached?(:data))
|
|
224
|
+
@obj.clear_cache
|
|
225
|
+
refute(@obj.cached?(:data))
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
184
229
|
describe "validation" do
|
|
185
230
|
before do
|
|
186
231
|
@doc = Object.new
|
data/test/hexapdf/test_parser.rb
CHANGED
|
@@ -8,6 +8,7 @@ require 'stringio'
|
|
|
8
8
|
describe HexaPDF::Parser do
|
|
9
9
|
before do
|
|
10
10
|
@document = HexaPDF::Document.new
|
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
|
12
13
|
|
|
13
14
|
create_parser(<<~EOF)
|
|
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
|
|
|
132
133
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
133
134
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
|
134
135
|
end
|
|
136
|
+
|
|
137
|
+
describe "with strict parsing" do
|
|
138
|
+
before do
|
|
139
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it "fails if an empty indirect object is found" do
|
|
143
|
+
create_parser("1 0 obj\nendobj")
|
|
144
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
145
|
+
assert_match(/no indirect object value/i, exp.message)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
|
149
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
|
150
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
151
|
+
assert_match(/not CR alone/, exp.message)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
it "fails if the stream length value is invalid" do
|
|
155
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
157
|
+
assert_match(/invalid stream length/i, exp.message)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it "fails if the keyword endobj is mangled" do
|
|
161
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
163
|
+
assert_match(/keyword endobj/, exp.message)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it "fails if the keyword endobj is missing" do
|
|
167
|
+
create_parser("1 0 obj\n<< >>")
|
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
169
|
+
assert_match(/keyword endobj/, exp.message)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
|
173
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
175
|
+
assert_match(/keyword endobj/, exp.message)
|
|
176
|
+
end
|
|
177
|
+
end
|
|
135
178
|
end
|
|
136
179
|
|
|
137
180
|
describe "load_object" do
|
|
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
|
|
|
205
248
|
end
|
|
206
249
|
|
|
207
250
|
it "ignores garbage at the end of the file" do
|
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
|
251
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
|
209
252
|
assert_equal(5, @parser.startxref_offset)
|
|
210
253
|
end
|
|
211
254
|
|
|
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
|
|
|
215
258
|
end
|
|
216
259
|
|
|
217
260
|
it "finds the startxref anywhere in file" do
|
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
|
261
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
219
262
|
assert_equal(5, @parser.startxref_offset)
|
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
|
263
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
|
221
264
|
assert_equal(5, @parser.startxref_offset)
|
|
222
265
|
end
|
|
223
266
|
|
|
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
|
|
|
242
285
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
243
286
|
assert_match(/missing startxref/, exp.message)
|
|
244
287
|
end
|
|
288
|
+
|
|
289
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
|
290
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
291
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
292
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
293
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
294
|
+
end
|
|
245
295
|
end
|
|
246
296
|
|
|
247
297
|
describe "file_header_version" do
|
|
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
|
|
|
262
312
|
end
|
|
263
313
|
|
|
264
314
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
|
265
|
-
create_parser("junk" * 200
|
|
315
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
|
266
316
|
assert_equal('1.4', @parser.file_header_version)
|
|
267
317
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
|
268
318
|
end
|
|
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
|
|
|
318
368
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
|
319
369
|
end
|
|
320
370
|
|
|
371
|
+
it "fails if a sub section entry is mangled" do
|
|
372
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
|
373
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
374
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
|
375
|
+
end
|
|
376
|
+
|
|
321
377
|
it "fails if there is no trailer" do
|
|
322
378
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
|
323
379
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
@@ -329,6 +385,71 @@ describe HexaPDF::Parser do
|
|
|
329
385
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
330
386
|
assert_match(/dictionary/, exp.message)
|
|
331
387
|
end
|
|
388
|
+
|
|
389
|
+
describe "invalid numbering of main xref section" do
|
|
390
|
+
it "handles the xref if the numbering is off by N" do
|
|
391
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
392
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
393
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(17)
|
|
394
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it "fails if the first entry is not the one for oid=0" do
|
|
398
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
399
|
+
"xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
400
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
401
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
402
|
+
|
|
403
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
404
|
+
"xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
405
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
406
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
it "fails if the tested entry position is invalid" do
|
|
410
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
411
|
+
"xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
|
|
412
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
413
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
|
|
417
|
+
create_parser(" 2 0 obj 1 endobj\n" \
|
|
418
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
419
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
420
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
describe "with strict parsing" do
|
|
425
|
+
before do
|
|
426
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
it "fails if xref type=n with offset=0" do
|
|
430
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
|
431
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
432
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
it " fails xref type=n with gen>65535" do
|
|
436
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
|
437
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
438
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
it "fails if trailing second whitespace is missing" do
|
|
442
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
|
443
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
444
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
it "fails if the main cross-reference section has invalid numbering" do
|
|
448
|
+
create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
449
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
450
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
451
|
+
end
|
|
452
|
+
end
|
|
332
453
|
end
|
|
333
454
|
|
|
334
455
|
describe "load_revision" do
|
|
@@ -348,75 +469,60 @@ describe HexaPDF::Parser do
|
|
|
348
469
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
|
349
470
|
assert_match(/not a cross-reference stream/, exp.message)
|
|
350
471
|
end
|
|
351
|
-
end
|
|
352
472
|
|
|
353
|
-
|
|
354
|
-
before do
|
|
473
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
|
355
474
|
@document.config['parser.on_correctable_error'] = proc { true }
|
|
475
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
|
476
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
|
477
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
|
478
|
+
assert_match(/entry for itself/, exp.message)
|
|
356
479
|
end
|
|
480
|
+
end
|
|
357
481
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
|
|
365
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
|
366
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
482
|
+
describe "reconstruct_revision" do
|
|
483
|
+
before do
|
|
484
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
|
485
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
|
368
486
|
end
|
|
369
487
|
|
|
370
|
-
it "
|
|
371
|
-
create_parser("
|
|
372
|
-
|
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
488
|
+
it "serially parses the contents" do
|
|
489
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
490
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
374
491
|
end
|
|
375
492
|
|
|
376
|
-
it "
|
|
377
|
-
create_parser("
|
|
378
|
-
|
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
|
493
|
+
it "ignores parts where the starting line is split across lines" do
|
|
494
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
495
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
|
380
496
|
end
|
|
381
497
|
|
|
382
|
-
it "
|
|
383
|
-
create_parser("1 0 obj\nendobj")
|
|
384
|
-
|
|
385
|
-
assert_match(/no indirect object value/i, exp.message)
|
|
498
|
+
it "handles cases where the line contains an invalid string that exceeds the read buffer" do
|
|
499
|
+
create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
500
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
386
501
|
end
|
|
387
502
|
|
|
388
|
-
it "
|
|
389
|
-
create_parser("1
|
|
390
|
-
|
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
|
503
|
+
it "ignores invalid objects" do
|
|
504
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
|
505
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
|
392
506
|
end
|
|
393
507
|
|
|
394
|
-
it "
|
|
395
|
-
create_parser("1 0 obj
|
|
396
|
-
|
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
|
508
|
+
it "ignores invalid lines" do
|
|
509
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
510
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
398
511
|
end
|
|
399
512
|
|
|
400
|
-
it "
|
|
401
|
-
create_parser("1
|
|
402
|
-
|
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
|
513
|
+
it "uses the last trailer" do
|
|
514
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
|
515
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
|
407
516
|
end
|
|
408
517
|
|
|
409
|
-
it "
|
|
410
|
-
create_parser("1
|
|
411
|
-
|
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
|
518
|
+
it "uses the first trailer in case of a linearized file" do
|
|
519
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
|
520
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
|
413
521
|
end
|
|
414
522
|
|
|
415
|
-
it "
|
|
416
|
-
create_parser("
|
|
417
|
-
|
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
|
523
|
+
it "fails if no valid trailer is found" do
|
|
524
|
+
create_parser("1 0 obj\n5\nendobj")
|
|
525
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
|
420
526
|
end
|
|
421
527
|
end
|
|
422
528
|
end
|