hexapdf 0.12.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +126 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/encoding/base.rb +8 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +87 -3
- data/lib/hexapdf/pdf_array.rb +11 -4
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/encoding/test_base.rb +10 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +159 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
@@ -441,21 +441,21 @@ describe HexaPDF::Document do
|
|
441
441
|
|
442
442
|
describe "validate" do
|
443
443
|
before do
|
444
|
-
@doc.
|
444
|
+
@doc.validate # to create a valid document
|
445
445
|
end
|
446
446
|
|
447
447
|
it "validates indirect objects" do
|
448
|
-
obj = @doc.add({Type: :
|
448
|
+
obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
|
449
449
|
refute(@doc.validate(auto_correct: false))
|
450
450
|
|
451
451
|
called = false
|
452
|
-
assert(@doc.validate {|o| assert_same(obj, o); called = true })
|
452
|
+
assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
|
453
453
|
assert(called)
|
454
454
|
end
|
455
455
|
|
456
456
|
it "validates the trailer object" do
|
457
457
|
@doc.trailer[:ID] = :Symbol
|
458
|
-
refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
|
458
|
+
refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
|
459
459
|
end
|
460
460
|
|
461
461
|
it "validates only loaded objects" do
|
@@ -609,16 +609,24 @@ describe HexaPDF::Document do
|
|
609
609
|
|
610
610
|
describe "caching interface" do
|
611
611
|
it "allows setting and retrieving values" do
|
612
|
-
assert_equal(:test, @doc.cache(:a, :b, :test))
|
613
|
-
assert_equal(:test, @doc.cache(:a, :b
|
614
|
-
assert_equal(:
|
612
|
+
assert_equal(:test, @doc.cache(:a, :b, :test) { :notused })
|
613
|
+
assert_equal(:test, @doc.cache(:a, :b) { :other })
|
614
|
+
assert_equal(:test, @doc.cache(:a, :b))
|
615
|
+
assert_nil(@doc.cache(:a, :c, nil))
|
616
|
+
assert_nil(@doc.cache(:a, :c) { :other })
|
617
|
+
assert_nil(@doc.cache(:a, :c))
|
615
618
|
assert(@doc.cached?(:a, :b))
|
616
619
|
assert(@doc.cached?(:a, :c))
|
617
620
|
end
|
618
621
|
|
622
|
+
it "allows updating a value" do
|
623
|
+
@doc.cache(:a, :b) { :test }
|
624
|
+
assert_equal(:new, @doc.cache(:a, :b, update: true) { :new })
|
625
|
+
end
|
626
|
+
|
619
627
|
it "allows clearing cached values" do
|
620
|
-
@doc.cache(:a, :b
|
621
|
-
@doc.cache(:b, :c
|
628
|
+
@doc.cache(:a, :b) { :c }
|
629
|
+
@doc.cache(:b, :c) { :d }
|
622
630
|
@doc.clear_cache(:a)
|
623
631
|
refute(@doc.cached?(:a, :b))
|
624
632
|
assert(@doc.cached?(:b, :c))
|
@@ -626,7 +634,7 @@ describe HexaPDF::Document do
|
|
626
634
|
refute(@doc.cached?(:a, :c))
|
627
635
|
end
|
628
636
|
|
629
|
-
it "fails if no cached value exists and
|
637
|
+
it "fails if no cached value exists and no block is given" do
|
630
638
|
assert_raises(LocalJumpError) { @doc.cache(:a, :b) }
|
631
639
|
end
|
632
640
|
end
|
data/test/hexapdf/test_object.rb
CHANGED
@@ -3,18 +3,10 @@
|
|
3
3
|
require 'test_helper'
|
4
4
|
require 'hexapdf/object'
|
5
5
|
require 'hexapdf/reference'
|
6
|
+
require 'hexapdf/document'
|
6
7
|
|
7
8
|
describe HexaPDF::Object do
|
8
9
|
describe "class.deep_copy" do
|
9
|
-
it "handles not-duplicatable classes" do
|
10
|
-
assert_equal(5, HexaPDF::Object.deep_copy(5))
|
11
|
-
assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
|
12
|
-
assert_nil(HexaPDF::Object.deep_copy(nil))
|
13
|
-
assert_equal(true, HexaPDF::Object.deep_copy(true))
|
14
|
-
assert_equal(false, HexaPDF::Object.deep_copy(false))
|
15
|
-
assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
|
16
|
-
end
|
17
|
-
|
18
10
|
it "handles general, duplicatable classes" do
|
19
11
|
x = "test"
|
20
12
|
assert_equal("test", HexaPDF::Object.deep_copy(x))
|
@@ -103,30 +95,57 @@ describe HexaPDF::Object do
|
|
103
95
|
end
|
104
96
|
|
105
97
|
describe "validate" do
|
106
|
-
|
107
|
-
obj = HexaPDF::Object.new(5)
|
108
|
-
|
109
|
-
|
110
|
-
|
98
|
+
before do
|
99
|
+
@obj = HexaPDF::Object.new(5)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "invokes perform_validation correctly" do
|
103
|
+
invoked = false
|
104
|
+
@obj.define_singleton_method(:perform_validation) { invoked = true }
|
105
|
+
assert(@obj.validate)
|
106
|
+
assert(invoked)
|
107
|
+
end
|
108
|
+
|
109
|
+
it "yields all arguments yieled by perform_validation" do
|
110
|
+
invoked = []
|
111
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
112
|
+
block.call("error", true, :object)
|
113
|
+
end
|
114
|
+
assert(@obj.validate {|*a| invoked << a })
|
115
|
+
assert_equal([["error", true, :object]], invoked)
|
116
|
+
end
|
117
|
+
|
118
|
+
it "provides self as third argument if none is yielded by perform_validation" do
|
119
|
+
invoked = []
|
120
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
111
121
|
block.call("error", true)
|
112
122
|
end
|
113
|
-
assert(obj.validate {|*a| invoked
|
114
|
-
assert_equal([
|
115
|
-
|
123
|
+
assert(@obj.validate {|*a| invoked << a })
|
124
|
+
assert_equal([["error", true, @obj]], invoked)
|
125
|
+
end
|
116
126
|
|
117
|
-
|
127
|
+
it "yields all problems when auto_correct is true" do
|
128
|
+
invoked = []
|
129
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
130
|
+
invoked << :before
|
131
|
+
block.call("error", false)
|
132
|
+
invoked << :after
|
133
|
+
block.call("error2", true)
|
134
|
+
invoked << :last
|
135
|
+
end
|
136
|
+
refute(@obj.validate)
|
137
|
+
assert_equal([:before, :after, :last], invoked)
|
118
138
|
end
|
119
139
|
|
120
|
-
it "stops
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
invoked[:before] = true
|
140
|
+
it "stops at the first uncorrectable problem if auto_correct is false" do
|
141
|
+
invoked = []
|
142
|
+
@obj.define_singleton_method(:perform_validation) do |&block|
|
143
|
+
invoked << :before
|
125
144
|
block.call("error", false)
|
126
|
-
invoked
|
145
|
+
invoked << :after
|
127
146
|
end
|
128
|
-
refute(obj.validate
|
129
|
-
|
147
|
+
refute(@obj.validate(auto_correct: false))
|
148
|
+
assert_equal([:before], invoked)
|
130
149
|
end
|
131
150
|
end
|
132
151
|
|
@@ -181,6 +200,32 @@ describe HexaPDF::Object do
|
|
181
200
|
end
|
182
201
|
end
|
183
202
|
|
203
|
+
describe "caching" do
|
204
|
+
before do
|
205
|
+
@obj = HexaPDF::Object.new({}, document: HexaPDF::Document.new)
|
206
|
+
end
|
207
|
+
|
208
|
+
it "can set and return a cached value" do
|
209
|
+
assert_equal(:value, @obj.cache(:data, :value))
|
210
|
+
assert_equal(:value, @obj.cache(:data, :other))
|
211
|
+
assert_equal(:value, @obj.cache(:block) { :value })
|
212
|
+
assert_equal(:other, @obj.cache(:data, :other, update: true))
|
213
|
+
end
|
214
|
+
|
215
|
+
it "can check for the existence of a cached value" do
|
216
|
+
refute(@obj.cached?(:data))
|
217
|
+
@obj.cache(:data, :value)
|
218
|
+
assert(@obj.cached?(:data))
|
219
|
+
end
|
220
|
+
|
221
|
+
it "can clear all cached values" do
|
222
|
+
@obj.cache(:data, :value)
|
223
|
+
assert(@obj.cached?(:data))
|
224
|
+
@obj.clear_cache
|
225
|
+
refute(@obj.cached?(:data))
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
184
229
|
describe "validation" do
|
185
230
|
before do
|
186
231
|
@doc = Object.new
|
data/test/hexapdf/test_parser.rb
CHANGED
@@ -8,6 +8,7 @@ require 'stringio'
|
|
8
8
|
describe HexaPDF::Parser do
|
9
9
|
before do
|
10
10
|
@document = HexaPDF::Document.new
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
12
13
|
|
13
14
|
create_parser(<<~EOF)
|
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
|
|
132
133
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
133
134
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
134
135
|
end
|
136
|
+
|
137
|
+
describe "with strict parsing" do
|
138
|
+
before do
|
139
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
140
|
+
end
|
141
|
+
|
142
|
+
it "fails if an empty indirect object is found" do
|
143
|
+
create_parser("1 0 obj\nendobj")
|
144
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
145
|
+
assert_match(/no indirect object value/i, exp.message)
|
146
|
+
end
|
147
|
+
|
148
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
149
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
150
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
151
|
+
assert_match(/not CR alone/, exp.message)
|
152
|
+
end
|
153
|
+
|
154
|
+
it "fails if the stream length value is invalid" do
|
155
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
157
|
+
assert_match(/invalid stream length/i, exp.message)
|
158
|
+
end
|
159
|
+
|
160
|
+
it "fails if the keyword endobj is mangled" do
|
161
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
163
|
+
assert_match(/keyword endobj/, exp.message)
|
164
|
+
end
|
165
|
+
|
166
|
+
it "fails if the keyword endobj is missing" do
|
167
|
+
create_parser("1 0 obj\n<< >>")
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
169
|
+
assert_match(/keyword endobj/, exp.message)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
173
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
175
|
+
assert_match(/keyword endobj/, exp.message)
|
176
|
+
end
|
177
|
+
end
|
135
178
|
end
|
136
179
|
|
137
180
|
describe "load_object" do
|
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
|
|
205
248
|
end
|
206
249
|
|
207
250
|
it "ignores garbage at the end of the file" do
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
251
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
209
252
|
assert_equal(5, @parser.startxref_offset)
|
210
253
|
end
|
211
254
|
|
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
|
|
215
258
|
end
|
216
259
|
|
217
260
|
it "finds the startxref anywhere in file" do
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
261
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
219
262
|
assert_equal(5, @parser.startxref_offset)
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
263
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
221
264
|
assert_equal(5, @parser.startxref_offset)
|
222
265
|
end
|
223
266
|
|
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
|
|
242
285
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
243
286
|
assert_match(/missing startxref/, exp.message)
|
244
287
|
end
|
288
|
+
|
289
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
290
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
291
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
292
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
293
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
294
|
+
end
|
245
295
|
end
|
246
296
|
|
247
297
|
describe "file_header_version" do
|
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
|
|
262
312
|
end
|
263
313
|
|
264
314
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
265
|
-
create_parser("junk" * 200
|
315
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
266
316
|
assert_equal('1.4', @parser.file_header_version)
|
267
317
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
268
318
|
end
|
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
|
|
318
368
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
319
369
|
end
|
320
370
|
|
371
|
+
it "fails if a sub section entry is mangled" do
|
372
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
373
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
374
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
375
|
+
end
|
376
|
+
|
321
377
|
it "fails if there is no trailer" do
|
322
378
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
323
379
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
@@ -329,6 +385,71 @@ describe HexaPDF::Parser do
|
|
329
385
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
330
386
|
assert_match(/dictionary/, exp.message)
|
331
387
|
end
|
388
|
+
|
389
|
+
describe "invalid numbering of main xref section" do
|
390
|
+
it "handles the xref if the numbering is off by N" do
|
391
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
392
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
393
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(17)
|
394
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
|
395
|
+
end
|
396
|
+
|
397
|
+
it "fails if the first entry is not the one for oid=0" do
|
398
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
399
|
+
"xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
400
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
401
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
402
|
+
|
403
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
404
|
+
"xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
|
405
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
406
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
407
|
+
end
|
408
|
+
|
409
|
+
it "fails if the tested entry position is invalid" do
|
410
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
411
|
+
"xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
|
412
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
413
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
414
|
+
end
|
415
|
+
|
416
|
+
it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
|
417
|
+
create_parser(" 2 0 obj 1 endobj\n" \
|
418
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
419
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
420
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
describe "with strict parsing" do
|
425
|
+
before do
|
426
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
427
|
+
end
|
428
|
+
|
429
|
+
it "fails if xref type=n with offset=0" do
|
430
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
431
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
432
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
433
|
+
end
|
434
|
+
|
435
|
+
it " fails xref type=n with gen>65535" do
|
436
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
437
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
438
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
439
|
+
end
|
440
|
+
|
441
|
+
it "fails if trailing second whitespace is missing" do
|
442
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
443
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
444
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
445
|
+
end
|
446
|
+
|
447
|
+
it "fails if the main cross-reference section has invalid numbering" do
|
448
|
+
create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
|
449
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
450
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
451
|
+
end
|
452
|
+
end
|
332
453
|
end
|
333
454
|
|
334
455
|
describe "load_revision" do
|
@@ -348,75 +469,60 @@ describe HexaPDF::Parser do
|
|
348
469
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
349
470
|
assert_match(/not a cross-reference stream/, exp.message)
|
350
471
|
end
|
351
|
-
end
|
352
472
|
|
353
|
-
|
354
|
-
before do
|
473
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
355
474
|
@document.config['parser.on_correctable_error'] = proc { true }
|
475
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
476
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
477
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
478
|
+
assert_match(/entry for itself/, exp.message)
|
356
479
|
end
|
480
|
+
end
|
357
481
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
end
|
363
|
-
|
364
|
-
it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
|
365
|
-
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
366
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
482
|
+
describe "reconstruct_revision" do
|
483
|
+
before do
|
484
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
485
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
368
486
|
end
|
369
487
|
|
370
|
-
it "
|
371
|
-
create_parser("
|
372
|
-
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
488
|
+
it "serially parses the contents" do
|
489
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
490
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
374
491
|
end
|
375
492
|
|
376
|
-
it "
|
377
|
-
create_parser("
|
378
|
-
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
493
|
+
it "ignores parts where the starting line is split across lines" do
|
494
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
495
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
380
496
|
end
|
381
497
|
|
382
|
-
it "
|
383
|
-
create_parser("1 0 obj\nendobj")
|
384
|
-
|
385
|
-
assert_match(/no indirect object value/i, exp.message)
|
498
|
+
it "handles cases where the line contains an invalid string that exceeds the read buffer" do
|
499
|
+
create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
500
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
386
501
|
end
|
387
502
|
|
388
|
-
it "
|
389
|
-
create_parser("1
|
390
|
-
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
503
|
+
it "ignores invalid objects" do
|
504
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
505
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
392
506
|
end
|
393
507
|
|
394
|
-
it "
|
395
|
-
create_parser("1 0 obj
|
396
|
-
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
508
|
+
it "ignores invalid lines" do
|
509
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
510
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
398
511
|
end
|
399
512
|
|
400
|
-
it "
|
401
|
-
create_parser("1
|
402
|
-
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
513
|
+
it "uses the last trailer" do
|
514
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
515
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
407
516
|
end
|
408
517
|
|
409
|
-
it "
|
410
|
-
create_parser("1
|
411
|
-
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
518
|
+
it "uses the first trailer in case of a linearized file" do
|
519
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
520
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
413
521
|
end
|
414
522
|
|
415
|
-
it "
|
416
|
-
create_parser("
|
417
|
-
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
523
|
+
it "fails if no valid trailer is found" do
|
524
|
+
create_parser("1 0 obj\n5\nendobj")
|
525
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
420
526
|
end
|
421
527
|
end
|
422
528
|
end
|