hexapdf 0.12.3 → 0.14.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +132 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +12 -6
- data/lib/hexapdf/dictionary_fields.rb +2 -10
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +107 -7
- data/lib/hexapdf/pdf_array.rb +15 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +37 -10
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +22 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
- data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +6 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +21 -12
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +5 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +9 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +205 -51
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_serializer.rb +7 -0
- data/test/hexapdf/test_tokenizer.rb +28 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
- data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +8 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -12
data/test/hexapdf/test_parser.rb
CHANGED
@@ -8,6 +8,7 @@ require 'stringio'
|
|
8
8
|
describe HexaPDF::Parser do
|
9
9
|
before do
|
10
10
|
@document = HexaPDF::Document.new
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
12
13
|
|
13
14
|
create_parser(<<~EOF)
|
@@ -87,6 +88,18 @@ describe HexaPDF::Parser do
|
|
87
88
|
assert_equal('12', TestHelper.collector(stream.fiber))
|
88
89
|
end
|
89
90
|
|
91
|
+
it "handles keyword stream followed by space and CR or LF" do
|
92
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
93
|
+
*, stream = @parser.parse_indirect_object
|
94
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
95
|
+
end
|
96
|
+
|
97
|
+
it "handles invalid indirect object value consisting of number followed by endobj without space" do
|
98
|
+
create_parser("1 0 obj 749endobj")
|
99
|
+
object, * = @parser.parse_indirect_object
|
100
|
+
assert_equal(749, object)
|
101
|
+
end
|
102
|
+
|
90
103
|
it "recovers from an invalid stream length value" do
|
91
104
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
92
105
|
obj, _, _, stream = @parser.parse_indirect_object
|
@@ -132,6 +145,60 @@ describe HexaPDF::Parser do
|
|
132
145
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
133
146
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
134
147
|
end
|
148
|
+
|
149
|
+
describe "with strict parsing" do
|
150
|
+
before do
|
151
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
152
|
+
end
|
153
|
+
|
154
|
+
it "fails if an empty indirect object is found" do
|
155
|
+
create_parser("1 0 obj\nendobj")
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
157
|
+
assert_match(/no indirect object value/i, exp.message)
|
158
|
+
end
|
159
|
+
|
160
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
161
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
163
|
+
assert_match(/not CR alone/, exp.message)
|
164
|
+
end
|
165
|
+
|
166
|
+
it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
|
167
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
169
|
+
assert_match(/must be followed by LF or CR\/LF/, exp.message)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "fails for numbers followed by endobj without space" do
|
173
|
+
create_parser("1 0 obj 749endobj")
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
175
|
+
assert_match(/Invalid object value after 'obj'/, exp.message)
|
176
|
+
end
|
177
|
+
|
178
|
+
it "fails if the stream length value is invalid" do
|
179
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
180
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
181
|
+
assert_match(/invalid stream length/i, exp.message)
|
182
|
+
end
|
183
|
+
|
184
|
+
it "fails if the keyword endobj is mangled" do
|
185
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
186
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
187
|
+
assert_match(/keyword endobj/, exp.message)
|
188
|
+
end
|
189
|
+
|
190
|
+
it "fails if the keyword endobj is missing" do
|
191
|
+
create_parser("1 0 obj\n<< >>")
|
192
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
193
|
+
assert_match(/keyword endobj/, exp.message)
|
194
|
+
end
|
195
|
+
|
196
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
197
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
198
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
199
|
+
assert_match(/keyword endobj/, exp.message)
|
200
|
+
end
|
201
|
+
end
|
135
202
|
end
|
136
203
|
|
137
204
|
describe "load_object" do
|
@@ -167,6 +234,23 @@ describe HexaPDF::Parser do
|
|
167
234
|
assert_equal([1, 2], obj.value)
|
168
235
|
end
|
169
236
|
|
237
|
+
it "handles an invalid indirect object offset of 0" do
|
238
|
+
obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
239
|
+
assert(obj.null?)
|
240
|
+
assert_equal(2, obj.oid)
|
241
|
+
assert_equal(0, obj.gen)
|
242
|
+
end
|
243
|
+
|
244
|
+
describe "with strict parsing" do
|
245
|
+
it "raises an error if an indirect object has an offset of 0" do
|
246
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
247
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
248
|
+
@parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
249
|
+
end
|
250
|
+
assert_match(/has offset 0/, exp.message)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
170
254
|
it "fails if another object is found instead of an object stream" do
|
171
255
|
def (@document).object(_oid)
|
172
256
|
:invalid
|
@@ -205,7 +289,7 @@ describe HexaPDF::Parser do
|
|
205
289
|
end
|
206
290
|
|
207
291
|
it "ignores garbage at the end of the file" do
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
292
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
209
293
|
assert_equal(5, @parser.startxref_offset)
|
210
294
|
end
|
211
295
|
|
@@ -215,9 +299,9 @@ describe HexaPDF::Parser do
|
|
215
299
|
end
|
216
300
|
|
217
301
|
it "finds the startxref anywhere in file" do
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
302
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
219
303
|
assert_equal(5, @parser.startxref_offset)
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
304
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
221
305
|
assert_equal(5, @parser.startxref_offset)
|
222
306
|
end
|
223
307
|
|
@@ -242,6 +326,13 @@ describe HexaPDF::Parser do
|
|
242
326
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
243
327
|
assert_match(/missing startxref/, exp.message)
|
244
328
|
end
|
329
|
+
|
330
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
331
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
332
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
333
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
334
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
335
|
+
end
|
245
336
|
end
|
246
337
|
|
247
338
|
describe "file_header_version" do
|
@@ -262,7 +353,7 @@ describe HexaPDF::Parser do
|
|
262
353
|
end
|
263
354
|
|
264
355
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
265
|
-
create_parser("junk" * 200
|
356
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
266
357
|
assert_equal('1.4', @parser.file_header_version)
|
267
358
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
268
359
|
end
|
@@ -318,6 +409,12 @@ describe HexaPDF::Parser do
|
|
318
409
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
319
410
|
end
|
320
411
|
|
412
|
+
it "fails if a sub section entry is mangled" do
|
413
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
414
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
415
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
416
|
+
end
|
417
|
+
|
321
418
|
it "fails if there is no trailer" do
|
322
419
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
323
420
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
@@ -329,6 +426,71 @@ describe HexaPDF::Parser do
|
|
329
426
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
330
427
|
assert_match(/dictionary/, exp.message)
|
331
428
|
end
|
429
|
+
|
430
|
+
describe "invalid numbering of main xref section" do
|
431
|
+
it "handles the xref if the numbering is off by N" do
|
432
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
433
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
434
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(17)
|
435
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
|
436
|
+
end
|
437
|
+
|
438
|
+
it "fails if the first entry is not the one for oid=0" do
|
439
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
440
|
+
"xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
441
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
442
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
443
|
+
|
444
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
445
|
+
"xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
|
446
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
447
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
448
|
+
end
|
449
|
+
|
450
|
+
it "fails if the tested entry position is invalid" do
|
451
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
452
|
+
"xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
|
453
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
454
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
455
|
+
end
|
456
|
+
|
457
|
+
it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
|
458
|
+
create_parser(" 2 0 obj 1 endobj\n" \
|
459
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
460
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
461
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
describe "with strict parsing" do
|
466
|
+
before do
|
467
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
468
|
+
end
|
469
|
+
|
470
|
+
it "fails if xref type=n with offset=0" do
|
471
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
472
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
473
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
474
|
+
end
|
475
|
+
|
476
|
+
it " fails xref type=n with gen>65535" do
|
477
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
478
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
479
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
480
|
+
end
|
481
|
+
|
482
|
+
it "fails if trailing second whitespace is missing" do
|
483
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
484
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
485
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
486
|
+
end
|
487
|
+
|
488
|
+
it "fails if the main cross-reference section has invalid numbering" do
|
489
|
+
create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
|
490
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
491
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
492
|
+
end
|
493
|
+
end
|
332
494
|
end
|
333
495
|
|
334
496
|
describe "load_revision" do
|
@@ -348,75 +510,67 @@ describe HexaPDF::Parser do
|
|
348
510
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
349
511
|
assert_match(/not a cross-reference stream/, exp.message)
|
350
512
|
end
|
351
|
-
end
|
352
513
|
|
353
|
-
|
354
|
-
before do
|
514
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
355
515
|
@document.config['parser.on_correctable_error'] = proc { true }
|
516
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
517
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
518
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
519
|
+
assert_match(/entry for itself/, exp.message)
|
356
520
|
end
|
521
|
+
end
|
357
522
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
523
|
+
describe "reconstruct_revision" do
|
524
|
+
before do
|
525
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
526
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
362
527
|
end
|
363
528
|
|
364
|
-
it "
|
365
|
-
create_parser("
|
366
|
-
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
529
|
+
it "serially parses the contents" do
|
530
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
531
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
368
532
|
end
|
369
533
|
|
370
|
-
it "
|
371
|
-
create_parser("
|
372
|
-
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
534
|
+
it "ignores parts where the starting line is split across lines" do
|
535
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
536
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
374
537
|
end
|
375
538
|
|
376
|
-
it "
|
377
|
-
create_parser("
|
378
|
-
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
539
|
+
it "handles cases where the line contains an invalid string that exceeds the read buffer" do
|
540
|
+
create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
541
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
380
542
|
end
|
381
543
|
|
382
|
-
it "
|
383
|
-
|
384
|
-
|
385
|
-
|
544
|
+
it "handles pathalogical cases which contain many opened literal strings" do
|
545
|
+
time = Time.now
|
546
|
+
create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
547
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
548
|
+
assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
|
386
549
|
end
|
387
550
|
|
388
|
-
it "
|
389
|
-
create_parser("1
|
390
|
-
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
551
|
+
it "ignores invalid objects" do
|
552
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
553
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
392
554
|
end
|
393
555
|
|
394
|
-
it "
|
395
|
-
create_parser("1 0 obj
|
396
|
-
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
556
|
+
it "ignores invalid lines" do
|
557
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
558
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
398
559
|
end
|
399
560
|
|
400
|
-
it "
|
401
|
-
create_parser("1
|
402
|
-
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
561
|
+
it "uses the last trailer" do
|
562
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
563
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
407
564
|
end
|
408
565
|
|
409
|
-
it "
|
410
|
-
create_parser("1
|
411
|
-
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
566
|
+
it "uses the first trailer in case of a linearized file" do
|
567
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
568
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
413
569
|
end
|
414
570
|
|
415
|
-
it "
|
416
|
-
create_parser("
|
417
|
-
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
571
|
+
it "fails if no valid trailer is found" do
|
572
|
+
create_parser("1 0 obj\n5\nendobj")
|
573
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
420
574
|
end
|
421
575
|
end
|
422
576
|
end
|
@@ -107,6 +107,13 @@ describe HexaPDF::PDFArray do
|
|
107
107
|
assert_equal([1, :data, @array[2]], @array[0, 5])
|
108
108
|
end
|
109
109
|
|
110
|
+
it "allows deleting an object" do
|
111
|
+
obj = @array.value[1]
|
112
|
+
assert_same(obj, @array.delete(obj))
|
113
|
+
ref = HexaPDF::Object.new(:test, oid: 1)
|
114
|
+
assert_equal(ref, @array.delete(ref))
|
115
|
+
end
|
116
|
+
|
110
117
|
describe "slice!" do
|
111
118
|
it "allows deleting a single element" do
|
112
119
|
@array.slice!(2)
|
@@ -157,6 +164,6 @@ describe HexaPDF::PDFArray do
|
|
157
164
|
end
|
158
165
|
|
159
166
|
it "can be converted to a simple array" do
|
160
|
-
assert_equal(@array
|
167
|
+
assert_equal([1, :data, "deref", @array[3]], @array.to_ary)
|
161
168
|
end
|
162
169
|
end
|
@@ -158,4 +158,39 @@ describe HexaPDF::Revisions do
|
|
158
158
|
doc = HexaPDF::Document.new(io: io)
|
159
159
|
assert_equal(2, doc.revisions.count)
|
160
160
|
end
|
161
|
+
|
162
|
+
it "uses the reconstructed revision if errors are found when loading from an IO" do
|
163
|
+
io = StringIO.new(<<~EOF)
|
164
|
+
%PDF-1.7
|
165
|
+
1 0 obj
|
166
|
+
10
|
167
|
+
endobj
|
168
|
+
|
169
|
+
xref
|
170
|
+
0 2
|
171
|
+
0000000000 65535 f
|
172
|
+
0000000009 00000 n
|
173
|
+
trailer
|
174
|
+
<< /Size 5 >>
|
175
|
+
startxref
|
176
|
+
28
|
177
|
+
%%EOF
|
178
|
+
|
179
|
+
2 0 obj
|
180
|
+
300
|
181
|
+
endobj
|
182
|
+
|
183
|
+
xref
|
184
|
+
2 1
|
185
|
+
0000000301 00000 n
|
186
|
+
trailer
|
187
|
+
<< /Size 3 /Prev 100>>
|
188
|
+
startxref
|
189
|
+
139
|
190
|
+
%%EOF
|
191
|
+
EOF
|
192
|
+
doc = HexaPDF::Document.new(io: io)
|
193
|
+
assert_equal(2, doc.revisions.count)
|
194
|
+
assert_same(doc.revisions[0].trailer.value, doc.revisions[1].trailer.value)
|
195
|
+
end
|
161
196
|
end
|
@@ -153,6 +153,13 @@ describe HexaPDF::Serializer do
|
|
153
153
|
assert_equal("<</Key(value)/Length 6>>stream\nsome\nendstream", io.string)
|
154
154
|
end
|
155
155
|
|
156
|
+
it "doesn't reset the internal recursion flag if the stream is serialized as part of another object" do
|
157
|
+
object = HexaPDF::Dictionary.new({}, oid: 5)
|
158
|
+
object[:Stream] = @stream
|
159
|
+
object[:Self] = object # needs to be the last entry so that :Stream gets serialized first!
|
160
|
+
assert_serialized("<</Stream 2 0 R/Self 5 0 R>>", object)
|
161
|
+
end
|
162
|
+
|
156
163
|
it "fails if a stream without object identifier is serialized" do
|
157
164
|
@stream.oid = 0
|
158
165
|
assert_raises(HexaPDF::Error) { @serializer.serialize(@stream) }
|
@@ -27,4 +27,32 @@ describe HexaPDF::Tokenizer do
|
|
27
27
|
5.times {|i| assert_equal(i, @tokenizer.next_token) }
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
it "has a special token scanning method for use with xref reconstruction" do
|
32
|
+
create_tokenizer(<<-EOF.chomp.gsub(/^ {8}/, ''))
|
33
|
+
% Comment
|
34
|
+
true
|
35
|
+
123 50
|
36
|
+
obj
|
37
|
+
(ignored)
|
38
|
+
/Ignored
|
39
|
+
[/Ignored]
|
40
|
+
<</Ignored /Values>>
|
41
|
+
EOF
|
42
|
+
|
43
|
+
scan_to_newline = proc { @tokenizer.scan_until(/(\n|\r\n?)+|\z/) }
|
44
|
+
|
45
|
+
assert_nil(@tokenizer.next_integer_or_keyword)
|
46
|
+
scan_to_newline.call
|
47
|
+
assert_equal(true, @tokenizer.next_integer_or_keyword)
|
48
|
+
assert_equal(123, @tokenizer.next_integer_or_keyword)
|
49
|
+
assert_equal(50, @tokenizer.next_integer_or_keyword)
|
50
|
+
assert_equal('obj', @tokenizer.next_integer_or_keyword)
|
51
|
+
4.times do
|
52
|
+
assert_nil(@tokenizer.next_integer_or_keyword)
|
53
|
+
scan_to_newline.call
|
54
|
+
end
|
55
|
+
assert_equal(HexaPDF::Tokenizer::NO_MORE_TOKENS, @tokenizer.next_integer_or_keyword)
|
56
|
+
end
|
57
|
+
|
30
58
|
end
|