hexapdf 0.12.3 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +132 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +12 -6
- data/lib/hexapdf/dictionary_fields.rb +2 -10
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +107 -7
- data/lib/hexapdf/pdf_array.rb +15 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +37 -10
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +22 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
- data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +6 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +21 -12
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +5 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +9 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +205 -51
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_serializer.rb +7 -0
- data/test/hexapdf/test_tokenizer.rb +28 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
- data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +8 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -12
data/test/hexapdf/test_parser.rb
CHANGED
|
@@ -8,6 +8,7 @@ require 'stringio'
|
|
|
8
8
|
describe HexaPDF::Parser do
|
|
9
9
|
before do
|
|
10
10
|
@document = HexaPDF::Document.new
|
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
|
11
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
|
12
13
|
|
|
13
14
|
create_parser(<<~EOF)
|
|
@@ -87,6 +88,18 @@ describe HexaPDF::Parser do
|
|
|
87
88
|
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
88
89
|
end
|
|
89
90
|
|
|
91
|
+
it "handles keyword stream followed by space and CR or LF" do
|
|
92
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
|
93
|
+
*, stream = @parser.parse_indirect_object
|
|
94
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "handles invalid indirect object value consisting of number followed by endobj without space" do
|
|
98
|
+
create_parser("1 0 obj 749endobj")
|
|
99
|
+
object, * = @parser.parse_indirect_object
|
|
100
|
+
assert_equal(749, object)
|
|
101
|
+
end
|
|
102
|
+
|
|
90
103
|
it "recovers from an invalid stream length value" do
|
|
91
104
|
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
92
105
|
obj, _, _, stream = @parser.parse_indirect_object
|
|
@@ -132,6 +145,60 @@ describe HexaPDF::Parser do
|
|
|
132
145
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
133
146
|
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
|
134
147
|
end
|
|
148
|
+
|
|
149
|
+
describe "with strict parsing" do
|
|
150
|
+
before do
|
|
151
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
it "fails if an empty indirect object is found" do
|
|
155
|
+
create_parser("1 0 obj\nendobj")
|
|
156
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
157
|
+
assert_match(/no indirect object value/i, exp.message)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
|
161
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
|
162
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
163
|
+
assert_match(/not CR alone/, exp.message)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
|
|
167
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
|
168
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
169
|
+
assert_match(/must be followed by LF or CR\/LF/, exp.message)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
it "fails for numbers followed by endobj without space" do
|
|
173
|
+
create_parser("1 0 obj 749endobj")
|
|
174
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
175
|
+
assert_match(/Invalid object value after 'obj'/, exp.message)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
it "fails if the stream length value is invalid" do
|
|
179
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
180
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
181
|
+
assert_match(/invalid stream length/i, exp.message)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
it "fails if the keyword endobj is mangled" do
|
|
185
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
|
186
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
187
|
+
assert_match(/keyword endobj/, exp.message)
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
it "fails if the keyword endobj is missing" do
|
|
191
|
+
create_parser("1 0 obj\n<< >>")
|
|
192
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
193
|
+
assert_match(/keyword endobj/, exp.message)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
|
197
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
|
198
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
199
|
+
assert_match(/keyword endobj/, exp.message)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
135
202
|
end
|
|
136
203
|
|
|
137
204
|
describe "load_object" do
|
|
@@ -167,6 +234,23 @@ describe HexaPDF::Parser do
|
|
|
167
234
|
assert_equal([1, 2], obj.value)
|
|
168
235
|
end
|
|
169
236
|
|
|
237
|
+
it "handles an invalid indirect object offset of 0" do
|
|
238
|
+
obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
|
239
|
+
assert(obj.null?)
|
|
240
|
+
assert_equal(2, obj.oid)
|
|
241
|
+
assert_equal(0, obj.gen)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
describe "with strict parsing" do
|
|
245
|
+
it "raises an error if an indirect object has an offset of 0" do
|
|
246
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
247
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
248
|
+
@parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
|
249
|
+
end
|
|
250
|
+
assert_match(/has offset 0/, exp.message)
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
170
254
|
it "fails if another object is found instead of an object stream" do
|
|
171
255
|
def (@document).object(_oid)
|
|
172
256
|
:invalid
|
|
@@ -205,7 +289,7 @@ describe HexaPDF::Parser do
|
|
|
205
289
|
end
|
|
206
290
|
|
|
207
291
|
it "ignores garbage at the end of the file" do
|
|
208
|
-
create_parser("startxref\n5\n%%EOF"
|
|
292
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
|
209
293
|
assert_equal(5, @parser.startxref_offset)
|
|
210
294
|
end
|
|
211
295
|
|
|
@@ -215,9 +299,9 @@ describe HexaPDF::Parser do
|
|
|
215
299
|
end
|
|
216
300
|
|
|
217
301
|
it "finds the startxref anywhere in file" do
|
|
218
|
-
create_parser("startxref\n5\n%%EOF"
|
|
302
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
219
303
|
assert_equal(5, @parser.startxref_offset)
|
|
220
|
-
create_parser("startxref\n5\n%%EOF\n"
|
|
304
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
|
221
305
|
assert_equal(5, @parser.startxref_offset)
|
|
222
306
|
end
|
|
223
307
|
|
|
@@ -242,6 +326,13 @@ describe HexaPDF::Parser do
|
|
|
242
326
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
243
327
|
assert_match(/missing startxref/, exp.message)
|
|
244
328
|
end
|
|
329
|
+
|
|
330
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
|
331
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
332
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
333
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
334
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
335
|
+
end
|
|
245
336
|
end
|
|
246
337
|
|
|
247
338
|
describe "file_header_version" do
|
|
@@ -262,7 +353,7 @@ describe HexaPDF::Parser do
|
|
|
262
353
|
end
|
|
263
354
|
|
|
264
355
|
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
|
265
|
-
create_parser("junk" * 200
|
|
356
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
|
266
357
|
assert_equal('1.4', @parser.file_header_version)
|
|
267
358
|
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
|
268
359
|
end
|
|
@@ -318,6 +409,12 @@ describe HexaPDF::Parser do
|
|
|
318
409
|
assert_match(/invalid cross-reference subsection/i, exp.message)
|
|
319
410
|
end
|
|
320
411
|
|
|
412
|
+
it "fails if a sub section entry is mangled" do
|
|
413
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
|
414
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
415
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
|
416
|
+
end
|
|
417
|
+
|
|
321
418
|
it "fails if there is no trailer" do
|
|
322
419
|
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
|
323
420
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
@@ -329,6 +426,71 @@ describe HexaPDF::Parser do
|
|
|
329
426
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
330
427
|
assert_match(/dictionary/, exp.message)
|
|
331
428
|
end
|
|
429
|
+
|
|
430
|
+
describe "invalid numbering of main xref section" do
|
|
431
|
+
it "handles the xref if the numbering is off by N" do
|
|
432
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
433
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
434
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(17)
|
|
435
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
it "fails if the first entry is not the one for oid=0" do
|
|
439
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
440
|
+
"xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
441
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
442
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
443
|
+
|
|
444
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
445
|
+
"xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
446
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
447
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
it "fails if the tested entry position is invalid" do
|
|
451
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
452
|
+
"xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
|
|
453
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
454
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
|
|
458
|
+
create_parser(" 2 0 obj 1 endobj\n" \
|
|
459
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
460
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
461
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
describe "with strict parsing" do
|
|
466
|
+
before do
|
|
467
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
it "fails if xref type=n with offset=0" do
|
|
471
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
|
472
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
473
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
it " fails xref type=n with gen>65535" do
|
|
477
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
|
478
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
479
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
it "fails if trailing second whitespace is missing" do
|
|
483
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
|
484
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
485
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
it "fails if the main cross-reference section has invalid numbering" do
|
|
489
|
+
create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
490
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
491
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
492
|
+
end
|
|
493
|
+
end
|
|
332
494
|
end
|
|
333
495
|
|
|
334
496
|
describe "load_revision" do
|
|
@@ -348,75 +510,67 @@ describe HexaPDF::Parser do
|
|
|
348
510
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
|
349
511
|
assert_match(/not a cross-reference stream/, exp.message)
|
|
350
512
|
end
|
|
351
|
-
end
|
|
352
513
|
|
|
353
|
-
|
|
354
|
-
before do
|
|
514
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
|
355
515
|
@document.config['parser.on_correctable_error'] = proc { true }
|
|
516
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
|
517
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
|
518
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
|
519
|
+
assert_match(/entry for itself/, exp.message)
|
|
356
520
|
end
|
|
521
|
+
end
|
|
357
522
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
523
|
+
describe "reconstruct_revision" do
|
|
524
|
+
before do
|
|
525
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
|
526
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
|
362
527
|
end
|
|
363
528
|
|
|
364
|
-
it "
|
|
365
|
-
create_parser("
|
|
366
|
-
|
|
367
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
529
|
+
it "serially parses the contents" do
|
|
530
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
531
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
368
532
|
end
|
|
369
533
|
|
|
370
|
-
it "
|
|
371
|
-
create_parser("
|
|
372
|
-
|
|
373
|
-
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
534
|
+
it "ignores parts where the starting line is split across lines" do
|
|
535
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
536
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
|
374
537
|
end
|
|
375
538
|
|
|
376
|
-
it "
|
|
377
|
-
create_parser("
|
|
378
|
-
|
|
379
|
-
assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
|
|
539
|
+
it "handles cases where the line contains an invalid string that exceeds the read buffer" do
|
|
540
|
+
create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
541
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
380
542
|
end
|
|
381
543
|
|
|
382
|
-
it "
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
544
|
+
it "handles pathalogical cases which contain many opened literal strings" do
|
|
545
|
+
time = Time.now
|
|
546
|
+
create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
547
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
548
|
+
assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
|
|
386
549
|
end
|
|
387
550
|
|
|
388
|
-
it "
|
|
389
|
-
create_parser("1
|
|
390
|
-
|
|
391
|
-
assert_match(/not CR alone/, exp.message)
|
|
551
|
+
it "ignores invalid objects" do
|
|
552
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
|
553
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
|
392
554
|
end
|
|
393
555
|
|
|
394
|
-
it "
|
|
395
|
-
create_parser("1 0 obj
|
|
396
|
-
|
|
397
|
-
assert_match(/invalid stream length/i, exp.message)
|
|
556
|
+
it "ignores invalid lines" do
|
|
557
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
558
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
398
559
|
end
|
|
399
560
|
|
|
400
|
-
it "
|
|
401
|
-
create_parser("1
|
|
402
|
-
|
|
403
|
-
assert_match(/keyword endobj/, exp.message)
|
|
404
|
-
create_parser("1 0 obj\n<< >>")
|
|
405
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
406
|
-
assert_match(/keyword endobj/, exp.message)
|
|
561
|
+
it "uses the last trailer" do
|
|
562
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
|
563
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
|
407
564
|
end
|
|
408
565
|
|
|
409
|
-
it "
|
|
410
|
-
create_parser("1
|
|
411
|
-
|
|
412
|
-
assert_match(/keyword endobj/, exp.message)
|
|
566
|
+
it "uses the first trailer in case of a linearized file" do
|
|
567
|
+
create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
|
568
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
|
413
569
|
end
|
|
414
570
|
|
|
415
|
-
it "
|
|
416
|
-
create_parser("
|
|
417
|
-
|
|
418
|
-
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
|
419
|
-
assert_match(/entry for itself/, exp.message)
|
|
571
|
+
it "fails if no valid trailer is found" do
|
|
572
|
+
create_parser("1 0 obj\n5\nendobj")
|
|
573
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
|
420
574
|
end
|
|
421
575
|
end
|
|
422
576
|
end
|
|
@@ -107,6 +107,13 @@ describe HexaPDF::PDFArray do
|
|
|
107
107
|
assert_equal([1, :data, @array[2]], @array[0, 5])
|
|
108
108
|
end
|
|
109
109
|
|
|
110
|
+
it "allows deleting an object" do
|
|
111
|
+
obj = @array.value[1]
|
|
112
|
+
assert_same(obj, @array.delete(obj))
|
|
113
|
+
ref = HexaPDF::Object.new(:test, oid: 1)
|
|
114
|
+
assert_equal(ref, @array.delete(ref))
|
|
115
|
+
end
|
|
116
|
+
|
|
110
117
|
describe "slice!" do
|
|
111
118
|
it "allows deleting a single element" do
|
|
112
119
|
@array.slice!(2)
|
|
@@ -157,6 +164,6 @@ describe HexaPDF::PDFArray do
|
|
|
157
164
|
end
|
|
158
165
|
|
|
159
166
|
it "can be converted to a simple array" do
|
|
160
|
-
assert_equal(@array
|
|
167
|
+
assert_equal([1, :data, "deref", @array[3]], @array.to_ary)
|
|
161
168
|
end
|
|
162
169
|
end
|
|
@@ -158,4 +158,39 @@ describe HexaPDF::Revisions do
|
|
|
158
158
|
doc = HexaPDF::Document.new(io: io)
|
|
159
159
|
assert_equal(2, doc.revisions.count)
|
|
160
160
|
end
|
|
161
|
+
|
|
162
|
+
it "uses the reconstructed revision if errors are found when loading from an IO" do
|
|
163
|
+
io = StringIO.new(<<~EOF)
|
|
164
|
+
%PDF-1.7
|
|
165
|
+
1 0 obj
|
|
166
|
+
10
|
|
167
|
+
endobj
|
|
168
|
+
|
|
169
|
+
xref
|
|
170
|
+
0 2
|
|
171
|
+
0000000000 65535 f
|
|
172
|
+
0000000009 00000 n
|
|
173
|
+
trailer
|
|
174
|
+
<< /Size 5 >>
|
|
175
|
+
startxref
|
|
176
|
+
28
|
|
177
|
+
%%EOF
|
|
178
|
+
|
|
179
|
+
2 0 obj
|
|
180
|
+
300
|
|
181
|
+
endobj
|
|
182
|
+
|
|
183
|
+
xref
|
|
184
|
+
2 1
|
|
185
|
+
0000000301 00000 n
|
|
186
|
+
trailer
|
|
187
|
+
<< /Size 3 /Prev 100>>
|
|
188
|
+
startxref
|
|
189
|
+
139
|
|
190
|
+
%%EOF
|
|
191
|
+
EOF
|
|
192
|
+
doc = HexaPDF::Document.new(io: io)
|
|
193
|
+
assert_equal(2, doc.revisions.count)
|
|
194
|
+
assert_same(doc.revisions[0].trailer.value, doc.revisions[1].trailer.value)
|
|
195
|
+
end
|
|
161
196
|
end
|
|
@@ -153,6 +153,13 @@ describe HexaPDF::Serializer do
|
|
|
153
153
|
assert_equal("<</Key(value)/Length 6>>stream\nsome\nendstream", io.string)
|
|
154
154
|
end
|
|
155
155
|
|
|
156
|
+
it "doesn't reset the internal recursion flag if the stream is serialized as part of another object" do
|
|
157
|
+
object = HexaPDF::Dictionary.new({}, oid: 5)
|
|
158
|
+
object[:Stream] = @stream
|
|
159
|
+
object[:Self] = object # needs to be the last entry so that :Stream gets serialized first!
|
|
160
|
+
assert_serialized("<</Stream 2 0 R/Self 5 0 R>>", object)
|
|
161
|
+
end
|
|
162
|
+
|
|
156
163
|
it "fails if a stream without object identifier is serialized" do
|
|
157
164
|
@stream.oid = 0
|
|
158
165
|
assert_raises(HexaPDF::Error) { @serializer.serialize(@stream) }
|
|
@@ -27,4 +27,32 @@ describe HexaPDF::Tokenizer do
|
|
|
27
27
|
5.times {|i| assert_equal(i, @tokenizer.next_token) }
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
|
+
|
|
31
|
+
it "has a special token scanning method for use with xref reconstruction" do
|
|
32
|
+
create_tokenizer(<<-EOF.chomp.gsub(/^ {8}/, ''))
|
|
33
|
+
% Comment
|
|
34
|
+
true
|
|
35
|
+
123 50
|
|
36
|
+
obj
|
|
37
|
+
(ignored)
|
|
38
|
+
/Ignored
|
|
39
|
+
[/Ignored]
|
|
40
|
+
<</Ignored /Values>>
|
|
41
|
+
EOF
|
|
42
|
+
|
|
43
|
+
scan_to_newline = proc { @tokenizer.scan_until(/(\n|\r\n?)+|\z/) }
|
|
44
|
+
|
|
45
|
+
assert_nil(@tokenizer.next_integer_or_keyword)
|
|
46
|
+
scan_to_newline.call
|
|
47
|
+
assert_equal(true, @tokenizer.next_integer_or_keyword)
|
|
48
|
+
assert_equal(123, @tokenizer.next_integer_or_keyword)
|
|
49
|
+
assert_equal(50, @tokenizer.next_integer_or_keyword)
|
|
50
|
+
assert_equal('obj', @tokenizer.next_integer_or_keyword)
|
|
51
|
+
4.times do
|
|
52
|
+
assert_nil(@tokenizer.next_integer_or_keyword)
|
|
53
|
+
scan_to_newline.call
|
|
54
|
+
end
|
|
55
|
+
assert_equal(HexaPDF::Tokenizer::NO_MORE_TOKENS, @tokenizer.next_integer_or_keyword)
|
|
56
|
+
end
|
|
57
|
+
|
|
30
58
|
end
|