hexapdf 0.17.1 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1024 -0
- data/LICENSE +29 -0
- data/README.md +129 -0
- data/Rakefile +109 -0
- data/agpl-3.0.txt +661 -0
- data/examples/001-hello_world.rb +16 -0
- data/examples/002-graphics.rb +275 -0
- data/examples/003-arcs.rb +50 -0
- data/examples/004-optimizing.rb +23 -0
- data/examples/005-merging.rb +27 -0
- data/examples/006-standard_pdf_fonts.rb +73 -0
- data/examples/007-truetype.rb +42 -0
- data/examples/008-show_char_bboxes.rb +55 -0
- data/examples/009-text_layouter_alignment.rb +47 -0
- data/examples/010-text_layouter_inline_boxes.rb +64 -0
- data/examples/011-text_layouter_line_wrapping.rb +57 -0
- data/examples/012-text_layouter_styling.rb +122 -0
- data/examples/013-text_layouter_shapes.rb +176 -0
- data/examples/014-text_in_polygon.rb +60 -0
- data/examples/015-boxes.rb +76 -0
- data/examples/016-frame_automatic_box_placement.rb +90 -0
- data/examples/017-frame_text_flow.rb +60 -0
- data/examples/018-composer.rb +44 -0
- data/examples/019-acro_form.rb +88 -0
- data/examples/emoji-smile.png +0 -0
- data/examples/emoji-wink.png +0 -0
- data/examples/machupicchu.jpg +0 -0
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -0
- data/lib/hexapdf/content/graphic_object/geom2d.rb +13 -0
- data/lib/hexapdf/version.rb +1 -1
- data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
- data/test/data/fonts/Ubuntu-Title.ttf +0 -0
- data/test/data/images/cmyk.jpg +0 -0
- data/test/data/images/fillbytes.jpg +0 -0
- data/test/data/images/gray.jpg +0 -0
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-4bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/greyscale-alpha-8bit.png +0 -0
- data/test/data/images/greyscale-trns-8bit.png +0 -0
- data/test/data/images/greyscale-with-gamma1.0.png +0 -0
- data/test/data/images/greyscale-with-gamma1.5.png +0 -0
- data/test/data/images/indexed-1bit.png +0 -0
- data/test/data/images/indexed-2bit.png +0 -0
- data/test/data/images/indexed-4bit.png +0 -0
- data/test/data/images/indexed-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/indexed-alpha-8bit.png +0 -0
- data/test/data/images/rgb.jpg +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/data/images/truecolour-alpha-8bit.png +0 -0
- data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
- data/test/data/images/truecolour-srgb-8bit.png +0 -0
- data/test/data/images/ycck.jpg +0 -0
- data/test/data/minimal.pdf +44 -0
- data/test/data/standard-security-handler/README +9 -0
- data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
- data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
- data/test/hexapdf/common_tokenizer_tests.rb +236 -0
- data/test/hexapdf/content/common.rb +39 -0
- data/test/hexapdf/content/graphic_object/test_arc.rb +102 -0
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +90 -0
- data/test/hexapdf/content/graphic_object/test_geom2d.rb +79 -0
- data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
- data/test/hexapdf/content/test_canvas.rb +1279 -0
- data/test/hexapdf/content/test_color_space.rb +176 -0
- data/test/hexapdf/content/test_graphics_state.rb +151 -0
- data/test/hexapdf/content/test_operator.rb +619 -0
- data/test/hexapdf/content/test_parser.rb +99 -0
- data/test/hexapdf/content/test_processor.rb +163 -0
- data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
- data/test/hexapdf/document/test_files.rb +72 -0
- data/test/hexapdf/document/test_fonts.rb +60 -0
- data/test/hexapdf/document/test_images.rb +72 -0
- data/test/hexapdf/document/test_pages.rb +130 -0
- data/test/hexapdf/encryption/common.rb +87 -0
- data/test/hexapdf/encryption/test_aes.rb +129 -0
- data/test/hexapdf/encryption/test_arc4.rb +39 -0
- data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
- data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
- data/test/hexapdf/encryption/test_identity.rb +21 -0
- data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
- data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
- data/test/hexapdf/encryption/test_security_handler.rb +380 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +322 -0
- data/test/hexapdf/filter/common.rb +53 -0
- data/test/hexapdf/filter/test_ascii85_decode.rb +59 -0
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +38 -0
- data/test/hexapdf/filter/test_crypt.rb +21 -0
- data/test/hexapdf/filter/test_encryption.rb +24 -0
- data/test/hexapdf/filter/test_flate_decode.rb +44 -0
- data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
- data/test/hexapdf/filter/test_predictor.rb +219 -0
- data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
- data/test/hexapdf/font/cmap/test_parser.rb +102 -0
- data/test/hexapdf/font/cmap/test_writer.rb +66 -0
- data/test/hexapdf/font/encoding/test_base.rb +45 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +29 -0
- data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
- data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
- data/test/hexapdf/font/test_cmap.rb +104 -0
- data/test/hexapdf/font/test_encoding.rb +27 -0
- data/test/hexapdf/font/test_invalid_glyph.rb +34 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +186 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +107 -0
- data/test/hexapdf/font/true_type/common.rb +17 -0
- data/test/hexapdf/font/true_type/table/common.rb +27 -0
- data/test/hexapdf/font/true_type/table/test_cmap.rb +47 -0
- data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +141 -0
- data/test/hexapdf/font/true_type/table/test_directory.rb +30 -0
- data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
- data/test/hexapdf/font/true_type/table/test_head.rb +56 -0
- data/test/hexapdf/font/true_type/table/test_hhea.rb +26 -0
- data/test/hexapdf/font/true_type/table/test_hmtx.rb +30 -0
- data/test/hexapdf/font/true_type/table/test_kern.rb +61 -0
- data/test/hexapdf/font/true_type/table/test_loca.rb +33 -0
- data/test/hexapdf/font/true_type/table/test_maxp.rb +50 -0
- data/test/hexapdf/font/true_type/table/test_name.rb +76 -0
- data/test/hexapdf/font/true_type/table/test_os2.rb +55 -0
- data/test/hexapdf/font/true_type/table/test_post.rb +78 -0
- data/test/hexapdf/font/true_type/test_builder.rb +42 -0
- data/test/hexapdf/font/true_type/test_font.rb +116 -0
- data/test/hexapdf/font/true_type/test_optimizer.rb +26 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +73 -0
- data/test/hexapdf/font/true_type/test_table.rb +48 -0
- data/test/hexapdf/font/type1/common.rb +6 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +65 -0
- data/test/hexapdf/font/type1/test_font.rb +104 -0
- data/test/hexapdf/font/type1/test_font_metrics.rb +22 -0
- data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +43 -0
- data/test/hexapdf/font_loader/test_from_file.rb +36 -0
- data/test/hexapdf/font_loader/test_standard14.rb +33 -0
- data/test/hexapdf/image_loader/test_jpeg.rb +93 -0
- data/test/hexapdf/image_loader/test_pdf.rb +47 -0
- data/test/hexapdf/image_loader/test_png.rb +259 -0
- data/test/hexapdf/layout/test_box.rb +154 -0
- data/test/hexapdf/layout/test_frame.rb +350 -0
- data/test/hexapdf/layout/test_image_box.rb +73 -0
- data/test/hexapdf/layout/test_inline_box.rb +71 -0
- data/test/hexapdf/layout/test_line.rb +206 -0
- data/test/hexapdf/layout/test_style.rb +790 -0
- data/test/hexapdf/layout/test_text_box.rb +140 -0
- data/test/hexapdf/layout/test_text_fragment.rb +375 -0
- data/test/hexapdf/layout/test_text_layouter.rb +758 -0
- data/test/hexapdf/layout/test_text_shaper.rb +62 -0
- data/test/hexapdf/layout/test_width_from_polygon.rb +109 -0
- data/test/hexapdf/task/test_dereference.rb +51 -0
- data/test/hexapdf/task/test_optimize.rb +162 -0
- data/test/hexapdf/test_composer.rb +258 -0
- data/test/hexapdf/test_configuration.rb +93 -0
- data/test/hexapdf/test_data_dir.rb +32 -0
- data/test/hexapdf/test_dictionary.rb +340 -0
- data/test/hexapdf/test_dictionary_fields.rb +269 -0
- data/test/hexapdf/test_document.rb +641 -0
- data/test/hexapdf/test_filter.rb +100 -0
- data/test/hexapdf/test_importer.rb +106 -0
- data/test/hexapdf/test_object.rb +258 -0
- data/test/hexapdf/test_parser.rb +645 -0
- data/test/hexapdf/test_pdf_array.rb +169 -0
- data/test/hexapdf/test_rectangle.rb +73 -0
- data/test/hexapdf/test_reference.rb +50 -0
- data/test/hexapdf/test_revision.rb +188 -0
- data/test/hexapdf/test_revisions.rb +196 -0
- data/test/hexapdf/test_serializer.rb +195 -0
- data/test/hexapdf/test_stream.rb +274 -0
- data/test/hexapdf/test_tokenizer.rb +80 -0
- data/test/hexapdf/test_type.rb +18 -0
- data/test/hexapdf/test_writer.rb +140 -0
- data/test/hexapdf/test_xref_section.rb +61 -0
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +795 -0
- data/test/hexapdf/type/acro_form/test_button_field.rb +308 -0
- data/test/hexapdf/type/acro_form/test_choice_field.rb +220 -0
- data/test/hexapdf/type/acro_form/test_field.rb +259 -0
- data/test/hexapdf/type/acro_form/test_form.rb +357 -0
- data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
- data/test/hexapdf/type/acro_form/test_text_field.rb +201 -0
- data/test/hexapdf/type/acro_form/test_variable_text_field.rb +88 -0
- data/test/hexapdf/type/actions/test_launch.rb +24 -0
- data/test/hexapdf/type/actions/test_uri.rb +23 -0
- data/test/hexapdf/type/annotations/test_markup_annotation.rb +22 -0
- data/test/hexapdf/type/annotations/test_text.rb +34 -0
- data/test/hexapdf/type/annotations/test_widget.rb +225 -0
- data/test/hexapdf/type/test_annotation.rb +97 -0
- data/test/hexapdf/type/test_catalog.rb +48 -0
- data/test/hexapdf/type/test_cid_font.rb +61 -0
- data/test/hexapdf/type/test_file_specification.rb +141 -0
- data/test/hexapdf/type/test_font.rb +67 -0
- data/test/hexapdf/type/test_font_descriptor.rb +61 -0
- data/test/hexapdf/type/test_font_simple.rb +176 -0
- data/test/hexapdf/type/test_font_true_type.rb +31 -0
- data/test/hexapdf/type/test_font_type0.rb +120 -0
- data/test/hexapdf/type/test_font_type1.rb +142 -0
- data/test/hexapdf/type/test_font_type3.rb +26 -0
- data/test/hexapdf/type/test_form.rb +120 -0
- data/test/hexapdf/type/test_image.rb +261 -0
- data/test/hexapdf/type/test_info.rb +9 -0
- data/test/hexapdf/type/test_object_stream.rb +117 -0
- data/test/hexapdf/type/test_page.rb +598 -0
- data/test/hexapdf/type/test_page_tree_node.rb +315 -0
- data/test/hexapdf/type/test_resources.rb +209 -0
- data/test/hexapdf/type/test_trailer.rb +116 -0
- data/test/hexapdf/type/test_xref_stream.rb +143 -0
- data/test/hexapdf/utils/test_bit_field.rb +63 -0
- data/test/hexapdf/utils/test_bit_stream.rb +69 -0
- data/test/hexapdf/utils/test_graphics_helpers.rb +37 -0
- data/test/hexapdf/utils/test_lru_cache.rb +22 -0
- data/test/hexapdf/utils/test_object_hash.rb +120 -0
- data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +239 -0
- data/test/test_helper.rb +58 -0
- metadata +263 -3
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
require 'test_helper'
|
|
4
|
+
require 'hexapdf/document'
|
|
5
|
+
require 'hexapdf/parser'
|
|
6
|
+
require 'stringio'
|
|
7
|
+
|
|
8
|
+
describe HexaPDF::Parser do
|
|
9
|
+
before do
|
|
10
|
+
@document = HexaPDF::Document.new
|
|
11
|
+
@document.config['parser.try_xref_reconstruction'] = false
|
|
12
|
+
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
|
13
|
+
|
|
14
|
+
create_parser(<<~EOF)
|
|
15
|
+
%PDF-1.7
|
|
16
|
+
|
|
17
|
+
1 0 obj
|
|
18
|
+
10
|
|
19
|
+
endobj
|
|
20
|
+
|
|
21
|
+
2 0 obj
|
|
22
|
+
[ 5 6 <</Length 10 >> (name) <4E6F762073 686D6F7A20 6B612070
|
|
23
|
+
6F702E>]
|
|
24
|
+
endobj
|
|
25
|
+
|
|
26
|
+
3 15 obj<< /Length 1 0 R/Hallo 6/Filter /Fl/DecodeParms<<>> >>stream
|
|
27
|
+
Hallo PDF!endstream
|
|
28
|
+
endobj
|
|
29
|
+
|
|
30
|
+
4 0 obj
|
|
31
|
+
<</Type /XRef /Length 3 /W [1 1 1] /Index [1 1] /Size 2 >> stream
|
|
32
|
+
\x01\x0A\x00
|
|
33
|
+
endstream
|
|
34
|
+
endobj
|
|
35
|
+
|
|
36
|
+
xref
|
|
37
|
+
0 4
|
|
38
|
+
0000000000 65535 f
|
|
39
|
+
0000000010 00000 n
|
|
40
|
+
0000000029 00000 n
|
|
41
|
+
0000000000 65535 f
|
|
42
|
+
3 1
|
|
43
|
+
0000000556 00000 n
|
|
44
|
+
trailer
|
|
45
|
+
<< /Test (now) >>
|
|
46
|
+
startxref
|
|
47
|
+
308
|
|
48
|
+
%%EOF
|
|
49
|
+
EOF
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def create_parser(str)
|
|
53
|
+
@parse_io = StringIO.new(str)
|
|
54
|
+
@parser = HexaPDF::Parser.new(@parse_io, @document)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
describe "parse_indirect_object" do
|
|
58
|
+
it "reads indirect objects sequentially" do
|
|
59
|
+
object, oid, gen, stream = @parser.parse_indirect_object
|
|
60
|
+
assert_equal(1, oid)
|
|
61
|
+
assert_equal(0, gen)
|
|
62
|
+
assert_equal(10, object)
|
|
63
|
+
assert_nil(stream)
|
|
64
|
+
|
|
65
|
+
object, oid, gen, stream = @parser.parse_indirect_object
|
|
66
|
+
assert_equal(2, oid)
|
|
67
|
+
assert_equal(0, gen)
|
|
68
|
+
assert_equal([5, 6, {Length: 10}, "name", "Nov shmoz ka pop."], object)
|
|
69
|
+
assert_nil(stream)
|
|
70
|
+
|
|
71
|
+
object, oid, gen, stream = @parser.parse_indirect_object
|
|
72
|
+
assert_equal(3, oid)
|
|
73
|
+
assert_equal(15, gen)
|
|
74
|
+
assert_kind_of(HexaPDF::StreamData, stream)
|
|
75
|
+
assert_equal([:Fl], stream.filter)
|
|
76
|
+
assert_equal([{}], stream.decode_parms)
|
|
77
|
+
assert_equal({Length: 10, Hallo: 6, Filter: :Fl, DecodeParms: {}}, object)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it "handles empty indirect objects by using PDF null for them" do
|
|
81
|
+
create_parser("1 0 obj\nendobj")
|
|
82
|
+
object, * = @parser.parse_indirect_object
|
|
83
|
+
assert_nil(object)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it "handles keyword stream followed only by CR without LF" do
|
|
87
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
|
88
|
+
*, stream = @parser.parse_indirect_object
|
|
89
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "handles keyword stream followed by space and CR or LF" do
|
|
93
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
|
94
|
+
*, stream = @parser.parse_indirect_object
|
|
95
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "handles keyword stream followed by space and CR LF" do
|
|
99
|
+
create_parser("1 0 obj<</Length 2>> stream \r\n12\nendstream endobj")
|
|
100
|
+
*, stream = @parser.parse_indirect_object
|
|
101
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
it "handles invalid indirect object value consisting of number followed by endobj without space" do
|
|
105
|
+
create_parser("1 0 obj 749endobj")
|
|
106
|
+
object, * = @parser.parse_indirect_object
|
|
107
|
+
assert_equal(749, object)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "treats indirect objects with invalid values as null objects" do
|
|
111
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
|
112
|
+
object, * = @parser.parse_indirect_object
|
|
113
|
+
assert_nil(object)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it "recovers from a stream length value that doesn't reflect the correct length" do
|
|
117
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
118
|
+
obj, _, _, stream = @parser.parse_indirect_object
|
|
119
|
+
assert_equal(2, obj[:Length])
|
|
120
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
it "recovers from an invalid stream length value" do
|
|
124
|
+
create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
|
|
125
|
+
@document.add([5], oid: 2)
|
|
126
|
+
obj, _, _, stream = @parser.parse_indirect_object
|
|
127
|
+
assert_equal(2, obj[:Length])
|
|
128
|
+
assert_equal('12', TestHelper.collector(stream.fiber))
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it "works even if the keyword endobj is missing or mangled" do
|
|
132
|
+
create_parser("1 0 obj<</Length 4>>5")
|
|
133
|
+
object, * = @parser.parse_indirect_object
|
|
134
|
+
assert_equal({Length: 4}, object)
|
|
135
|
+
create_parser("1 0 obj<</Length 4>>endobjk")
|
|
136
|
+
object, * = @parser.parse_indirect_object
|
|
137
|
+
assert_equal({Length: 4}, object)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it "fails if the oid, gen or 'obj' keyword is invalid" do
|
|
141
|
+
create_parser("a 0 obj\n5\nendobj")
|
|
142
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
143
|
+
assert_match(/No valid object/, exp.message)
|
|
144
|
+
create_parser("1 a obj\n5\nendobj")
|
|
145
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
146
|
+
assert_match(/No valid object/, exp.message)
|
|
147
|
+
create_parser("1 0 dobj\n5\nendobj")
|
|
148
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
149
|
+
assert_match(/No valid object/, exp.message)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "fails if the value of a stream is not a dictionary" do
|
|
153
|
+
create_parser("1 0 obj\n(fail)\nstream\nendstream\nendobj\n")
|
|
154
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
155
|
+
assert_match(/stream.*dictionary/, exp.message)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it "fails if the 'stream' keyword isn't followed by EOL" do
|
|
159
|
+
create_parser("1 0 obj\n<< >>\nstream endstream\nendobj\n")
|
|
160
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
161
|
+
assert_match(/stream.*followed by LF/, exp.message)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it "fails if the 'endstream' keyword is missing" do
|
|
165
|
+
create_parser("1 0 obj\n<< >>\nstream\nendobj\n")
|
|
166
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
167
|
+
assert_match(/stream.*followed by.*endstream/i, exp.message)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
describe "with strict parsing" do
|
|
171
|
+
before do
|
|
172
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it "fails if an empty indirect object is found" do
|
|
176
|
+
create_parser("1 0 obj\nendobj")
|
|
177
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
178
|
+
assert_match(/no indirect object value/i, exp.message)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
it "fails if keyword stream is followed only by CR without LF" do
|
|
182
|
+
create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
|
|
183
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
184
|
+
assert_match(/not CR alone/, exp.message)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
|
|
188
|
+
create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
|
|
189
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
190
|
+
assert_match(/followed by space instead/, exp.message)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
it "fails if keyword stream is followed by space and CR LF instead of LF or CR/LF" do
|
|
194
|
+
create_parser("1 0 obj<</Length 2>> stream \r\n12\nendstream endobj")
|
|
195
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
196
|
+
assert_match(/followed by space instead/, exp.message)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it "fails for numbers followed by endobj without space" do
|
|
200
|
+
create_parser("1 0 obj 749endobj")
|
|
201
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
202
|
+
assert_match(/Missing whitespace after number/, exp.message)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
it "fails for invalid values" do
|
|
206
|
+
create_parser("1 0 obj <</test ( /other (end)>> endobj")
|
|
207
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
208
|
+
assert_match(/Invalid value after '1 0 obj'/, exp.message)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
it "fails if the stream length value is invalid" do
|
|
212
|
+
create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
|
|
213
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
214
|
+
assert_match(/invalid stream length/i, exp.message)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
it "fails if the keyword endobj is mangled" do
|
|
218
|
+
create_parser("1 0 obj\n<< >>\nendobjd\n")
|
|
219
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
220
|
+
assert_match(/keyword endobj/, exp.message)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
it "fails if the keyword endobj is missing" do
|
|
224
|
+
create_parser("1 0 obj\n<< >>")
|
|
225
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
226
|
+
assert_match(/keyword endobj/, exp.message)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
it "fails if there is data between 'endstream' and 'endobj'" do
|
|
230
|
+
create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
|
|
231
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
232
|
+
assert_match(/keyword endobj/, exp.message)
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
describe "load_object" do
|
|
238
|
+
before do
|
|
239
|
+
@entry = HexaPDF::XRefSection.in_use_entry(2, 0, 29)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
it "can load an indirect object" do
|
|
243
|
+
obj = @parser.load_object(@entry)
|
|
244
|
+
assert_kind_of(HexaPDF::Object, obj)
|
|
245
|
+
assert_equal(5, obj.value[0])
|
|
246
|
+
assert_equal(2, obj.oid)
|
|
247
|
+
assert_equal(0, obj.gen)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it "can load a free object" do
|
|
251
|
+
obj = @parser.load_object(HexaPDF::XRefSection.free_entry(0, 0))
|
|
252
|
+
assert_kind_of(HexaPDF::Object, obj)
|
|
253
|
+
assert_nil(obj.value)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it "can load a compressed object" do
|
|
257
|
+
def (@document).object(_oid)
|
|
258
|
+
obj = Object.new
|
|
259
|
+
def obj.parse_stream
|
|
260
|
+
HexaPDF::Type::ObjectStream::Data.new("5 [1 2]", [1, 2], [0, 2])
|
|
261
|
+
end
|
|
262
|
+
obj
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
obj = @parser.load_object(HexaPDF::XRefSection.compressed_entry(2, 3, 1))
|
|
266
|
+
assert_kind_of(HexaPDF::Object, obj)
|
|
267
|
+
assert_equal([1, 2], obj.value)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
it "handles an invalid indirect object offset of 0" do
|
|
271
|
+
obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
|
272
|
+
assert(obj.null?)
|
|
273
|
+
assert_equal(2, obj.oid)
|
|
274
|
+
assert_equal(0, obj.gen)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
describe "with strict parsing" do
|
|
278
|
+
it "raises an error if an indirect object has an offset of 0" do
|
|
279
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
280
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
281
|
+
@parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
|
282
|
+
end
|
|
283
|
+
assert_match(/has offset 0/, exp.message)
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it "fails if another object is found instead of an object stream" do
|
|
288
|
+
def (@document).object(_oid)
|
|
289
|
+
:invalid
|
|
290
|
+
end
|
|
291
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
292
|
+
@parser.load_object(HexaPDF::XRefSection.compressed_entry(2, 1, 1))
|
|
293
|
+
end
|
|
294
|
+
assert_match(/not an object stream/, exp.message)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
it "fails if the xref entry type is invalid" do
|
|
298
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
299
|
+
@parser.load_object(HexaPDF::XRefSection::Entry.new(:invalid))
|
|
300
|
+
end
|
|
301
|
+
assert_match(/invalid cross-reference type/i, exp.message)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
it "fails if the object/generation numbers don't match" do
|
|
305
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
306
|
+
@entry.gen = 2
|
|
307
|
+
@parser.load_object(@entry)
|
|
308
|
+
end
|
|
309
|
+
assert_match(/oid,gen.*don't match/, exp.message)
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
describe "startxref_offset" do
|
|
314
|
+
it "caches the offset value" do
|
|
315
|
+
assert_equal(308, @parser.startxref_offset)
|
|
316
|
+
@parser.instance_eval { @io }.string.sub!(/308\n/, "309\n")
|
|
317
|
+
assert_equal(308, @parser.startxref_offset)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
it "returns the correct offset" do
|
|
321
|
+
assert_equal(308, @parser.startxref_offset)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
it "ignores garbage at the end of the file" do
|
|
325
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
|
|
326
|
+
assert_equal(5, @parser.startxref_offset)
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
it "uses the last startxref if there are more than one" do
|
|
330
|
+
create_parser("startxref\n5\n%%EOF\n\nsome garbage\n\nstartxref\n555\n%%EOF\n")
|
|
331
|
+
assert_equal(555, @parser.startxref_offset)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
it "finds the startxref anywhere in file" do
|
|
335
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
336
|
+
assert_equal(5, @parser.startxref_offset)
|
|
337
|
+
create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
|
|
338
|
+
assert_equal(5, @parser.startxref_offset)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
it "fails even in big files when nothing is found" do
|
|
342
|
+
create_parser("\nhallo" * 5000)
|
|
343
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
344
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
it "fails if the %%EOF marker is missing" do
|
|
348
|
+
create_parser("startxref\n5")
|
|
349
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
350
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
351
|
+
|
|
352
|
+
create_parser("")
|
|
353
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
354
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
it "fails if the startxref keyword is missing" do
|
|
358
|
+
create_parser("somexref\n5\n%%EOF")
|
|
359
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
360
|
+
assert_match(/missing startxref/, exp.message)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
it "fails on strict parsing if the startxref is not in the last part of the file" do
|
|
364
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
365
|
+
create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
|
|
366
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
|
|
367
|
+
assert_match(/end-of-file marker not found/, exp.message)
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
describe "file_header_version" do
|
|
372
|
+
it "returns the correct version" do
|
|
373
|
+
assert_equal('1.7', @parser.file_header_version)
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
it "fails if the header is mangled" do
|
|
377
|
+
create_parser("%PDF-1\n")
|
|
378
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.file_header_version }
|
|
379
|
+
assert_match(/file header/, exp.message)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
it "fails if the header is missing" do
|
|
383
|
+
create_parser("no header")
|
|
384
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.file_header_version }
|
|
385
|
+
assert_match(/file header/, exp.message)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
it "ignores junk at the beginning of the file and correctly calculates offset" do
|
|
389
|
+
create_parser("junk" * 200 << "\n%PDF-1.4\n")
|
|
390
|
+
assert_equal('1.4', @parser.file_header_version)
|
|
391
|
+
assert_equal(801, @parser.instance_variable_get(:@header_offset))
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
it "xref_section?" do
|
|
396
|
+
assert(@parser.xref_section?(@parser.startxref_offset))
|
|
397
|
+
refute(@parser.xref_section?(53))
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
describe "parse_xref_section_and_trailer" do
|
|
401
|
+
it "works on a section with multiple sub sections" do
|
|
402
|
+
section, trailer = @parser.parse_xref_section_and_trailer(@parser.startxref_offset)
|
|
403
|
+
assert_equal({Test: 'now'}, trailer)
|
|
404
|
+
assert_equal(HexaPDF::XRefSection.free_entry(0, 65535), section[0, 65535])
|
|
405
|
+
assert_equal(HexaPDF::XRefSection.free_entry(3, 65535), section[3, 65535])
|
|
406
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 10), section[1])
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
it "works for an empty section" do
|
|
410
|
+
create_parser("xref\n0 0\ntrailer\n<</Name /Value >>\n")
|
|
411
|
+
_, trailer = @parser.parse_xref_section_and_trailer(0)
|
|
412
|
+
assert_equal({Name: :Value}, trailer)
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
it "handles xref type=n with offset=0" do
|
|
416
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
|
417
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(0)
|
|
418
|
+
assert_equal(HexaPDF::XRefSection.free_entry(1, 0), section[1])
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
it "handles xref type=n with gen>65535" do
|
|
422
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
|
423
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(0)
|
|
424
|
+
assert_equal(HexaPDF::XRefSection.free_entry(1, 65536), section[1])
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
it "handles xref with missing whitespace at end" do
|
|
428
|
+
create_parser("xref\n0 2\n0000000000 00000 n\n0000000000 65536 n\ntrailer\n<<>>\n")
|
|
429
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(0)
|
|
430
|
+
assert_equal(HexaPDF::XRefSection.free_entry(1, 65536), section[1])
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
it "fails if the xref keyword is missing/mangled" do
|
|
434
|
+
create_parser("xTEf\n0 d\n0000000000 00000 n \ntrailer\n<< >>\n")
|
|
435
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
436
|
+
assert_match(/keyword xref/, exp.message)
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
it "fails if a sub section header is mangled" do
|
|
440
|
+
create_parser("xref\n0 d\n0000000000 00000 n \ntrailer\n<< >>\n")
|
|
441
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
442
|
+
assert_match(/invalid cross-reference subsection/i, exp.message)
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
it "fails if a sub section entry is mangled" do
|
|
446
|
+
create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
|
|
447
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
448
|
+
assert_match(/invalid cross-reference entry/i, exp.message)
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
it "fails if there is no trailer" do
|
|
452
|
+
create_parser("xref\n0 1\n0000000000 00000 n \n")
|
|
453
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
454
|
+
assert_match(/keyword trailer/i, exp.message)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
it "fails if the trailer is not a PDF dictionary" do
|
|
458
|
+
create_parser("xref\n0 1\n0000000000 00000 n \ntrailer\n(base)")
|
|
459
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
460
|
+
assert_match(/dictionary/, exp.message)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
describe "invalid numbering of main xref section" do
|
|
464
|
+
it "handles the xref if the numbering is off by N" do
|
|
465
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
466
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
467
|
+
section, _trailer = @parser.parse_xref_section_and_trailer(17)
|
|
468
|
+
assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
it "fails if the first entry is not the one for oid=0" do
|
|
472
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
473
|
+
"xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
474
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
475
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
476
|
+
|
|
477
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
478
|
+
"xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
479
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
480
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
it "fails if the tested entry position is invalid" do
|
|
484
|
+
create_parser(" 1 0 obj 1 endobj\n" \
|
|
485
|
+
"xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
|
|
486
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
487
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
|
|
491
|
+
create_parser(" 2 0 obj 1 endobj\n" \
|
|
492
|
+
"xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
493
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
|
|
494
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
describe "with strict parsing" do
|
|
499
|
+
before do
|
|
500
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
it "fails if xref type=n with offset=0" do
|
|
504
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
|
|
505
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
506
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
it " fails xref type=n with gen>65535" do
|
|
510
|
+
create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
|
|
511
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
512
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
it "fails if trailing second whitespace is missing" do
|
|
516
|
+
create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
|
|
517
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
518
|
+
assert_match(/invalid.*cross-reference entry/i, exp.message)
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
it "fails if the main cross-reference section has invalid numbering" do
|
|
522
|
+
create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
|
|
523
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
|
|
524
|
+
assert_match(/Main.*invalid numbering/i, exp.message)
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
describe "load_revision" do
|
|
530
|
+
it "works for a simple cross-reference section" do
|
|
531
|
+
xref_section, trailer = @parser.load_revision(@parser.startxref_offset)
|
|
532
|
+
assert_equal({Test: 'now'}, trailer)
|
|
533
|
+
assert(xref_section[1].in_use?)
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
it "works for a cross-reference stream" do
|
|
537
|
+
xref_section, trailer = @parser.load_revision(212)
|
|
538
|
+
assert_equal({Size: 2}, trailer)
|
|
539
|
+
assert(xref_section[1].in_use?)
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
it "fails if another object is found instead of a cross-reference stream" do
|
|
543
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
|
|
544
|
+
assert_match(/not a cross-reference stream/, exp.message)
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
it "fails if the cross-reference stream is missing data" do
|
|
548
|
+
@parse_io.string[287..288] = ''
|
|
549
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(212) }
|
|
550
|
+
assert_match(/missing data/, exp.message)
|
|
551
|
+
assert_equal(212, exp.pos)
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
|
|
555
|
+
@document.config['parser.on_correctable_error'] = proc { true }
|
|
556
|
+
create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
|
|
557
|
+
"stream\n\x01\x0A\x00\nendstream endobj")
|
|
558
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
|
|
559
|
+
assert_match(/entry for itself/, exp.message)
|
|
560
|
+
end
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
describe "reconstruct_revision" do
|
|
564
|
+
before do
|
|
565
|
+
@document.config['parser.try_xref_reconstruction'] = true
|
|
566
|
+
@xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
it "serially parses the contents" do
|
|
570
|
+
create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
571
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
it "uses a security handler for decrypting indirect objects if necessary" do
|
|
575
|
+
handler = Minitest::Mock.new
|
|
576
|
+
handler.expect(:decrypt, HexaPDF::Object.new(:result, oid: 1), [HexaPDF::Object])
|
|
577
|
+
@document.instance_variable_set(:@security_handler, handler)
|
|
578
|
+
create_parser("1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
579
|
+
assert_equal(:result, @parser.load_object(@xref).value)
|
|
580
|
+
assert(handler.verify)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
it "ignores parts where the starting line is split across lines" do
|
|
584
|
+
create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
585
|
+
assert_equal(5, @parser.load_object(@xref).value)
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
it "handles the case when the specified object had an xref entry but is not found" do
|
|
589
|
+
create_parser("3 0 obj\n5\nendobj\ntrailer\n<</Size 1>>")
|
|
590
|
+
assert(@parser.load_object(@xref).null?)
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
it "handles cases where the line contains an invalid string that exceeds the read buffer" do
|
|
594
|
+
create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
595
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
it "handles pathalogical cases which contain many opened literal strings" do
|
|
599
|
+
time = Time.now
|
|
600
|
+
create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
601
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
602
|
+
assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
it "ignores invalid objects" do
|
|
606
|
+
create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
|
|
607
|
+
assert_equal(4, @parser.load_object(@xref).value)
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
it "ignores invalid lines" do
|
|
611
|
+
create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
|
|
612
|
+
assert_equal(6, @parser.load_object(@xref).value)
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
it "uses the last trailer" do
|
|
616
|
+
create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
|
|
617
|
+
assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
it "uses the first trailer in case of a linearized file" do
|
|
621
|
+
create_parser("1 0 obj\n<</Linearized true>>\nendobj\ntrailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
|
|
622
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
it "tries the trailer specified at the startxref position if no other is found" do
|
|
626
|
+
create_parser("1 0 obj\n5\nendobj\nquack xref trailer <</Size 1/Prev 5>>\nstartxref\n22\n%%EOF")
|
|
627
|
+
assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
|
|
628
|
+
end
|
|
629
|
+
|
|
630
|
+
it "constructs a trailer with a /Root entry if no valid trailer was found" do
|
|
631
|
+
create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
|
|
632
|
+
assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
it "fails if no valid trailer is found and couldn't be constructed" do
|
|
636
|
+
create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
|
|
637
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
it "fails if no valid trailer is found" do
|
|
641
|
+
create_parser("1 0 obj\n5\nendobj")
|
|
642
|
+
assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
end
|