hexapdf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTERS +3 -0
- data/LICENSE +26 -0
- data/README.md +88 -0
- data/Rakefile +121 -0
- data/VERSION +1 -0
- data/agpl-3.0.txt +661 -0
- data/bin/hexapdf +6 -0
- data/data/hexapdf/afm/Courier-Bold.afm +342 -0
- data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
- data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
- data/data/hexapdf/afm/Courier.afm +342 -0
- data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
- data/data/hexapdf/afm/Helvetica.afm +3051 -0
- data/data/hexapdf/afm/MustRead.html +1 -0
- data/data/hexapdf/afm/Symbol.afm +213 -0
- data/data/hexapdf/afm/Times-Bold.afm +2588 -0
- data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
- data/data/hexapdf/afm/Times-Italic.afm +2667 -0
- data/data/hexapdf/afm/Times-Roman.afm +2419 -0
- data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
- data/data/hexapdf/encoding/glyphlist.txt +4305 -0
- data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
- data/examples/arc.rb +50 -0
- data/examples/graphics.rb +274 -0
- data/examples/hello_world.rb +16 -0
- data/examples/machupicchu.jpg +0 -0
- data/examples/merging.rb +24 -0
- data/examples/optimizing.rb +20 -0
- data/examples/show_char_bboxes.rb +55 -0
- data/examples/standard_pdf_fonts.rb +72 -0
- data/examples/truetype.rb +45 -0
- data/lib/hexapdf/cli/extract.rb +128 -0
- data/lib/hexapdf/cli/info.rb +121 -0
- data/lib/hexapdf/cli/inspect.rb +157 -0
- data/lib/hexapdf/cli/modify.rb +218 -0
- data/lib/hexapdf/cli.rb +121 -0
- data/lib/hexapdf/configuration.rb +392 -0
- data/lib/hexapdf/content/canvas.rb +1974 -0
- data/lib/hexapdf/content/color_space.rb +364 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
- data/lib/hexapdf/content/graphic_object.rb +81 -0
- data/lib/hexapdf/content/graphics_state.rb +579 -0
- data/lib/hexapdf/content/operator.rb +1072 -0
- data/lib/hexapdf/content/parser.rb +204 -0
- data/lib/hexapdf/content/processor.rb +451 -0
- data/lib/hexapdf/content/transformation_matrix.rb +172 -0
- data/lib/hexapdf/content.rb +47 -0
- data/lib/hexapdf/data_dir.rb +51 -0
- data/lib/hexapdf/dictionary.rb +303 -0
- data/lib/hexapdf/dictionary_fields.rb +382 -0
- data/lib/hexapdf/document.rb +589 -0
- data/lib/hexapdf/document_utils.rb +209 -0
- data/lib/hexapdf/encryption/aes.rb +206 -0
- data/lib/hexapdf/encryption/arc4.rb +93 -0
- data/lib/hexapdf/encryption/fast_aes.rb +79 -0
- data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
- data/lib/hexapdf/encryption/identity.rb +63 -0
- data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
- data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
- data/lib/hexapdf/encryption/security_handler.rb +494 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
- data/lib/hexapdf/encryption.rb +94 -0
- data/lib/hexapdf/error.rb +73 -0
- data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
- data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
- data/lib/hexapdf/filter/dct_decode.rb +57 -0
- data/lib/hexapdf/filter/encryption.rb +59 -0
- data/lib/hexapdf/filter/flate_decode.rb +93 -0
- data/lib/hexapdf/filter/jpx_decode.rb +56 -0
- data/lib/hexapdf/filter/lzw_decode.rb +191 -0
- data/lib/hexapdf/filter/predictor.rb +266 -0
- data/lib/hexapdf/filter/run_length_decode.rb +108 -0
- data/lib/hexapdf/filter.rb +176 -0
- data/lib/hexapdf/font/cmap/parser.rb +146 -0
- data/lib/hexapdf/font/cmap/writer.rb +176 -0
- data/lib/hexapdf/font/cmap.rb +90 -0
- data/lib/hexapdf/font/encoding/base.rb +77 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
- data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
- data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
- data/lib/hexapdf/font/encoding.rb +68 -0
- data/lib/hexapdf/font/true_type/font.rb +179 -0
- data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
- data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
- data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
- data/lib/hexapdf/font/true_type/table/head.rb +143 -0
- data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
- data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
- data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
- data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
- data/lib/hexapdf/font/true_type/table/name.rb +218 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
- data/lib/hexapdf/font/true_type/table/post.rb +230 -0
- data/lib/hexapdf/font/true_type/table.rb +155 -0
- data/lib/hexapdf/font/true_type.rb +48 -0
- data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
- data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
- data/lib/hexapdf/font/type1/font.rb +123 -0
- data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
- data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
- data/lib/hexapdf/font/type1.rb +52 -0
- data/lib/hexapdf/font/type1_wrapper.rb +193 -0
- data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
- data/lib/hexapdf/font_loader/standard14.rb +98 -0
- data/lib/hexapdf/font_loader.rb +85 -0
- data/lib/hexapdf/font_utils.rb +89 -0
- data/lib/hexapdf/image_loader/jpeg.rb +166 -0
- data/lib/hexapdf/image_loader/pdf.rb +89 -0
- data/lib/hexapdf/image_loader/png.rb +410 -0
- data/lib/hexapdf/image_loader.rb +68 -0
- data/lib/hexapdf/importer.rb +139 -0
- data/lib/hexapdf/name_tree_node.rb +78 -0
- data/lib/hexapdf/number_tree_node.rb +67 -0
- data/lib/hexapdf/object.rb +363 -0
- data/lib/hexapdf/parser.rb +349 -0
- data/lib/hexapdf/rectangle.rb +99 -0
- data/lib/hexapdf/reference.rb +98 -0
- data/lib/hexapdf/revision.rb +206 -0
- data/lib/hexapdf/revisions.rb +194 -0
- data/lib/hexapdf/serializer.rb +326 -0
- data/lib/hexapdf/stream.rb +279 -0
- data/lib/hexapdf/task/dereference.rb +109 -0
- data/lib/hexapdf/task/optimize.rb +230 -0
- data/lib/hexapdf/task.rb +68 -0
- data/lib/hexapdf/tokenizer.rb +406 -0
- data/lib/hexapdf/type/catalog.rb +107 -0
- data/lib/hexapdf/type/embedded_file.rb +87 -0
- data/lib/hexapdf/type/file_specification.rb +232 -0
- data/lib/hexapdf/type/font.rb +81 -0
- data/lib/hexapdf/type/font_descriptor.rb +109 -0
- data/lib/hexapdf/type/font_simple.rb +190 -0
- data/lib/hexapdf/type/font_true_type.rb +47 -0
- data/lib/hexapdf/type/font_type1.rb +162 -0
- data/lib/hexapdf/type/form.rb +103 -0
- data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
- data/lib/hexapdf/type/image.rb +73 -0
- data/lib/hexapdf/type/info.rb +70 -0
- data/lib/hexapdf/type/names.rb +69 -0
- data/lib/hexapdf/type/object_stream.rb +224 -0
- data/lib/hexapdf/type/page.rb +355 -0
- data/lib/hexapdf/type/page_tree_node.rb +269 -0
- data/lib/hexapdf/type/resources.rb +212 -0
- data/lib/hexapdf/type/trailer.rb +128 -0
- data/lib/hexapdf/type/viewer_preferences.rb +73 -0
- data/lib/hexapdf/type/xref_stream.rb +204 -0
- data/lib/hexapdf/type.rb +67 -0
- data/lib/hexapdf/utils/bit_field.rb +87 -0
- data/lib/hexapdf/utils/bit_stream.rb +148 -0
- data/lib/hexapdf/utils/lru_cache.rb +65 -0
- data/lib/hexapdf/utils/math_helpers.rb +55 -0
- data/lib/hexapdf/utils/object_hash.rb +130 -0
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
- data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
- data/lib/hexapdf/version.rb +39 -0
- data/lib/hexapdf/writer.rb +199 -0
- data/lib/hexapdf/xref_section.rb +152 -0
- data/lib/hexapdf.rb +34 -0
- data/man/man1/hexapdf.1 +249 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
- data/test/data/fonts/Ubuntu-Title.ttf +0 -0
- data/test/data/images/cmyk.jpg +0 -0
- data/test/data/images/fillbytes.jpg +0 -0
- data/test/data/images/gray.jpg +0 -0
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-4bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/greyscale-alpha-8bit.png +0 -0
- data/test/data/images/greyscale-trns-8bit.png +0 -0
- data/test/data/images/greyscale-with-gamma1.0.png +0 -0
- data/test/data/images/greyscale-with-gamma1.5.png +0 -0
- data/test/data/images/indexed-1bit.png +0 -0
- data/test/data/images/indexed-2bit.png +0 -0
- data/test/data/images/indexed-4bit.png +0 -0
- data/test/data/images/indexed-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/indexed-alpha-8bit.png +0 -0
- data/test/data/images/rgb.jpg +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/data/images/truecolour-alpha-8bit.png +0 -0
- data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
- data/test/data/images/truecolour-srgb-8bit.png +0 -0
- data/test/data/minimal.pdf +44 -0
- data/test/data/standard-security-handler/README +9 -0
- data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
- data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
- data/test/hexapdf/common_tokenizer_tests.rb +204 -0
- data/test/hexapdf/content/common.rb +31 -0
- data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
- data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
- data/test/hexapdf/content/test_canvas.rb +1113 -0
- data/test/hexapdf/content/test_color_space.rb +97 -0
- data/test/hexapdf/content/test_graphics_state.rb +138 -0
- data/test/hexapdf/content/test_operator.rb +619 -0
- data/test/hexapdf/content/test_parser.rb +66 -0
- data/test/hexapdf/content/test_processor.rb +156 -0
- data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
- data/test/hexapdf/encryption/common.rb +87 -0
- data/test/hexapdf/encryption/test_aes.rb +121 -0
- data/test/hexapdf/encryption/test_arc4.rb +39 -0
- data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
- data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
- data/test/hexapdf/encryption/test_identity.rb +21 -0
- data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
- data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
- data/test/hexapdf/encryption/test_security_handler.rb +356 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
- data/test/hexapdf/filter/common.rb +53 -0
- data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
- data/test/hexapdf/filter/test_encryption.rb +24 -0
- data/test/hexapdf/filter/test_flate_decode.rb +35 -0
- data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
- data/test/hexapdf/filter/test_predictor.rb +183 -0
- data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
- data/test/hexapdf/font/cmap/test_parser.rb +67 -0
- data/test/hexapdf/font/cmap/test_writer.rb +58 -0
- data/test/hexapdf/font/encoding/test_base.rb +35 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
- data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
- data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
- data/test/hexapdf/font/test_encoding.rb +27 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
- data/test/hexapdf/font/true_type/common.rb +19 -0
- data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
- data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
- data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
- data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
- data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
- data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
- data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
- data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
- data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
- data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
- data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
- data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
- data/test/hexapdf/font/true_type/test_font.rb +120 -0
- data/test/hexapdf/font/true_type/test_table.rb +41 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
- data/test/hexapdf/font/type1/test_font.rb +68 -0
- data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
- data/test/hexapdf/font_loader/test_standard14.rb +22 -0
- data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
- data/test/hexapdf/image_loader/test_pdf.rb +47 -0
- data/test/hexapdf/image_loader/test_png.rb +258 -0
- data/test/hexapdf/task/test_dereference.rb +46 -0
- data/test/hexapdf/task/test_optimize.rb +137 -0
- data/test/hexapdf/test_configuration.rb +82 -0
- data/test/hexapdf/test_data_dir.rb +32 -0
- data/test/hexapdf/test_dictionary.rb +284 -0
- data/test/hexapdf/test_dictionary_fields.rb +185 -0
- data/test/hexapdf/test_document.rb +574 -0
- data/test/hexapdf/test_document_utils.rb +144 -0
- data/test/hexapdf/test_filter.rb +96 -0
- data/test/hexapdf/test_font_utils.rb +47 -0
- data/test/hexapdf/test_importer.rb +78 -0
- data/test/hexapdf/test_object.rb +177 -0
- data/test/hexapdf/test_parser.rb +394 -0
- data/test/hexapdf/test_rectangle.rb +36 -0
- data/test/hexapdf/test_reference.rb +41 -0
- data/test/hexapdf/test_revision.rb +139 -0
- data/test/hexapdf/test_revisions.rb +93 -0
- data/test/hexapdf/test_serializer.rb +169 -0
- data/test/hexapdf/test_stream.rb +262 -0
- data/test/hexapdf/test_tokenizer.rb +30 -0
- data/test/hexapdf/test_writer.rb +120 -0
- data/test/hexapdf/test_xref_section.rb +35 -0
- data/test/hexapdf/type/test_catalog.rb +30 -0
- data/test/hexapdf/type/test_embedded_file.rb +16 -0
- data/test/hexapdf/type/test_file_specification.rb +148 -0
- data/test/hexapdf/type/test_font.rb +35 -0
- data/test/hexapdf/type/test_font_descriptor.rb +51 -0
- data/test/hexapdf/type/test_font_simple.rb +190 -0
- data/test/hexapdf/type/test_font_type1.rb +128 -0
- data/test/hexapdf/type/test_form.rb +60 -0
- data/test/hexapdf/type/test_info.rb +14 -0
- data/test/hexapdf/type/test_names.rb +9 -0
- data/test/hexapdf/type/test_object_stream.rb +84 -0
- data/test/hexapdf/type/test_page.rb +260 -0
- data/test/hexapdf/type/test_page_tree_node.rb +255 -0
- data/test/hexapdf/type/test_resources.rb +167 -0
- data/test/hexapdf/type/test_trailer.rb +109 -0
- data/test/hexapdf/type/test_xref_stream.rb +131 -0
- data/test/hexapdf/utils/test_bit_field.rb +47 -0
- data/test/hexapdf/utils/test_lru_cache.rb +22 -0
- data/test/hexapdf/utils/test_object_hash.rb +115 -0
- data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
- data/test/test_helper.rb +56 -0
- metadata +427 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'stringio'
|
35
|
+
require 'hexapdf/tokenizer'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
module Content
|
39
|
+
|
40
|
+
# More efficient tokenizer for content streams. This tokenizer class works directly on a
|
41
|
+
# string and not on an IO.
|
42
|
+
#
|
43
|
+
# Note: Indirect object references are *not* supported by this tokenizer!
|
44
|
+
#
|
45
|
+
# See: PDF1.7 s7.2
|
46
|
+
class Tokenizer < HexaPDF::Tokenizer #:nodoc:
|
47
|
+
|
48
|
+
# Creates a new tokenizer.
|
49
|
+
def initialize(string)
|
50
|
+
@ss = StringScanner.new(string)
|
51
|
+
end
|
52
|
+
|
53
|
+
# See: HexaPDF::Tokenizer#pos
|
54
|
+
def pos
|
55
|
+
@ss.pos
|
56
|
+
end
|
57
|
+
|
58
|
+
# See: HexaPDF::Tokenizer#pos=
|
59
|
+
def pos=(pos)
|
60
|
+
@ss.pos = pos
|
61
|
+
end
|
62
|
+
|
63
|
+
# See: HexaPDF::Tokenizer#scan_until
|
64
|
+
def scan_until(re)
|
65
|
+
@ss.scan_until(re)
|
66
|
+
end
|
67
|
+
|
68
|
+
# See: HexaPDF::Tokenizer#next_token
|
69
|
+
def next_token
|
70
|
+
@ss.skip(WHITESPACE_MULTI_RE)
|
71
|
+
case (@ss.eos? ? -1 : @ss.string.getbyte(@ss.pos))
|
72
|
+
when 43, 45, 46, 48..57 # + - . 0..9
|
73
|
+
parse_number
|
74
|
+
when 65..90, 96..121
|
75
|
+
parse_keyword
|
76
|
+
when 47 # /
|
77
|
+
parse_name
|
78
|
+
when 40 # (
|
79
|
+
parse_literal_string
|
80
|
+
when 60 # <
|
81
|
+
if @ss.string.getbyte(@ss.pos + 1) != 60
|
82
|
+
parse_hex_string
|
83
|
+
else
|
84
|
+
@ss.pos += 2
|
85
|
+
TOKEN_DICT_START
|
86
|
+
end
|
87
|
+
when 62 # >
|
88
|
+
unless @ss.string.getbyte(@ss.pos + 1) == 62
|
89
|
+
raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
|
90
|
+
end
|
91
|
+
@ss.pos += 2
|
92
|
+
TOKEN_DICT_END
|
93
|
+
when 91 # [
|
94
|
+
@ss.pos += 1
|
95
|
+
TOKEN_ARRAY_START
|
96
|
+
when 93 # ]
|
97
|
+
@ss.pos += 1
|
98
|
+
TOKEN_ARRAY_END
|
99
|
+
when 123, 125 # { }
|
100
|
+
Token.new(@ss.get_byte)
|
101
|
+
when 37 # %
|
102
|
+
return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/)
|
103
|
+
next_token
|
104
|
+
when -1
|
105
|
+
NO_MORE_TOKENS
|
106
|
+
else
|
107
|
+
parse_keyword
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# See: HexaPDF::Tokenizer#parse_number
|
114
|
+
def parse_number
|
115
|
+
if (val = @ss.scan(/[+-]?\d++(?!\.)/))
|
116
|
+
val.to_i
|
117
|
+
else
|
118
|
+
val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)
|
119
|
+
val << '0'.freeze if val.getbyte(-1) == 46 # dot '.'
|
120
|
+
Float(val)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Stub implementation to prevent errors for not-overridden methods.
|
125
|
+
def prepare_string_scanner(*)
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# This class knows how to correctly parse a content stream.
|
132
|
+
#
|
133
|
+
# == Overview
|
134
|
+
#
|
135
|
+
# A content stream is mostly just a stream of PDF objects. However, there is one exception:
|
136
|
+
# inline images.
|
137
|
+
#
|
138
|
+
# Since inline images don't follow the normal PDF object parsing rules, they need to be
|
139
|
+
# handled specially and this is the reason for this class. Therefore only the BI operator is
|
140
|
+
# ever called for inline images because the ID and EI operators are handled by the parser.
|
141
|
+
#
|
142
|
+
# To parse some contents the #parse method needs to be called with the contents to be parsed
|
143
|
+
# and a Processor object which is used for processing the parsed operators.
|
144
|
+
class Parser
|
145
|
+
|
146
|
+
# Creates a new Parser object and calls #parse.
|
147
|
+
def self.parse(contents, processor)
|
148
|
+
new.parse(contents, processor)
|
149
|
+
end
|
150
|
+
|
151
|
+
# Parses the contents and calls the processor object for each parsed operator.
|
152
|
+
def parse(contents, processor)
|
153
|
+
tokenizer = Tokenizer.new(contents)
|
154
|
+
params = []
|
155
|
+
while (obj = tokenizer.next_object(allow_keyword: true)) != Tokenizer::NO_MORE_TOKENS
|
156
|
+
if obj.kind_of?(Tokenizer::Token)
|
157
|
+
if obj == 'BI'.freeze
|
158
|
+
params = parse_inline_image(tokenizer)
|
159
|
+
end
|
160
|
+
processor.process(obj.to_sym, params)
|
161
|
+
params.clear
|
162
|
+
else
|
163
|
+
params << obj
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
private
|
169
|
+
|
170
|
+
# Parses the inline image at the current position.
|
171
|
+
def parse_inline_image(tokenizer)
|
172
|
+
# BI has already been read, so read the image dictionary
|
173
|
+
dict = {}
|
174
|
+
while (key = tokenizer.next_object(allow_keyword: true))
|
175
|
+
if key == 'ID'.freeze
|
176
|
+
break
|
177
|
+
elsif key == Tokenizer::NO_MORE_TOKENS
|
178
|
+
raise HexaPDF::Error, "EOS while trying to read dictionary key for inline image"
|
179
|
+
elsif !key.kind_of?(Symbol)
|
180
|
+
raise HexaPDF::Error, "Inline image dictionary keys must be PDF name objects"
|
181
|
+
end
|
182
|
+
value = tokenizer.next_object
|
183
|
+
if value == Tokenizer::NO_MORE_TOKENS
|
184
|
+
raise HexaPDF::Error, "EOS while trying to read dictionary value for inline image"
|
185
|
+
end
|
186
|
+
dict[key] = value
|
187
|
+
end
|
188
|
+
|
189
|
+
# one whitespace character after ID
|
190
|
+
tokenizer.next_byte
|
191
|
+
|
192
|
+
# find the EI operator
|
193
|
+
data = tokenizer.scan_until(/(?=EI[#{Tokenizer::WHITESPACE}])/o)
|
194
|
+
if data.nil?
|
195
|
+
raise HexaPDF::Error, "End inline image marker EI not found"
|
196
|
+
end
|
197
|
+
tokenizer.pos += 3
|
198
|
+
[dict, data]
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,451 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/content/operator'
|
35
|
+
require 'hexapdf/content/graphics_state'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
module Content
|
39
|
+
|
40
|
+
# This class is used for processing content operators extracted from a content stream.
|
41
|
+
#
|
42
|
+
# == General Information
|
43
|
+
#
|
44
|
+
# When a content stream is read, operators and their operands are extracted. After extracting
|
45
|
+
# these operators are normally processed with a Processor instance that ensures that the needed
|
46
|
+
# setup (like modifying the graphics state) is done before further processing.
|
47
|
+
#
|
48
|
+
# == How Processing Works
|
49
|
+
#
|
50
|
+
# The operator implementations (see the Operator module) are called first and they ensure that
|
51
|
+
# the processing state is consistent. For example, operators that modify the graphics state do
|
52
|
+
# actually modify the #graphics_state object. However, operator implementations are *only* used
|
53
|
+
# for this task and not more, so they are very specific and normally don't need to be changed.
|
54
|
+
#
|
55
|
+
# After that methods corresponding to the operator names are invoked on the processor object (if
|
56
|
+
# they exist). Each PDF operator name is mapped to a nicer message name via the
|
57
|
+
# OPERATOR_MESSAGE_NAME_MAP constant. For example, the operator 'q' is mapped to
|
58
|
+
# 'save_graphics_state".
|
59
|
+
#
|
60
|
+
# The task of these methods is to do something useful with the content itself, it doesn't need
|
61
|
+
# to concern itself with ensuring the consistency of the processing state. For example, the
|
62
|
+
# processor could use the processing state to extract the text. Or paint the content on a
|
63
|
+
# canvas.
|
64
|
+
#
|
65
|
+
# For inline images only the 'BI' operator mapped to 'inline_image' is used. Although also the
|
66
|
+
# operators 'ID' and 'EI' exist for inline images, they are not used because they are consumed
|
67
|
+
# while parsing inline images and do not reflect separate operators.
|
68
|
+
#
|
69
|
+
# == Text Processing
|
70
|
+
#
|
71
|
+
# Two utility methods #decode_text and #decode_text_with_positioning for extracting text are
|
72
|
+
# provided. Both can directly be invoked from the 'show_text' and 'show_text_with_positioning'
|
73
|
+
# methods.
|
74
|
+
#
|
75
|
+
class Processor
|
76
|
+
|
77
|
+
# Represents an (immutable) glyph box with positioning information.
|
78
|
+
#
|
79
|
+
# Since the glyph may have been transformed by an affine matrix, the bounding may not be a
|
80
|
+
# rectangle in all cases but it is always a parallelogram.
|
81
|
+
class GlyphBox
|
82
|
+
|
83
|
+
# The code point representing the glyph.
|
84
|
+
attr_reader :code_point
|
85
|
+
|
86
|
+
# The Unicode value of the code point.
|
87
|
+
attr_reader :string
|
88
|
+
|
89
|
+
# Creates a new glyph box for the given code point/Unicode value pair with the lower left
|
90
|
+
# coordinate [llx, lly], the lower right coordinate [lrx, lry], and the upper left
|
91
|
+
# coordinate [ulx, uly].
|
92
|
+
def initialize(code_point, string, llx, lly, lrx, lry, ulx, uly)
|
93
|
+
@code_point = code_point
|
94
|
+
@string = string.freeze
|
95
|
+
@llx = llx
|
96
|
+
@lly = lly
|
97
|
+
@lrx = lrx
|
98
|
+
@lry = lry
|
99
|
+
@ulx = ulx
|
100
|
+
@uly = uly
|
101
|
+
freeze
|
102
|
+
end
|
103
|
+
|
104
|
+
# :call-seq:
|
105
|
+
# fragment.lower_left -> [llx, lly]
|
106
|
+
#
|
107
|
+
# Returns the lower left coordinate
|
108
|
+
def lower_left
|
109
|
+
[@llx, @lly]
|
110
|
+
end
|
111
|
+
|
112
|
+
# :call-seq:
|
113
|
+
# fragment.lower_right -> [lrx, lry]
|
114
|
+
#
|
115
|
+
# Returns the lower right coordinate
|
116
|
+
def lower_right
|
117
|
+
[@lrx, @lry]
|
118
|
+
end
|
119
|
+
|
120
|
+
# :call-seq:
|
121
|
+
# fragment.upper_left -> [ulx, uly]
|
122
|
+
#
|
123
|
+
# Returns the upper left coordinate
|
124
|
+
def upper_left
|
125
|
+
[@ulx, @uly]
|
126
|
+
end
|
127
|
+
|
128
|
+
# :call-seq:
|
129
|
+
# fragment.upper_right -> [urx, ury]
|
130
|
+
#
|
131
|
+
# Returns the upper right coordinate which is computed by using the other three points of
|
132
|
+
# the parallelogram.
|
133
|
+
def upper_right
|
134
|
+
[@ulx + (@lrx - @llx), @uly + (@lry - @lly)]
|
135
|
+
end
|
136
|
+
|
137
|
+
# :call-seq:
|
138
|
+
# fragment.points -> [llx, lly, lrx, lry, urx, ury, ulx, uly]
|
139
|
+
#
|
140
|
+
# Returns the four corners of the box as an array of coordinates, starting with the lower
|
141
|
+
# left corner and going counterclockwise.
|
142
|
+
def points
|
143
|
+
[@llx, @lly, @lrx, @lry, @ulx + (@lrx - @llx), @uly + (@lry - @lly), @ulx, @uly]
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
# Represents a box composed of GlyphBox objects.
|
150
|
+
#
|
151
|
+
# The bounding box methods #lower_left, #lower_right, #upper_left, #upper_right are computed
|
152
|
+
# by just using the first and last boxes, assuming the boxes are arranged from left to right
|
153
|
+
# in a straight line.
|
154
|
+
class CompositeBox
|
155
|
+
|
156
|
+
# The text boxes contained in this positioned text object.
|
157
|
+
attr_reader :boxes
|
158
|
+
|
159
|
+
# Creates an empty object.
|
160
|
+
def initialize
|
161
|
+
@boxes = []
|
162
|
+
end
|
163
|
+
|
164
|
+
# Appends the given text glyph box.
|
165
|
+
def <<(glyph_box)
|
166
|
+
@boxes << glyph_box
|
167
|
+
self
|
168
|
+
end
|
169
|
+
|
170
|
+
# Returns the glyph box at the given index, or +nil+ if the index is out of range.
|
171
|
+
def [](index)
|
172
|
+
@boxes[index]
|
173
|
+
end
|
174
|
+
|
175
|
+
# :call-seq:
|
176
|
+
# composite.each {|glyph_box| block} -> composite
|
177
|
+
# composite.each -> Enumerator
|
178
|
+
#
|
179
|
+
# Iterates over all contained glyph boxes.
|
180
|
+
def each(&block)
|
181
|
+
return to_enum(__method__) unless block_given?
|
182
|
+
@boxes.each(&block)
|
183
|
+
self
|
184
|
+
end
|
185
|
+
|
186
|
+
# Returns the concatenated text of the boxes.
|
187
|
+
def string
|
188
|
+
@boxes.map(&:string).join('')
|
189
|
+
end
|
190
|
+
|
191
|
+
# :call-seq:
|
192
|
+
# text.lower_left -> [llx, lly]
|
193
|
+
#
|
194
|
+
# Returns the lower left coordinate
|
195
|
+
def lower_left
|
196
|
+
@boxes[0].lower_left
|
197
|
+
end
|
198
|
+
|
199
|
+
# :call-seq:
|
200
|
+
# text.lower_right -> [lrx, lry]
|
201
|
+
#
|
202
|
+
# Returns the lower right coordinate
|
203
|
+
def lower_right
|
204
|
+
@boxes[-1].lower_right
|
205
|
+
end
|
206
|
+
|
207
|
+
# :call-seq:
|
208
|
+
# text.upper_left -> [ulx, uly]
|
209
|
+
#
|
210
|
+
# Returns the upper left coordinate
|
211
|
+
def upper_left
|
212
|
+
@boxes[0].upper_left
|
213
|
+
end
|
214
|
+
|
215
|
+
# :call-seq:
|
216
|
+
# text.upper_right -> [urx, ury]
|
217
|
+
#
|
218
|
+
# Returns the upper right coordinate.
|
219
|
+
def upper_right
|
220
|
+
@boxes[-1].upper_right
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|
224
|
+
|
225
|
+
# Mapping of PDF operator names to message names that are sent to renderer implementations.
|
226
|
+
OPERATOR_MESSAGE_NAME_MAP = {
|
227
|
+
q: :save_graphics_state,
|
228
|
+
Q: :restore_graphics_state,
|
229
|
+
cm: :concatenate_matrix,
|
230
|
+
w: :set_line_width,
|
231
|
+
J: :set_line_cap_style,
|
232
|
+
j: :set_line_join_style,
|
233
|
+
M: :set_miter_limit,
|
234
|
+
d: :set_line_dash_pattern,
|
235
|
+
ri: :set_rendering_intent,
|
236
|
+
i: :set_flatness_tolerance,
|
237
|
+
gs: :set_graphics_state_parameters,
|
238
|
+
CS: :set_stroking_color_space,
|
239
|
+
cs: :set_non_stroking_color_space,
|
240
|
+
SC: :set_stroking_color,
|
241
|
+
SCN: :set_stroking_color,
|
242
|
+
sc: :set_non_stroking_color,
|
243
|
+
scn: :set_non_stroking_color,
|
244
|
+
G: :set_device_gray_stroking_color,
|
245
|
+
g: :set_device_gray_non_stroking_color,
|
246
|
+
RG: :set_device_rgb_stroking_color,
|
247
|
+
rg: :set_device_rgb_non_stroking_color,
|
248
|
+
K: :set_device_cmyk_stroking_color,
|
249
|
+
k: :set_device_cmyk_non_stroking_color,
|
250
|
+
m: :move_to,
|
251
|
+
l: :line_to,
|
252
|
+
c: :curve_to,
|
253
|
+
v: :curve_to_no_first_control_point,
|
254
|
+
y: :curve_to_no_second_control_point,
|
255
|
+
h: :close_subpath,
|
256
|
+
re: :append_rectangle,
|
257
|
+
S: :stroke_path,
|
258
|
+
s: :close_and_stroke_path,
|
259
|
+
f: :fill_path_non_zero,
|
260
|
+
F: :fill_path_non_zero,
|
261
|
+
'f*'.to_sym => :fill_path_even_odd,
|
262
|
+
B: :fill_and_stroke_path_non_zero,
|
263
|
+
'B*'.to_sym => :fill_and_stroke_path_even_odd,
|
264
|
+
b: :close_fill_and_stroke_path_non_zero,
|
265
|
+
'b*'.to_sym => :close_fill_and_stroke_path_even_odd,
|
266
|
+
n: :end_path,
|
267
|
+
W: :clip_path_non_zero,
|
268
|
+
'W*'.to_sym => :clip_path_even_odd,
|
269
|
+
BT: :begin_text,
|
270
|
+
ET: :end_text,
|
271
|
+
Tc: :set_character_spacing,
|
272
|
+
Tw: :set_word_spacing,
|
273
|
+
Tz: :set_horizontal_scaling,
|
274
|
+
TL: :set_leading,
|
275
|
+
Tf: :set_font_and_size,
|
276
|
+
Tr: :set_text_rendering_mode,
|
277
|
+
Ts: :set_text_rise,
|
278
|
+
Td: :move_text,
|
279
|
+
TD: :move_text_and_set_leading,
|
280
|
+
Tm: :set_text_matrix,
|
281
|
+
'T*'.to_sym => :move_text_next_line,
|
282
|
+
Tj: :show_text,
|
283
|
+
'\''.to_sym => :move_text_next_line_and_show_text,
|
284
|
+
'"'.to_sym => :set_spacing_move_text_next_line_and_show_text,
|
285
|
+
TJ: :show_text_with_positioning,
|
286
|
+
d0: :set_glyph_width, # only for Type 3 fonts
|
287
|
+
d1: :set_glyph_width_and_bounding_box, # only for Type 3 fonts
|
288
|
+
sh: :paint_shading,
|
289
|
+
BI: :inline_image, # ID and EI are not sent because the complete image has been read
|
290
|
+
Do: :paint_xobject,
|
291
|
+
MP: :designate_marked_content_point,
|
292
|
+
DP: :designate_marked_content_point_with_property_list,
|
293
|
+
BMC: :begin_marked_content,
|
294
|
+
BDC: :begin_marked_content_with_property_list,
|
295
|
+
EMC: :end_marked_content,
|
296
|
+
BX: :begin_compatibility_section,
|
297
|
+
EX: :end_compatibility_section,
|
298
|
+
}
|
299
|
+
|
300
|
+
# Mapping from operator name (Symbol) to a callable object.
|
301
|
+
#
|
302
|
+
# This hash is prepopulated with the default operator implementations (see
|
303
|
+
# Operator::DEFAULT_OPERATORS). If a default operator implementation is not satisfactory, it
|
304
|
+
# can easily be changed by modifying this hash.
|
305
|
+
attr_reader :operators
|
306
|
+
|
307
|
+
# The resources dictionary used during processing.
|
308
|
+
attr_accessor :resources
|
309
|
+
|
310
|
+
# The GraphicsState object containing the current graphics state.
|
311
|
+
#
|
312
|
+
# It is not advised that this attribute is changed manually, it is automatically adjusted
|
313
|
+
# according to the processed operators!
|
314
|
+
attr_reader :graphics_state
|
315
|
+
|
316
|
+
# The current graphics object.
|
317
|
+
#
|
318
|
+
# It is not advised that this attribute is changed manually, it is automatically adjusted
|
319
|
+
# according to the processed operators!
|
320
|
+
#
|
321
|
+
# This attribute can have the following values:
|
322
|
+
#
|
323
|
+
# :none:: No current graphics object, i.e. the processor is at the page description level.
|
324
|
+
# :path:: The current graphics object is a path.
|
325
|
+
# :clipping_path:: The current graphics object is a clipping path.
|
326
|
+
# :text:: The current graphics object is text.
|
327
|
+
#
|
328
|
+
# See: PDF1.7 s8.2
|
329
|
+
attr_accessor :graphics_object
|
330
|
+
|
331
|
+
# Initializes a new processor that uses the resources PDF dictionary for resolving resources
|
332
|
+
# while processing operators.
|
333
|
+
#
|
334
|
+
# It is not mandatory to set the resources dictionary on initialization but it needs to be set
|
335
|
+
# prior to processing operators!
|
336
|
+
def initialize(resources = nil)
|
337
|
+
@operators = Operator::DEFAULT_OPERATORS.dup
|
338
|
+
@graphics_state = GraphicsState.new
|
339
|
+
@resources = resources
|
340
|
+
@graphics_object = :none
|
341
|
+
end
|
342
|
+
|
343
|
+
# Processes the operator with the given operands.
|
344
|
+
#
|
345
|
+
# The operator is first processed with an operator implementation (if any) to ensure correct
|
346
|
+
# operations and then the corresponding method on this object is invoked.
|
347
|
+
def process(operator, operands = [])
|
348
|
+
@operators[operator].invoke(self, *operands) if @operators.key?(operator)
|
349
|
+
msg = OPERATOR_MESSAGE_NAME_MAP[operator]
|
350
|
+
send(msg, *operands) if msg && respond_to?(msg, true)
|
351
|
+
end
|
352
|
+
|
353
|
+
protected
|
354
|
+
|
355
|
+
# Provides a default implementation for the 'Do' operator.
|
356
|
+
#
|
357
|
+
# It checks if the XObject is a Form XObject and if so, processes the contents of the Form
|
358
|
+
# XObject.
|
359
|
+
def paint_xobject(name)
|
360
|
+
xobject = resources.xobject(name)
|
361
|
+
return unless xobject[:Subtype] == :Form
|
362
|
+
|
363
|
+
res = resources
|
364
|
+
graphics_state.save
|
365
|
+
|
366
|
+
graphics_state.ctm.premultiply(*xobject[:Matrix]) if xobject.key?(:Matrix)
|
367
|
+
xobject.process_contents(self)
|
368
|
+
|
369
|
+
graphics_state.restore
|
370
|
+
self.resources = res
|
371
|
+
end
|
372
|
+
|
373
|
+
# Decodes the given text object and returns it as UTF-8 string.
|
374
|
+
#
|
375
|
+
# The argument may either be a simple text string (+Tj+ operator) or an array that contains
|
376
|
+
# text strings together with positioning information (+TJ+ operator).
|
377
|
+
def decode_text(data)
|
378
|
+
if data.kind_of?(Array)
|
379
|
+
data = data.each_with_object(''.b) {|obj, result| result << obj if obj.kind_of?(String)}
|
380
|
+
end
|
381
|
+
font = graphics_state.font
|
382
|
+
font.decode(data).map {|code_point| font.to_utf8(code_point)}.join('')
|
383
|
+
end
|
384
|
+
|
385
|
+
# Decodes the given text object and returns it as a CompositeBox object.
|
386
|
+
#
|
387
|
+
# The argument may either be a simple text string (+Tj+ operator) or an array that contains
|
388
|
+
# text strings together with positioning information (+TJ+ operator).
|
389
|
+
#
|
390
|
+
# For each glyph a GlyphBox object is computed. For horizontal fonts the width is
|
391
|
+
# predetermined but not the height. The latter is chosen to be the height and offset of the
|
392
|
+
# font's bounding box.
|
393
|
+
def decode_text_with_positioning(data)
|
394
|
+
data = Array(data)
|
395
|
+
if graphics_state.font.writing_mode == :horizontal
|
396
|
+
decode_horizontal_text(data)
|
397
|
+
else
|
398
|
+
decode_vertical_text(data)
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
private
|
403
|
+
|
404
|
+
# Decodes the given array containing text and positioning information while assuming that the
|
405
|
+
# writing direction is horizontal.
|
406
|
+
#
|
407
|
+
# See: PDF1.7 s9.4.4
|
408
|
+
def decode_horizontal_text(array)
|
409
|
+
font = graphics_state.font
|
410
|
+
scaled_char_space = graphics_state.scaled_character_spacing
|
411
|
+
scaled_word_space = graphics_state.scaled_word_spacing
|
412
|
+
scaled_font_size = graphics_state.scaled_font_size
|
413
|
+
|
414
|
+
below_baseline = font.bounding_box[1] * scaled_font_size / \
|
415
|
+
graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
|
416
|
+
above_baseline = font.bounding_box[3] * scaled_font_size / \
|
417
|
+
graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
|
418
|
+
|
419
|
+
text = CompositeBox.new
|
420
|
+
array.each do |item|
|
421
|
+
if item.kind_of?(Numeric)
|
422
|
+
graphics_state.tm.translate(-item * scaled_font_size, 0)
|
423
|
+
else
|
424
|
+
font.decode(item).each do |code_point|
|
425
|
+
char = font.to_utf8(code_point)
|
426
|
+
width = font.width(code_point) * scaled_font_size
|
427
|
+
matrix = graphics_state.ctm.dup.premultiply(*graphics_state.tm)
|
428
|
+
fragment = GlyphBox.new(code_point, char,
|
429
|
+
*matrix.evaluate(0, below_baseline),
|
430
|
+
*matrix.evaluate(width, below_baseline),
|
431
|
+
*matrix.evaluate(0, above_baseline))
|
432
|
+
text << fragment
|
433
|
+
graphics_state.tm.translate(width + scaled_char_space + \
|
434
|
+
(char == ' ' ? scaled_word_space : 0), 0)
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
text.freeze
|
440
|
+
end
|
441
|
+
|
442
|
+
# Decodes the given array containing text and positioning information while assuming that the
|
443
|
+
# writing direction is vertical.
|
444
|
+
def decode_vertical_text(_data)
|
445
|
+
raise NotImplementedError
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
|
450
|
+
end
|
451
|
+
end
|