hexapdf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTERS +3 -0
- data/LICENSE +26 -0
- data/README.md +88 -0
- data/Rakefile +121 -0
- data/VERSION +1 -0
- data/agpl-3.0.txt +661 -0
- data/bin/hexapdf +6 -0
- data/data/hexapdf/afm/Courier-Bold.afm +342 -0
- data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
- data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
- data/data/hexapdf/afm/Courier.afm +342 -0
- data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
- data/data/hexapdf/afm/Helvetica.afm +3051 -0
- data/data/hexapdf/afm/MustRead.html +1 -0
- data/data/hexapdf/afm/Symbol.afm +213 -0
- data/data/hexapdf/afm/Times-Bold.afm +2588 -0
- data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
- data/data/hexapdf/afm/Times-Italic.afm +2667 -0
- data/data/hexapdf/afm/Times-Roman.afm +2419 -0
- data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
- data/data/hexapdf/encoding/glyphlist.txt +4305 -0
- data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
- data/examples/arc.rb +50 -0
- data/examples/graphics.rb +274 -0
- data/examples/hello_world.rb +16 -0
- data/examples/machupicchu.jpg +0 -0
- data/examples/merging.rb +24 -0
- data/examples/optimizing.rb +20 -0
- data/examples/show_char_bboxes.rb +55 -0
- data/examples/standard_pdf_fonts.rb +72 -0
- data/examples/truetype.rb +45 -0
- data/lib/hexapdf/cli/extract.rb +128 -0
- data/lib/hexapdf/cli/info.rb +121 -0
- data/lib/hexapdf/cli/inspect.rb +157 -0
- data/lib/hexapdf/cli/modify.rb +218 -0
- data/lib/hexapdf/cli.rb +121 -0
- data/lib/hexapdf/configuration.rb +392 -0
- data/lib/hexapdf/content/canvas.rb +1974 -0
- data/lib/hexapdf/content/color_space.rb +364 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
- data/lib/hexapdf/content/graphic_object.rb +81 -0
- data/lib/hexapdf/content/graphics_state.rb +579 -0
- data/lib/hexapdf/content/operator.rb +1072 -0
- data/lib/hexapdf/content/parser.rb +204 -0
- data/lib/hexapdf/content/processor.rb +451 -0
- data/lib/hexapdf/content/transformation_matrix.rb +172 -0
- data/lib/hexapdf/content.rb +47 -0
- data/lib/hexapdf/data_dir.rb +51 -0
- data/lib/hexapdf/dictionary.rb +303 -0
- data/lib/hexapdf/dictionary_fields.rb +382 -0
- data/lib/hexapdf/document.rb +589 -0
- data/lib/hexapdf/document_utils.rb +209 -0
- data/lib/hexapdf/encryption/aes.rb +206 -0
- data/lib/hexapdf/encryption/arc4.rb +93 -0
- data/lib/hexapdf/encryption/fast_aes.rb +79 -0
- data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
- data/lib/hexapdf/encryption/identity.rb +63 -0
- data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
- data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
- data/lib/hexapdf/encryption/security_handler.rb +494 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
- data/lib/hexapdf/encryption.rb +94 -0
- data/lib/hexapdf/error.rb +73 -0
- data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
- data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
- data/lib/hexapdf/filter/dct_decode.rb +57 -0
- data/lib/hexapdf/filter/encryption.rb +59 -0
- data/lib/hexapdf/filter/flate_decode.rb +93 -0
- data/lib/hexapdf/filter/jpx_decode.rb +56 -0
- data/lib/hexapdf/filter/lzw_decode.rb +191 -0
- data/lib/hexapdf/filter/predictor.rb +266 -0
- data/lib/hexapdf/filter/run_length_decode.rb +108 -0
- data/lib/hexapdf/filter.rb +176 -0
- data/lib/hexapdf/font/cmap/parser.rb +146 -0
- data/lib/hexapdf/font/cmap/writer.rb +176 -0
- data/lib/hexapdf/font/cmap.rb +90 -0
- data/lib/hexapdf/font/encoding/base.rb +77 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
- data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
- data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
- data/lib/hexapdf/font/encoding.rb +68 -0
- data/lib/hexapdf/font/true_type/font.rb +179 -0
- data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
- data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
- data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
- data/lib/hexapdf/font/true_type/table/head.rb +143 -0
- data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
- data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
- data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
- data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
- data/lib/hexapdf/font/true_type/table/name.rb +218 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
- data/lib/hexapdf/font/true_type/table/post.rb +230 -0
- data/lib/hexapdf/font/true_type/table.rb +155 -0
- data/lib/hexapdf/font/true_type.rb +48 -0
- data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
- data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
- data/lib/hexapdf/font/type1/font.rb +123 -0
- data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
- data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
- data/lib/hexapdf/font/type1.rb +52 -0
- data/lib/hexapdf/font/type1_wrapper.rb +193 -0
- data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
- data/lib/hexapdf/font_loader/standard14.rb +98 -0
- data/lib/hexapdf/font_loader.rb +85 -0
- data/lib/hexapdf/font_utils.rb +89 -0
- data/lib/hexapdf/image_loader/jpeg.rb +166 -0
- data/lib/hexapdf/image_loader/pdf.rb +89 -0
- data/lib/hexapdf/image_loader/png.rb +410 -0
- data/lib/hexapdf/image_loader.rb +68 -0
- data/lib/hexapdf/importer.rb +139 -0
- data/lib/hexapdf/name_tree_node.rb +78 -0
- data/lib/hexapdf/number_tree_node.rb +67 -0
- data/lib/hexapdf/object.rb +363 -0
- data/lib/hexapdf/parser.rb +349 -0
- data/lib/hexapdf/rectangle.rb +99 -0
- data/lib/hexapdf/reference.rb +98 -0
- data/lib/hexapdf/revision.rb +206 -0
- data/lib/hexapdf/revisions.rb +194 -0
- data/lib/hexapdf/serializer.rb +326 -0
- data/lib/hexapdf/stream.rb +279 -0
- data/lib/hexapdf/task/dereference.rb +109 -0
- data/lib/hexapdf/task/optimize.rb +230 -0
- data/lib/hexapdf/task.rb +68 -0
- data/lib/hexapdf/tokenizer.rb +406 -0
- data/lib/hexapdf/type/catalog.rb +107 -0
- data/lib/hexapdf/type/embedded_file.rb +87 -0
- data/lib/hexapdf/type/file_specification.rb +232 -0
- data/lib/hexapdf/type/font.rb +81 -0
- data/lib/hexapdf/type/font_descriptor.rb +109 -0
- data/lib/hexapdf/type/font_simple.rb +190 -0
- data/lib/hexapdf/type/font_true_type.rb +47 -0
- data/lib/hexapdf/type/font_type1.rb +162 -0
- data/lib/hexapdf/type/form.rb +103 -0
- data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
- data/lib/hexapdf/type/image.rb +73 -0
- data/lib/hexapdf/type/info.rb +70 -0
- data/lib/hexapdf/type/names.rb +69 -0
- data/lib/hexapdf/type/object_stream.rb +224 -0
- data/lib/hexapdf/type/page.rb +355 -0
- data/lib/hexapdf/type/page_tree_node.rb +269 -0
- data/lib/hexapdf/type/resources.rb +212 -0
- data/lib/hexapdf/type/trailer.rb +128 -0
- data/lib/hexapdf/type/viewer_preferences.rb +73 -0
- data/lib/hexapdf/type/xref_stream.rb +204 -0
- data/lib/hexapdf/type.rb +67 -0
- data/lib/hexapdf/utils/bit_field.rb +87 -0
- data/lib/hexapdf/utils/bit_stream.rb +148 -0
- data/lib/hexapdf/utils/lru_cache.rb +65 -0
- data/lib/hexapdf/utils/math_helpers.rb +55 -0
- data/lib/hexapdf/utils/object_hash.rb +130 -0
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
- data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
- data/lib/hexapdf/version.rb +39 -0
- data/lib/hexapdf/writer.rb +199 -0
- data/lib/hexapdf/xref_section.rb +152 -0
- data/lib/hexapdf.rb +34 -0
- data/man/man1/hexapdf.1 +249 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
- data/test/data/fonts/Ubuntu-Title.ttf +0 -0
- data/test/data/images/cmyk.jpg +0 -0
- data/test/data/images/fillbytes.jpg +0 -0
- data/test/data/images/gray.jpg +0 -0
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-4bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/greyscale-alpha-8bit.png +0 -0
- data/test/data/images/greyscale-trns-8bit.png +0 -0
- data/test/data/images/greyscale-with-gamma1.0.png +0 -0
- data/test/data/images/greyscale-with-gamma1.5.png +0 -0
- data/test/data/images/indexed-1bit.png +0 -0
- data/test/data/images/indexed-2bit.png +0 -0
- data/test/data/images/indexed-4bit.png +0 -0
- data/test/data/images/indexed-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/indexed-alpha-8bit.png +0 -0
- data/test/data/images/rgb.jpg +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/data/images/truecolour-alpha-8bit.png +0 -0
- data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
- data/test/data/images/truecolour-srgb-8bit.png +0 -0
- data/test/data/minimal.pdf +44 -0
- data/test/data/standard-security-handler/README +9 -0
- data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
- data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
- data/test/hexapdf/common_tokenizer_tests.rb +204 -0
- data/test/hexapdf/content/common.rb +31 -0
- data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
- data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
- data/test/hexapdf/content/test_canvas.rb +1113 -0
- data/test/hexapdf/content/test_color_space.rb +97 -0
- data/test/hexapdf/content/test_graphics_state.rb +138 -0
- data/test/hexapdf/content/test_operator.rb +619 -0
- data/test/hexapdf/content/test_parser.rb +66 -0
- data/test/hexapdf/content/test_processor.rb +156 -0
- data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
- data/test/hexapdf/encryption/common.rb +87 -0
- data/test/hexapdf/encryption/test_aes.rb +121 -0
- data/test/hexapdf/encryption/test_arc4.rb +39 -0
- data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
- data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
- data/test/hexapdf/encryption/test_identity.rb +21 -0
- data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
- data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
- data/test/hexapdf/encryption/test_security_handler.rb +356 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
- data/test/hexapdf/filter/common.rb +53 -0
- data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
- data/test/hexapdf/filter/test_encryption.rb +24 -0
- data/test/hexapdf/filter/test_flate_decode.rb +35 -0
- data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
- data/test/hexapdf/filter/test_predictor.rb +183 -0
- data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
- data/test/hexapdf/font/cmap/test_parser.rb +67 -0
- data/test/hexapdf/font/cmap/test_writer.rb +58 -0
- data/test/hexapdf/font/encoding/test_base.rb +35 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
- data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
- data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
- data/test/hexapdf/font/test_encoding.rb +27 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
- data/test/hexapdf/font/true_type/common.rb +19 -0
- data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
- data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
- data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
- data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
- data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
- data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
- data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
- data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
- data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
- data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
- data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
- data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
- data/test/hexapdf/font/true_type/test_font.rb +120 -0
- data/test/hexapdf/font/true_type/test_table.rb +41 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
- data/test/hexapdf/font/type1/test_font.rb +68 -0
- data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
- data/test/hexapdf/font_loader/test_standard14.rb +22 -0
- data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
- data/test/hexapdf/image_loader/test_pdf.rb +47 -0
- data/test/hexapdf/image_loader/test_png.rb +258 -0
- data/test/hexapdf/task/test_dereference.rb +46 -0
- data/test/hexapdf/task/test_optimize.rb +137 -0
- data/test/hexapdf/test_configuration.rb +82 -0
- data/test/hexapdf/test_data_dir.rb +32 -0
- data/test/hexapdf/test_dictionary.rb +284 -0
- data/test/hexapdf/test_dictionary_fields.rb +185 -0
- data/test/hexapdf/test_document.rb +574 -0
- data/test/hexapdf/test_document_utils.rb +144 -0
- data/test/hexapdf/test_filter.rb +96 -0
- data/test/hexapdf/test_font_utils.rb +47 -0
- data/test/hexapdf/test_importer.rb +78 -0
- data/test/hexapdf/test_object.rb +177 -0
- data/test/hexapdf/test_parser.rb +394 -0
- data/test/hexapdf/test_rectangle.rb +36 -0
- data/test/hexapdf/test_reference.rb +41 -0
- data/test/hexapdf/test_revision.rb +139 -0
- data/test/hexapdf/test_revisions.rb +93 -0
- data/test/hexapdf/test_serializer.rb +169 -0
- data/test/hexapdf/test_stream.rb +262 -0
- data/test/hexapdf/test_tokenizer.rb +30 -0
- data/test/hexapdf/test_writer.rb +120 -0
- data/test/hexapdf/test_xref_section.rb +35 -0
- data/test/hexapdf/type/test_catalog.rb +30 -0
- data/test/hexapdf/type/test_embedded_file.rb +16 -0
- data/test/hexapdf/type/test_file_specification.rb +148 -0
- data/test/hexapdf/type/test_font.rb +35 -0
- data/test/hexapdf/type/test_font_descriptor.rb +51 -0
- data/test/hexapdf/type/test_font_simple.rb +190 -0
- data/test/hexapdf/type/test_font_type1.rb +128 -0
- data/test/hexapdf/type/test_form.rb +60 -0
- data/test/hexapdf/type/test_info.rb +14 -0
- data/test/hexapdf/type/test_names.rb +9 -0
- data/test/hexapdf/type/test_object_stream.rb +84 -0
- data/test/hexapdf/type/test_page.rb +260 -0
- data/test/hexapdf/type/test_page_tree_node.rb +255 -0
- data/test/hexapdf/type/test_resources.rb +167 -0
- data/test/hexapdf/type/test_trailer.rb +109 -0
- data/test/hexapdf/type/test_xref_stream.rb +131 -0
- data/test/hexapdf/utils/test_bit_field.rb +47 -0
- data/test/hexapdf/utils/test_lru_cache.rb +22 -0
- data/test/hexapdf/utils/test_object_hash.rb +115 -0
- data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
- data/test/test_helper.rb +56 -0
- metadata +427 -0
@@ -0,0 +1,204 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'stringio'
|
35
|
+
require 'hexapdf/tokenizer'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
module Content
|
39
|
+
|
40
|
+
# More efficient tokenizer for content streams. This tokenizer class works directly on a
|
41
|
+
# string and not on an IO.
|
42
|
+
#
|
43
|
+
# Note: Indirect object references are *not* supported by this tokenizer!
|
44
|
+
#
|
45
|
+
# See: PDF1.7 s7.2
|
46
|
+
class Tokenizer < HexaPDF::Tokenizer #:nodoc:
|
47
|
+
|
48
|
+
# Creates a new tokenizer.
|
49
|
+
def initialize(string)
|
50
|
+
@ss = StringScanner.new(string)
|
51
|
+
end
|
52
|
+
|
53
|
+
# See: HexaPDF::Tokenizer#pos
|
54
|
+
def pos
|
55
|
+
@ss.pos
|
56
|
+
end
|
57
|
+
|
58
|
+
# See: HexaPDF::Tokenizer#pos=
|
59
|
+
def pos=(pos)
|
60
|
+
@ss.pos = pos
|
61
|
+
end
|
62
|
+
|
63
|
+
# See: HexaPDF::Tokenizer#scan_until
|
64
|
+
def scan_until(re)
|
65
|
+
@ss.scan_until(re)
|
66
|
+
end
|
67
|
+
|
68
|
+
# See: HexaPDF::Tokenizer#next_token
|
69
|
+
def next_token
|
70
|
+
@ss.skip(WHITESPACE_MULTI_RE)
|
71
|
+
case (@ss.eos? ? -1 : @ss.string.getbyte(@ss.pos))
|
72
|
+
when 43, 45, 46, 48..57 # + - . 0..9
|
73
|
+
parse_number
|
74
|
+
when 65..90, 96..121
|
75
|
+
parse_keyword
|
76
|
+
when 47 # /
|
77
|
+
parse_name
|
78
|
+
when 40 # (
|
79
|
+
parse_literal_string
|
80
|
+
when 60 # <
|
81
|
+
if @ss.string.getbyte(@ss.pos + 1) != 60
|
82
|
+
parse_hex_string
|
83
|
+
else
|
84
|
+
@ss.pos += 2
|
85
|
+
TOKEN_DICT_START
|
86
|
+
end
|
87
|
+
when 62 # >
|
88
|
+
unless @ss.string.getbyte(@ss.pos + 1) == 62
|
89
|
+
raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
|
90
|
+
end
|
91
|
+
@ss.pos += 2
|
92
|
+
TOKEN_DICT_END
|
93
|
+
when 91 # [
|
94
|
+
@ss.pos += 1
|
95
|
+
TOKEN_ARRAY_START
|
96
|
+
when 93 # ]
|
97
|
+
@ss.pos += 1
|
98
|
+
TOKEN_ARRAY_END
|
99
|
+
when 123, 125 # { }
|
100
|
+
Token.new(@ss.get_byte)
|
101
|
+
when 37 # %
|
102
|
+
return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/)
|
103
|
+
next_token
|
104
|
+
when -1
|
105
|
+
NO_MORE_TOKENS
|
106
|
+
else
|
107
|
+
parse_keyword
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# See: HexaPDF::Tokenizer#parse_number
|
114
|
+
def parse_number
|
115
|
+
if (val = @ss.scan(/[+-]?\d++(?!\.)/))
|
116
|
+
val.to_i
|
117
|
+
else
|
118
|
+
val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)
|
119
|
+
val << '0'.freeze if val.getbyte(-1) == 46 # dot '.'
|
120
|
+
Float(val)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Stub implementation to prevent errors for not-overridden methods.
|
125
|
+
def prepare_string_scanner(*)
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# This class knows how to correctly parse a content stream.
|
132
|
+
#
|
133
|
+
# == Overview
|
134
|
+
#
|
135
|
+
# A content stream is mostly just a stream of PDF objects. However, there is one exception:
|
136
|
+
# inline images.
|
137
|
+
#
|
138
|
+
# Since inline images don't follow the normal PDF object parsing rules, they need to be
|
139
|
+
# handled specially and this is the reason for this class. Therefore only the BI operator is
|
140
|
+
# ever called for inline images because the ID and EI operators are handled by the parser.
|
141
|
+
#
|
142
|
+
# To parse some contents the #parse method needs to be called with the contents to be parsed
|
143
|
+
# and a Processor object which is used for processing the parsed operators.
|
144
|
+
class Parser
|
145
|
+
|
146
|
+
# Creates a new Parser object and calls #parse.
|
147
|
+
def self.parse(contents, processor)
|
148
|
+
new.parse(contents, processor)
|
149
|
+
end
|
150
|
+
|
151
|
+
# Parses the contents and calls the processor object for each parsed operator.
|
152
|
+
def parse(contents, processor)
|
153
|
+
tokenizer = Tokenizer.new(contents)
|
154
|
+
params = []
|
155
|
+
while (obj = tokenizer.next_object(allow_keyword: true)) != Tokenizer::NO_MORE_TOKENS
|
156
|
+
if obj.kind_of?(Tokenizer::Token)
|
157
|
+
if obj == 'BI'.freeze
|
158
|
+
params = parse_inline_image(tokenizer)
|
159
|
+
end
|
160
|
+
processor.process(obj.to_sym, params)
|
161
|
+
params.clear
|
162
|
+
else
|
163
|
+
params << obj
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
private
|
169
|
+
|
170
|
+
# Parses the inline image at the current position.
|
171
|
+
def parse_inline_image(tokenizer)
|
172
|
+
# BI has already been read, so read the image dictionary
|
173
|
+
dict = {}
|
174
|
+
while (key = tokenizer.next_object(allow_keyword: true))
|
175
|
+
if key == 'ID'.freeze
|
176
|
+
break
|
177
|
+
elsif key == Tokenizer::NO_MORE_TOKENS
|
178
|
+
raise HexaPDF::Error, "EOS while trying to read dictionary key for inline image"
|
179
|
+
elsif !key.kind_of?(Symbol)
|
180
|
+
raise HexaPDF::Error, "Inline image dictionary keys must be PDF name objects"
|
181
|
+
end
|
182
|
+
value = tokenizer.next_object
|
183
|
+
if value == Tokenizer::NO_MORE_TOKENS
|
184
|
+
raise HexaPDF::Error, "EOS while trying to read dictionary value for inline image"
|
185
|
+
end
|
186
|
+
dict[key] = value
|
187
|
+
end
|
188
|
+
|
189
|
+
# one whitespace character after ID
|
190
|
+
tokenizer.next_byte
|
191
|
+
|
192
|
+
# find the EI operator
|
193
|
+
data = tokenizer.scan_until(/(?=EI[#{Tokenizer::WHITESPACE}])/o)
|
194
|
+
if data.nil?
|
195
|
+
raise HexaPDF::Error, "End inline image marker EI not found"
|
196
|
+
end
|
197
|
+
tokenizer.pos += 3
|
198
|
+
[dict, data]
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,451 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/content/operator'
|
35
|
+
require 'hexapdf/content/graphics_state'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
module Content
|
39
|
+
|
40
|
+
# This class is used for processing content operators extracted from a content stream.
|
41
|
+
#
|
42
|
+
# == General Information
|
43
|
+
#
|
44
|
+
# When a content stream is read, operators and their operands are extracted. After extracting
|
45
|
+
# these operators are normally processed with a Processor instance that ensures that the needed
|
46
|
+
# setup (like modifying the graphics state) is done before further processing.
|
47
|
+
#
|
48
|
+
# == How Processing Works
|
49
|
+
#
|
50
|
+
# The operator implementations (see the Operator module) are called first and they ensure that
|
51
|
+
# the processing state is consistent. For example, operators that modify the graphics state do
|
52
|
+
# actually modify the #graphics_state object. However, operator implementations are *only* used
|
53
|
+
# for this task and not more, so they are very specific and normally don't need to be changed.
|
54
|
+
#
|
55
|
+
# After that methods corresponding to the operator names are invoked on the processor object (if
|
56
|
+
# they exist). Each PDF operator name is mapped to a nicer message name via the
|
57
|
+
# OPERATOR_MESSAGE_NAME_MAP constant. For example, the operator 'q' is mapped to
|
58
|
+
# 'save_graphics_state".
|
59
|
+
#
|
60
|
+
# The task of these methods is to do something useful with the content itself, it doesn't need
|
61
|
+
# to concern itself with ensuring the consistency of the processing state. For example, the
|
62
|
+
# processor could use the processing state to extract the text. Or paint the content on a
|
63
|
+
# canvas.
|
64
|
+
#
|
65
|
+
# For inline images only the 'BI' operator mapped to 'inline_image' is used. Although also the
|
66
|
+
# operators 'ID' and 'EI' exist for inline images, they are not used because they are consumed
|
67
|
+
# while parsing inline images and do not reflect separate operators.
|
68
|
+
#
|
69
|
+
# == Text Processing
|
70
|
+
#
|
71
|
+
# Two utility methods #decode_text and #decode_text_with_positioning for extracting text are
|
72
|
+
# provided. Both can directly be invoked from the 'show_text' and 'show_text_with_positioning'
|
73
|
+
# methods.
|
74
|
+
#
|
75
|
+
class Processor
|
76
|
+
|
77
|
+
# Represents an (immutable) glyph box with positioning information.
|
78
|
+
#
|
79
|
+
# Since the glyph may have been transformed by an affine matrix, the bounding may not be a
|
80
|
+
# rectangle in all cases but it is always a parallelogram.
|
81
|
+
class GlyphBox
|
82
|
+
|
83
|
+
# The code point representing the glyph.
|
84
|
+
attr_reader :code_point
|
85
|
+
|
86
|
+
# The Unicode value of the code point.
|
87
|
+
attr_reader :string
|
88
|
+
|
89
|
+
# Creates a new glyph box for the given code point/Unicode value pair with the lower left
|
90
|
+
# coordinate [llx, lly], the lower right coordinate [lrx, lry], and the upper left
|
91
|
+
# coordinate [ulx, uly].
|
92
|
+
def initialize(code_point, string, llx, lly, lrx, lry, ulx, uly)
|
93
|
+
@code_point = code_point
|
94
|
+
@string = string.freeze
|
95
|
+
@llx = llx
|
96
|
+
@lly = lly
|
97
|
+
@lrx = lrx
|
98
|
+
@lry = lry
|
99
|
+
@ulx = ulx
|
100
|
+
@uly = uly
|
101
|
+
freeze
|
102
|
+
end
|
103
|
+
|
104
|
+
# :call-seq:
|
105
|
+
# fragment.lower_left -> [llx, lly]
|
106
|
+
#
|
107
|
+
# Returns the lower left coordinate
|
108
|
+
def lower_left
|
109
|
+
[@llx, @lly]
|
110
|
+
end
|
111
|
+
|
112
|
+
# :call-seq:
|
113
|
+
# fragment.lower_right -> [lrx, lry]
|
114
|
+
#
|
115
|
+
# Returns the lower right coordinate
|
116
|
+
def lower_right
|
117
|
+
[@lrx, @lry]
|
118
|
+
end
|
119
|
+
|
120
|
+
# :call-seq:
|
121
|
+
# fragment.upper_left -> [ulx, uly]
|
122
|
+
#
|
123
|
+
# Returns the upper left coordinate
|
124
|
+
def upper_left
|
125
|
+
[@ulx, @uly]
|
126
|
+
end
|
127
|
+
|
128
|
+
# :call-seq:
|
129
|
+
# fragment.upper_right -> [urx, ury]
|
130
|
+
#
|
131
|
+
# Returns the upper right coordinate which is computed by using the other three points of
|
132
|
+
# the parallelogram.
|
133
|
+
def upper_right
|
134
|
+
[@ulx + (@lrx - @llx), @uly + (@lry - @lly)]
|
135
|
+
end
|
136
|
+
|
137
|
+
# :call-seq:
|
138
|
+
# fragment.points -> [llx, lly, lrx, lry, urx, ury, ulx, uly]
|
139
|
+
#
|
140
|
+
# Returns the four corners of the box as an array of coordinates, starting with the lower
|
141
|
+
# left corner and going counterclockwise.
|
142
|
+
def points
|
143
|
+
[@llx, @lly, @lrx, @lry, @ulx + (@lrx - @llx), @uly + (@lry - @lly), @ulx, @uly]
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
# Represents a box composed of GlyphBox objects.
|
150
|
+
#
|
151
|
+
# The bounding box methods #lower_left, #lower_right, #upper_left, #upper_right are computed
|
152
|
+
# by just using the first and last boxes, assuming the boxes are arranged from left to right
|
153
|
+
# in a straight line.
|
154
|
+
class CompositeBox
|
155
|
+
|
156
|
+
# The text boxes contained in this positioned text object.
|
157
|
+
attr_reader :boxes
|
158
|
+
|
159
|
+
# Creates an empty object.
|
160
|
+
def initialize
|
161
|
+
@boxes = []
|
162
|
+
end
|
163
|
+
|
164
|
+
# Appends the given text glyph box.
|
165
|
+
def <<(glyph_box)
|
166
|
+
@boxes << glyph_box
|
167
|
+
self
|
168
|
+
end
|
169
|
+
|
170
|
+
# Returns the glyph box at the given index, or +nil+ if the index is out of range.
|
171
|
+
def [](index)
|
172
|
+
@boxes[index]
|
173
|
+
end
|
174
|
+
|
175
|
+
# :call-seq:
|
176
|
+
# composite.each {|glyph_box| block} -> composite
|
177
|
+
# composite.each -> Enumerator
|
178
|
+
#
|
179
|
+
# Iterates over all contained glyph boxes.
|
180
|
+
def each(&block)
|
181
|
+
return to_enum(__method__) unless block_given?
|
182
|
+
@boxes.each(&block)
|
183
|
+
self
|
184
|
+
end
|
185
|
+
|
186
|
+
# Returns the concatenated text of the boxes.
|
187
|
+
def string
|
188
|
+
@boxes.map(&:string).join('')
|
189
|
+
end
|
190
|
+
|
191
|
+
# :call-seq:
|
192
|
+
# text.lower_left -> [llx, lly]
|
193
|
+
#
|
194
|
+
# Returns the lower left coordinate
|
195
|
+
def lower_left
|
196
|
+
@boxes[0].lower_left
|
197
|
+
end
|
198
|
+
|
199
|
+
# :call-seq:
|
200
|
+
# text.lower_right -> [lrx, lry]
|
201
|
+
#
|
202
|
+
# Returns the lower right coordinate
|
203
|
+
def lower_right
|
204
|
+
@boxes[-1].lower_right
|
205
|
+
end
|
206
|
+
|
207
|
+
# :call-seq:
|
208
|
+
# text.upper_left -> [ulx, uly]
|
209
|
+
#
|
210
|
+
# Returns the upper left coordinate
|
211
|
+
def upper_left
|
212
|
+
@boxes[0].upper_left
|
213
|
+
end
|
214
|
+
|
215
|
+
# :call-seq:
|
216
|
+
# text.upper_right -> [urx, ury]
|
217
|
+
#
|
218
|
+
# Returns the upper right coordinate.
|
219
|
+
def upper_right
|
220
|
+
@boxes[-1].upper_right
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|
224
|
+
|
225
|
+
# Mapping of PDF operator names to message names that are sent to renderer implementations.
|
226
|
+
OPERATOR_MESSAGE_NAME_MAP = {
|
227
|
+
q: :save_graphics_state,
|
228
|
+
Q: :restore_graphics_state,
|
229
|
+
cm: :concatenate_matrix,
|
230
|
+
w: :set_line_width,
|
231
|
+
J: :set_line_cap_style,
|
232
|
+
j: :set_line_join_style,
|
233
|
+
M: :set_miter_limit,
|
234
|
+
d: :set_line_dash_pattern,
|
235
|
+
ri: :set_rendering_intent,
|
236
|
+
i: :set_flatness_tolerance,
|
237
|
+
gs: :set_graphics_state_parameters,
|
238
|
+
CS: :set_stroking_color_space,
|
239
|
+
cs: :set_non_stroking_color_space,
|
240
|
+
SC: :set_stroking_color,
|
241
|
+
SCN: :set_stroking_color,
|
242
|
+
sc: :set_non_stroking_color,
|
243
|
+
scn: :set_non_stroking_color,
|
244
|
+
G: :set_device_gray_stroking_color,
|
245
|
+
g: :set_device_gray_non_stroking_color,
|
246
|
+
RG: :set_device_rgb_stroking_color,
|
247
|
+
rg: :set_device_rgb_non_stroking_color,
|
248
|
+
K: :set_device_cmyk_stroking_color,
|
249
|
+
k: :set_device_cmyk_non_stroking_color,
|
250
|
+
m: :move_to,
|
251
|
+
l: :line_to,
|
252
|
+
c: :curve_to,
|
253
|
+
v: :curve_to_no_first_control_point,
|
254
|
+
y: :curve_to_no_second_control_point,
|
255
|
+
h: :close_subpath,
|
256
|
+
re: :append_rectangle,
|
257
|
+
S: :stroke_path,
|
258
|
+
s: :close_and_stroke_path,
|
259
|
+
f: :fill_path_non_zero,
|
260
|
+
F: :fill_path_non_zero,
|
261
|
+
'f*'.to_sym => :fill_path_even_odd,
|
262
|
+
B: :fill_and_stroke_path_non_zero,
|
263
|
+
'B*'.to_sym => :fill_and_stroke_path_even_odd,
|
264
|
+
b: :close_fill_and_stroke_path_non_zero,
|
265
|
+
'b*'.to_sym => :close_fill_and_stroke_path_even_odd,
|
266
|
+
n: :end_path,
|
267
|
+
W: :clip_path_non_zero,
|
268
|
+
'W*'.to_sym => :clip_path_even_odd,
|
269
|
+
BT: :begin_text,
|
270
|
+
ET: :end_text,
|
271
|
+
Tc: :set_character_spacing,
|
272
|
+
Tw: :set_word_spacing,
|
273
|
+
Tz: :set_horizontal_scaling,
|
274
|
+
TL: :set_leading,
|
275
|
+
Tf: :set_font_and_size,
|
276
|
+
Tr: :set_text_rendering_mode,
|
277
|
+
Ts: :set_text_rise,
|
278
|
+
Td: :move_text,
|
279
|
+
TD: :move_text_and_set_leading,
|
280
|
+
Tm: :set_text_matrix,
|
281
|
+
'T*'.to_sym => :move_text_next_line,
|
282
|
+
Tj: :show_text,
|
283
|
+
'\''.to_sym => :move_text_next_line_and_show_text,
|
284
|
+
'"'.to_sym => :set_spacing_move_text_next_line_and_show_text,
|
285
|
+
TJ: :show_text_with_positioning,
|
286
|
+
d0: :set_glyph_width, # only for Type 3 fonts
|
287
|
+
d1: :set_glyph_width_and_bounding_box, # only for Type 3 fonts
|
288
|
+
sh: :paint_shading,
|
289
|
+
BI: :inline_image, # ID and EI are not sent because the complete image has been read
|
290
|
+
Do: :paint_xobject,
|
291
|
+
MP: :designate_marked_content_point,
|
292
|
+
DP: :designate_marked_content_point_with_property_list,
|
293
|
+
BMC: :begin_marked_content,
|
294
|
+
BDC: :begin_marked_content_with_property_list,
|
295
|
+
EMC: :end_marked_content,
|
296
|
+
BX: :begin_compatibility_section,
|
297
|
+
EX: :end_compatibility_section,
|
298
|
+
}
|
299
|
+
|
300
|
+
# Mapping from operator name (Symbol) to a callable object.
|
301
|
+
#
|
302
|
+
# This hash is prepopulated with the default operator implementations (see
|
303
|
+
# Operator::DEFAULT_OPERATORS). If a default operator implementation is not satisfactory, it
|
304
|
+
# can easily be changed by modifying this hash.
|
305
|
+
attr_reader :operators
|
306
|
+
|
307
|
+
# The resources dictionary used during processing.
|
308
|
+
attr_accessor :resources
|
309
|
+
|
310
|
+
# The GraphicsState object containing the current graphics state.
|
311
|
+
#
|
312
|
+
# It is not advised that this attribute is changed manually, it is automatically adjusted
|
313
|
+
# according to the processed operators!
|
314
|
+
attr_reader :graphics_state
|
315
|
+
|
316
|
+
# The current graphics object.
|
317
|
+
#
|
318
|
+
# It is not advised that this attribute is changed manually, it is automatically adjusted
|
319
|
+
# according to the processed operators!
|
320
|
+
#
|
321
|
+
# This attribute can have the following values:
|
322
|
+
#
|
323
|
+
# :none:: No current graphics object, i.e. the processor is at the page description level.
|
324
|
+
# :path:: The current graphics object is a path.
|
325
|
+
# :clipping_path:: The current graphics object is a clipping path.
|
326
|
+
# :text:: The current graphics object is text.
|
327
|
+
#
|
328
|
+
# See: PDF1.7 s8.2
|
329
|
+
attr_accessor :graphics_object
|
330
|
+
|
331
|
+
# Initializes a new processor that uses the resources PDF dictionary for resolving resources
|
332
|
+
# while processing operators.
|
333
|
+
#
|
334
|
+
# It is not mandatory to set the resources dictionary on initialization but it needs to be set
|
335
|
+
# prior to processing operators!
|
336
|
+
def initialize(resources = nil)
|
337
|
+
@operators = Operator::DEFAULT_OPERATORS.dup
|
338
|
+
@graphics_state = GraphicsState.new
|
339
|
+
@resources = resources
|
340
|
+
@graphics_object = :none
|
341
|
+
end
|
342
|
+
|
343
|
+
# Processes the operator with the given operands.
|
344
|
+
#
|
345
|
+
# The operator is first processed with an operator implementation (if any) to ensure correct
|
346
|
+
# operations and then the corresponding method on this object is invoked.
|
347
|
+
def process(operator, operands = [])
|
348
|
+
@operators[operator].invoke(self, *operands) if @operators.key?(operator)
|
349
|
+
msg = OPERATOR_MESSAGE_NAME_MAP[operator]
|
350
|
+
send(msg, *operands) if msg && respond_to?(msg, true)
|
351
|
+
end
|
352
|
+
|
353
|
+
protected
|
354
|
+
|
355
|
+
# Provides a default implementation for the 'Do' operator.
|
356
|
+
#
|
357
|
+
# It checks if the XObject is a Form XObject and if so, processes the contents of the Form
|
358
|
+
# XObject.
|
359
|
+
def paint_xobject(name)
|
360
|
+
xobject = resources.xobject(name)
|
361
|
+
return unless xobject[:Subtype] == :Form
|
362
|
+
|
363
|
+
res = resources
|
364
|
+
graphics_state.save
|
365
|
+
|
366
|
+
graphics_state.ctm.premultiply(*xobject[:Matrix]) if xobject.key?(:Matrix)
|
367
|
+
xobject.process_contents(self)
|
368
|
+
|
369
|
+
graphics_state.restore
|
370
|
+
self.resources = res
|
371
|
+
end
|
372
|
+
|
373
|
+
# Decodes the given text object and returns it as UTF-8 string.
|
374
|
+
#
|
375
|
+
# The argument may either be a simple text string (+Tj+ operator) or an array that contains
|
376
|
+
# text strings together with positioning information (+TJ+ operator).
|
377
|
+
def decode_text(data)
|
378
|
+
if data.kind_of?(Array)
|
379
|
+
data = data.each_with_object(''.b) {|obj, result| result << obj if obj.kind_of?(String)}
|
380
|
+
end
|
381
|
+
font = graphics_state.font
|
382
|
+
font.decode(data).map {|code_point| font.to_utf8(code_point)}.join('')
|
383
|
+
end
|
384
|
+
|
385
|
+
# Decodes the given text object and returns it as a CompositeBox object.
|
386
|
+
#
|
387
|
+
# The argument may either be a simple text string (+Tj+ operator) or an array that contains
|
388
|
+
# text strings together with positioning information (+TJ+ operator).
|
389
|
+
#
|
390
|
+
# For each glyph a GlyphBox object is computed. For horizontal fonts the width is
|
391
|
+
# predetermined but not the height. The latter is chosen to be the height and offset of the
|
392
|
+
# font's bounding box.
|
393
|
+
def decode_text_with_positioning(data)
|
394
|
+
data = Array(data)
|
395
|
+
if graphics_state.font.writing_mode == :horizontal
|
396
|
+
decode_horizontal_text(data)
|
397
|
+
else
|
398
|
+
decode_vertical_text(data)
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
private
|
403
|
+
|
404
|
+
# Decodes the given array containing text and positioning information while assuming that the
|
405
|
+
# writing direction is horizontal.
|
406
|
+
#
|
407
|
+
# See: PDF1.7 s9.4.4
|
408
|
+
def decode_horizontal_text(array)
|
409
|
+
font = graphics_state.font
|
410
|
+
scaled_char_space = graphics_state.scaled_character_spacing
|
411
|
+
scaled_word_space = graphics_state.scaled_word_spacing
|
412
|
+
scaled_font_size = graphics_state.scaled_font_size
|
413
|
+
|
414
|
+
below_baseline = font.bounding_box[1] * scaled_font_size / \
|
415
|
+
graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
|
416
|
+
above_baseline = font.bounding_box[3] * scaled_font_size / \
|
417
|
+
graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
|
418
|
+
|
419
|
+
text = CompositeBox.new
|
420
|
+
array.each do |item|
|
421
|
+
if item.kind_of?(Numeric)
|
422
|
+
graphics_state.tm.translate(-item * scaled_font_size, 0)
|
423
|
+
else
|
424
|
+
font.decode(item).each do |code_point|
|
425
|
+
char = font.to_utf8(code_point)
|
426
|
+
width = font.width(code_point) * scaled_font_size
|
427
|
+
matrix = graphics_state.ctm.dup.premultiply(*graphics_state.tm)
|
428
|
+
fragment = GlyphBox.new(code_point, char,
|
429
|
+
*matrix.evaluate(0, below_baseline),
|
430
|
+
*matrix.evaluate(width, below_baseline),
|
431
|
+
*matrix.evaluate(0, above_baseline))
|
432
|
+
text << fragment
|
433
|
+
graphics_state.tm.translate(width + scaled_char_space + \
|
434
|
+
(char == ' ' ? scaled_word_space : 0), 0)
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
text.freeze
|
440
|
+
end
|
441
|
+
|
442
|
+
# Decodes the given array containing text and positioning information while assuming that the
|
443
|
+
# writing direction is vertical.
|
444
|
+
def decode_vertical_text(_data)
|
445
|
+
raise NotImplementedError
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
|
450
|
+
end
|
451
|
+
end
|