hexapdf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTERS +3 -0
- data/LICENSE +26 -0
- data/README.md +88 -0
- data/Rakefile +121 -0
- data/VERSION +1 -0
- data/agpl-3.0.txt +661 -0
- data/bin/hexapdf +6 -0
- data/data/hexapdf/afm/Courier-Bold.afm +342 -0
- data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
- data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
- data/data/hexapdf/afm/Courier.afm +342 -0
- data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
- data/data/hexapdf/afm/Helvetica.afm +3051 -0
- data/data/hexapdf/afm/MustRead.html +1 -0
- data/data/hexapdf/afm/Symbol.afm +213 -0
- data/data/hexapdf/afm/Times-Bold.afm +2588 -0
- data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
- data/data/hexapdf/afm/Times-Italic.afm +2667 -0
- data/data/hexapdf/afm/Times-Roman.afm +2419 -0
- data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
- data/data/hexapdf/encoding/glyphlist.txt +4305 -0
- data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
- data/examples/arc.rb +50 -0
- data/examples/graphics.rb +274 -0
- data/examples/hello_world.rb +16 -0
- data/examples/machupicchu.jpg +0 -0
- data/examples/merging.rb +24 -0
- data/examples/optimizing.rb +20 -0
- data/examples/show_char_bboxes.rb +55 -0
- data/examples/standard_pdf_fonts.rb +72 -0
- data/examples/truetype.rb +45 -0
- data/lib/hexapdf/cli/extract.rb +128 -0
- data/lib/hexapdf/cli/info.rb +121 -0
- data/lib/hexapdf/cli/inspect.rb +157 -0
- data/lib/hexapdf/cli/modify.rb +218 -0
- data/lib/hexapdf/cli.rb +121 -0
- data/lib/hexapdf/configuration.rb +392 -0
- data/lib/hexapdf/content/canvas.rb +1974 -0
- data/lib/hexapdf/content/color_space.rb +364 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
- data/lib/hexapdf/content/graphic_object.rb +81 -0
- data/lib/hexapdf/content/graphics_state.rb +579 -0
- data/lib/hexapdf/content/operator.rb +1072 -0
- data/lib/hexapdf/content/parser.rb +204 -0
- data/lib/hexapdf/content/processor.rb +451 -0
- data/lib/hexapdf/content/transformation_matrix.rb +172 -0
- data/lib/hexapdf/content.rb +47 -0
- data/lib/hexapdf/data_dir.rb +51 -0
- data/lib/hexapdf/dictionary.rb +303 -0
- data/lib/hexapdf/dictionary_fields.rb +382 -0
- data/lib/hexapdf/document.rb +589 -0
- data/lib/hexapdf/document_utils.rb +209 -0
- data/lib/hexapdf/encryption/aes.rb +206 -0
- data/lib/hexapdf/encryption/arc4.rb +93 -0
- data/lib/hexapdf/encryption/fast_aes.rb +79 -0
- data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
- data/lib/hexapdf/encryption/identity.rb +63 -0
- data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
- data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
- data/lib/hexapdf/encryption/security_handler.rb +494 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
- data/lib/hexapdf/encryption.rb +94 -0
- data/lib/hexapdf/error.rb +73 -0
- data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
- data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
- data/lib/hexapdf/filter/dct_decode.rb +57 -0
- data/lib/hexapdf/filter/encryption.rb +59 -0
- data/lib/hexapdf/filter/flate_decode.rb +93 -0
- data/lib/hexapdf/filter/jpx_decode.rb +56 -0
- data/lib/hexapdf/filter/lzw_decode.rb +191 -0
- data/lib/hexapdf/filter/predictor.rb +266 -0
- data/lib/hexapdf/filter/run_length_decode.rb +108 -0
- data/lib/hexapdf/filter.rb +176 -0
- data/lib/hexapdf/font/cmap/parser.rb +146 -0
- data/lib/hexapdf/font/cmap/writer.rb +176 -0
- data/lib/hexapdf/font/cmap.rb +90 -0
- data/lib/hexapdf/font/encoding/base.rb +77 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
- data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
- data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
- data/lib/hexapdf/font/encoding.rb +68 -0
- data/lib/hexapdf/font/true_type/font.rb +179 -0
- data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
- data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
- data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
- data/lib/hexapdf/font/true_type/table/head.rb +143 -0
- data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
- data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
- data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
- data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
- data/lib/hexapdf/font/true_type/table/name.rb +218 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
- data/lib/hexapdf/font/true_type/table/post.rb +230 -0
- data/lib/hexapdf/font/true_type/table.rb +155 -0
- data/lib/hexapdf/font/true_type.rb +48 -0
- data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
- data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
- data/lib/hexapdf/font/type1/font.rb +123 -0
- data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
- data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
- data/lib/hexapdf/font/type1.rb +52 -0
- data/lib/hexapdf/font/type1_wrapper.rb +193 -0
- data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
- data/lib/hexapdf/font_loader/standard14.rb +98 -0
- data/lib/hexapdf/font_loader.rb +85 -0
- data/lib/hexapdf/font_utils.rb +89 -0
- data/lib/hexapdf/image_loader/jpeg.rb +166 -0
- data/lib/hexapdf/image_loader/pdf.rb +89 -0
- data/lib/hexapdf/image_loader/png.rb +410 -0
- data/lib/hexapdf/image_loader.rb +68 -0
- data/lib/hexapdf/importer.rb +139 -0
- data/lib/hexapdf/name_tree_node.rb +78 -0
- data/lib/hexapdf/number_tree_node.rb +67 -0
- data/lib/hexapdf/object.rb +363 -0
- data/lib/hexapdf/parser.rb +349 -0
- data/lib/hexapdf/rectangle.rb +99 -0
- data/lib/hexapdf/reference.rb +98 -0
- data/lib/hexapdf/revision.rb +206 -0
- data/lib/hexapdf/revisions.rb +194 -0
- data/lib/hexapdf/serializer.rb +326 -0
- data/lib/hexapdf/stream.rb +279 -0
- data/lib/hexapdf/task/dereference.rb +109 -0
- data/lib/hexapdf/task/optimize.rb +230 -0
- data/lib/hexapdf/task.rb +68 -0
- data/lib/hexapdf/tokenizer.rb +406 -0
- data/lib/hexapdf/type/catalog.rb +107 -0
- data/lib/hexapdf/type/embedded_file.rb +87 -0
- data/lib/hexapdf/type/file_specification.rb +232 -0
- data/lib/hexapdf/type/font.rb +81 -0
- data/lib/hexapdf/type/font_descriptor.rb +109 -0
- data/lib/hexapdf/type/font_simple.rb +190 -0
- data/lib/hexapdf/type/font_true_type.rb +47 -0
- data/lib/hexapdf/type/font_type1.rb +162 -0
- data/lib/hexapdf/type/form.rb +103 -0
- data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
- data/lib/hexapdf/type/image.rb +73 -0
- data/lib/hexapdf/type/info.rb +70 -0
- data/lib/hexapdf/type/names.rb +69 -0
- data/lib/hexapdf/type/object_stream.rb +224 -0
- data/lib/hexapdf/type/page.rb +355 -0
- data/lib/hexapdf/type/page_tree_node.rb +269 -0
- data/lib/hexapdf/type/resources.rb +212 -0
- data/lib/hexapdf/type/trailer.rb +128 -0
- data/lib/hexapdf/type/viewer_preferences.rb +73 -0
- data/lib/hexapdf/type/xref_stream.rb +204 -0
- data/lib/hexapdf/type.rb +67 -0
- data/lib/hexapdf/utils/bit_field.rb +87 -0
- data/lib/hexapdf/utils/bit_stream.rb +148 -0
- data/lib/hexapdf/utils/lru_cache.rb +65 -0
- data/lib/hexapdf/utils/math_helpers.rb +55 -0
- data/lib/hexapdf/utils/object_hash.rb +130 -0
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
- data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
- data/lib/hexapdf/version.rb +39 -0
- data/lib/hexapdf/writer.rb +199 -0
- data/lib/hexapdf/xref_section.rb +152 -0
- data/lib/hexapdf.rb +34 -0
- data/man/man1/hexapdf.1 +249 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
- data/test/data/fonts/Ubuntu-Title.ttf +0 -0
- data/test/data/images/cmyk.jpg +0 -0
- data/test/data/images/fillbytes.jpg +0 -0
- data/test/data/images/gray.jpg +0 -0
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-4bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/greyscale-alpha-8bit.png +0 -0
- data/test/data/images/greyscale-trns-8bit.png +0 -0
- data/test/data/images/greyscale-with-gamma1.0.png +0 -0
- data/test/data/images/greyscale-with-gamma1.5.png +0 -0
- data/test/data/images/indexed-1bit.png +0 -0
- data/test/data/images/indexed-2bit.png +0 -0
- data/test/data/images/indexed-4bit.png +0 -0
- data/test/data/images/indexed-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/indexed-alpha-8bit.png +0 -0
- data/test/data/images/rgb.jpg +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/data/images/truecolour-alpha-8bit.png +0 -0
- data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
- data/test/data/images/truecolour-srgb-8bit.png +0 -0
- data/test/data/minimal.pdf +44 -0
- data/test/data/standard-security-handler/README +9 -0
- data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
- data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
- data/test/hexapdf/common_tokenizer_tests.rb +204 -0
- data/test/hexapdf/content/common.rb +31 -0
- data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
- data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
- data/test/hexapdf/content/test_canvas.rb +1113 -0
- data/test/hexapdf/content/test_color_space.rb +97 -0
- data/test/hexapdf/content/test_graphics_state.rb +138 -0
- data/test/hexapdf/content/test_operator.rb +619 -0
- data/test/hexapdf/content/test_parser.rb +66 -0
- data/test/hexapdf/content/test_processor.rb +156 -0
- data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
- data/test/hexapdf/encryption/common.rb +87 -0
- data/test/hexapdf/encryption/test_aes.rb +121 -0
- data/test/hexapdf/encryption/test_arc4.rb +39 -0
- data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
- data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
- data/test/hexapdf/encryption/test_identity.rb +21 -0
- data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
- data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
- data/test/hexapdf/encryption/test_security_handler.rb +356 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
- data/test/hexapdf/filter/common.rb +53 -0
- data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
- data/test/hexapdf/filter/test_encryption.rb +24 -0
- data/test/hexapdf/filter/test_flate_decode.rb +35 -0
- data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
- data/test/hexapdf/filter/test_predictor.rb +183 -0
- data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
- data/test/hexapdf/font/cmap/test_parser.rb +67 -0
- data/test/hexapdf/font/cmap/test_writer.rb +58 -0
- data/test/hexapdf/font/encoding/test_base.rb +35 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
- data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
- data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
- data/test/hexapdf/font/test_encoding.rb +27 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
- data/test/hexapdf/font/true_type/common.rb +19 -0
- data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
- data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
- data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
- data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
- data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
- data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
- data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
- data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
- data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
- data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
- data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
- data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
- data/test/hexapdf/font/true_type/test_font.rb +120 -0
- data/test/hexapdf/font/true_type/test_table.rb +41 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
- data/test/hexapdf/font/type1/test_font.rb +68 -0
- data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
- data/test/hexapdf/font_loader/test_standard14.rb +22 -0
- data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
- data/test/hexapdf/image_loader/test_pdf.rb +47 -0
- data/test/hexapdf/image_loader/test_png.rb +258 -0
- data/test/hexapdf/task/test_dereference.rb +46 -0
- data/test/hexapdf/task/test_optimize.rb +137 -0
- data/test/hexapdf/test_configuration.rb +82 -0
- data/test/hexapdf/test_data_dir.rb +32 -0
- data/test/hexapdf/test_dictionary.rb +284 -0
- data/test/hexapdf/test_dictionary_fields.rb +185 -0
- data/test/hexapdf/test_document.rb +574 -0
- data/test/hexapdf/test_document_utils.rb +144 -0
- data/test/hexapdf/test_filter.rb +96 -0
- data/test/hexapdf/test_font_utils.rb +47 -0
- data/test/hexapdf/test_importer.rb +78 -0
- data/test/hexapdf/test_object.rb +177 -0
- data/test/hexapdf/test_parser.rb +394 -0
- data/test/hexapdf/test_rectangle.rb +36 -0
- data/test/hexapdf/test_reference.rb +41 -0
- data/test/hexapdf/test_revision.rb +139 -0
- data/test/hexapdf/test_revisions.rb +93 -0
- data/test/hexapdf/test_serializer.rb +169 -0
- data/test/hexapdf/test_stream.rb +262 -0
- data/test/hexapdf/test_tokenizer.rb +30 -0
- data/test/hexapdf/test_writer.rb +120 -0
- data/test/hexapdf/test_xref_section.rb +35 -0
- data/test/hexapdf/type/test_catalog.rb +30 -0
- data/test/hexapdf/type/test_embedded_file.rb +16 -0
- data/test/hexapdf/type/test_file_specification.rb +148 -0
- data/test/hexapdf/type/test_font.rb +35 -0
- data/test/hexapdf/type/test_font_descriptor.rb +51 -0
- data/test/hexapdf/type/test_font_simple.rb +190 -0
- data/test/hexapdf/type/test_font_type1.rb +128 -0
- data/test/hexapdf/type/test_form.rb +60 -0
- data/test/hexapdf/type/test_info.rb +14 -0
- data/test/hexapdf/type/test_names.rb +9 -0
- data/test/hexapdf/type/test_object_stream.rb +84 -0
- data/test/hexapdf/type/test_page.rb +260 -0
- data/test/hexapdf/type/test_page_tree_node.rb +255 -0
- data/test/hexapdf/type/test_resources.rb +167 -0
- data/test/hexapdf/type/test_trailer.rb +109 -0
- data/test/hexapdf/type/test_xref_stream.rb +131 -0
- data/test/hexapdf/utils/test_bit_field.rb +47 -0
- data/test/hexapdf/utils/test_lru_cache.rb +22 -0
- data/test/hexapdf/utils/test_object_hash.rb +115 -0
- data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
- data/test/test_helper.rb +56 -0
- metadata +427 -0
@@ -0,0 +1,176 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'fiber'
|
35
|
+
require 'hexapdf/error'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
|
39
|
+
# This special Fiber class should be used when the total length of the data yielded by the fiber
|
40
|
+
# is known beforehand. HexaPDF uses this information to avoid unnecessary memory usage.
|
41
|
+
class FiberWithLength < Fiber
|
42
|
+
|
43
|
+
# The total length of the data that will be yielded by this fiber. If the return value is
|
44
|
+
# negative the total length is *not* known.
|
45
|
+
attr_reader :length
|
46
|
+
|
47
|
+
# Initializes the Fiber and sets the +length+.
|
48
|
+
def initialize(length, &block)
|
49
|
+
super(&block)
|
50
|
+
@length = length || -1
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
# == Overview
|
57
|
+
#
|
58
|
+
# A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
|
59
|
+
# to reverse this process. Some filters can be used for any content, like FlateDecode, others
|
60
|
+
# are specifically designed for image streams, like DCTDecode.
|
61
|
+
#
|
62
|
+
# Each filter is implemented via fibers. This allows HexaPDF to easily process either small
|
63
|
+
# chunks or a whole stream at once, depending on the memory restrictions and to create flexible
|
64
|
+
# filter pipelines.
|
65
|
+
#
|
66
|
+
# It also allows the easy re-processing of a stream without first decoding and the encoding it.
|
67
|
+
# Such functionality is useful, for example, when a PDF file should be decrypted and streams
|
68
|
+
# compressed in one step.
|
69
|
+
#
|
70
|
+
#
|
71
|
+
# == Implementation of a Filter Object
|
72
|
+
#
|
73
|
+
# Each filter is an object (normally a module) that responds to two methods: \#encoder and
|
74
|
+
# \#decoder. Both of these methods are given a *source* (a Fiber) and *options* (a Hash) and have
|
75
|
+
# to return a Fiber object.
|
76
|
+
#
|
77
|
+
# The returned fiber should resume the *source* fiber to get the next chunk of binary data
|
78
|
+
# (possibly only one byte of data, so this situation should be handled gracefully). Once the
|
79
|
+
# fiber has processed this chunk, it should yield the processed chunk as binary string. This
|
80
|
+
# should be done as long as the source fiber is #alive? and doesn't return +nil+ when resumed.
|
81
|
+
#
|
82
|
+
# Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
|
83
|
+
#
|
84
|
+
# See: PDF1.7 s7.4
|
85
|
+
module Filter
|
86
|
+
|
87
|
+
autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
|
88
|
+
autoload(:ASCIIHexDecode, 'hexapdf/filter/ascii_hex_decode')
|
89
|
+
autoload(:DCTDecode, 'hexapdf/filter/dct_decode')
|
90
|
+
autoload(:FlateDecode, 'hexapdf/filter/flate_decode')
|
91
|
+
autoload(:JPXDecode, 'hexapdf/filter/jpx_decode')
|
92
|
+
autoload(:LZWDecode, 'hexapdf/filter/lzw_decode')
|
93
|
+
autoload(:RunLengthDecode, 'hexapdf/filter/run_length_decode')
|
94
|
+
|
95
|
+
autoload(:Predictor, 'hexapdf/filter/predictor')
|
96
|
+
|
97
|
+
autoload(:Encryption, 'hexapdf/filter/encryption')
|
98
|
+
|
99
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
|
100
|
+
# String object.
|
101
|
+
def self.source_from_string(str)
|
102
|
+
FiberWithLength.new(str.length) { str.dup }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
|
106
|
+
# data from an IO object.
|
107
|
+
#
|
108
|
+
# Each time a chunk is read, the position pointer of the IO is adjusted. This should be taken
|
109
|
+
# into account when working with the IO object.
|
110
|
+
#
|
111
|
+
# Options:
|
112
|
+
#
|
113
|
+
# :pos:: The position from where the reading should start. A negative position is treated as
|
114
|
+
# zero. Default: 0.
|
115
|
+
#
|
116
|
+
# :length:: The length indicating the number of bytes to read. An error is raised if not all
|
117
|
+
# specified bytes could be read. A negative length means reading until the end of
|
118
|
+
# the IO stream. Default: -1.
|
119
|
+
#
|
120
|
+
# :chunk_size:: The size of the chunks that should be returned in each iteration. A chunk size
|
121
|
+
# of less than or equal to 0 means using the biggest chunk size available (can
|
122
|
+
# change between versions!). Default: 0.
|
123
|
+
def self.source_from_io(io, pos: 0, length: -1, chunk_size: 0)
|
124
|
+
orig_length = length
|
125
|
+
chunk_size = 2**20 if chunk_size <= 0
|
126
|
+
chunk_size = length if length >= 0 && chunk_size > length
|
127
|
+
length = 2**61 if length < 0
|
128
|
+
pos = 0 if pos < 0
|
129
|
+
|
130
|
+
FiberWithLength.new(orig_length) do
|
131
|
+
while length > 0 && (io.pos = pos) && (data = io.read(chunk_size))
|
132
|
+
pos = io.pos
|
133
|
+
length -= data.size
|
134
|
+
chunk_size = length if chunk_size > length
|
135
|
+
Fiber.yield(data)
|
136
|
+
end
|
137
|
+
if length > 0 && orig_length >= 0
|
138
|
+
raise FilterError, "Couldn't read all requested bytes before encountering EOF"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks
|
144
|
+
# from a file.
|
145
|
+
#
|
146
|
+
# Note that there will be a problem if the size of the file changes between the invocation of
|
147
|
+
# this method and the actual consumption of the file!
|
148
|
+
#
|
149
|
+
# See ::source_from_io for a description of the available options.
|
150
|
+
def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
|
151
|
+
fib_length = (length < 0 ? File.stat(filename).size - pos : length)
|
152
|
+
FiberWithLength.new(fib_length) do
|
153
|
+
File.open(filename, 'rb') do |file|
|
154
|
+
source = source_from_io(file, pos: pos, length: length, chunk_size: chunk_size)
|
155
|
+
while source.alive? && (io_data = source.resume)
|
156
|
+
Fiber.yield(io_data)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
|
163
|
+
# is dead.
|
164
|
+
#
|
165
|
+
# The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
|
166
|
+
def self.string_from_source(source)
|
167
|
+
str = ''.b
|
168
|
+
while source.alive? && (data = source.resume)
|
169
|
+
str << data
|
170
|
+
end
|
171
|
+
str
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/error'
|
35
|
+
require 'hexapdf/font/cmap'
|
36
|
+
require 'hexapdf/content/parser'
|
37
|
+
|
38
|
+
module HexaPDF
|
39
|
+
module Font
|
40
|
+
class CMap
|
41
|
+
|
42
|
+
# Parses CMap files.
|
43
|
+
#
|
44
|
+
# Currently only ToUnicode CMaps are supported.
|
45
|
+
class Parser
|
46
|
+
|
47
|
+
# Parses the given string and returns a CMap object.
|
48
|
+
def parse(string)
|
49
|
+
tokenizer = HexaPDF::Content::Tokenizer.new(string)
|
50
|
+
cmap = CMap.new
|
51
|
+
|
52
|
+
while (token = tokenizer.next_token) != HexaPDF::Tokenizer::NO_MORE_TOKENS
|
53
|
+
if token.kind_of?(HexaPDF::Tokenizer::Token)
|
54
|
+
case token
|
55
|
+
when 'beginbfchar'.freeze then parse_bf_char(tokenizer, cmap)
|
56
|
+
when 'beginbfrange'.freeze then parse_bf_range(tokenizer, cmap)
|
57
|
+
when 'endcmap' then break
|
58
|
+
end
|
59
|
+
elsif token.kind_of?(Symbol)
|
60
|
+
parse_dict_mapping(tokenizer, cmap, token)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
cmap
|
65
|
+
rescue => e
|
66
|
+
raise HexaPDF::Error, "Error parsing CMap: #{e.message}", e.backtrace
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# Parses a single mapping of a dictionary pair. The +name+ of the mapping has already been
|
72
|
+
# parsed.
|
73
|
+
def parse_dict_mapping(tokenizer, cmap, name)
|
74
|
+
value = tokenizer.next_token
|
75
|
+
return if value.kind_of?(HexaPDF::Tokenizer::Token)
|
76
|
+
|
77
|
+
case name
|
78
|
+
when :Registry then cmap.registry = value if value.kind_of?(String)
|
79
|
+
when :Ordering then cmap.ordering = value if value.kind_of?(String)
|
80
|
+
when :Supplement then cmap.supplement = value if value.kind_of?(Integer)
|
81
|
+
when :CMapName then cmap.name = value.to_s if value.kind_of?(Symbol)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Parses the "bfchar" operator at the current position.
|
86
|
+
def parse_bf_char(tokenizer, cmap)
|
87
|
+
until (code = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
|
88
|
+
str = tokenizer.next_token.encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
|
89
|
+
cmap.unicode_mapping[bytes_to_int(code)] = str
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Parses the "bfrange" operator at the current position.
|
94
|
+
#
|
95
|
+
#--
|
96
|
+
# PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
|
97
|
+
# operators of the form "startCode endCode codePoint" should be handled.
|
98
|
+
#
|
99
|
+
# PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
|
100
|
+
# of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
|
101
|
+
# this.
|
102
|
+
#
|
103
|
+
# Additionally, #5411 mentions in section 1.4.1 that the first byte of "startCode" and
|
104
|
+
# "endCode" have to be the same. So it seems that this is a mistake in the PDF reference.
|
105
|
+
#++
|
106
|
+
def parse_bf_range(tokenizer, cmap)
|
107
|
+
until (code1 = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
|
108
|
+
code1 = bytes_to_int(code1)
|
109
|
+
code2 = bytes_to_int(tokenizer.next_token)
|
110
|
+
dest = tokenizer.next_object
|
111
|
+
|
112
|
+
if dest.kind_of?(String)
|
113
|
+
codepoint = dest.force_encoding(::Encoding::UTF_16BE).ord
|
114
|
+
code1.upto(code2) do |code|
|
115
|
+
cmap.unicode_mapping[code] = '' << codepoint
|
116
|
+
codepoint += 1
|
117
|
+
end
|
118
|
+
elsif dest.kind_of?(Array)
|
119
|
+
code1.upto(code2) do |code|
|
120
|
+
cmap.unicode_mapping[code] =
|
121
|
+
dest[code - code1].encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
|
122
|
+
end
|
123
|
+
else
|
124
|
+
raise HexaPDF::Error, "Invalid bfrange operator in CMap"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Treats the string as an array of bytes and converts it to an integer.
|
130
|
+
#
|
131
|
+
# The bytes are converted in the big-endian way.
|
132
|
+
def bytes_to_int(string)
|
133
|
+
result = 0
|
134
|
+
index = 0
|
135
|
+
while index < string.length
|
136
|
+
result = (result << 8) | string.getbyte(index)
|
137
|
+
index += 1
|
138
|
+
end
|
139
|
+
result
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/font/cmap'
|
35
|
+
|
36
|
+
module HexaPDF
|
37
|
+
module Font
|
38
|
+
class CMap
|
39
|
+
|
40
|
+
# Creates a CMap file.
|
41
|
+
#
|
42
|
+
# Currently only ToUnicode CMaps are supported.
|
43
|
+
class Writer
|
44
|
+
|
45
|
+
# Maximum number of entries in one section.
|
46
|
+
MAX_ENTRIES_IN_SECTION = 100
|
47
|
+
|
48
|
+
# Returns a ToUnicode CMap for the given input code to Unicode codepoint mapping which needs
|
49
|
+
# to be sorted by input codes.
|
50
|
+
#
|
51
|
+
# Note that the returned CMap always uses a 16-bit input code space!
|
52
|
+
def create_to_unicode_cmap(mapping)
|
53
|
+
return to_unicode_template % '' if mapping.length == 0
|
54
|
+
|
55
|
+
chars, ranges = compute_section_entries(mapping)
|
56
|
+
|
57
|
+
result = create_sections("bfchar", chars.size / 2) do |index|
|
58
|
+
index *= 2
|
59
|
+
sprintf("<%04X>", chars[index]) << "<" <<
|
60
|
+
(''.force_encoding(::Encoding::UTF_16BE) << chars[index + 1]).unpack('H*').first <<
|
61
|
+
">\n"
|
62
|
+
end
|
63
|
+
|
64
|
+
result << create_sections("bfrange", ranges.size / 3) do |index|
|
65
|
+
index *= 3
|
66
|
+
sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << "<" <<
|
67
|
+
(''.force_encoding(::Encoding::UTF_16BE) << ranges[index + 2]).unpack('H*').first <<
|
68
|
+
">\n"
|
69
|
+
end
|
70
|
+
|
71
|
+
to_unicode_template % result.chop!
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# Computes the entries for the "char" and "range" sections based on the given mapping.
|
77
|
+
#
|
78
|
+
# Returns two arrays +char_mappings+ and +range_mappings+ where +char_mappings+ is an array
|
79
|
+
# of the form
|
80
|
+
#
|
81
|
+
# [code1, value1, code2, value2, ...]
|
82
|
+
#
|
83
|
+
# and +range_mappings+ an array of the form
|
84
|
+
#
|
85
|
+
# [start1, end1, value1, start2, end2, value2, ...]
|
86
|
+
def compute_section_entries(mapping)
|
87
|
+
chars = []
|
88
|
+
ranges = []
|
89
|
+
|
90
|
+
last_code, last_value = *mapping[0]
|
91
|
+
is_range = false
|
92
|
+
mapping.slice(1..-1).each do |code, value|
|
93
|
+
if last_code + 1 == code && last_value + 1 == value && code % 256 != 0
|
94
|
+
ranges << last_code << nil << last_value unless is_range
|
95
|
+
is_range = true
|
96
|
+
else
|
97
|
+
if is_range
|
98
|
+
ranges[-2] = last_code
|
99
|
+
is_range = false
|
100
|
+
else
|
101
|
+
chars << last_code << last_value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
last_code = code
|
105
|
+
last_value = value
|
106
|
+
end
|
107
|
+
|
108
|
+
# Handle last remaining mapping
|
109
|
+
if is_range
|
110
|
+
ranges[-2] = last_code
|
111
|
+
else
|
112
|
+
chars << last_code << last_value
|
113
|
+
end
|
114
|
+
|
115
|
+
[chars, ranges]
|
116
|
+
end
|
117
|
+
|
118
|
+
# Creates one or more sections of a CMap file and returns the resulting string.
|
119
|
+
#
|
120
|
+
# +type+::
|
121
|
+
# The name of the section, e.g. "bfchar" or "bfrange".
|
122
|
+
#
|
123
|
+
# +size+::
|
124
|
+
# The maximum number of elements of this type. Used for determining when to start a new
|
125
|
+
# section.
|
126
|
+
#
|
127
|
+
# The method makes sure that no section has more than the maximum number of allowed entries.
|
128
|
+
#
|
129
|
+
# Numbers from 0 up to size - 1 are yielded, indicating the current entry that should be
|
130
|
+
# processed and for which an appropriate section line should be returned from the block.
|
131
|
+
def create_sections(type, size)
|
132
|
+
return '' if size == 0
|
133
|
+
|
134
|
+
result = ""
|
135
|
+
index = 0
|
136
|
+
while size > 0
|
137
|
+
count = [MAX_ENTRIES_IN_SECTION, size].min
|
138
|
+
result << "#{count} begin#{type}\n"
|
139
|
+
index.upto(index + count - 1) {|i| result << yield(i)}
|
140
|
+
result << "end#{type}\n"
|
141
|
+
index += count
|
142
|
+
size -= count
|
143
|
+
end
|
144
|
+
|
145
|
+
result
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the CMap file template for a ToUnicode CMap.
|
149
|
+
def to_unicode_template
|
150
|
+
<<-TEMPLATE
|
151
|
+
/CIDInit /ProcSet findresource begin
|
152
|
+
12 dict begin
|
153
|
+
begincmap
|
154
|
+
/CIDSystemInfo
|
155
|
+
<< /Registry (Adobe)
|
156
|
+
/Ordering (UCS)
|
157
|
+
/Supplement 0
|
158
|
+
>> def
|
159
|
+
/CMapName /Adobe-Identity-UCS def
|
160
|
+
/CMapType 2 def
|
161
|
+
1 begincodespacerange
|
162
|
+
<0000> <FFFF>
|
163
|
+
endcodespacerange
|
164
|
+
%s
|
165
|
+
endcmap
|
166
|
+
CMapName currentdict /CMap defineresource pop
|
167
|
+
end
|
168
|
+
end
|
169
|
+
TEMPLATE
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
module HexaPDF
|
35
|
+
module Font
|
36
|
+
|
37
|
+
# Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
|
38
|
+
# value.
|
39
|
+
#
|
40
|
+
# Currently, only the mapping to the Unicode values is supported.
|
41
|
+
#
|
42
|
+
# See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Note #5411
|
43
|
+
class CMap
|
44
|
+
|
45
|
+
autoload(:Parser, 'hexapdf/font/cmap/parser')
|
46
|
+
autoload(:Writer, 'hexapdf/font/cmap/writer')
|
47
|
+
|
48
|
+
# Creates a new CMap object from the given string which needs to contain a valid CMap file.
|
49
|
+
def self.parse(string)
|
50
|
+
Parser.new.parse(string)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns a string containing a ToUnicode CMap that represents the given code to Unicode
|
54
|
+
# codepoint mapping.
|
55
|
+
#
|
56
|
+
# See: Writer#create_to_unicode_cmap
|
57
|
+
def self.create_to_unicode_cmap(mapping)
|
58
|
+
Writer.new.create_to_unicode_cmap(mapping)
|
59
|
+
end
|
60
|
+
|
61
|
+
# The registry part of the CMap version.
|
62
|
+
attr_accessor :registry
|
63
|
+
|
64
|
+
# The ordering part of the CMap version.
|
65
|
+
attr_accessor :ordering
|
66
|
+
|
67
|
+
# The supplement part of the CMap version.
|
68
|
+
attr_accessor :supplement
|
69
|
+
|
70
|
+
# The name of the CMap.
|
71
|
+
attr_accessor :name
|
72
|
+
|
73
|
+
# The mapping from character codes to Unicode values.
|
74
|
+
attr_accessor :unicode_mapping
|
75
|
+
|
76
|
+
# Creates a new CMap object.
|
77
|
+
def initialize
|
78
|
+
@unicode_mapping = Hash.new("".freeze)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the Unicode string in UTF-8 encoding for the given character code, or an empty
|
82
|
+
# string if no mapping was found.
|
83
|
+
def to_unicode(code)
|
84
|
+
unicode_mapping[code]
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/font/encoding/glyph_list'
|
35
|
+
|
36
|
+
module HexaPDF
|
37
|
+
module Font
|
38
|
+
module Encoding
|
39
|
+
|
40
|
+
# Base for encoding classes that are used for mapping codes in the range of 0 to 255 to glyph
|
41
|
+
# names.
|
42
|
+
class Base
|
43
|
+
|
44
|
+
# The name of the encoding or +nil+ if the encoding has not been assigned a name.
|
45
|
+
attr_reader :encoding_name
|
46
|
+
|
47
|
+
# The hash mapping codes to names.
|
48
|
+
attr_reader :code_to_name
|
49
|
+
|
50
|
+
# Creates a new encoding object containing no default mappings.
|
51
|
+
def initialize
|
52
|
+
@code_to_name = {}
|
53
|
+
@unicode_cache = {}
|
54
|
+
@encoding_name = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the name for the given code, or .notdef if no glyph for the code is defined.
|
58
|
+
#
|
59
|
+
# The returned value is always a Symbol object!
|
60
|
+
def name(code)
|
61
|
+
@code_to_name.fetch(code, :'.notdef')
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the Unicode value in UTF-8 for the given code, or an empty string if the code
|
65
|
+
# cannot be mapped.
|
66
|
+
#
|
67
|
+
# Note that this method caches the result of the Unicode mapping and therefore should only
|
68
|
+
# be called after all codes have been defined.
|
69
|
+
def unicode(code)
|
70
|
+
@unicode_cache[code] ||= GlyphList.name_to_unicode(name(code))
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|