hexapdf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CONTRIBUTERS +3 -0
- data/LICENSE +26 -0
- data/README.md +88 -0
- data/Rakefile +121 -0
- data/VERSION +1 -0
- data/agpl-3.0.txt +661 -0
- data/bin/hexapdf +6 -0
- data/data/hexapdf/afm/Courier-Bold.afm +342 -0
- data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
- data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
- data/data/hexapdf/afm/Courier.afm +342 -0
- data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
- data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
- data/data/hexapdf/afm/Helvetica.afm +3051 -0
- data/data/hexapdf/afm/MustRead.html +1 -0
- data/data/hexapdf/afm/Symbol.afm +213 -0
- data/data/hexapdf/afm/Times-Bold.afm +2588 -0
- data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
- data/data/hexapdf/afm/Times-Italic.afm +2667 -0
- data/data/hexapdf/afm/Times-Roman.afm +2419 -0
- data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
- data/data/hexapdf/encoding/glyphlist.txt +4305 -0
- data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
- data/examples/arc.rb +50 -0
- data/examples/graphics.rb +274 -0
- data/examples/hello_world.rb +16 -0
- data/examples/machupicchu.jpg +0 -0
- data/examples/merging.rb +24 -0
- data/examples/optimizing.rb +20 -0
- data/examples/show_char_bboxes.rb +55 -0
- data/examples/standard_pdf_fonts.rb +72 -0
- data/examples/truetype.rb +45 -0
- data/lib/hexapdf/cli/extract.rb +128 -0
- data/lib/hexapdf/cli/info.rb +121 -0
- data/lib/hexapdf/cli/inspect.rb +157 -0
- data/lib/hexapdf/cli/modify.rb +218 -0
- data/lib/hexapdf/cli.rb +121 -0
- data/lib/hexapdf/configuration.rb +392 -0
- data/lib/hexapdf/content/canvas.rb +1974 -0
- data/lib/hexapdf/content/color_space.rb +364 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
- data/lib/hexapdf/content/graphic_object.rb +81 -0
- data/lib/hexapdf/content/graphics_state.rb +579 -0
- data/lib/hexapdf/content/operator.rb +1072 -0
- data/lib/hexapdf/content/parser.rb +204 -0
- data/lib/hexapdf/content/processor.rb +451 -0
- data/lib/hexapdf/content/transformation_matrix.rb +172 -0
- data/lib/hexapdf/content.rb +47 -0
- data/lib/hexapdf/data_dir.rb +51 -0
- data/lib/hexapdf/dictionary.rb +303 -0
- data/lib/hexapdf/dictionary_fields.rb +382 -0
- data/lib/hexapdf/document.rb +589 -0
- data/lib/hexapdf/document_utils.rb +209 -0
- data/lib/hexapdf/encryption/aes.rb +206 -0
- data/lib/hexapdf/encryption/arc4.rb +93 -0
- data/lib/hexapdf/encryption/fast_aes.rb +79 -0
- data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
- data/lib/hexapdf/encryption/identity.rb +63 -0
- data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
- data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
- data/lib/hexapdf/encryption/security_handler.rb +494 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
- data/lib/hexapdf/encryption.rb +94 -0
- data/lib/hexapdf/error.rb +73 -0
- data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
- data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
- data/lib/hexapdf/filter/dct_decode.rb +57 -0
- data/lib/hexapdf/filter/encryption.rb +59 -0
- data/lib/hexapdf/filter/flate_decode.rb +93 -0
- data/lib/hexapdf/filter/jpx_decode.rb +56 -0
- data/lib/hexapdf/filter/lzw_decode.rb +191 -0
- data/lib/hexapdf/filter/predictor.rb +266 -0
- data/lib/hexapdf/filter/run_length_decode.rb +108 -0
- data/lib/hexapdf/filter.rb +176 -0
- data/lib/hexapdf/font/cmap/parser.rb +146 -0
- data/lib/hexapdf/font/cmap/writer.rb +176 -0
- data/lib/hexapdf/font/cmap.rb +90 -0
- data/lib/hexapdf/font/encoding/base.rb +77 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
- data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
- data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
- data/lib/hexapdf/font/encoding.rb +68 -0
- data/lib/hexapdf/font/true_type/font.rb +179 -0
- data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
- data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
- data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
- data/lib/hexapdf/font/true_type/table/head.rb +143 -0
- data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
- data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
- data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
- data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
- data/lib/hexapdf/font/true_type/table/name.rb +218 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
- data/lib/hexapdf/font/true_type/table/post.rb +230 -0
- data/lib/hexapdf/font/true_type/table.rb +155 -0
- data/lib/hexapdf/font/true_type.rb +48 -0
- data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
- data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
- data/lib/hexapdf/font/type1/font.rb +123 -0
- data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
- data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
- data/lib/hexapdf/font/type1.rb +52 -0
- data/lib/hexapdf/font/type1_wrapper.rb +193 -0
- data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
- data/lib/hexapdf/font_loader/standard14.rb +98 -0
- data/lib/hexapdf/font_loader.rb +85 -0
- data/lib/hexapdf/font_utils.rb +89 -0
- data/lib/hexapdf/image_loader/jpeg.rb +166 -0
- data/lib/hexapdf/image_loader/pdf.rb +89 -0
- data/lib/hexapdf/image_loader/png.rb +410 -0
- data/lib/hexapdf/image_loader.rb +68 -0
- data/lib/hexapdf/importer.rb +139 -0
- data/lib/hexapdf/name_tree_node.rb +78 -0
- data/lib/hexapdf/number_tree_node.rb +67 -0
- data/lib/hexapdf/object.rb +363 -0
- data/lib/hexapdf/parser.rb +349 -0
- data/lib/hexapdf/rectangle.rb +99 -0
- data/lib/hexapdf/reference.rb +98 -0
- data/lib/hexapdf/revision.rb +206 -0
- data/lib/hexapdf/revisions.rb +194 -0
- data/lib/hexapdf/serializer.rb +326 -0
- data/lib/hexapdf/stream.rb +279 -0
- data/lib/hexapdf/task/dereference.rb +109 -0
- data/lib/hexapdf/task/optimize.rb +230 -0
- data/lib/hexapdf/task.rb +68 -0
- data/lib/hexapdf/tokenizer.rb +406 -0
- data/lib/hexapdf/type/catalog.rb +107 -0
- data/lib/hexapdf/type/embedded_file.rb +87 -0
- data/lib/hexapdf/type/file_specification.rb +232 -0
- data/lib/hexapdf/type/font.rb +81 -0
- data/lib/hexapdf/type/font_descriptor.rb +109 -0
- data/lib/hexapdf/type/font_simple.rb +190 -0
- data/lib/hexapdf/type/font_true_type.rb +47 -0
- data/lib/hexapdf/type/font_type1.rb +162 -0
- data/lib/hexapdf/type/form.rb +103 -0
- data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
- data/lib/hexapdf/type/image.rb +73 -0
- data/lib/hexapdf/type/info.rb +70 -0
- data/lib/hexapdf/type/names.rb +69 -0
- data/lib/hexapdf/type/object_stream.rb +224 -0
- data/lib/hexapdf/type/page.rb +355 -0
- data/lib/hexapdf/type/page_tree_node.rb +269 -0
- data/lib/hexapdf/type/resources.rb +212 -0
- data/lib/hexapdf/type/trailer.rb +128 -0
- data/lib/hexapdf/type/viewer_preferences.rb +73 -0
- data/lib/hexapdf/type/xref_stream.rb +204 -0
- data/lib/hexapdf/type.rb +67 -0
- data/lib/hexapdf/utils/bit_field.rb +87 -0
- data/lib/hexapdf/utils/bit_stream.rb +148 -0
- data/lib/hexapdf/utils/lru_cache.rb +65 -0
- data/lib/hexapdf/utils/math_helpers.rb +55 -0
- data/lib/hexapdf/utils/object_hash.rb +130 -0
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
- data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
- data/lib/hexapdf/version.rb +39 -0
- data/lib/hexapdf/writer.rb +199 -0
- data/lib/hexapdf/xref_section.rb +152 -0
- data/lib/hexapdf.rb +34 -0
- data/man/man1/hexapdf.1 +249 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
- data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
- data/test/data/fonts/Ubuntu-Title.ttf +0 -0
- data/test/data/images/cmyk.jpg +0 -0
- data/test/data/images/fillbytes.jpg +0 -0
- data/test/data/images/gray.jpg +0 -0
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-4bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/greyscale-alpha-8bit.png +0 -0
- data/test/data/images/greyscale-trns-8bit.png +0 -0
- data/test/data/images/greyscale-with-gamma1.0.png +0 -0
- data/test/data/images/greyscale-with-gamma1.5.png +0 -0
- data/test/data/images/indexed-1bit.png +0 -0
- data/test/data/images/indexed-2bit.png +0 -0
- data/test/data/images/indexed-4bit.png +0 -0
- data/test/data/images/indexed-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/indexed-alpha-8bit.png +0 -0
- data/test/data/images/rgb.jpg +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/data/images/truecolour-alpha-8bit.png +0 -0
- data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
- data/test/data/images/truecolour-srgb-8bit.png +0 -0
- data/test/data/minimal.pdf +44 -0
- data/test/data/standard-security-handler/README +9 -0
- data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
- data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
- data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
- data/test/hexapdf/common_tokenizer_tests.rb +204 -0
- data/test/hexapdf/content/common.rb +31 -0
- data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
- data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
- data/test/hexapdf/content/test_canvas.rb +1113 -0
- data/test/hexapdf/content/test_color_space.rb +97 -0
- data/test/hexapdf/content/test_graphics_state.rb +138 -0
- data/test/hexapdf/content/test_operator.rb +619 -0
- data/test/hexapdf/content/test_parser.rb +66 -0
- data/test/hexapdf/content/test_processor.rb +156 -0
- data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
- data/test/hexapdf/encryption/common.rb +87 -0
- data/test/hexapdf/encryption/test_aes.rb +121 -0
- data/test/hexapdf/encryption/test_arc4.rb +39 -0
- data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
- data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
- data/test/hexapdf/encryption/test_identity.rb +21 -0
- data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
- data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
- data/test/hexapdf/encryption/test_security_handler.rb +356 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
- data/test/hexapdf/filter/common.rb +53 -0
- data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
- data/test/hexapdf/filter/test_encryption.rb +24 -0
- data/test/hexapdf/filter/test_flate_decode.rb +35 -0
- data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
- data/test/hexapdf/filter/test_predictor.rb +183 -0
- data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
- data/test/hexapdf/font/cmap/test_parser.rb +67 -0
- data/test/hexapdf/font/cmap/test_writer.rb +58 -0
- data/test/hexapdf/font/encoding/test_base.rb +35 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
- data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
- data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
- data/test/hexapdf/font/test_encoding.rb +27 -0
- data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
- data/test/hexapdf/font/true_type/common.rb +19 -0
- data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
- data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
- data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
- data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
- data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
- data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
- data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
- data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
- data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
- data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
- data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
- data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
- data/test/hexapdf/font/true_type/test_font.rb +120 -0
- data/test/hexapdf/font/true_type/test_table.rb +41 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
- data/test/hexapdf/font/type1/test_font.rb +68 -0
- data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
- data/test/hexapdf/font_loader/test_standard14.rb +22 -0
- data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
- data/test/hexapdf/image_loader/test_pdf.rb +47 -0
- data/test/hexapdf/image_loader/test_png.rb +258 -0
- data/test/hexapdf/task/test_dereference.rb +46 -0
- data/test/hexapdf/task/test_optimize.rb +137 -0
- data/test/hexapdf/test_configuration.rb +82 -0
- data/test/hexapdf/test_data_dir.rb +32 -0
- data/test/hexapdf/test_dictionary.rb +284 -0
- data/test/hexapdf/test_dictionary_fields.rb +185 -0
- data/test/hexapdf/test_document.rb +574 -0
- data/test/hexapdf/test_document_utils.rb +144 -0
- data/test/hexapdf/test_filter.rb +96 -0
- data/test/hexapdf/test_font_utils.rb +47 -0
- data/test/hexapdf/test_importer.rb +78 -0
- data/test/hexapdf/test_object.rb +177 -0
- data/test/hexapdf/test_parser.rb +394 -0
- data/test/hexapdf/test_rectangle.rb +36 -0
- data/test/hexapdf/test_reference.rb +41 -0
- data/test/hexapdf/test_revision.rb +139 -0
- data/test/hexapdf/test_revisions.rb +93 -0
- data/test/hexapdf/test_serializer.rb +169 -0
- data/test/hexapdf/test_stream.rb +262 -0
- data/test/hexapdf/test_tokenizer.rb +30 -0
- data/test/hexapdf/test_writer.rb +120 -0
- data/test/hexapdf/test_xref_section.rb +35 -0
- data/test/hexapdf/type/test_catalog.rb +30 -0
- data/test/hexapdf/type/test_embedded_file.rb +16 -0
- data/test/hexapdf/type/test_file_specification.rb +148 -0
- data/test/hexapdf/type/test_font.rb +35 -0
- data/test/hexapdf/type/test_font_descriptor.rb +51 -0
- data/test/hexapdf/type/test_font_simple.rb +190 -0
- data/test/hexapdf/type/test_font_type1.rb +128 -0
- data/test/hexapdf/type/test_form.rb +60 -0
- data/test/hexapdf/type/test_info.rb +14 -0
- data/test/hexapdf/type/test_names.rb +9 -0
- data/test/hexapdf/type/test_object_stream.rb +84 -0
- data/test/hexapdf/type/test_page.rb +260 -0
- data/test/hexapdf/type/test_page_tree_node.rb +255 -0
- data/test/hexapdf/type/test_resources.rb +167 -0
- data/test/hexapdf/type/test_trailer.rb +109 -0
- data/test/hexapdf/type/test_xref_stream.rb +131 -0
- data/test/hexapdf/utils/test_bit_field.rb +47 -0
- data/test/hexapdf/utils/test_lru_cache.rb +22 -0
- data/test/hexapdf/utils/test_object_hash.rb +115 -0
- data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
- data/test/test_helper.rb +56 -0
- metadata +427 -0
@@ -0,0 +1,176 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'fiber'
|
35
|
+
require 'hexapdf/error'
|
36
|
+
|
37
|
+
module HexaPDF
|
38
|
+
|
39
|
+
# This special Fiber class should be used when the total length of the data yielded by the fiber
|
40
|
+
# is known beforehand. HexaPDF uses this information to avoid unnecessary memory usage.
|
41
|
+
class FiberWithLength < Fiber
|
42
|
+
|
43
|
+
# The total length of the data that will be yielded by this fiber. If the return value is
|
44
|
+
# negative the total length is *not* known.
|
45
|
+
attr_reader :length
|
46
|
+
|
47
|
+
# Initializes the Fiber and sets the +length+.
|
48
|
+
def initialize(length, &block)
|
49
|
+
super(&block)
|
50
|
+
@length = length || -1
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
# == Overview
|
57
|
+
#
|
58
|
+
# A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
|
59
|
+
# to reverse this process. Some filters can be used for any content, like FlateDecode, others
|
60
|
+
# are specifically designed for image streams, like DCTDecode.
|
61
|
+
#
|
62
|
+
# Each filter is implemented via fibers. This allows HexaPDF to easily process either small
|
63
|
+
# chunks or a whole stream at once, depending on the memory restrictions and to create flexible
|
64
|
+
# filter pipelines.
|
65
|
+
#
|
66
|
+
# It also allows the easy re-processing of a stream without first decoding and the encoding it.
|
67
|
+
# Such functionality is useful, for example, when a PDF file should be decrypted and streams
|
68
|
+
# compressed in one step.
|
69
|
+
#
|
70
|
+
#
|
71
|
+
# == Implementation of a Filter Object
|
72
|
+
#
|
73
|
+
# Each filter is an object (normally a module) that responds to two methods: \#encoder and
|
74
|
+
# \#decoder. Both of these methods are given a *source* (a Fiber) and *options* (a Hash) and have
|
75
|
+
# to return a Fiber object.
|
76
|
+
#
|
77
|
+
# The returned fiber should resume the *source* fiber to get the next chunk of binary data
|
78
|
+
# (possibly only one byte of data, so this situation should be handled gracefully). Once the
|
79
|
+
# fiber has processed this chunk, it should yield the processed chunk as binary string. This
|
80
|
+
# should be done as long as the source fiber is #alive? and doesn't return +nil+ when resumed.
|
81
|
+
#
|
82
|
+
# Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
|
83
|
+
#
|
84
|
+
# See: PDF1.7 s7.4
|
85
|
+
module Filter
|
86
|
+
|
87
|
+
autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
|
88
|
+
autoload(:ASCIIHexDecode, 'hexapdf/filter/ascii_hex_decode')
|
89
|
+
autoload(:DCTDecode, 'hexapdf/filter/dct_decode')
|
90
|
+
autoload(:FlateDecode, 'hexapdf/filter/flate_decode')
|
91
|
+
autoload(:JPXDecode, 'hexapdf/filter/jpx_decode')
|
92
|
+
autoload(:LZWDecode, 'hexapdf/filter/lzw_decode')
|
93
|
+
autoload(:RunLengthDecode, 'hexapdf/filter/run_length_decode')
|
94
|
+
|
95
|
+
autoload(:Predictor, 'hexapdf/filter/predictor')
|
96
|
+
|
97
|
+
autoload(:Encryption, 'hexapdf/filter/encryption')
|
98
|
+
|
99
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
|
100
|
+
# String object.
|
101
|
+
def self.source_from_string(str)
|
102
|
+
FiberWithLength.new(str.length) { str.dup }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
|
106
|
+
# data from an IO object.
|
107
|
+
#
|
108
|
+
# Each time a chunk is read, the position pointer of the IO is adjusted. This should be taken
|
109
|
+
# into account when working with the IO object.
|
110
|
+
#
|
111
|
+
# Options:
|
112
|
+
#
|
113
|
+
# :pos:: The position from where the reading should start. A negative position is treated as
|
114
|
+
# zero. Default: 0.
|
115
|
+
#
|
116
|
+
# :length:: The length indicating the number of bytes to read. An error is raised if not all
|
117
|
+
# specified bytes could be read. A negative length means reading until the end of
|
118
|
+
# the IO stream. Default: -1.
|
119
|
+
#
|
120
|
+
# :chunk_size:: The size of the chunks that should be returned in each iteration. A chunk size
|
121
|
+
# of less than or equal to 0 means using the biggest chunk size available (can
|
122
|
+
# change between versions!). Default: 0.
|
123
|
+
def self.source_from_io(io, pos: 0, length: -1, chunk_size: 0)
|
124
|
+
orig_length = length
|
125
|
+
chunk_size = 2**20 if chunk_size <= 0
|
126
|
+
chunk_size = length if length >= 0 && chunk_size > length
|
127
|
+
length = 2**61 if length < 0
|
128
|
+
pos = 0 if pos < 0
|
129
|
+
|
130
|
+
FiberWithLength.new(orig_length) do
|
131
|
+
while length > 0 && (io.pos = pos) && (data = io.read(chunk_size))
|
132
|
+
pos = io.pos
|
133
|
+
length -= data.size
|
134
|
+
chunk_size = length if chunk_size > length
|
135
|
+
Fiber.yield(data)
|
136
|
+
end
|
137
|
+
if length > 0 && orig_length >= 0
|
138
|
+
raise FilterError, "Couldn't read all requested bytes before encountering EOF"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks
|
144
|
+
# from a file.
|
145
|
+
#
|
146
|
+
# Note that there will be a problem if the size of the file changes between the invocation of
|
147
|
+
# this method and the actual consumption of the file!
|
148
|
+
#
|
149
|
+
# See ::source_from_io for a description of the available options.
|
150
|
+
def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
|
151
|
+
fib_length = (length < 0 ? File.stat(filename).size - pos : length)
|
152
|
+
FiberWithLength.new(fib_length) do
|
153
|
+
File.open(filename, 'rb') do |file|
|
154
|
+
source = source_from_io(file, pos: pos, length: length, chunk_size: chunk_size)
|
155
|
+
while source.alive? && (io_data = source.resume)
|
156
|
+
Fiber.yield(io_data)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
|
163
|
+
# is dead.
|
164
|
+
#
|
165
|
+
# The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
|
166
|
+
def self.string_from_source(source)
|
167
|
+
str = ''.b
|
168
|
+
while source.alive? && (data = source.resume)
|
169
|
+
str << data
|
170
|
+
end
|
171
|
+
str
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/error'
|
35
|
+
require 'hexapdf/font/cmap'
|
36
|
+
require 'hexapdf/content/parser'
|
37
|
+
|
38
|
+
module HexaPDF
|
39
|
+
module Font
|
40
|
+
class CMap
|
41
|
+
|
42
|
+
# Parses CMap files.
|
43
|
+
#
|
44
|
+
# Currently only ToUnicode CMaps are supported.
|
45
|
+
class Parser
|
46
|
+
|
47
|
+
# Parses the given string and returns a CMap object.
|
48
|
+
def parse(string)
|
49
|
+
tokenizer = HexaPDF::Content::Tokenizer.new(string)
|
50
|
+
cmap = CMap.new
|
51
|
+
|
52
|
+
while (token = tokenizer.next_token) != HexaPDF::Tokenizer::NO_MORE_TOKENS
|
53
|
+
if token.kind_of?(HexaPDF::Tokenizer::Token)
|
54
|
+
case token
|
55
|
+
when 'beginbfchar'.freeze then parse_bf_char(tokenizer, cmap)
|
56
|
+
when 'beginbfrange'.freeze then parse_bf_range(tokenizer, cmap)
|
57
|
+
when 'endcmap' then break
|
58
|
+
end
|
59
|
+
elsif token.kind_of?(Symbol)
|
60
|
+
parse_dict_mapping(tokenizer, cmap, token)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
cmap
|
65
|
+
rescue => e
|
66
|
+
raise HexaPDF::Error, "Error parsing CMap: #{e.message}", e.backtrace
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# Parses a single mapping of a dictionary pair. The +name+ of the mapping has already been
|
72
|
+
# parsed.
|
73
|
+
def parse_dict_mapping(tokenizer, cmap, name)
|
74
|
+
value = tokenizer.next_token
|
75
|
+
return if value.kind_of?(HexaPDF::Tokenizer::Token)
|
76
|
+
|
77
|
+
case name
|
78
|
+
when :Registry then cmap.registry = value if value.kind_of?(String)
|
79
|
+
when :Ordering then cmap.ordering = value if value.kind_of?(String)
|
80
|
+
when :Supplement then cmap.supplement = value if value.kind_of?(Integer)
|
81
|
+
when :CMapName then cmap.name = value.to_s if value.kind_of?(Symbol)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Parses the "bfchar" operator at the current position.
|
86
|
+
def parse_bf_char(tokenizer, cmap)
|
87
|
+
until (code = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
|
88
|
+
str = tokenizer.next_token.encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
|
89
|
+
cmap.unicode_mapping[bytes_to_int(code)] = str
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Parses the "bfrange" operator at the current position.
|
94
|
+
#
|
95
|
+
#--
|
96
|
+
# PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
|
97
|
+
# operators of the form "startCode endCode codePoint" should be handled.
|
98
|
+
#
|
99
|
+
# PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
|
100
|
+
# of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
|
101
|
+
# this.
|
102
|
+
#
|
103
|
+
# Additionally, #5411 mentions in section 1.4.1 that the first byte of "startCode" and
|
104
|
+
# "endCode" have to be the same. So it seems that this is a mistake in the PDF reference.
|
105
|
+
#++
|
106
|
+
def parse_bf_range(tokenizer, cmap)
|
107
|
+
until (code1 = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
|
108
|
+
code1 = bytes_to_int(code1)
|
109
|
+
code2 = bytes_to_int(tokenizer.next_token)
|
110
|
+
dest = tokenizer.next_object
|
111
|
+
|
112
|
+
if dest.kind_of?(String)
|
113
|
+
codepoint = dest.force_encoding(::Encoding::UTF_16BE).ord
|
114
|
+
code1.upto(code2) do |code|
|
115
|
+
cmap.unicode_mapping[code] = '' << codepoint
|
116
|
+
codepoint += 1
|
117
|
+
end
|
118
|
+
elsif dest.kind_of?(Array)
|
119
|
+
code1.upto(code2) do |code|
|
120
|
+
cmap.unicode_mapping[code] =
|
121
|
+
dest[code - code1].encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
|
122
|
+
end
|
123
|
+
else
|
124
|
+
raise HexaPDF::Error, "Invalid bfrange operator in CMap"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Treats the string as an array of bytes and converts it to an integer.
|
130
|
+
#
|
131
|
+
# The bytes are converted in the big-endian way.
|
132
|
+
def bytes_to_int(string)
|
133
|
+
result = 0
|
134
|
+
index = 0
|
135
|
+
while index < string.length
|
136
|
+
result = (result << 8) | string.getbyte(index)
|
137
|
+
index += 1
|
138
|
+
end
|
139
|
+
result
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/font/cmap'
|
35
|
+
|
36
|
+
module HexaPDF
|
37
|
+
module Font
|
38
|
+
class CMap
|
39
|
+
|
40
|
+
# Creates a CMap file.
|
41
|
+
#
|
42
|
+
# Currently only ToUnicode CMaps are supported.
|
43
|
+
class Writer
|
44
|
+
|
45
|
+
# Maximum number of entries in one section.
|
46
|
+
MAX_ENTRIES_IN_SECTION = 100
|
47
|
+
|
48
|
+
# Returns a ToUnicode CMap for the given input code to Unicode codepoint mapping which needs
|
49
|
+
# to be sorted by input codes.
|
50
|
+
#
|
51
|
+
# Note that the returned CMap always uses a 16-bit input code space!
|
52
|
+
def create_to_unicode_cmap(mapping)
|
53
|
+
return to_unicode_template % '' if mapping.length == 0
|
54
|
+
|
55
|
+
chars, ranges = compute_section_entries(mapping)
|
56
|
+
|
57
|
+
result = create_sections("bfchar", chars.size / 2) do |index|
|
58
|
+
index *= 2
|
59
|
+
sprintf("<%04X>", chars[index]) << "<" <<
|
60
|
+
(''.force_encoding(::Encoding::UTF_16BE) << chars[index + 1]).unpack('H*').first <<
|
61
|
+
">\n"
|
62
|
+
end
|
63
|
+
|
64
|
+
result << create_sections("bfrange", ranges.size / 3) do |index|
|
65
|
+
index *= 3
|
66
|
+
sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << "<" <<
|
67
|
+
(''.force_encoding(::Encoding::UTF_16BE) << ranges[index + 2]).unpack('H*').first <<
|
68
|
+
">\n"
|
69
|
+
end
|
70
|
+
|
71
|
+
to_unicode_template % result.chop!
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# Computes the entries for the "char" and "range" sections based on the given mapping.
|
77
|
+
#
|
78
|
+
# Returns two arrays +char_mappings+ and +range_mappings+ where +char_mappings+ is an array
|
79
|
+
# of the form
|
80
|
+
#
|
81
|
+
# [code1, value1, code2, value2, ...]
|
82
|
+
#
|
83
|
+
# and +range_mappings+ an array of the form
|
84
|
+
#
|
85
|
+
# [start1, end1, value1, start2, end2, value2, ...]
|
86
|
+
def compute_section_entries(mapping)
|
87
|
+
chars = []
|
88
|
+
ranges = []
|
89
|
+
|
90
|
+
last_code, last_value = *mapping[0]
|
91
|
+
is_range = false
|
92
|
+
mapping.slice(1..-1).each do |code, value|
|
93
|
+
if last_code + 1 == code && last_value + 1 == value && code % 256 != 0
|
94
|
+
ranges << last_code << nil << last_value unless is_range
|
95
|
+
is_range = true
|
96
|
+
else
|
97
|
+
if is_range
|
98
|
+
ranges[-2] = last_code
|
99
|
+
is_range = false
|
100
|
+
else
|
101
|
+
chars << last_code << last_value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
last_code = code
|
105
|
+
last_value = value
|
106
|
+
end
|
107
|
+
|
108
|
+
# Handle last remaining mapping
|
109
|
+
if is_range
|
110
|
+
ranges[-2] = last_code
|
111
|
+
else
|
112
|
+
chars << last_code << last_value
|
113
|
+
end
|
114
|
+
|
115
|
+
[chars, ranges]
|
116
|
+
end
|
117
|
+
|
118
|
+
# Creates one or more sections of a CMap file and returns the resulting string.
|
119
|
+
#
|
120
|
+
# +type+::
|
121
|
+
# The name of the section, e.g. "bfchar" or "bfrange".
|
122
|
+
#
|
123
|
+
# +size+::
|
124
|
+
# The maximum number of elements of this type. Used for determining when to start a new
|
125
|
+
# section.
|
126
|
+
#
|
127
|
+
# The method makes sure that no section has more than the maximum number of allowed entries.
|
128
|
+
#
|
129
|
+
# Numbers from 0 up to size - 1 are yielded, indicating the current entry that should be
|
130
|
+
# processed and for which an appropriate section line should be returned from the block.
|
131
|
+
def create_sections(type, size)
|
132
|
+
return '' if size == 0
|
133
|
+
|
134
|
+
result = ""
|
135
|
+
index = 0
|
136
|
+
while size > 0
|
137
|
+
count = [MAX_ENTRIES_IN_SECTION, size].min
|
138
|
+
result << "#{count} begin#{type}\n"
|
139
|
+
index.upto(index + count - 1) {|i| result << yield(i)}
|
140
|
+
result << "end#{type}\n"
|
141
|
+
index += count
|
142
|
+
size -= count
|
143
|
+
end
|
144
|
+
|
145
|
+
result
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the CMap file template for a ToUnicode CMap.
|
149
|
+
def to_unicode_template
|
150
|
+
<<-TEMPLATE
|
151
|
+
/CIDInit /ProcSet findresource begin
|
152
|
+
12 dict begin
|
153
|
+
begincmap
|
154
|
+
/CIDSystemInfo
|
155
|
+
<< /Registry (Adobe)
|
156
|
+
/Ordering (UCS)
|
157
|
+
/Supplement 0
|
158
|
+
>> def
|
159
|
+
/CMapName /Adobe-Identity-UCS def
|
160
|
+
/CMapType 2 def
|
161
|
+
1 begincodespacerange
|
162
|
+
<0000> <FFFF>
|
163
|
+
endcodespacerange
|
164
|
+
%s
|
165
|
+
endcmap
|
166
|
+
CMapName currentdict /CMap defineresource pop
|
167
|
+
end
|
168
|
+
end
|
169
|
+
TEMPLATE
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
module HexaPDF
|
35
|
+
module Font
|
36
|
+
|
37
|
+
# Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
|
38
|
+
# value.
|
39
|
+
#
|
40
|
+
# Currently, only the mapping to the Unicode values is supported.
|
41
|
+
#
|
42
|
+
# See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Note #5411
|
43
|
+
class CMap
|
44
|
+
|
45
|
+
autoload(:Parser, 'hexapdf/font/cmap/parser')
|
46
|
+
autoload(:Writer, 'hexapdf/font/cmap/writer')
|
47
|
+
|
48
|
+
# Creates a new CMap object from the given string which needs to contain a valid CMap file.
|
49
|
+
def self.parse(string)
|
50
|
+
Parser.new.parse(string)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns a string containing a ToUnicode CMap that represents the given code to Unicode
|
54
|
+
# codepoint mapping.
|
55
|
+
#
|
56
|
+
# See: Writer#create_to_unicode_cmap
|
57
|
+
def self.create_to_unicode_cmap(mapping)
|
58
|
+
Writer.new.create_to_unicode_cmap(mapping)
|
59
|
+
end
|
60
|
+
|
61
|
+
# The registry part of the CMap version.
|
62
|
+
attr_accessor :registry
|
63
|
+
|
64
|
+
# The ordering part of the CMap version.
|
65
|
+
attr_accessor :ordering
|
66
|
+
|
67
|
+
# The supplement part of the CMap version.
|
68
|
+
attr_accessor :supplement
|
69
|
+
|
70
|
+
# The name of the CMap.
|
71
|
+
attr_accessor :name
|
72
|
+
|
73
|
+
# The mapping from character codes to Unicode values.
|
74
|
+
attr_accessor :unicode_mapping
|
75
|
+
|
76
|
+
# Creates a new CMap object.
|
77
|
+
def initialize
|
78
|
+
@unicode_mapping = Hash.new("".freeze)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the Unicode string in UTF-8 encoding for the given character code, or an empty
|
82
|
+
# string if no mapping was found.
|
83
|
+
def to_unicode(code)
|
84
|
+
unicode_mapping[code]
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2016 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#++
|
33
|
+
|
34
|
+
require 'hexapdf/font/encoding/glyph_list'
|
35
|
+
|
36
|
+
module HexaPDF
|
37
|
+
module Font
|
38
|
+
module Encoding
|
39
|
+
|
40
|
+
# Base for encoding classes that are used for mapping codes in the range of 0 to 255 to glyph
|
41
|
+
# names.
|
42
|
+
class Base
|
43
|
+
|
44
|
+
# The name of the encoding or +nil+ if the encoding has not been assigned a name.
|
45
|
+
attr_reader :encoding_name
|
46
|
+
|
47
|
+
# The hash mapping codes to names.
|
48
|
+
attr_reader :code_to_name
|
49
|
+
|
50
|
+
# Creates a new encoding object containing no default mappings.
|
51
|
+
def initialize
|
52
|
+
@code_to_name = {}
|
53
|
+
@unicode_cache = {}
|
54
|
+
@encoding_name = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the name for the given code, or .notdef if no glyph for the code is defined.
|
58
|
+
#
|
59
|
+
# The returned value is always a Symbol object!
|
60
|
+
def name(code)
|
61
|
+
@code_to_name.fetch(code, :'.notdef')
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the Unicode value in UTF-8 for the given code, or an empty string if the code
|
65
|
+
# cannot be mapped.
|
66
|
+
#
|
67
|
+
# Note that this method caches the result of the Unicode mapping and therefore should only
|
68
|
+
# be called after all codes have been defined.
|
69
|
+
def unicode(code)
|
70
|
+
@unicode_cache[code] ||= GlyphList.name_to_unicode(name(code))
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|