hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,176 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'fiber'
35
+ require 'hexapdf/error'
36
+
37
+ module HexaPDF
38
+
39
+ # This special Fiber class should be used when the total length of the data yielded by the fiber
40
+ # is known beforehand. HexaPDF uses this information to avoid unnecessary memory usage.
41
+ class FiberWithLength < Fiber
42
+
43
+ # The total length of the data that will be yielded by this fiber. If the return value is
44
+ # negative the total length is *not* known.
45
+ attr_reader :length
46
+
47
+ # Initializes the Fiber and sets the +length+.
48
+ def initialize(length, &block)
49
+ super(&block)
50
+ @length = length || -1
51
+ end
52
+
53
+ end
54
+
55
+
56
+ # == Overview
57
+ #
58
+ # A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
59
+ # to reverse this process. Some filters can be used for any content, like FlateDecode, others
60
+ # are specifically designed for image streams, like DCTDecode.
61
+ #
62
+ # Each filter is implemented via fibers. This allows HexaPDF to easily process either small
63
+ # chunks or a whole stream at once, depending on the memory restrictions and to create flexible
64
+ # filter pipelines.
65
+ #
66
+ # It also allows the easy re-processing of a stream without first decoding and the encoding it.
67
+ # Such functionality is useful, for example, when a PDF file should be decrypted and streams
68
+ # compressed in one step.
69
+ #
70
+ #
71
+ # == Implementation of a Filter Object
72
+ #
73
+ # Each filter is an object (normally a module) that responds to two methods: \#encoder and
74
+ # \#decoder. Both of these methods are given a *source* (a Fiber) and *options* (a Hash) and have
75
+ # to return a Fiber object.
76
+ #
77
+ # The returned fiber should resume the *source* fiber to get the next chunk of binary data
78
+ # (possibly only one byte of data, so this situation should be handled gracefully). Once the
79
+ # fiber has processed this chunk, it should yield the processed chunk as binary string. This
80
+ # should be done as long as the source fiber is #alive? and doesn't return +nil+ when resumed.
81
+ #
82
+ # Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
83
+ #
84
+ # See: PDF1.7 s7.4
85
+ module Filter
86
+
87
+ autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
88
+ autoload(:ASCIIHexDecode, 'hexapdf/filter/ascii_hex_decode')
89
+ autoload(:DCTDecode, 'hexapdf/filter/dct_decode')
90
+ autoload(:FlateDecode, 'hexapdf/filter/flate_decode')
91
+ autoload(:JPXDecode, 'hexapdf/filter/jpx_decode')
92
+ autoload(:LZWDecode, 'hexapdf/filter/lzw_decode')
93
+ autoload(:RunLengthDecode, 'hexapdf/filter/run_length_decode')
94
+
95
+ autoload(:Predictor, 'hexapdf/filter/predictor')
96
+
97
+ autoload(:Encryption, 'hexapdf/filter/encryption')
98
+
99
+ # Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
100
+ # String object.
101
+ def self.source_from_string(str)
102
+ FiberWithLength.new(str.length) { str.dup }
103
+ end
104
+
105
+ # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
106
+ # data from an IO object.
107
+ #
108
+ # Each time a chunk is read, the position pointer of the IO is adjusted. This should be taken
109
+ # into account when working with the IO object.
110
+ #
111
+ # Options:
112
+ #
113
+ # :pos:: The position from where the reading should start. A negative position is treated as
114
+ # zero. Default: 0.
115
+ #
116
+ # :length:: The length indicating the number of bytes to read. An error is raised if not all
117
+ # specified bytes could be read. A negative length means reading until the end of
118
+ # the IO stream. Default: -1.
119
+ #
120
+ # :chunk_size:: The size of the chunks that should be returned in each iteration. A chunk size
121
+ # of less than or equal to 0 means using the biggest chunk size available (can
122
+ # change between versions!). Default: 0.
123
+ def self.source_from_io(io, pos: 0, length: -1, chunk_size: 0)
124
+ orig_length = length
125
+ chunk_size = 2**20 if chunk_size <= 0
126
+ chunk_size = length if length >= 0 && chunk_size > length
127
+ length = 2**61 if length < 0
128
+ pos = 0 if pos < 0
129
+
130
+ FiberWithLength.new(orig_length) do
131
+ while length > 0 && (io.pos = pos) && (data = io.read(chunk_size))
132
+ pos = io.pos
133
+ length -= data.size
134
+ chunk_size = length if chunk_size > length
135
+ Fiber.yield(data)
136
+ end
137
+ if length > 0 && orig_length >= 0
138
+ raise FilterError, "Couldn't read all requested bytes before encountering EOF"
139
+ end
140
+ end
141
+ end
142
+
143
+ # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks
144
+ # from a file.
145
+ #
146
+ # Note that there will be a problem if the size of the file changes between the invocation of
147
+ # this method and the actual consumption of the file!
148
+ #
149
+ # See ::source_from_io for a description of the available options.
150
+ def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
151
+ fib_length = (length < 0 ? File.stat(filename).size - pos : length)
152
+ FiberWithLength.new(fib_length) do
153
+ File.open(filename, 'rb') do |file|
154
+ source = source_from_io(file, pos: pos, length: length, chunk_size: chunk_size)
155
+ while source.alive? && (io_data = source.resume)
156
+ Fiber.yield(io_data)
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ # Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
163
+ # is dead.
164
+ #
165
+ # The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
166
+ def self.string_from_source(source)
167
+ str = ''.b
168
+ while source.alive? && (data = source.resume)
169
+ str << data
170
+ end
171
+ str
172
+ end
173
+
174
+ end
175
+
176
+ end
@@ -0,0 +1,146 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/error'
35
+ require 'hexapdf/font/cmap'
36
+ require 'hexapdf/content/parser'
37
+
38
+ module HexaPDF
39
+ module Font
40
+ class CMap
41
+
42
+ # Parses CMap files.
43
+ #
44
+ # Currently only ToUnicode CMaps are supported.
45
+ class Parser
46
+
47
+ # Parses the given string and returns a CMap object.
48
+ def parse(string)
49
+ tokenizer = HexaPDF::Content::Tokenizer.new(string)
50
+ cmap = CMap.new
51
+
52
+ while (token = tokenizer.next_token) != HexaPDF::Tokenizer::NO_MORE_TOKENS
53
+ if token.kind_of?(HexaPDF::Tokenizer::Token)
54
+ case token
55
+ when 'beginbfchar'.freeze then parse_bf_char(tokenizer, cmap)
56
+ when 'beginbfrange'.freeze then parse_bf_range(tokenizer, cmap)
57
+ when 'endcmap' then break
58
+ end
59
+ elsif token.kind_of?(Symbol)
60
+ parse_dict_mapping(tokenizer, cmap, token)
61
+ end
62
+ end
63
+
64
+ cmap
65
+ rescue => e
66
+ raise HexaPDF::Error, "Error parsing CMap: #{e.message}", e.backtrace
67
+ end
68
+
69
+ private
70
+
71
+ # Parses a single mapping of a dictionary pair. The +name+ of the mapping has already been
72
+ # parsed.
73
+ def parse_dict_mapping(tokenizer, cmap, name)
74
+ value = tokenizer.next_token
75
+ return if value.kind_of?(HexaPDF::Tokenizer::Token)
76
+
77
+ case name
78
+ when :Registry then cmap.registry = value if value.kind_of?(String)
79
+ when :Ordering then cmap.ordering = value if value.kind_of?(String)
80
+ when :Supplement then cmap.supplement = value if value.kind_of?(Integer)
81
+ when :CMapName then cmap.name = value.to_s if value.kind_of?(Symbol)
82
+ end
83
+ end
84
+
85
+ # Parses the "bfchar" operator at the current position.
86
+ def parse_bf_char(tokenizer, cmap)
87
+ until (code = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
88
+ str = tokenizer.next_token.encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
89
+ cmap.unicode_mapping[bytes_to_int(code)] = str
90
+ end
91
+ end
92
+
93
+ # Parses the "bfrange" operator at the current position.
94
+ #
95
+ #--
96
+ # PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
97
+ # operators of the form "startCode endCode codePoint" should be handled.
98
+ #
99
+ # PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
100
+ # of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
101
+ # this.
102
+ #
103
+ # Additionally, #5411 mentions in section 1.4.1 that the first byte of "startCode" and
104
+ # "endCode" have to be the same. So it seems that this is a mistake in the PDF reference.
105
+ #++
106
+ def parse_bf_range(tokenizer, cmap)
107
+ until (code1 = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
108
+ code1 = bytes_to_int(code1)
109
+ code2 = bytes_to_int(tokenizer.next_token)
110
+ dest = tokenizer.next_object
111
+
112
+ if dest.kind_of?(String)
113
+ codepoint = dest.force_encoding(::Encoding::UTF_16BE).ord
114
+ code1.upto(code2) do |code|
115
+ cmap.unicode_mapping[code] = '' << codepoint
116
+ codepoint += 1
117
+ end
118
+ elsif dest.kind_of?(Array)
119
+ code1.upto(code2) do |code|
120
+ cmap.unicode_mapping[code] =
121
+ dest[code - code1].encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
122
+ end
123
+ else
124
+ raise HexaPDF::Error, "Invalid bfrange operator in CMap"
125
+ end
126
+ end
127
+ end
128
+
129
+ # Treats the string as an array of bytes and converts it to an integer.
130
+ #
131
+ # The bytes are converted in the big-endian way.
132
+ def bytes_to_int(string)
133
+ result = 0
134
+ index = 0
135
+ while index < string.length
136
+ result = (result << 8) | string.getbyte(index)
137
+ index += 1
138
+ end
139
+ result
140
+ end
141
+
142
+ end
143
+
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,176 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/cmap'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ class CMap
39
+
40
+ # Creates a CMap file.
41
+ #
42
+ # Currently only ToUnicode CMaps are supported.
43
+ class Writer
44
+
45
+ # Maximum number of entries in one section.
46
+ MAX_ENTRIES_IN_SECTION = 100
47
+
48
+ # Returns a ToUnicode CMap for the given input code to Unicode codepoint mapping which needs
49
+ # to be sorted by input codes.
50
+ #
51
+ # Note that the returned CMap always uses a 16-bit input code space!
52
+ def create_to_unicode_cmap(mapping)
53
+ return to_unicode_template % '' if mapping.length == 0
54
+
55
+ chars, ranges = compute_section_entries(mapping)
56
+
57
+ result = create_sections("bfchar", chars.size / 2) do |index|
58
+ index *= 2
59
+ sprintf("<%04X>", chars[index]) << "<" <<
60
+ (''.force_encoding(::Encoding::UTF_16BE) << chars[index + 1]).unpack('H*').first <<
61
+ ">\n"
62
+ end
63
+
64
+ result << create_sections("bfrange", ranges.size / 3) do |index|
65
+ index *= 3
66
+ sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << "<" <<
67
+ (''.force_encoding(::Encoding::UTF_16BE) << ranges[index + 2]).unpack('H*').first <<
68
+ ">\n"
69
+ end
70
+
71
+ to_unicode_template % result.chop!
72
+ end
73
+
74
+ private
75
+
76
+ # Computes the entries for the "char" and "range" sections based on the given mapping.
77
+ #
78
+ # Returns two arrays +char_mappings+ and +range_mappings+ where +char_mappings+ is an array
79
+ # of the form
80
+ #
81
+ # [code1, value1, code2, value2, ...]
82
+ #
83
+ # and +range_mappings+ an array of the form
84
+ #
85
+ # [start1, end1, value1, start2, end2, value2, ...]
86
+ def compute_section_entries(mapping)
87
+ chars = []
88
+ ranges = []
89
+
90
+ last_code, last_value = *mapping[0]
91
+ is_range = false
92
+ mapping.slice(1..-1).each do |code, value|
93
+ if last_code + 1 == code && last_value + 1 == value && code % 256 != 0
94
+ ranges << last_code << nil << last_value unless is_range
95
+ is_range = true
96
+ else
97
+ if is_range
98
+ ranges[-2] = last_code
99
+ is_range = false
100
+ else
101
+ chars << last_code << last_value
102
+ end
103
+ end
104
+ last_code = code
105
+ last_value = value
106
+ end
107
+
108
+ # Handle last remaining mapping
109
+ if is_range
110
+ ranges[-2] = last_code
111
+ else
112
+ chars << last_code << last_value
113
+ end
114
+
115
+ [chars, ranges]
116
+ end
117
+
118
+ # Creates one or more sections of a CMap file and returns the resulting string.
119
+ #
120
+ # +type+::
121
+ # The name of the section, e.g. "bfchar" or "bfrange".
122
+ #
123
+ # +size+::
124
+ # The maximum number of elements of this type. Used for determining when to start a new
125
+ # section.
126
+ #
127
+ # The method makes sure that no section has more than the maximum number of allowed entries.
128
+ #
129
+ # Numbers from 0 up to size - 1 are yielded, indicating the current entry that should be
130
+ # processed and for which an appropriate section line should be returned from the block.
131
+ def create_sections(type, size)
132
+ return '' if size == 0
133
+
134
+ result = ""
135
+ index = 0
136
+ while size > 0
137
+ count = [MAX_ENTRIES_IN_SECTION, size].min
138
+ result << "#{count} begin#{type}\n"
139
+ index.upto(index + count - 1) {|i| result << yield(i)}
140
+ result << "end#{type}\n"
141
+ index += count
142
+ size -= count
143
+ end
144
+
145
+ result
146
+ end
147
+
148
+ # Returns the CMap file template for a ToUnicode CMap.
149
+ def to_unicode_template
150
+ <<-TEMPLATE
151
+ /CIDInit /ProcSet findresource begin
152
+ 12 dict begin
153
+ begincmap
154
+ /CIDSystemInfo
155
+ << /Registry (Adobe)
156
+ /Ordering (UCS)
157
+ /Supplement 0
158
+ >> def
159
+ /CMapName /Adobe-Identity-UCS def
160
+ /CMapType 2 def
161
+ 1 begincodespacerange
162
+ <0000> <FFFF>
163
+ endcodespacerange
164
+ %s
165
+ endcmap
166
+ CMapName currentdict /CMap defineresource pop
167
+ end
168
+ end
169
+ TEMPLATE
170
+ end
171
+
172
+ end
173
+
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,90 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ module HexaPDF
35
+ module Font
36
+
37
+ # Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
38
+ # value.
39
+ #
40
+ # Currently, only the mapping to the Unicode values is supported.
41
+ #
42
+ # See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Note #5411
43
+ class CMap
44
+
45
+ autoload(:Parser, 'hexapdf/font/cmap/parser')
46
+ autoload(:Writer, 'hexapdf/font/cmap/writer')
47
+
48
+ # Creates a new CMap object from the given string which needs to contain a valid CMap file.
49
+ def self.parse(string)
50
+ Parser.new.parse(string)
51
+ end
52
+
53
+ # Returns a string containing a ToUnicode CMap that represents the given code to Unicode
54
+ # codepoint mapping.
55
+ #
56
+ # See: Writer#create_to_unicode_cmap
57
+ def self.create_to_unicode_cmap(mapping)
58
+ Writer.new.create_to_unicode_cmap(mapping)
59
+ end
60
+
61
+ # The registry part of the CMap version.
62
+ attr_accessor :registry
63
+
64
+ # The ordering part of the CMap version.
65
+ attr_accessor :ordering
66
+
67
+ # The supplement part of the CMap version.
68
+ attr_accessor :supplement
69
+
70
+ # The name of the CMap.
71
+ attr_accessor :name
72
+
73
+ # The mapping from character codes to Unicode values.
74
+ attr_accessor :unicode_mapping
75
+
76
+ # Creates a new CMap object.
77
+ def initialize
78
+ @unicode_mapping = Hash.new("".freeze)
79
+ end
80
+
81
+ # Returns the Unicode string in UTF-8 encoding for the given character code, or an empty
82
+ # string if no mapping was found.
83
+ def to_unicode(code)
84
+ unicode_mapping[code]
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,77 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/encoding/glyph_list'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module Encoding
39
+
40
+ # Base for encoding classes that are used for mapping codes in the range of 0 to 255 to glyph
41
+ # names.
42
+ class Base
43
+
44
+ # The name of the encoding or +nil+ if the encoding has not been assigned a name.
45
+ attr_reader :encoding_name
46
+
47
+ # The hash mapping codes to names.
48
+ attr_reader :code_to_name
49
+
50
+ # Creates a new encoding object containing no default mappings.
51
+ def initialize
52
+ @code_to_name = {}
53
+ @unicode_cache = {}
54
+ @encoding_name = nil
55
+ end
56
+
57
+ # Returns the name for the given code, or .notdef if no glyph for the code is defined.
58
+ #
59
+ # The returned value is always a Symbol object!
60
+ def name(code)
61
+ @code_to_name.fetch(code, :'.notdef')
62
+ end
63
+
64
+ # Returns the Unicode value in UTF-8 for the given code, or an empty string if the code
65
+ # cannot be mapped.
66
+ #
67
+ # Note that this method caches the result of the Unicode mapping and therefore should only
68
+ # be called after all codes have been defined.
69
+ def unicode(code)
70
+ @unicode_cache[code] ||= GlyphList.name_to_unicode(name(code))
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
77
+ end