hexapdf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,176 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'fiber'
35
+ require 'hexapdf/error'
36
+
37
+ module HexaPDF
38
+
39
+ # This special Fiber class should be used when the total length of the data yielded by the fiber
40
+ # is known beforehand. HexaPDF uses this information to avoid unnecessary memory usage.
41
+ class FiberWithLength < Fiber
42
+
43
+ # The total length of the data that will be yielded by this fiber. If the return value is
44
+ # negative the total length is *not* known.
45
+ attr_reader :length
46
+
47
+ # Initializes the Fiber and sets the +length+.
48
+ def initialize(length, &block)
49
+ super(&block)
50
+ @length = length || -1
51
+ end
52
+
53
+ end
54
+
55
+
56
+ # == Overview
57
+ #
58
+ # A stream filter is used to compress a stream or to encode it in an ASCII compatible way; or
59
+ # to reverse this process. Some filters can be used for any content, like FlateDecode, others
60
+ # are specifically designed for image streams, like DCTDecode.
61
+ #
62
+ # Each filter is implemented via fibers. This allows HexaPDF to easily process either small
63
+ # chunks or a whole stream at once, depending on the memory restrictions and to create flexible
64
+ # filter pipelines.
65
+ #
66
+ # It also allows the easy re-processing of a stream without first decoding and the encoding it.
67
+ # Such functionality is useful, for example, when a PDF file should be decrypted and streams
68
+ # compressed in one step.
69
+ #
70
+ #
71
+ # == Implementation of a Filter Object
72
+ #
73
+ # Each filter is an object (normally a module) that responds to two methods: \#encoder and
74
+ # \#decoder. Both of these methods are given a *source* (a Fiber) and *options* (a Hash) and have
75
+ # to return a Fiber object.
76
+ #
77
+ # The returned fiber should resume the *source* fiber to get the next chunk of binary data
78
+ # (possibly only one byte of data, so this situation should be handled gracefully). Once the
79
+ # fiber has processed this chunk, it should yield the processed chunk as binary string. This
80
+ # should be done as long as the source fiber is #alive? and doesn't return +nil+ when resumed.
81
+ #
82
+ # Such a fiber should *not* return +nil+ unless this signifies that no more data is coming!
83
+ #
84
+ # See: PDF1.7 s7.4
85
+ module Filter
86
+
87
+ autoload(:ASCII85Decode, 'hexapdf/filter/ascii85_decode')
88
+ autoload(:ASCIIHexDecode, 'hexapdf/filter/ascii_hex_decode')
89
+ autoload(:DCTDecode, 'hexapdf/filter/dct_decode')
90
+ autoload(:FlateDecode, 'hexapdf/filter/flate_decode')
91
+ autoload(:JPXDecode, 'hexapdf/filter/jpx_decode')
92
+ autoload(:LZWDecode, 'hexapdf/filter/lzw_decode')
93
+ autoload(:RunLengthDecode, 'hexapdf/filter/run_length_decode')
94
+
95
+ autoload(:Predictor, 'hexapdf/filter/predictor')
96
+
97
+ autoload(:Encryption, 'hexapdf/filter/encryption')
98
+
99
+ # Returns a Fiber that can be used as a source for decoders/encoders and that is based on a
100
+ # String object.
101
+ def self.source_from_string(str)
102
+ FiberWithLength.new(str.length) { str.dup }
103
+ end
104
+
105
+ # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks of
106
+ # data from an IO object.
107
+ #
108
+ # Each time a chunk is read, the position pointer of the IO is adjusted. This should be taken
109
+ # into account when working with the IO object.
110
+ #
111
+ # Options:
112
+ #
113
+ # :pos:: The position from where the reading should start. A negative position is treated as
114
+ # zero. Default: 0.
115
+ #
116
+ # :length:: The length indicating the number of bytes to read. An error is raised if not all
117
+ # specified bytes could be read. A negative length means reading until the end of
118
+ # the IO stream. Default: -1.
119
+ #
120
+ # :chunk_size:: The size of the chunks that should be returned in each iteration. A chunk size
121
+ # of less than or equal to 0 means using the biggest chunk size available (can
122
+ # change between versions!). Default: 0.
123
+ def self.source_from_io(io, pos: 0, length: -1, chunk_size: 0)
124
+ orig_length = length
125
+ chunk_size = 2**20 if chunk_size <= 0
126
+ chunk_size = length if length >= 0 && chunk_size > length
127
+ length = 2**61 if length < 0
128
+ pos = 0 if pos < 0
129
+
130
+ FiberWithLength.new(orig_length) do
131
+ while length > 0 && (io.pos = pos) && (data = io.read(chunk_size))
132
+ pos = io.pos
133
+ length -= data.size
134
+ chunk_size = length if chunk_size > length
135
+ Fiber.yield(data)
136
+ end
137
+ if length > 0 && orig_length >= 0
138
+ raise FilterError, "Couldn't read all requested bytes before encountering EOF"
139
+ end
140
+ end
141
+ end
142
+
143
+ # Returns a Fiber that can be used as a source for decoders/encoders and that reads chunks
144
+ # from a file.
145
+ #
146
+ # Note that there will be a problem if the size of the file changes between the invocation of
147
+ # this method and the actual consumption of the file!
148
+ #
149
+ # See ::source_from_io for a description of the available options.
150
+ def self.source_from_file(filename, pos: 0, length: -1, chunk_size: 0)
151
+ fib_length = (length < 0 ? File.stat(filename).size - pos : length)
152
+ FiberWithLength.new(fib_length) do
153
+ File.open(filename, 'rb') do |file|
154
+ source = source_from_io(file, pos: pos, length: length, chunk_size: chunk_size)
155
+ while source.alive? && (io_data = source.resume)
156
+ Fiber.yield(io_data)
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ # Returns the concatenated string chunks retrieved by resuming the given source Fiber until it
163
+ # is dead.
164
+ #
165
+ # The returned string is always a string with +BINARY+ (= +ASCII-8BIT+) encoding.
166
+ def self.string_from_source(source)
167
+ str = ''.b
168
+ while source.alive? && (data = source.resume)
169
+ str << data
170
+ end
171
+ str
172
+ end
173
+
174
+ end
175
+
176
+ end
@@ -0,0 +1,146 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/error'
35
+ require 'hexapdf/font/cmap'
36
+ require 'hexapdf/content/parser'
37
+
38
+ module HexaPDF
39
+ module Font
40
+ class CMap
41
+
42
+ # Parses CMap files.
43
+ #
44
+ # Currently only ToUnicode CMaps are supported.
45
+ class Parser
46
+
47
+ # Parses the given string and returns a CMap object.
48
+ def parse(string)
49
+ tokenizer = HexaPDF::Content::Tokenizer.new(string)
50
+ cmap = CMap.new
51
+
52
+ while (token = tokenizer.next_token) != HexaPDF::Tokenizer::NO_MORE_TOKENS
53
+ if token.kind_of?(HexaPDF::Tokenizer::Token)
54
+ case token
55
+ when 'beginbfchar'.freeze then parse_bf_char(tokenizer, cmap)
56
+ when 'beginbfrange'.freeze then parse_bf_range(tokenizer, cmap)
57
+ when 'endcmap' then break
58
+ end
59
+ elsif token.kind_of?(Symbol)
60
+ parse_dict_mapping(tokenizer, cmap, token)
61
+ end
62
+ end
63
+
64
+ cmap
65
+ rescue => e
66
+ raise HexaPDF::Error, "Error parsing CMap: #{e.message}", e.backtrace
67
+ end
68
+
69
+ private
70
+
71
+ # Parses a single mapping of a dictionary pair. The +name+ of the mapping has already been
72
+ # parsed.
73
+ def parse_dict_mapping(tokenizer, cmap, name)
74
+ value = tokenizer.next_token
75
+ return if value.kind_of?(HexaPDF::Tokenizer::Token)
76
+
77
+ case name
78
+ when :Registry then cmap.registry = value if value.kind_of?(String)
79
+ when :Ordering then cmap.ordering = value if value.kind_of?(String)
80
+ when :Supplement then cmap.supplement = value if value.kind_of?(Integer)
81
+ when :CMapName then cmap.name = value.to_s if value.kind_of?(Symbol)
82
+ end
83
+ end
84
+
85
+ # Parses the "bfchar" operator at the current position.
86
+ def parse_bf_char(tokenizer, cmap)
87
+ until (code = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
88
+ str = tokenizer.next_token.encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
89
+ cmap.unicode_mapping[bytes_to_int(code)] = str
90
+ end
91
+ end
92
+
93
+ # Parses the "bfrange" operator at the current position.
94
+ #
95
+ #--
96
+ # PDF1.7 s9.10.3 and Adobe Technical Note #5411 have different views as to how "bfrange"
97
+ # operators of the form "startCode endCode codePoint" should be handled.
98
+ #
99
+ # PDF1.7 mentions that the last byte of "codePoint" should be incremented, up to a maximum
100
+ # of 255. However #5411 has the range "<1379> <137B> <90FE>" as example which contradicts
101
+ # this.
102
+ #
103
+ # Additionally, #5411 mentions in section 1.4.1 that the first byte of "startCode" and
104
+ # "endCode" have to be the same. So it seems that this is a mistake in the PDF reference.
105
+ #++
106
+ def parse_bf_range(tokenizer, cmap)
107
+ until (code1 = tokenizer.next_token).kind_of?(HexaPDF::Tokenizer::Token)
108
+ code1 = bytes_to_int(code1)
109
+ code2 = bytes_to_int(tokenizer.next_token)
110
+ dest = tokenizer.next_object
111
+
112
+ if dest.kind_of?(String)
113
+ codepoint = dest.force_encoding(::Encoding::UTF_16BE).ord
114
+ code1.upto(code2) do |code|
115
+ cmap.unicode_mapping[code] = '' << codepoint
116
+ codepoint += 1
117
+ end
118
+ elsif dest.kind_of?(Array)
119
+ code1.upto(code2) do |code|
120
+ cmap.unicode_mapping[code] =
121
+ dest[code - code1].encode!(::Encoding::UTF_8, ::Encoding::UTF_16BE)
122
+ end
123
+ else
124
+ raise HexaPDF::Error, "Invalid bfrange operator in CMap"
125
+ end
126
+ end
127
+ end
128
+
129
+ # Treats the string as an array of bytes and converts it to an integer.
130
+ #
131
+ # The bytes are converted in the big-endian way.
132
+ def bytes_to_int(string)
133
+ result = 0
134
+ index = 0
135
+ while index < string.length
136
+ result = (result << 8) | string.getbyte(index)
137
+ index += 1
138
+ end
139
+ result
140
+ end
141
+
142
+ end
143
+
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,176 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/cmap'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ class CMap
39
+
40
+ # Creates a CMap file.
41
+ #
42
+ # Currently only ToUnicode CMaps are supported.
43
+ class Writer
44
+
45
+ # Maximum number of entries in one section.
46
+ MAX_ENTRIES_IN_SECTION = 100
47
+
48
+ # Returns a ToUnicode CMap for the given input code to Unicode codepoint mapping which needs
49
+ # to be sorted by input codes.
50
+ #
51
+ # Note that the returned CMap always uses a 16-bit input code space!
52
+ def create_to_unicode_cmap(mapping)
53
+ return to_unicode_template % '' if mapping.length == 0
54
+
55
+ chars, ranges = compute_section_entries(mapping)
56
+
57
+ result = create_sections("bfchar", chars.size / 2) do |index|
58
+ index *= 2
59
+ sprintf("<%04X>", chars[index]) << "<" <<
60
+ (''.force_encoding(::Encoding::UTF_16BE) << chars[index + 1]).unpack('H*').first <<
61
+ ">\n"
62
+ end
63
+
64
+ result << create_sections("bfrange", ranges.size / 3) do |index|
65
+ index *= 3
66
+ sprintf("<%04X><%04X>", ranges[index], ranges[index + 1]) << "<" <<
67
+ (''.force_encoding(::Encoding::UTF_16BE) << ranges[index + 2]).unpack('H*').first <<
68
+ ">\n"
69
+ end
70
+
71
+ to_unicode_template % result.chop!
72
+ end
73
+
74
+ private
75
+
76
+ # Computes the entries for the "char" and "range" sections based on the given mapping.
77
+ #
78
+ # Returns two arrays +char_mappings+ and +range_mappings+ where +char_mappings+ is an array
79
+ # of the form
80
+ #
81
+ # [code1, value1, code2, value2, ...]
82
+ #
83
+ # and +range_mappings+ an array of the form
84
+ #
85
+ # [start1, end1, value1, start2, end2, value2, ...]
86
+ def compute_section_entries(mapping)
87
+ chars = []
88
+ ranges = []
89
+
90
+ last_code, last_value = *mapping[0]
91
+ is_range = false
92
+ mapping.slice(1..-1).each do |code, value|
93
+ if last_code + 1 == code && last_value + 1 == value && code % 256 != 0
94
+ ranges << last_code << nil << last_value unless is_range
95
+ is_range = true
96
+ else
97
+ if is_range
98
+ ranges[-2] = last_code
99
+ is_range = false
100
+ else
101
+ chars << last_code << last_value
102
+ end
103
+ end
104
+ last_code = code
105
+ last_value = value
106
+ end
107
+
108
+ # Handle last remaining mapping
109
+ if is_range
110
+ ranges[-2] = last_code
111
+ else
112
+ chars << last_code << last_value
113
+ end
114
+
115
+ [chars, ranges]
116
+ end
117
+
118
+ # Creates one or more sections of a CMap file and returns the resulting string.
119
+ #
120
+ # +type+::
121
+ # The name of the section, e.g. "bfchar" or "bfrange".
122
+ #
123
+ # +size+::
124
+ # The maximum number of elements of this type. Used for determining when to start a new
125
+ # section.
126
+ #
127
+ # The method makes sure that no section has more than the maximum number of allowed entries.
128
+ #
129
+ # Numbers from 0 up to size - 1 are yielded, indicating the current entry that should be
130
+ # processed and for which an appropriate section line should be returned from the block.
131
+ def create_sections(type, size)
132
+ return '' if size == 0
133
+
134
+ result = ""
135
+ index = 0
136
+ while size > 0
137
+ count = [MAX_ENTRIES_IN_SECTION, size].min
138
+ result << "#{count} begin#{type}\n"
139
+ index.upto(index + count - 1) {|i| result << yield(i)}
140
+ result << "end#{type}\n"
141
+ index += count
142
+ size -= count
143
+ end
144
+
145
+ result
146
+ end
147
+
148
+ # Returns the CMap file template for a ToUnicode CMap.
149
+ def to_unicode_template
150
+ <<-TEMPLATE
151
+ /CIDInit /ProcSet findresource begin
152
+ 12 dict begin
153
+ begincmap
154
+ /CIDSystemInfo
155
+ << /Registry (Adobe)
156
+ /Ordering (UCS)
157
+ /Supplement 0
158
+ >> def
159
+ /CMapName /Adobe-Identity-UCS def
160
+ /CMapType 2 def
161
+ 1 begincodespacerange
162
+ <0000> <FFFF>
163
+ endcodespacerange
164
+ %s
165
+ endcmap
166
+ CMapName currentdict /CMap defineresource pop
167
+ end
168
+ end
169
+ TEMPLATE
170
+ end
171
+
172
+ end
173
+
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,90 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ module HexaPDF
35
+ module Font
36
+
37
+ # Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode
38
+ # value.
39
+ #
40
+ # Currently, only the mapping to the Unicode values is supported.
41
+ #
42
+ # See: PDF1.7 s9.7.5, s9.10.3; Adobe Technical Note #5411
43
+ class CMap
44
+
45
+ autoload(:Parser, 'hexapdf/font/cmap/parser')
46
+ autoload(:Writer, 'hexapdf/font/cmap/writer')
47
+
48
+ # Creates a new CMap object from the given string which needs to contain a valid CMap file.
49
+ def self.parse(string)
50
+ Parser.new.parse(string)
51
+ end
52
+
53
+ # Returns a string containing a ToUnicode CMap that represents the given code to Unicode
54
+ # codepoint mapping.
55
+ #
56
+ # See: Writer#create_to_unicode_cmap
57
+ def self.create_to_unicode_cmap(mapping)
58
+ Writer.new.create_to_unicode_cmap(mapping)
59
+ end
60
+
61
+ # The registry part of the CMap version.
62
+ attr_accessor :registry
63
+
64
+ # The ordering part of the CMap version.
65
+ attr_accessor :ordering
66
+
67
+ # The supplement part of the CMap version.
68
+ attr_accessor :supplement
69
+
70
+ # The name of the CMap.
71
+ attr_accessor :name
72
+
73
+ # The mapping from character codes to Unicode values.
74
+ attr_accessor :unicode_mapping
75
+
76
+ # Creates a new CMap object.
77
+ def initialize
78
+ @unicode_mapping = Hash.new("".freeze)
79
+ end
80
+
81
+ # Returns the Unicode string in UTF-8 encoding for the given character code, or an empty
82
+ # string if no mapping was found.
83
+ def to_unicode(code)
84
+ unicode_mapping[code]
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+ end
@@ -0,0 +1,77 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/encoding/glyph_list'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module Encoding
39
+
40
+ # Base for encoding classes that are used for mapping codes in the range of 0 to 255 to glyph
41
+ # names.
42
+ class Base
43
+
44
+ # The name of the encoding or +nil+ if the encoding has not been assigned a name.
45
+ attr_reader :encoding_name
46
+
47
+ # The hash mapping codes to names.
48
+ attr_reader :code_to_name
49
+
50
+ # Creates a new encoding object containing no default mappings.
51
+ def initialize
52
+ @code_to_name = {}
53
+ @unicode_cache = {}
54
+ @encoding_name = nil
55
+ end
56
+
57
+ # Returns the name for the given code, or .notdef if no glyph for the code is defined.
58
+ #
59
+ # The returned value is always a Symbol object!
60
+ def name(code)
61
+ @code_to_name.fetch(code, :'.notdef')
62
+ end
63
+
64
+ # Returns the Unicode value in UTF-8 for the given code, or an empty string if the code
65
+ # cannot be mapped.
66
+ #
67
+ # Note that this method caches the result of the Unicode mapping and therefore should only
68
+ # be called after all codes have been defined.
69
+ def unicode(code)
70
+ @unicode_cache[code] ||= GlyphList.name_to_unicode(name(code))
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
77
+ end