hexapdf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,230 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/true_type/table'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module TrueType
39
+ class Table
40
+
41
+ # The 'post' table contains information for using a font on a PostScript printer.
42
+ #
43
+ # post format 2.5 is currently not implemented because use of the format is deprecated since
44
+ # 2000 in the specification and no font with a format 2.5 post subtable was available for
45
+ # testing.
46
+ #
47
+ # See: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6post.html
48
+ class Post < Table
49
+
50
+ # The format of the table (a Rational).
51
+ attr_accessor :format
52
+
53
+ # The italic angle (a Rational).
54
+ attr_accessor :italic_angle
55
+
56
+ # The suggested distance of the top of the underline from the baseline (negative values
57
+ # indicate underlines below the baseline).
58
+ attr_accessor :underline_position
59
+
60
+ # The suggested thickness for underlines.
61
+ attr_accessor :underline_thickness
62
+
63
+ # Specifies whether the font is proportional (value is 0) or monospaced (value is not 0).
64
+ attr_accessor :is_fixed_pitch
65
+
66
+ # Returns +true+ if the font is monospaced.
67
+ #
68
+ # See: #is_fixed_pitch
69
+ def is_fixed_pitch?
70
+ @is_fixed_pitch != 0
71
+ end
72
+
73
+ # Minimum memory usage when a font is downloaded.
74
+ attr_accessor :min_mem_type42
75
+
76
+ # Maximum memory usage when a font is downloaded.
77
+ attr_accessor :max_mem_type42
78
+
79
+ # Minimum memory usage when a Type1 font is downloaded.
80
+ attr_accessor :min_mem_type1
81
+
82
+ # Maximum memory usage when a Type1 font is downloaded.
83
+ attr_accessor :max_mem_type1
84
+
85
+ # Returns the name for the given glpyh id or ".notdef" if the given glyph id has no name.
86
+ def [](glyph_id)
87
+ @glyph_names[glyph_id] || '.notdef'.freeze
88
+ end
89
+
90
+ private
91
+
92
+ def parse_table #:nodoc:
93
+ @format = read_fixed
94
+ @italic_angle = read_fixed
95
+ @underline_position, @underline_thickness, @is_fixed_pitch, @min_mem_type42,
96
+ @max_mem_type42, @min_mem_type1, @max_mem_type1 = read_formatted(24, 's>2N5')
97
+
98
+ sub_table_length = directory_entry.length - 32
99
+ @glyph_names = case @format
100
+ when 1 then Format1.parse(io, sub_table_length)
101
+ when 2 then Format2.parse(io, sub_table_length)
102
+ when 3 then Format3.parse(io, sub_table_length)
103
+ when 4 then Format4.parse(io, sub_table_length)
104
+ else
105
+ raise HexaPDF::Error, "Unsupported post table format: #{@format}"
106
+ end
107
+ end
108
+
109
+ def load_default #:nodoc:
110
+ @format = 1.to_r
111
+ @italic_angle = 0.to_r
112
+ @underline_position = @underline_thickness = @is_fixed_pitch = @min_mem_type42 =
113
+ @max_mem_type42 = @min_mem_type1 = @max_mem_type1 = 0
114
+ @glyph_names = {}
115
+ end
116
+
117
+
118
+ # 'post' table format 1
119
+ module Format1
120
+
121
+ # The 258 predefined glyph names in the standard Macintosh ordering.
122
+ GLYPH_NAMES = %w[
123
+ .notdef .null nonmarkingreturn space exclam quotedbl numbersign dollar percent
124
+ ampersand quotesingle parenleft parenright asterisk plus comma hyphen period slash
125
+ zero one two three four five six seven eight nine colon semicolon less equal greater
126
+ question at A B C D E F G H I J K L M N O P Q R S T U V W X Y Z bracketleft backslash
127
+ bracketright asciicircum underscore grave a b c d e f g h i j k l m n o p q r s t u v
128
+ w x y z braceleft bar braceright asciitilde Adieresis Aring Ccedilla Eacute Ntilde
129
+ Odieresis Udieresis aacute agrave acircumflex adieresis atilde aring ccedilla eacute
130
+ egrave ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute ograve
131
+ ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis dagger degree cent
132
+ sterling section bullet paragraph germandbls registered copyright trademark acute
133
+ dieresis notequal AE Oslash infinity plusminus lessequal greaterequal yen mu
134
+ partialdiff summation product pi integral ordfeminine ordmasculine Omega ae oslash
135
+ questiondown exclamdown logicalnot radical florin approxequal Delta guillemotleft
136
+ guillemotright ellipsis nonbreakingspace Agrave Atilde Otilde OE oe endash emdash
137
+ quotedblleft quotedblright quoteleft quoteright divide lozenge ydieresis Ydieresis
138
+ fraction currency guilsinglleft guilsinglright fi fl daggerdbl periodcentered
139
+ quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute Edieresis
140
+ Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex apple Ograve Uacute
141
+ Ucircumflex Ugrave dotlessi circumflex tilde macron breve dotaccent ring cedilla
142
+ hungarumlaut ogonek caron Lslash lslash Scaron scaron Zcaron zcaron brokenbar Eth eth
143
+ Yacute yacute Thorn thorn minus multiply onesuperior twosuperior threesuperior onehalf
144
+ onequarter threequarters franc Gbreve gbreve Idotaccent Scedilla scedilla Cacute
145
+ cacute Ccaron ccaron dcroat
146
+ ].freeze
147
+
148
+ # :call-seq:
149
+ # Format1.parse(io, length) -> glyph_names
150
+ #
151
+ # Returns the array containing the 258 predefined glpyh names.
152
+ def self.parse(_io, _length)
153
+ GLYPH_NAMES
154
+ end
155
+
156
+ end
157
+
158
+
159
+ # 'post' table format 2
160
+ module Format2
161
+
162
+ # :call-seq:
163
+ # Format2.parse(io, length) -> glyph_names
164
+ #
165
+ # Parses the format 2 post subtable from the given IO at the current position and
166
+ # returns the contained glyph name map.
167
+ def self.parse(io, length)
168
+ end_pos = io.pos + length
169
+ num_glyphs = io.read(2).unpack('n').first
170
+ glyph_name_index = io.read(2 * num_glyphs).unpack('n*')
171
+ names = []
172
+ names << io.read(io.getbyte).force_encoding(::Encoding::UTF_8) while io.pos < end_pos
173
+ mapper(glyph_name_index, names)
174
+ end
175
+
176
+ def self.mapper(glyph_name_index, names) #:nodoc:
177
+ lambda do |glyph_id|
178
+ name_index = glyph_name_index[glyph_id]
179
+ if !name_index
180
+ nil
181
+ elsif name_index <= 257
182
+ Format1::GLYPH_NAMES[name_index]
183
+ else
184
+ names[name_index - 258]
185
+ end
186
+ end
187
+ end
188
+
189
+ end
190
+
191
+
192
+ # 'post' table format 3
193
+ module Format3
194
+
195
+ # :call-seq:
196
+ # Format3.parse(io, length) -> glyph_names
197
+ #
198
+ # Since the post table format 3 does not contain any valid glyph names, an empty array
199
+ # is returned.
200
+ def self.parse(_io, _length)
201
+ [].freeze
202
+ end
203
+
204
+ end
205
+
206
+
207
+ # 'post' table format 4
208
+ module Format4
209
+
210
+ # :call-seq:
211
+ # Format4.parse(io, length) -> glyph_names
212
+ #
213
+ # Parses the format 4 post subtable from the given IO at the current position and
214
+ # returns a lambda mapping the glyph id to a character code.
215
+ def self.parse(io, length)
216
+ mapper(io.read(length).unpack('n*'))
217
+ end
218
+
219
+ def self.mapper(char_codes) #:nodoc:
220
+ lambda {|glyph_id| char_codes[glyph_id] || 0xFFFF }
221
+ end
222
+
223
+ end
224
+
225
+ end
226
+
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,155 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/error'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module TrueType
39
+
40
+ # Implementation of a generic table inside a sfnt-formatted font file.
41
+ #
42
+ # See: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6.html
43
+ class Table
44
+
45
+ autoload(:Directory, 'hexapdf/font/true_type/table/directory')
46
+ autoload(:Head, 'hexapdf/font/true_type/table/head')
47
+ autoload(:Cmap, 'hexapdf/font/true_type/table/cmap')
48
+ autoload(:Hhea, 'hexapdf/font/true_type/table/hhea')
49
+ autoload(:Hmtx, 'hexapdf/font/true_type/table/hmtx')
50
+ autoload(:Loca, 'hexapdf/font/true_type/table/loca')
51
+ autoload(:Maxp, 'hexapdf/font/true_type/table/maxp')
52
+ autoload(:Name, 'hexapdf/font/true_type/table/name')
53
+ autoload(:Post, 'hexapdf/font/true_type/table/post')
54
+ autoload(:Glyf, 'hexapdf/font/true_type/table/glyf')
55
+ autoload(:OS2, 'hexapdf/font/true_type/table/os2')
56
+
57
+
58
+ # The time Epoch used in sfnt-formatted font files.
59
+ TIME_EPOCH = Time.new(1904, 1, 1)
60
+
61
+ # Calculates the checksum for the given data.
62
+ def self.calculate_checksum(data)
63
+ data.unpack('N*').inject(0) {|sum, long| sum + long} % 2**32
64
+ end
65
+
66
+
67
+ # The TrueType font object associated with this table.
68
+ attr_reader :font
69
+
70
+ # Creates a new Table object for the given font and initializes it by either reading the
71
+ # data from the font's associated IO stream if +entry+ is given or by using default values.
72
+ #
73
+ # See: #parse_table, #load_default
74
+ def initialize(font, entry = nil)
75
+ @font = font
76
+ @directory_entry = entry
77
+ entry ? load_from_io : load_default
78
+ end
79
+
80
+ # Returns the directory entry for this table.
81
+ #
82
+ # See: Directory
83
+ def directory_entry
84
+ @directory_entry
85
+ end
86
+
87
+ # Returns +true+ if the checksum stored in the directory entry of the table matches the
88
+ # tables data.
89
+ def checksum_valid?
90
+ unless directory_entry
91
+ raise HexaPDF::Error, "Can't verify the checksum, no directory entry available"
92
+ end
93
+
94
+ data = with_io_pos(directory_entry.offset) { io.read(directory_entry.length) }
95
+ directory_entry.checksum == self.class.calculate_checksum(data)
96
+ end
97
+
98
+ private
99
+
100
+ # The IO stream of the associated font object.
101
+ def io
102
+ @font.io
103
+ end
104
+
105
+ # Loads the data for this table from the IO stream of the associated font object into this
106
+ # object.
107
+ #
108
+ # See #parse_table for more information.
109
+ def load_from_io
110
+ with_io_pos(directory_entry.offset) { parse_table }
111
+ end
112
+
113
+ # Parses the table with the IO position already at the correct offset.
114
+ #
115
+ # This method does the actual work of parsing a table entry and must be implemented by
116
+ # subclasses.
117
+ #
118
+ # See: #load_from_io
119
+ def parse_table
120
+ # noop for unsupported tables
121
+ end
122
+
123
+ # Uses default values to populate the table.
124
+ #
125
+ # This method must be implemented by subclasses.
126
+ def load_default
127
+ # noop for unsupported tables
128
+ end
129
+
130
+ # Sets the IO cursor to the given position while yielding to the block and returns the
131
+ # block's return value.
132
+ def with_io_pos(pos)
133
+ old_pos = io.pos
134
+ io.pos = pos
135
+ yield
136
+ ensure
137
+ io.pos = old_pos
138
+ end
139
+
140
+ # Reads +count+ bytes from the current position of the font's associated IO stream, unpacks
141
+ # them using the provided format specifier and returns the result.
142
+ def read_formatted(count, format)
143
+ io.read(count).unpack(format)
144
+ end
145
+
146
+ # Reads a 16.16-bit signed fixed-point integer and returns a Rational as result.
147
+ def read_fixed
148
+ Rational(io.read(4).unpack('N').first, 65536)
149
+ end
150
+
151
+ end
152
+
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,48 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ module HexaPDF
35
+ module Font
36
+
37
+ # This module provides classes for handling TrueType fonts.
38
+ #
39
+ # Note that currently not all parts of the file format are supported, only those needed for
40
+ # using the fonts with PDF.
41
+ module TrueType
42
+
43
+ autoload(:Font, 'hexapdf/font/true_type/font')
44
+
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,240 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/true_type'
35
+ require 'hexapdf/font/cmap'
36
+ require 'hexapdf/error'
37
+
38
+ module HexaPDF
39
+ module Font
40
+
41
+ # This class wraps a generic TrueType font object and provides the methods needed for working
42
+ # with the font in a PDF context.
43
+ #
44
+ # TrueType fonts can be represented in two ways in PDF: As a simple font with Subtype TrueType
45
+ # or as a composite font using a Type2 CIDFont. The wrapper only supports the composite font
46
+ # case because:
47
+ #
48
+ # * By using a composite font more than 256 characters can be encoded with one font object.
49
+ # * Fonts for vertical writing can potentially be used.
50
+ # * The PDF specification recommends using a composite font (see PDF1.7 s9.9 at the end).
51
+ #
52
+ # Additionally, TrueType fonts are *always* embedded.
53
+ class TrueTypeWrapper
54
+
55
+ # Represents a single glyph of the wrapped font.
56
+ class Glyph
57
+
58
+ # The glyph ID.
59
+ attr_reader :id
60
+
61
+ # Creates a new Glyph object.
62
+ def initialize(font, id)
63
+ @font = font
64
+ @id = id
65
+ end
66
+
67
+ # Returns the width of the glyph.
68
+ def width
69
+ @width ||= @font[:hmtx][id].advance_width * 1000.0 / @font[:head].units_per_em
70
+ end
71
+
72
+ # Returns +true+ if the glyph represents the space character.
73
+ def space?
74
+ # Accoding to http://scripts.sil.org/iws-chapter08 and
75
+ # https://www.microsoft.com/typography/otspec/recom.htm
76
+ @id == 3
77
+ end
78
+
79
+ end
80
+
81
+ private_constant :Glyph
82
+
83
+
84
+ # Returns the wrapped TrueType font object.
85
+ attr_reader :wrapped_font
86
+
87
+ # Returns the PDF font dictionary representing the wrapped font.
88
+ attr_reader :dict
89
+
90
+ # Creates a new object wrapping the TrueType font for the PDF document.
91
+ def initialize(document, font)
92
+ @document = document
93
+ @wrapped_font = font
94
+
95
+ @cmap = font[:cmap].preferred_table
96
+ if @cmap.nil?
97
+ raise HexaPDF::Error, "No mapping table for Unicode characters found for TTF " \
98
+ "font #{font.full_name}"
99
+ end
100
+ @dict = build_font_dict
101
+ @document.register_listener(:complete_objects, &method(:complete_font_dict))
102
+
103
+ @id_to_glyph = {}
104
+ @codepoint_to_glyph = {}
105
+ @encoded_glyphs = {}
106
+ end
107
+
108
+ # Returns a Glyph object for the given glyph ID.
109
+ #
110
+ # Note: Although this method is public, it should normally not be used by application code!
111
+ def glyph(id)
112
+ @id_to_glyph[id] ||=
113
+ begin
114
+ if id < 0 || id >= @wrapped_font[:maxp].num_glyphs
115
+ id = @document.config['font.on_missing_glyph'].call(0xFFFD, @wrapped_font)
116
+ end
117
+ Glyph.new(@wrapped_font, id)
118
+ end
119
+ end
120
+
121
+ # Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
122
+ def decode_utf8(str)
123
+ str.each_codepoint.map do |c|
124
+ @codepoint_to_glyph[c] ||=
125
+ begin
126
+ gid = @cmap[c] || @document.config['font.on_missing_glyph'].call(c, @wrapped_font)
127
+ glyph(gid)
128
+ end
129
+ end
130
+ end
131
+
132
+ # Encodes the glyph and returns the code string.
133
+ def encode(glyph)
134
+ @encoded_glyphs[glyph] ||= [glyph.id].pack('n')
135
+ end
136
+
137
+ private
138
+
139
+ # Builds a Type0 font object representing the TrueType font.
140
+ #
141
+ # The returned font object contains only information available at build time, so no
142
+ # information about glyph specific attributes like width.
143
+ #
144
+ # See: #complete_font_dict
145
+ def build_font_dict
146
+ scaling = 1000.0 / @wrapped_font[:head].units_per_em
147
+
148
+ embedded_font = @document.add({Length1: @wrapped_font.io.size},
149
+ stream: HexaPDF::StreamData.new(@wrapped_font.io))
150
+ fd = @document.add(Type: :FontDescriptor,
151
+ FontName: @wrapped_font.font_name.intern,
152
+ FontWeight: @wrapped_font.weight,
153
+ Flags: 0,
154
+ FontBBox: @wrapped_font.bounding_box.map {|m| m * scaling},
155
+ ItalicAngle: @wrapped_font.italic_angle || 0,
156
+ Ascent: @wrapped_font.ascender * scaling,
157
+ Descent: @wrapped_font.descender * scaling,
158
+ StemV: @wrapped_font.dominant_vertical_stem_width,
159
+ FontFile2: embedded_font)
160
+ if @wrapped_font[:'OS/2'].version >= 2
161
+ fd[:CapHeight] = @wrapped_font.cap_height * scaling
162
+ fd[:XHeight] = @wrapped_font.x_height * scaling
163
+ else # estimate values
164
+ # Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#ch
165
+ fd[:CapHeight] = if @cmap[0x0048] # H
166
+ @wrapped_font[:glyf][@cmap[0x0048]].y_max * scaling
167
+ else
168
+ @wrapped_font.ascender * 0.8 * scaling
169
+ end
170
+ # Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#xh
171
+ fd[:XHeight] = if @cmap[0x0078] # x
172
+ @wrapped_font[:glyf][@cmap[0x0078]].y_max * scaling
173
+ else
174
+ @wrapped_font.ascender * 0.5 * scaling
175
+ end
176
+ end
177
+
178
+ fd.flag(:fixed_pitch) if @wrapped_font[:post].is_fixed_pitch? ||
179
+ @wrapped_font[:hhea].num_of_long_hor_metrics == 1
180
+ fd.flag(:italic) if @wrapped_font[:'OS/2'].selection_include?(:italic) ||
181
+ @wrapped_font[:'OS/2'].selection_include?(:oblique)
182
+ fd.flag(:symbolic)
183
+
184
+ cid_font = @document.add(Type: :Font, Subtype: :CIDFontType2,
185
+ BaseFont: @wrapped_font.font_name.intern, FontDescriptor: fd,
186
+ CIDSystemInfo: {Registry: "Adobe", Ordering: "Identity",
187
+ Supplement: 0},
188
+ CIDToGIDMap: :Identity)
189
+ @document.add(Type: :Font, Subtype: :Type0, BaseFont: cid_font[:BaseFont],
190
+ Encoding: :"Identity-H", DescendantFonts: [cid_font])
191
+ end
192
+
193
+ # Makes sure that the Type0 font object as well as the CIDFont object contain all the needed
194
+ # information.
195
+ def complete_font_dict
196
+ complete_width_information
197
+ create_to_unicode_cmap
198
+ end
199
+
200
+ # Adds the /DW and /W fields to the CIDFont dictionary.
201
+ def complete_width_information
202
+ cid_font = @dict[:DescendantFonts].first
203
+ cid_font[:DW] = default_width = glyph(3).width
204
+
205
+ glyphs = @encoded_glyphs.keys.reject {|g| g.width == default_width}.sort_by(&:id)
206
+ if glyphs.length > 0
207
+ cid_font[:W] = widths = []
208
+ last_id = -10
209
+ cur_widths = nil
210
+ glyphs.each do |glyph|
211
+ gid = glyph.id
212
+ if last_id + 1 != gid
213
+ cur_widths = []
214
+ widths << gid << cur_widths
215
+ end
216
+ cur_widths << glyph.width
217
+ last_id = gid
218
+ end
219
+ end
220
+ end
221
+
222
+ # Creates the /ToUnicode CMap and updates the font dictionary so that text extraction works
223
+ # correctly.
224
+ def create_to_unicode_cmap
225
+ stream = HexaPDF::StreamData.new do
226
+ mapping = @encoded_glyphs.keys.sort_by(&:id).map do |glyph|
227
+ # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
228
+ [glyph.id, @cmap.gid_to_code(glyph.id) || 0xFFFD]
229
+ end
230
+ HexaPDF::Font::CMap.create_to_unicode_cmap(mapping)
231
+ end
232
+ stream_obj = @document.add({}, stream: stream)
233
+ stream_obj.set_filter(:FlateDecode)
234
+ @dict[:ToUnicode] = stream_obj
235
+ end
236
+
237
+ end
238
+
239
+ end
240
+ end