hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,230 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/true_type/table'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module TrueType
39
+ class Table
40
+
41
+ # The 'post' table contains information for using a font on a PostScript printer.
42
+ #
43
+ # post format 2.5 is currently not implemented because use of the format is deprecated since
44
+ # 2000 in the specification and no font with a format 2.5 post subtable was available for
45
+ # testing.
46
+ #
47
+ # See: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6post.html
48
+ class Post < Table
49
+
50
+ # The format of the table (a Rational).
51
+ attr_accessor :format
52
+
53
+ # The italic angle (a Rational).
54
+ attr_accessor :italic_angle
55
+
56
+ # The suggested distance of the top of the underline from the baseline (negative values
57
+ # indicate underlines below the baseline).
58
+ attr_accessor :underline_position
59
+
60
+ # The suggested thickness for underlines.
61
+ attr_accessor :underline_thickness
62
+
63
+ # Specifies whether the font is proportional (value is 0) or monospaced (value is not 0).
64
+ attr_accessor :is_fixed_pitch
65
+
66
+ # Returns +true+ if the font is monospaced.
67
+ #
68
+ # See: #is_fixed_pitch
69
+ def is_fixed_pitch?
70
+ @is_fixed_pitch != 0
71
+ end
72
+
73
+ # Minimum memory usage when a font is downloaded.
74
+ attr_accessor :min_mem_type42
75
+
76
+ # Maximum memory usage when a font is downloaded.
77
+ attr_accessor :max_mem_type42
78
+
79
+ # Minimum memory usage when a Type1 font is downloaded.
80
+ attr_accessor :min_mem_type1
81
+
82
+ # Maximum memory usage when a Type1 font is downloaded.
83
+ attr_accessor :max_mem_type1
84
+
85
+ # Returns the name for the given glpyh id or ".notdef" if the given glyph id has no name.
86
+ def [](glyph_id)
87
+ @glyph_names[glyph_id] || '.notdef'.freeze
88
+ end
89
+
90
+ private
91
+
92
+ def parse_table #:nodoc:
93
+ @format = read_fixed
94
+ @italic_angle = read_fixed
95
+ @underline_position, @underline_thickness, @is_fixed_pitch, @min_mem_type42,
96
+ @max_mem_type42, @min_mem_type1, @max_mem_type1 = read_formatted(24, 's>2N5')
97
+
98
+ sub_table_length = directory_entry.length - 32
99
+ @glyph_names = case @format
100
+ when 1 then Format1.parse(io, sub_table_length)
101
+ when 2 then Format2.parse(io, sub_table_length)
102
+ when 3 then Format3.parse(io, sub_table_length)
103
+ when 4 then Format4.parse(io, sub_table_length)
104
+ else
105
+ raise HexaPDF::Error, "Unsupported post table format: #{@format}"
106
+ end
107
+ end
108
+
109
+ def load_default #:nodoc:
110
+ @format = 1.to_r
111
+ @italic_angle = 0.to_r
112
+ @underline_position = @underline_thickness = @is_fixed_pitch = @min_mem_type42 =
113
+ @max_mem_type42 = @min_mem_type1 = @max_mem_type1 = 0
114
+ @glyph_names = {}
115
+ end
116
+
117
+
118
+ # 'post' table format 1
119
+ module Format1
120
+
121
+ # The 258 predefined glyph names in the standard Macintosh ordering.
122
+ GLYPH_NAMES = %w[
123
+ .notdef .null nonmarkingreturn space exclam quotedbl numbersign dollar percent
124
+ ampersand quotesingle parenleft parenright asterisk plus comma hyphen period slash
125
+ zero one two three four five six seven eight nine colon semicolon less equal greater
126
+ question at A B C D E F G H I J K L M N O P Q R S T U V W X Y Z bracketleft backslash
127
+ bracketright asciicircum underscore grave a b c d e f g h i j k l m n o p q r s t u v
128
+ w x y z braceleft bar braceright asciitilde Adieresis Aring Ccedilla Eacute Ntilde
129
+ Odieresis Udieresis aacute agrave acircumflex adieresis atilde aring ccedilla eacute
130
+ egrave ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute ograve
131
+ ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis dagger degree cent
132
+ sterling section bullet paragraph germandbls registered copyright trademark acute
133
+ dieresis notequal AE Oslash infinity plusminus lessequal greaterequal yen mu
134
+ partialdiff summation product pi integral ordfeminine ordmasculine Omega ae oslash
135
+ questiondown exclamdown logicalnot radical florin approxequal Delta guillemotleft
136
+ guillemotright ellipsis nonbreakingspace Agrave Atilde Otilde OE oe endash emdash
137
+ quotedblleft quotedblright quoteleft quoteright divide lozenge ydieresis Ydieresis
138
+ fraction currency guilsinglleft guilsinglright fi fl daggerdbl periodcentered
139
+ quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute Edieresis
140
+ Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex apple Ograve Uacute
141
+ Ucircumflex Ugrave dotlessi circumflex tilde macron breve dotaccent ring cedilla
142
+ hungarumlaut ogonek caron Lslash lslash Scaron scaron Zcaron zcaron brokenbar Eth eth
143
+ Yacute yacute Thorn thorn minus multiply onesuperior twosuperior threesuperior onehalf
144
+ onequarter threequarters franc Gbreve gbreve Idotaccent Scedilla scedilla Cacute
145
+ cacute Ccaron ccaron dcroat
146
+ ].freeze
147
+
148
+ # :call-seq:
149
+ # Format1.parse(io, length) -> glyph_names
150
+ #
151
+ # Returns the array containing the 258 predefined glpyh names.
152
+ def self.parse(_io, _length)
153
+ GLYPH_NAMES
154
+ end
155
+
156
+ end
157
+
158
+
159
+ # 'post' table format 2
160
+ module Format2
161
+
162
+ # :call-seq:
163
+ # Format2.parse(io, length) -> glyph_names
164
+ #
165
+ # Parses the format 2 post subtable from the given IO at the current position and
166
+ # returns the contained glyph name map.
167
+ def self.parse(io, length)
168
+ end_pos = io.pos + length
169
+ num_glyphs = io.read(2).unpack('n').first
170
+ glyph_name_index = io.read(2 * num_glyphs).unpack('n*')
171
+ names = []
172
+ names << io.read(io.getbyte).force_encoding(::Encoding::UTF_8) while io.pos < end_pos
173
+ mapper(glyph_name_index, names)
174
+ end
175
+
176
+ def self.mapper(glyph_name_index, names) #:nodoc:
177
+ lambda do |glyph_id|
178
+ name_index = glyph_name_index[glyph_id]
179
+ if !name_index
180
+ nil
181
+ elsif name_index <= 257
182
+ Format1::GLYPH_NAMES[name_index]
183
+ else
184
+ names[name_index - 258]
185
+ end
186
+ end
187
+ end
188
+
189
+ end
190
+
191
+
192
+ # 'post' table format 3
193
+ module Format3
194
+
195
+ # :call-seq:
196
+ # Format3.parse(io, length) -> glyph_names
197
+ #
198
+ # Since the post table format 3 does not contain any valid glyph names, an empty array
199
+ # is returned.
200
+ def self.parse(_io, _length)
201
+ [].freeze
202
+ end
203
+
204
+ end
205
+
206
+
207
+ # 'post' table format 4
208
+ module Format4
209
+
210
+ # :call-seq:
211
+ # Format4.parse(io, length) -> glyph_names
212
+ #
213
+ # Parses the format 4 post subtable from the given IO at the current position and
214
+ # returns a lambda mapping the glyph id to a character code.
215
+ def self.parse(io, length)
216
+ mapper(io.read(length).unpack('n*'))
217
+ end
218
+
219
+ def self.mapper(char_codes) #:nodoc:
220
+ lambda {|glyph_id| char_codes[glyph_id] || 0xFFFF }
221
+ end
222
+
223
+ end
224
+
225
+ end
226
+
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,155 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/error'
35
+
36
+ module HexaPDF
37
+ module Font
38
+ module TrueType
39
+
40
+ # Implementation of a generic table inside a sfnt-formatted font file.
41
+ #
42
+ # See: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6.html
43
+ class Table
44
+
45
+ autoload(:Directory, 'hexapdf/font/true_type/table/directory')
46
+ autoload(:Head, 'hexapdf/font/true_type/table/head')
47
+ autoload(:Cmap, 'hexapdf/font/true_type/table/cmap')
48
+ autoload(:Hhea, 'hexapdf/font/true_type/table/hhea')
49
+ autoload(:Hmtx, 'hexapdf/font/true_type/table/hmtx')
50
+ autoload(:Loca, 'hexapdf/font/true_type/table/loca')
51
+ autoload(:Maxp, 'hexapdf/font/true_type/table/maxp')
52
+ autoload(:Name, 'hexapdf/font/true_type/table/name')
53
+ autoload(:Post, 'hexapdf/font/true_type/table/post')
54
+ autoload(:Glyf, 'hexapdf/font/true_type/table/glyf')
55
+ autoload(:OS2, 'hexapdf/font/true_type/table/os2')
56
+
57
+
58
+ # The time Epoch used in sfnt-formatted font files.
59
+ TIME_EPOCH = Time.new(1904, 1, 1)
60
+
61
+ # Calculates the checksum for the given data.
62
+ def self.calculate_checksum(data)
63
+ data.unpack('N*').inject(0) {|sum, long| sum + long} % 2**32
64
+ end
65
+
66
+
67
+ # The TrueType font object associated with this table.
68
+ attr_reader :font
69
+
70
+ # Creates a new Table object for the given font and initializes it by either reading the
71
+ # data from the font's associated IO stream if +entry+ is given or by using default values.
72
+ #
73
+ # See: #parse_table, #load_default
74
+ def initialize(font, entry = nil)
75
+ @font = font
76
+ @directory_entry = entry
77
+ entry ? load_from_io : load_default
78
+ end
79
+
80
+ # Returns the directory entry for this table.
81
+ #
82
+ # See: Directory
83
+ def directory_entry
84
+ @directory_entry
85
+ end
86
+
87
+ # Returns +true+ if the checksum stored in the directory entry of the table matches the
88
+ # tables data.
89
+ def checksum_valid?
90
+ unless directory_entry
91
+ raise HexaPDF::Error, "Can't verify the checksum, no directory entry available"
92
+ end
93
+
94
+ data = with_io_pos(directory_entry.offset) { io.read(directory_entry.length) }
95
+ directory_entry.checksum == self.class.calculate_checksum(data)
96
+ end
97
+
98
+ private
99
+
100
+ # The IO stream of the associated font object.
101
+ def io
102
+ @font.io
103
+ end
104
+
105
+ # Loads the data for this table from the IO stream of the associated font object into this
106
+ # object.
107
+ #
108
+ # See #parse_table for more information.
109
+ def load_from_io
110
+ with_io_pos(directory_entry.offset) { parse_table }
111
+ end
112
+
113
+ # Parses the table with the IO position already at the correct offset.
114
+ #
115
+ # This method does the actual work of parsing a table entry and must be implemented by
116
+ # subclasses.
117
+ #
118
+ # See: #load_from_io
119
+ def parse_table
120
+ # noop for unsupported tables
121
+ end
122
+
123
+ # Uses default values to populate the table.
124
+ #
125
+ # This method must be implemented by subclasses.
126
+ def load_default
127
+ # noop for unsupported tables
128
+ end
129
+
130
+ # Sets the IO cursor to the given position while yielding to the block and returns the
131
+ # block's return value.
132
+ def with_io_pos(pos)
133
+ old_pos = io.pos
134
+ io.pos = pos
135
+ yield
136
+ ensure
137
+ io.pos = old_pos
138
+ end
139
+
140
+ # Reads +count+ bytes from the current position of the font's associated IO stream, unpacks
141
+ # them using the provided format specifier and returns the result.
142
+ def read_formatted(count, format)
143
+ io.read(count).unpack(format)
144
+ end
145
+
146
+ # Reads a 16.16-bit signed fixed-point integer and returns a Rational as result.
147
+ def read_fixed
148
+ Rational(io.read(4).unpack('N').first, 65536)
149
+ end
150
+
151
+ end
152
+
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,48 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ module HexaPDF
35
+ module Font
36
+
37
+ # This module provides classes for handling TrueType fonts.
38
+ #
39
+ # Note that currently not all parts of the file format are supported, only those needed for
40
+ # using the fonts with PDF.
41
+ module TrueType
42
+
43
+ autoload(:Font, 'hexapdf/font/true_type/font')
44
+
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,240 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/true_type'
35
+ require 'hexapdf/font/cmap'
36
+ require 'hexapdf/error'
37
+
38
+ module HexaPDF
39
+ module Font
40
+
41
+ # This class wraps a generic TrueType font object and provides the methods needed for working
42
+ # with the font in a PDF context.
43
+ #
44
+ # TrueType fonts can be represented in two ways in PDF: As a simple font with Subtype TrueType
45
+ # or as a composite font using a Type2 CIDFont. The wrapper only supports the composite font
46
+ # case because:
47
+ #
48
+ # * By using a composite font more than 256 characters can be encoded with one font object.
49
+ # * Fonts for vertical writing can potentially be used.
50
+ # * The PDF specification recommends using a composite font (see PDF1.7 s9.9 at the end).
51
+ #
52
+ # Additionally, TrueType fonts are *always* embedded.
53
+ class TrueTypeWrapper
54
+
55
+ # Represents a single glyph of the wrapped font.
56
+ class Glyph
57
+
58
+ # The glyph ID.
59
+ attr_reader :id
60
+
61
+ # Creates a new Glyph object.
62
+ def initialize(font, id)
63
+ @font = font
64
+ @id = id
65
+ end
66
+
67
+ # Returns the width of the glyph.
68
+ def width
69
+ @width ||= @font[:hmtx][id].advance_width * 1000.0 / @font[:head].units_per_em
70
+ end
71
+
72
+ # Returns +true+ if the glyph represents the space character.
73
+ def space?
74
+ # Accoding to http://scripts.sil.org/iws-chapter08 and
75
+ # https://www.microsoft.com/typography/otspec/recom.htm
76
+ @id == 3
77
+ end
78
+
79
+ end
80
+
81
+ private_constant :Glyph
82
+
83
+
84
+ # Returns the wrapped TrueType font object.
85
+ attr_reader :wrapped_font
86
+
87
+ # Returns the PDF font dictionary representing the wrapped font.
88
+ attr_reader :dict
89
+
90
+ # Creates a new object wrapping the TrueType font for the PDF document.
91
+ def initialize(document, font)
92
+ @document = document
93
+ @wrapped_font = font
94
+
95
+ @cmap = font[:cmap].preferred_table
96
+ if @cmap.nil?
97
+ raise HexaPDF::Error, "No mapping table for Unicode characters found for TTF " \
98
+ "font #{font.full_name}"
99
+ end
100
+ @dict = build_font_dict
101
+ @document.register_listener(:complete_objects, &method(:complete_font_dict))
102
+
103
+ @id_to_glyph = {}
104
+ @codepoint_to_glyph = {}
105
+ @encoded_glyphs = {}
106
+ end
107
+
108
+ # Returns a Glyph object for the given glyph ID.
109
+ #
110
+ # Note: Although this method is public, it should normally not be used by application code!
111
+ def glyph(id)
112
+ @id_to_glyph[id] ||=
113
+ begin
114
+ if id < 0 || id >= @wrapped_font[:maxp].num_glyphs
115
+ id = @document.config['font.on_missing_glyph'].call(0xFFFD, @wrapped_font)
116
+ end
117
+ Glyph.new(@wrapped_font, id)
118
+ end
119
+ end
120
+
121
+ # Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
122
+ def decode_utf8(str)
123
+ str.each_codepoint.map do |c|
124
+ @codepoint_to_glyph[c] ||=
125
+ begin
126
+ gid = @cmap[c] || @document.config['font.on_missing_glyph'].call(c, @wrapped_font)
127
+ glyph(gid)
128
+ end
129
+ end
130
+ end
131
+
132
+ # Encodes the glyph and returns the code string.
133
+ def encode(glyph)
134
+ @encoded_glyphs[glyph] ||= [glyph.id].pack('n')
135
+ end
136
+
137
+ private
138
+
139
+ # Builds a Type0 font object representing the TrueType font.
140
+ #
141
+ # The returned font object contains only information available at build time, so no
142
+ # information about glyph specific attributes like width.
143
+ #
144
+ # See: #complete_font_dict
145
+ def build_font_dict
146
+ scaling = 1000.0 / @wrapped_font[:head].units_per_em
147
+
148
+ embedded_font = @document.add({Length1: @wrapped_font.io.size},
149
+ stream: HexaPDF::StreamData.new(@wrapped_font.io))
150
+ fd = @document.add(Type: :FontDescriptor,
151
+ FontName: @wrapped_font.font_name.intern,
152
+ FontWeight: @wrapped_font.weight,
153
+ Flags: 0,
154
+ FontBBox: @wrapped_font.bounding_box.map {|m| m * scaling},
155
+ ItalicAngle: @wrapped_font.italic_angle || 0,
156
+ Ascent: @wrapped_font.ascender * scaling,
157
+ Descent: @wrapped_font.descender * scaling,
158
+ StemV: @wrapped_font.dominant_vertical_stem_width,
159
+ FontFile2: embedded_font)
160
+ if @wrapped_font[:'OS/2'].version >= 2
161
+ fd[:CapHeight] = @wrapped_font.cap_height * scaling
162
+ fd[:XHeight] = @wrapped_font.x_height * scaling
163
+ else # estimate values
164
+ # Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#ch
165
+ fd[:CapHeight] = if @cmap[0x0048] # H
166
+ @wrapped_font[:glyf][@cmap[0x0048]].y_max * scaling
167
+ else
168
+ @wrapped_font.ascender * 0.8 * scaling
169
+ end
170
+ # Estimate as per https://www.microsoft.com/typography/otspec/os2.htm#xh
171
+ fd[:XHeight] = if @cmap[0x0078] # x
172
+ @wrapped_font[:glyf][@cmap[0x0078]].y_max * scaling
173
+ else
174
+ @wrapped_font.ascender * 0.5 * scaling
175
+ end
176
+ end
177
+
178
+ fd.flag(:fixed_pitch) if @wrapped_font[:post].is_fixed_pitch? ||
179
+ @wrapped_font[:hhea].num_of_long_hor_metrics == 1
180
+ fd.flag(:italic) if @wrapped_font[:'OS/2'].selection_include?(:italic) ||
181
+ @wrapped_font[:'OS/2'].selection_include?(:oblique)
182
+ fd.flag(:symbolic)
183
+
184
+ cid_font = @document.add(Type: :Font, Subtype: :CIDFontType2,
185
+ BaseFont: @wrapped_font.font_name.intern, FontDescriptor: fd,
186
+ CIDSystemInfo: {Registry: "Adobe", Ordering: "Identity",
187
+ Supplement: 0},
188
+ CIDToGIDMap: :Identity)
189
+ @document.add(Type: :Font, Subtype: :Type0, BaseFont: cid_font[:BaseFont],
190
+ Encoding: :"Identity-H", DescendantFonts: [cid_font])
191
+ end
192
+
193
+ # Makes sure that the Type0 font object as well as the CIDFont object contain all the needed
194
+ # information.
195
+ def complete_font_dict
196
+ complete_width_information
197
+ create_to_unicode_cmap
198
+ end
199
+
200
+ # Adds the /DW and /W fields to the CIDFont dictionary.
201
+ def complete_width_information
202
+ cid_font = @dict[:DescendantFonts].first
203
+ cid_font[:DW] = default_width = glyph(3).width
204
+
205
+ glyphs = @encoded_glyphs.keys.reject {|g| g.width == default_width}.sort_by(&:id)
206
+ if glyphs.length > 0
207
+ cid_font[:W] = widths = []
208
+ last_id = -10
209
+ cur_widths = nil
210
+ glyphs.each do |glyph|
211
+ gid = glyph.id
212
+ if last_id + 1 != gid
213
+ cur_widths = []
214
+ widths << gid << cur_widths
215
+ end
216
+ cur_widths << glyph.width
217
+ last_id = gid
218
+ end
219
+ end
220
+ end
221
+
222
+ # Creates the /ToUnicode CMap and updates the font dictionary so that text extraction works
223
+ # correctly.
224
+ def create_to_unicode_cmap
225
+ stream = HexaPDF::StreamData.new do
226
+ mapping = @encoded_glyphs.keys.sort_by(&:id).map do |glyph|
227
+ # Using 0xFFFD as mentioned in Adobe #5411, last line before section 1.5
228
+ [glyph.id, @cmap.gid_to_code(glyph.id) || 0xFFFD]
229
+ end
230
+ HexaPDF::Font::CMap.create_to_unicode_cmap(mapping)
231
+ end
232
+ stream_obj = @document.add({}, stream: stream)
233
+ stream_obj.set_filter(:FlateDecode)
234
+ @dict[:ToUnicode] = stream_obj
235
+ end
236
+
237
+ end
238
+
239
+ end
240
+ end