hexapdf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,204 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'stringio'
35
+ require 'hexapdf/tokenizer'
36
+
37
+ module HexaPDF
38
+ module Content
39
+
40
+ # More efficient tokenizer for content streams. This tokenizer class works directly on a
41
+ # string and not on an IO.
42
+ #
43
+ # Note: Indirect object references are *not* supported by this tokenizer!
44
+ #
45
+ # See: PDF1.7 s7.2
46
+ class Tokenizer < HexaPDF::Tokenizer #:nodoc:
47
+
48
+ # Creates a new tokenizer.
49
+ def initialize(string)
50
+ @ss = StringScanner.new(string)
51
+ end
52
+
53
+ # See: HexaPDF::Tokenizer#pos
54
+ def pos
55
+ @ss.pos
56
+ end
57
+
58
+ # See: HexaPDF::Tokenizer#pos=
59
+ def pos=(pos)
60
+ @ss.pos = pos
61
+ end
62
+
63
+ # See: HexaPDF::Tokenizer#scan_until
64
+ def scan_until(re)
65
+ @ss.scan_until(re)
66
+ end
67
+
68
+ # See: HexaPDF::Tokenizer#next_token
69
+ def next_token
70
+ @ss.skip(WHITESPACE_MULTI_RE)
71
+ case (@ss.eos? ? -1 : @ss.string.getbyte(@ss.pos))
72
+ when 43, 45, 46, 48..57 # + - . 0..9
73
+ parse_number
74
+ when 65..90, 96..121
75
+ parse_keyword
76
+ when 47 # /
77
+ parse_name
78
+ when 40 # (
79
+ parse_literal_string
80
+ when 60 # <
81
+ if @ss.string.getbyte(@ss.pos + 1) != 60
82
+ parse_hex_string
83
+ else
84
+ @ss.pos += 2
85
+ TOKEN_DICT_START
86
+ end
87
+ when 62 # >
88
+ unless @ss.string.getbyte(@ss.pos + 1) == 62
89
+ raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
90
+ end
91
+ @ss.pos += 2
92
+ TOKEN_DICT_END
93
+ when 91 # [
94
+ @ss.pos += 1
95
+ TOKEN_ARRAY_START
96
+ when 93 # ]
97
+ @ss.pos += 1
98
+ TOKEN_ARRAY_END
99
+ when 123, 125 # { }
100
+ Token.new(@ss.get_byte)
101
+ when 37 # %
102
+ return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/)
103
+ next_token
104
+ when -1
105
+ NO_MORE_TOKENS
106
+ else
107
+ parse_keyword
108
+ end
109
+ end
110
+
111
+ private
112
+
113
+ # See: HexaPDF::Tokenizer#parse_number
114
+ def parse_number
115
+ if (val = @ss.scan(/[+-]?\d++(?!\.)/))
116
+ val.to_i
117
+ else
118
+ val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)
119
+ val << '0'.freeze if val.getbyte(-1) == 46 # dot '.'
120
+ Float(val)
121
+ end
122
+ end
123
+
124
+ # Stub implementation to prevent errors for not-overridden methods.
125
+ def prepare_string_scanner(*)
126
+ end
127
+
128
+ end
129
+
130
+
131
+ # This class knows how to correctly parse a content stream.
132
+ #
133
+ # == Overview
134
+ #
135
+ # A content stream is mostly just a stream of PDF objects. However, there is one exception:
136
+ # inline images.
137
+ #
138
+ # Since inline images don't follow the normal PDF object parsing rules, they need to be
139
+ # handled specially and this is the reason for this class. Therefore only the BI operator is
140
+ # ever called for inline images because the ID and EI operators are handled by the parser.
141
+ #
142
+ # To parse some contents the #parse method needs to be called with the contents to be parsed
143
+ # and a Processor object which is used for processing the parsed operators.
144
+ class Parser
145
+
146
+ # Creates a new Parser object and calls #parse.
147
+ def self.parse(contents, processor)
148
+ new.parse(contents, processor)
149
+ end
150
+
151
+ # Parses the contents and calls the processor object for each parsed operator.
152
+ def parse(contents, processor)
153
+ tokenizer = Tokenizer.new(contents)
154
+ params = []
155
+ while (obj = tokenizer.next_object(allow_keyword: true)) != Tokenizer::NO_MORE_TOKENS
156
+ if obj.kind_of?(Tokenizer::Token)
157
+ if obj == 'BI'.freeze
158
+ params = parse_inline_image(tokenizer)
159
+ end
160
+ processor.process(obj.to_sym, params)
161
+ params.clear
162
+ else
163
+ params << obj
164
+ end
165
+ end
166
+ end
167
+
168
+ private
169
+
170
+ # Parses the inline image at the current position.
171
+ def parse_inline_image(tokenizer)
172
+ # BI has already been read, so read the image dictionary
173
+ dict = {}
174
+ while (key = tokenizer.next_object(allow_keyword: true))
175
+ if key == 'ID'.freeze
176
+ break
177
+ elsif key == Tokenizer::NO_MORE_TOKENS
178
+ raise HexaPDF::Error, "EOS while trying to read dictionary key for inline image"
179
+ elsif !key.kind_of?(Symbol)
180
+ raise HexaPDF::Error, "Inline image dictionary keys must be PDF name objects"
181
+ end
182
+ value = tokenizer.next_object
183
+ if value == Tokenizer::NO_MORE_TOKENS
184
+ raise HexaPDF::Error, "EOS while trying to read dictionary value for inline image"
185
+ end
186
+ dict[key] = value
187
+ end
188
+
189
+ # one whitespace character after ID
190
+ tokenizer.next_byte
191
+
192
+ # find the EI operator
193
+ data = tokenizer.scan_until(/(?=EI[#{Tokenizer::WHITESPACE}])/o)
194
+ if data.nil?
195
+ raise HexaPDF::Error, "End inline image marker EI not found"
196
+ end
197
+ tokenizer.pos += 3
198
+ [dict, data]
199
+ end
200
+
201
+ end
202
+
203
+ end
204
+ end
@@ -0,0 +1,451 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/content/operator'
35
+ require 'hexapdf/content/graphics_state'
36
+
37
+ module HexaPDF
38
+ module Content
39
+
40
+ # This class is used for processing content operators extracted from a content stream.
41
+ #
42
+ # == General Information
43
+ #
44
+ # When a content stream is read, operators and their operands are extracted. After extracting
45
+ # these operators are normally processed with a Processor instance that ensures that the needed
46
+ # setup (like modifying the graphics state) is done before further processing.
47
+ #
48
+ # == How Processing Works
49
+ #
50
+ # The operator implementations (see the Operator module) are called first and they ensure that
51
+ # the processing state is consistent. For example, operators that modify the graphics state do
52
+ # actually modify the #graphics_state object. However, operator implementations are *only* used
53
+ # for this task and not more, so they are very specific and normally don't need to be changed.
54
+ #
55
+ # After that methods corresponding to the operator names are invoked on the processor object (if
56
+ # they exist). Each PDF operator name is mapped to a nicer message name via the
57
+ # OPERATOR_MESSAGE_NAME_MAP constant. For example, the operator 'q' is mapped to
58
+ # 'save_graphics_state".
59
+ #
60
+ # The task of these methods is to do something useful with the content itself, it doesn't need
61
+ # to concern itself with ensuring the consistency of the processing state. For example, the
62
+ # processor could use the processing state to extract the text. Or paint the content on a
63
+ # canvas.
64
+ #
65
+ # For inline images only the 'BI' operator mapped to 'inline_image' is used. Although also the
66
+ # operators 'ID' and 'EI' exist for inline images, they are not used because they are consumed
67
+ # while parsing inline images and do not reflect separate operators.
68
+ #
69
+ # == Text Processing
70
+ #
71
+ # Two utility methods #decode_text and #decode_text_with_positioning for extracting text are
72
+ # provided. Both can directly be invoked from the 'show_text' and 'show_text_with_positioning'
73
+ # methods.
74
+ #
75
+ class Processor
76
+
77
+ # Represents an (immutable) glyph box with positioning information.
78
+ #
79
+ # Since the glyph may have been transformed by an affine matrix, the bounding may not be a
80
+ # rectangle in all cases but it is always a parallelogram.
81
+ class GlyphBox
82
+
83
+ # The code point representing the glyph.
84
+ attr_reader :code_point
85
+
86
+ # The Unicode value of the code point.
87
+ attr_reader :string
88
+
89
+ # Creates a new glyph box for the given code point/Unicode value pair with the lower left
90
+ # coordinate [llx, lly], the lower right coordinate [lrx, lry], and the upper left
91
+ # coordinate [ulx, uly].
92
+ def initialize(code_point, string, llx, lly, lrx, lry, ulx, uly)
93
+ @code_point = code_point
94
+ @string = string.freeze
95
+ @llx = llx
96
+ @lly = lly
97
+ @lrx = lrx
98
+ @lry = lry
99
+ @ulx = ulx
100
+ @uly = uly
101
+ freeze
102
+ end
103
+
104
+ # :call-seq:
105
+ # fragment.lower_left -> [llx, lly]
106
+ #
107
+ # Returns the lower left coordinate
108
+ def lower_left
109
+ [@llx, @lly]
110
+ end
111
+
112
+ # :call-seq:
113
+ # fragment.lower_right -> [lrx, lry]
114
+ #
115
+ # Returns the lower right coordinate
116
+ def lower_right
117
+ [@lrx, @lry]
118
+ end
119
+
120
+ # :call-seq:
121
+ # fragment.upper_left -> [ulx, uly]
122
+ #
123
+ # Returns the upper left coordinate
124
+ def upper_left
125
+ [@ulx, @uly]
126
+ end
127
+
128
+ # :call-seq:
129
+ # fragment.upper_right -> [urx, ury]
130
+ #
131
+ # Returns the upper right coordinate which is computed by using the other three points of
132
+ # the parallelogram.
133
+ def upper_right
134
+ [@ulx + (@lrx - @llx), @uly + (@lry - @lly)]
135
+ end
136
+
137
+ # :call-seq:
138
+ # fragment.points -> [llx, lly, lrx, lry, urx, ury, ulx, uly]
139
+ #
140
+ # Returns the four corners of the box as an array of coordinates, starting with the lower
141
+ # left corner and going counterclockwise.
142
+ def points
143
+ [@llx, @lly, @lrx, @lry, @ulx + (@lrx - @llx), @uly + (@lry - @lly), @ulx, @uly]
144
+ end
145
+
146
+ end
147
+
148
+
149
+ # Represents a box composed of GlyphBox objects.
150
+ #
151
+ # The bounding box methods #lower_left, #lower_right, #upper_left, #upper_right are computed
152
+ # by just using the first and last boxes, assuming the boxes are arranged from left to right
153
+ # in a straight line.
154
+ class CompositeBox
155
+
156
+ # The text boxes contained in this positioned text object.
157
+ attr_reader :boxes
158
+
159
+ # Creates an empty object.
160
+ def initialize
161
+ @boxes = []
162
+ end
163
+
164
+ # Appends the given text glyph box.
165
+ def <<(glyph_box)
166
+ @boxes << glyph_box
167
+ self
168
+ end
169
+
170
+ # Returns the glyph box at the given index, or +nil+ if the index is out of range.
171
+ def [](index)
172
+ @boxes[index]
173
+ end
174
+
175
+ # :call-seq:
176
+ # composite.each {|glyph_box| block} -> composite
177
+ # composite.each -> Enumerator
178
+ #
179
+ # Iterates over all contained glyph boxes.
180
+ def each(&block)
181
+ return to_enum(__method__) unless block_given?
182
+ @boxes.each(&block)
183
+ self
184
+ end
185
+
186
+ # Returns the concatenated text of the boxes.
187
+ def string
188
+ @boxes.map(&:string).join('')
189
+ end
190
+
191
+ # :call-seq:
192
+ # text.lower_left -> [llx, lly]
193
+ #
194
+ # Returns the lower left coordinate
195
+ def lower_left
196
+ @boxes[0].lower_left
197
+ end
198
+
199
+ # :call-seq:
200
+ # text.lower_right -> [lrx, lry]
201
+ #
202
+ # Returns the lower right coordinate
203
+ def lower_right
204
+ @boxes[-1].lower_right
205
+ end
206
+
207
+ # :call-seq:
208
+ # text.upper_left -> [ulx, uly]
209
+ #
210
+ # Returns the upper left coordinate
211
+ def upper_left
212
+ @boxes[0].upper_left
213
+ end
214
+
215
+ # :call-seq:
216
+ # text.upper_right -> [urx, ury]
217
+ #
218
+ # Returns the upper right coordinate.
219
+ def upper_right
220
+ @boxes[-1].upper_right
221
+ end
222
+
223
+ end
224
+
225
+ # Mapping of PDF operator names to message names that are sent to renderer implementations.
226
+ OPERATOR_MESSAGE_NAME_MAP = {
227
+ q: :save_graphics_state,
228
+ Q: :restore_graphics_state,
229
+ cm: :concatenate_matrix,
230
+ w: :set_line_width,
231
+ J: :set_line_cap_style,
232
+ j: :set_line_join_style,
233
+ M: :set_miter_limit,
234
+ d: :set_line_dash_pattern,
235
+ ri: :set_rendering_intent,
236
+ i: :set_flatness_tolerance,
237
+ gs: :set_graphics_state_parameters,
238
+ CS: :set_stroking_color_space,
239
+ cs: :set_non_stroking_color_space,
240
+ SC: :set_stroking_color,
241
+ SCN: :set_stroking_color,
242
+ sc: :set_non_stroking_color,
243
+ scn: :set_non_stroking_color,
244
+ G: :set_device_gray_stroking_color,
245
+ g: :set_device_gray_non_stroking_color,
246
+ RG: :set_device_rgb_stroking_color,
247
+ rg: :set_device_rgb_non_stroking_color,
248
+ K: :set_device_cmyk_stroking_color,
249
+ k: :set_device_cmyk_non_stroking_color,
250
+ m: :move_to,
251
+ l: :line_to,
252
+ c: :curve_to,
253
+ v: :curve_to_no_first_control_point,
254
+ y: :curve_to_no_second_control_point,
255
+ h: :close_subpath,
256
+ re: :append_rectangle,
257
+ S: :stroke_path,
258
+ s: :close_and_stroke_path,
259
+ f: :fill_path_non_zero,
260
+ F: :fill_path_non_zero,
261
+ 'f*'.to_sym => :fill_path_even_odd,
262
+ B: :fill_and_stroke_path_non_zero,
263
+ 'B*'.to_sym => :fill_and_stroke_path_even_odd,
264
+ b: :close_fill_and_stroke_path_non_zero,
265
+ 'b*'.to_sym => :close_fill_and_stroke_path_even_odd,
266
+ n: :end_path,
267
+ W: :clip_path_non_zero,
268
+ 'W*'.to_sym => :clip_path_even_odd,
269
+ BT: :begin_text,
270
+ ET: :end_text,
271
+ Tc: :set_character_spacing,
272
+ Tw: :set_word_spacing,
273
+ Tz: :set_horizontal_scaling,
274
+ TL: :set_leading,
275
+ Tf: :set_font_and_size,
276
+ Tr: :set_text_rendering_mode,
277
+ Ts: :set_text_rise,
278
+ Td: :move_text,
279
+ TD: :move_text_and_set_leading,
280
+ Tm: :set_text_matrix,
281
+ 'T*'.to_sym => :move_text_next_line,
282
+ Tj: :show_text,
283
+ '\''.to_sym => :move_text_next_line_and_show_text,
284
+ '"'.to_sym => :set_spacing_move_text_next_line_and_show_text,
285
+ TJ: :show_text_with_positioning,
286
+ d0: :set_glyph_width, # only for Type 3 fonts
287
+ d1: :set_glyph_width_and_bounding_box, # only for Type 3 fonts
288
+ sh: :paint_shading,
289
+ BI: :inline_image, # ID and EI are not sent because the complete image has been read
290
+ Do: :paint_xobject,
291
+ MP: :designate_marked_content_point,
292
+ DP: :designate_marked_content_point_with_property_list,
293
+ BMC: :begin_marked_content,
294
+ BDC: :begin_marked_content_with_property_list,
295
+ EMC: :end_marked_content,
296
+ BX: :begin_compatibility_section,
297
+ EX: :end_compatibility_section,
298
+ }
299
+
300
+ # Mapping from operator name (Symbol) to a callable object.
301
+ #
302
+ # This hash is prepopulated with the default operator implementations (see
303
+ # Operator::DEFAULT_OPERATORS). If a default operator implementation is not satisfactory, it
304
+ # can easily be changed by modifying this hash.
305
+ attr_reader :operators
306
+
307
+ # The resources dictionary used during processing.
308
+ attr_accessor :resources
309
+
310
+ # The GraphicsState object containing the current graphics state.
311
+ #
312
+ # It is not advised that this attribute is changed manually, it is automatically adjusted
313
+ # according to the processed operators!
314
+ attr_reader :graphics_state
315
+
316
+ # The current graphics object.
317
+ #
318
+ # It is not advised that this attribute is changed manually, it is automatically adjusted
319
+ # according to the processed operators!
320
+ #
321
+ # This attribute can have the following values:
322
+ #
323
+ # :none:: No current graphics object, i.e. the processor is at the page description level.
324
+ # :path:: The current graphics object is a path.
325
+ # :clipping_path:: The current graphics object is a clipping path.
326
+ # :text:: The current graphics object is text.
327
+ #
328
+ # See: PDF1.7 s8.2
329
+ attr_accessor :graphics_object
330
+
331
+ # Initializes a new processor that uses the resources PDF dictionary for resolving resources
332
+ # while processing operators.
333
+ #
334
+ # It is not mandatory to set the resources dictionary on initialization but it needs to be set
335
+ # prior to processing operators!
336
+ def initialize(resources = nil)
337
+ @operators = Operator::DEFAULT_OPERATORS.dup
338
+ @graphics_state = GraphicsState.new
339
+ @resources = resources
340
+ @graphics_object = :none
341
+ end
342
+
343
+ # Processes the operator with the given operands.
344
+ #
345
+ # The operator is first processed with an operator implementation (if any) to ensure correct
346
+ # operations and then the corresponding method on this object is invoked.
347
+ def process(operator, operands = [])
348
+ @operators[operator].invoke(self, *operands) if @operators.key?(operator)
349
+ msg = OPERATOR_MESSAGE_NAME_MAP[operator]
350
+ send(msg, *operands) if msg && respond_to?(msg, true)
351
+ end
352
+
353
+ protected
354
+
355
+ # Provides a default implementation for the 'Do' operator.
356
+ #
357
+ # It checks if the XObject is a Form XObject and if so, processes the contents of the Form
358
+ # XObject.
359
+ def paint_xobject(name)
360
+ xobject = resources.xobject(name)
361
+ return unless xobject[:Subtype] == :Form
362
+
363
+ res = resources
364
+ graphics_state.save
365
+
366
+ graphics_state.ctm.premultiply(*xobject[:Matrix]) if xobject.key?(:Matrix)
367
+ xobject.process_contents(self)
368
+
369
+ graphics_state.restore
370
+ self.resources = res
371
+ end
372
+
373
+ # Decodes the given text object and returns it as UTF-8 string.
374
+ #
375
+ # The argument may either be a simple text string (+Tj+ operator) or an array that contains
376
+ # text strings together with positioning information (+TJ+ operator).
377
+ def decode_text(data)
378
+ if data.kind_of?(Array)
379
+ data = data.each_with_object(''.b) {|obj, result| result << obj if obj.kind_of?(String)}
380
+ end
381
+ font = graphics_state.font
382
+ font.decode(data).map {|code_point| font.to_utf8(code_point)}.join('')
383
+ end
384
+
385
+ # Decodes the given text object and returns it as a CompositeBox object.
386
+ #
387
+ # The argument may either be a simple text string (+Tj+ operator) or an array that contains
388
+ # text strings together with positioning information (+TJ+ operator).
389
+ #
390
+ # For each glyph a GlyphBox object is computed. For horizontal fonts the width is
391
+ # predetermined but not the height. The latter is chosen to be the height and offset of the
392
+ # font's bounding box.
393
+ def decode_text_with_positioning(data)
394
+ data = Array(data)
395
+ if graphics_state.font.writing_mode == :horizontal
396
+ decode_horizontal_text(data)
397
+ else
398
+ decode_vertical_text(data)
399
+ end
400
+ end
401
+
402
+ private
403
+
404
+ # Decodes the given array containing text and positioning information while assuming that the
405
+ # writing direction is horizontal.
406
+ #
407
+ # See: PDF1.7 s9.4.4
408
+ def decode_horizontal_text(array)
409
+ font = graphics_state.font
410
+ scaled_char_space = graphics_state.scaled_character_spacing
411
+ scaled_word_space = graphics_state.scaled_word_spacing
412
+ scaled_font_size = graphics_state.scaled_font_size
413
+
414
+ below_baseline = font.bounding_box[1] * scaled_font_size / \
415
+ graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
416
+ above_baseline = font.bounding_box[3] * scaled_font_size / \
417
+ graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
418
+
419
+ text = CompositeBox.new
420
+ array.each do |item|
421
+ if item.kind_of?(Numeric)
422
+ graphics_state.tm.translate(-item * scaled_font_size, 0)
423
+ else
424
+ font.decode(item).each do |code_point|
425
+ char = font.to_utf8(code_point)
426
+ width = font.width(code_point) * scaled_font_size
427
+ matrix = graphics_state.ctm.dup.premultiply(*graphics_state.tm)
428
+ fragment = GlyphBox.new(code_point, char,
429
+ *matrix.evaluate(0, below_baseline),
430
+ *matrix.evaluate(width, below_baseline),
431
+ *matrix.evaluate(0, above_baseline))
432
+ text << fragment
433
+ graphics_state.tm.translate(width + scaled_char_space + \
434
+ (char == ' ' ? scaled_word_space : 0), 0)
435
+ end
436
+ end
437
+ end
438
+
439
+ text.freeze
440
+ end
441
+
442
+ # Decodes the given array containing text and positioning information while assuming that the
443
+ # writing direction is vertical.
444
+ def decode_vertical_text(_data)
445
+ raise NotImplementedError
446
+ end
447
+
448
+ end
449
+
450
+ end
451
+ end