hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,204 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'stringio'
35
+ require 'hexapdf/tokenizer'
36
+
37
+ module HexaPDF
38
+ module Content
39
+
40
+ # More efficient tokenizer for content streams. This tokenizer class works directly on a
41
+ # string and not on an IO.
42
+ #
43
+ # Note: Indirect object references are *not* supported by this tokenizer!
44
+ #
45
+ # See: PDF1.7 s7.2
46
+ class Tokenizer < HexaPDF::Tokenizer #:nodoc:
47
+
48
+ # Creates a new tokenizer.
49
+ def initialize(string)
50
+ @ss = StringScanner.new(string)
51
+ end
52
+
53
+ # See: HexaPDF::Tokenizer#pos
54
+ def pos
55
+ @ss.pos
56
+ end
57
+
58
+ # See: HexaPDF::Tokenizer#pos=
59
+ def pos=(pos)
60
+ @ss.pos = pos
61
+ end
62
+
63
+ # See: HexaPDF::Tokenizer#scan_until
64
+ def scan_until(re)
65
+ @ss.scan_until(re)
66
+ end
67
+
68
+ # See: HexaPDF::Tokenizer#next_token
69
+ def next_token
70
+ @ss.skip(WHITESPACE_MULTI_RE)
71
+ case (@ss.eos? ? -1 : @ss.string.getbyte(@ss.pos))
72
+ when 43, 45, 46, 48..57 # + - . 0..9
73
+ parse_number
74
+ when 65..90, 96..121
75
+ parse_keyword
76
+ when 47 # /
77
+ parse_name
78
+ when 40 # (
79
+ parse_literal_string
80
+ when 60 # <
81
+ if @ss.string.getbyte(@ss.pos + 1) != 60
82
+ parse_hex_string
83
+ else
84
+ @ss.pos += 2
85
+ TOKEN_DICT_START
86
+ end
87
+ when 62 # >
88
+ unless @ss.string.getbyte(@ss.pos + 1) == 62
89
+ raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
90
+ end
91
+ @ss.pos += 2
92
+ TOKEN_DICT_END
93
+ when 91 # [
94
+ @ss.pos += 1
95
+ TOKEN_ARRAY_START
96
+ when 93 # ]
97
+ @ss.pos += 1
98
+ TOKEN_ARRAY_END
99
+ when 123, 125 # { }
100
+ Token.new(@ss.get_byte)
101
+ when 37 # %
102
+ return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/)
103
+ next_token
104
+ when -1
105
+ NO_MORE_TOKENS
106
+ else
107
+ parse_keyword
108
+ end
109
+ end
110
+
111
+ private
112
+
113
+ # See: HexaPDF::Tokenizer#parse_number
114
+ def parse_number
115
+ if (val = @ss.scan(/[+-]?\d++(?!\.)/))
116
+ val.to_i
117
+ else
118
+ val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)
119
+ val << '0'.freeze if val.getbyte(-1) == 46 # dot '.'
120
+ Float(val)
121
+ end
122
+ end
123
+
124
+ # Stub implementation to prevent errors for not-overridden methods.
125
+ def prepare_string_scanner(*)
126
+ end
127
+
128
+ end
129
+
130
+
131
+ # This class knows how to correctly parse a content stream.
132
+ #
133
+ # == Overview
134
+ #
135
+ # A content stream is mostly just a stream of PDF objects. However, there is one exception:
136
+ # inline images.
137
+ #
138
+ # Since inline images don't follow the normal PDF object parsing rules, they need to be
139
+ # handled specially and this is the reason for this class. Therefore only the BI operator is
140
+ # ever called for inline images because the ID and EI operators are handled by the parser.
141
+ #
142
+ # To parse some contents the #parse method needs to be called with the contents to be parsed
143
+ # and a Processor object which is used for processing the parsed operators.
144
+ class Parser
145
+
146
+ # Creates a new Parser object and calls #parse.
147
+ def self.parse(contents, processor)
148
+ new.parse(contents, processor)
149
+ end
150
+
151
+ # Parses the contents and calls the processor object for each parsed operator.
152
+ def parse(contents, processor)
153
+ tokenizer = Tokenizer.new(contents)
154
+ params = []
155
+ while (obj = tokenizer.next_object(allow_keyword: true)) != Tokenizer::NO_MORE_TOKENS
156
+ if obj.kind_of?(Tokenizer::Token)
157
+ if obj == 'BI'.freeze
158
+ params = parse_inline_image(tokenizer)
159
+ end
160
+ processor.process(obj.to_sym, params)
161
+ params.clear
162
+ else
163
+ params << obj
164
+ end
165
+ end
166
+ end
167
+
168
+ private
169
+
170
+ # Parses the inline image at the current position.
171
+ def parse_inline_image(tokenizer)
172
+ # BI has already been read, so read the image dictionary
173
+ dict = {}
174
+ while (key = tokenizer.next_object(allow_keyword: true))
175
+ if key == 'ID'.freeze
176
+ break
177
+ elsif key == Tokenizer::NO_MORE_TOKENS
178
+ raise HexaPDF::Error, "EOS while trying to read dictionary key for inline image"
179
+ elsif !key.kind_of?(Symbol)
180
+ raise HexaPDF::Error, "Inline image dictionary keys must be PDF name objects"
181
+ end
182
+ value = tokenizer.next_object
183
+ if value == Tokenizer::NO_MORE_TOKENS
184
+ raise HexaPDF::Error, "EOS while trying to read dictionary value for inline image"
185
+ end
186
+ dict[key] = value
187
+ end
188
+
189
+ # one whitespace character after ID
190
+ tokenizer.next_byte
191
+
192
+ # find the EI operator
193
+ data = tokenizer.scan_until(/(?=EI[#{Tokenizer::WHITESPACE}])/o)
194
+ if data.nil?
195
+ raise HexaPDF::Error, "End inline image marker EI not found"
196
+ end
197
+ tokenizer.pos += 3
198
+ [dict, data]
199
+ end
200
+
201
+ end
202
+
203
+ end
204
+ end
@@ -0,0 +1,451 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/content/operator'
35
+ require 'hexapdf/content/graphics_state'
36
+
37
+ module HexaPDF
38
+ module Content
39
+
40
+ # This class is used for processing content operators extracted from a content stream.
41
+ #
42
+ # == General Information
43
+ #
44
+ # When a content stream is read, operators and their operands are extracted. After extracting
45
+ # these operators are normally processed with a Processor instance that ensures that the needed
46
+ # setup (like modifying the graphics state) is done before further processing.
47
+ #
48
+ # == How Processing Works
49
+ #
50
+ # The operator implementations (see the Operator module) are called first and they ensure that
51
+ # the processing state is consistent. For example, operators that modify the graphics state do
52
+ # actually modify the #graphics_state object. However, operator implementations are *only* used
53
+ # for this task and not more, so they are very specific and normally don't need to be changed.
54
+ #
55
+ # After that methods corresponding to the operator names are invoked on the processor object (if
56
+ # they exist). Each PDF operator name is mapped to a nicer message name via the
57
+ # OPERATOR_MESSAGE_NAME_MAP constant. For example, the operator 'q' is mapped to
58
+ # 'save_graphics_state".
59
+ #
60
+ # The task of these methods is to do something useful with the content itself, it doesn't need
61
+ # to concern itself with ensuring the consistency of the processing state. For example, the
62
+ # processor could use the processing state to extract the text. Or paint the content on a
63
+ # canvas.
64
+ #
65
+ # For inline images only the 'BI' operator mapped to 'inline_image' is used. Although also the
66
+ # operators 'ID' and 'EI' exist for inline images, they are not used because they are consumed
67
+ # while parsing inline images and do not reflect separate operators.
68
+ #
69
+ # == Text Processing
70
+ #
71
+ # Two utility methods #decode_text and #decode_text_with_positioning for extracting text are
72
+ # provided. Both can directly be invoked from the 'show_text' and 'show_text_with_positioning'
73
+ # methods.
74
+ #
75
+ class Processor
76
+
77
+ # Represents an (immutable) glyph box with positioning information.
78
+ #
79
+ # Since the glyph may have been transformed by an affine matrix, the bounding may not be a
80
+ # rectangle in all cases but it is always a parallelogram.
81
+ class GlyphBox
82
+
83
+ # The code point representing the glyph.
84
+ attr_reader :code_point
85
+
86
+ # The Unicode value of the code point.
87
+ attr_reader :string
88
+
89
+ # Creates a new glyph box for the given code point/Unicode value pair with the lower left
90
+ # coordinate [llx, lly], the lower right coordinate [lrx, lry], and the upper left
91
+ # coordinate [ulx, uly].
92
+ def initialize(code_point, string, llx, lly, lrx, lry, ulx, uly)
93
+ @code_point = code_point
94
+ @string = string.freeze
95
+ @llx = llx
96
+ @lly = lly
97
+ @lrx = lrx
98
+ @lry = lry
99
+ @ulx = ulx
100
+ @uly = uly
101
+ freeze
102
+ end
103
+
104
+ # :call-seq:
105
+ # fragment.lower_left -> [llx, lly]
106
+ #
107
+ # Returns the lower left coordinate
108
+ def lower_left
109
+ [@llx, @lly]
110
+ end
111
+
112
+ # :call-seq:
113
+ # fragment.lower_right -> [lrx, lry]
114
+ #
115
+ # Returns the lower right coordinate
116
+ def lower_right
117
+ [@lrx, @lry]
118
+ end
119
+
120
+ # :call-seq:
121
+ # fragment.upper_left -> [ulx, uly]
122
+ #
123
+ # Returns the upper left coordinate
124
+ def upper_left
125
+ [@ulx, @uly]
126
+ end
127
+
128
+ # :call-seq:
129
+ # fragment.upper_right -> [urx, ury]
130
+ #
131
+ # Returns the upper right coordinate which is computed by using the other three points of
132
+ # the parallelogram.
133
+ def upper_right
134
+ [@ulx + (@lrx - @llx), @uly + (@lry - @lly)]
135
+ end
136
+
137
+ # :call-seq:
138
+ # fragment.points -> [llx, lly, lrx, lry, urx, ury, ulx, uly]
139
+ #
140
+ # Returns the four corners of the box as an array of coordinates, starting with the lower
141
+ # left corner and going counterclockwise.
142
+ def points
143
+ [@llx, @lly, @lrx, @lry, @ulx + (@lrx - @llx), @uly + (@lry - @lly), @ulx, @uly]
144
+ end
145
+
146
+ end
147
+
148
+
149
+ # Represents a box composed of GlyphBox objects.
150
+ #
151
+ # The bounding box methods #lower_left, #lower_right, #upper_left, #upper_right are computed
152
+ # by just using the first and last boxes, assuming the boxes are arranged from left to right
153
+ # in a straight line.
154
+ class CompositeBox
155
+
156
+ # The text boxes contained in this positioned text object.
157
+ attr_reader :boxes
158
+
159
+ # Creates an empty object.
160
+ def initialize
161
+ @boxes = []
162
+ end
163
+
164
+ # Appends the given text glyph box.
165
+ def <<(glyph_box)
166
+ @boxes << glyph_box
167
+ self
168
+ end
169
+
170
+ # Returns the glyph box at the given index, or +nil+ if the index is out of range.
171
+ def [](index)
172
+ @boxes[index]
173
+ end
174
+
175
+ # :call-seq:
176
+ # composite.each {|glyph_box| block} -> composite
177
+ # composite.each -> Enumerator
178
+ #
179
+ # Iterates over all contained glyph boxes.
180
+ def each(&block)
181
+ return to_enum(__method__) unless block_given?
182
+ @boxes.each(&block)
183
+ self
184
+ end
185
+
186
+ # Returns the concatenated text of the boxes.
187
+ def string
188
+ @boxes.map(&:string).join('')
189
+ end
190
+
191
+ # :call-seq:
192
+ # text.lower_left -> [llx, lly]
193
+ #
194
+ # Returns the lower left coordinate
195
+ def lower_left
196
+ @boxes[0].lower_left
197
+ end
198
+
199
+ # :call-seq:
200
+ # text.lower_right -> [lrx, lry]
201
+ #
202
+ # Returns the lower right coordinate
203
+ def lower_right
204
+ @boxes[-1].lower_right
205
+ end
206
+
207
+ # :call-seq:
208
+ # text.upper_left -> [ulx, uly]
209
+ #
210
+ # Returns the upper left coordinate
211
+ def upper_left
212
+ @boxes[0].upper_left
213
+ end
214
+
215
+ # :call-seq:
216
+ # text.upper_right -> [urx, ury]
217
+ #
218
+ # Returns the upper right coordinate.
219
+ def upper_right
220
+ @boxes[-1].upper_right
221
+ end
222
+
223
+ end
224
+
225
+ # Mapping of PDF operator names to message names that are sent to renderer implementations.
226
+ OPERATOR_MESSAGE_NAME_MAP = {
227
+ q: :save_graphics_state,
228
+ Q: :restore_graphics_state,
229
+ cm: :concatenate_matrix,
230
+ w: :set_line_width,
231
+ J: :set_line_cap_style,
232
+ j: :set_line_join_style,
233
+ M: :set_miter_limit,
234
+ d: :set_line_dash_pattern,
235
+ ri: :set_rendering_intent,
236
+ i: :set_flatness_tolerance,
237
+ gs: :set_graphics_state_parameters,
238
+ CS: :set_stroking_color_space,
239
+ cs: :set_non_stroking_color_space,
240
+ SC: :set_stroking_color,
241
+ SCN: :set_stroking_color,
242
+ sc: :set_non_stroking_color,
243
+ scn: :set_non_stroking_color,
244
+ G: :set_device_gray_stroking_color,
245
+ g: :set_device_gray_non_stroking_color,
246
+ RG: :set_device_rgb_stroking_color,
247
+ rg: :set_device_rgb_non_stroking_color,
248
+ K: :set_device_cmyk_stroking_color,
249
+ k: :set_device_cmyk_non_stroking_color,
250
+ m: :move_to,
251
+ l: :line_to,
252
+ c: :curve_to,
253
+ v: :curve_to_no_first_control_point,
254
+ y: :curve_to_no_second_control_point,
255
+ h: :close_subpath,
256
+ re: :append_rectangle,
257
+ S: :stroke_path,
258
+ s: :close_and_stroke_path,
259
+ f: :fill_path_non_zero,
260
+ F: :fill_path_non_zero,
261
+ 'f*'.to_sym => :fill_path_even_odd,
262
+ B: :fill_and_stroke_path_non_zero,
263
+ 'B*'.to_sym => :fill_and_stroke_path_even_odd,
264
+ b: :close_fill_and_stroke_path_non_zero,
265
+ 'b*'.to_sym => :close_fill_and_stroke_path_even_odd,
266
+ n: :end_path,
267
+ W: :clip_path_non_zero,
268
+ 'W*'.to_sym => :clip_path_even_odd,
269
+ BT: :begin_text,
270
+ ET: :end_text,
271
+ Tc: :set_character_spacing,
272
+ Tw: :set_word_spacing,
273
+ Tz: :set_horizontal_scaling,
274
+ TL: :set_leading,
275
+ Tf: :set_font_and_size,
276
+ Tr: :set_text_rendering_mode,
277
+ Ts: :set_text_rise,
278
+ Td: :move_text,
279
+ TD: :move_text_and_set_leading,
280
+ Tm: :set_text_matrix,
281
+ 'T*'.to_sym => :move_text_next_line,
282
+ Tj: :show_text,
283
+ '\''.to_sym => :move_text_next_line_and_show_text,
284
+ '"'.to_sym => :set_spacing_move_text_next_line_and_show_text,
285
+ TJ: :show_text_with_positioning,
286
+ d0: :set_glyph_width, # only for Type 3 fonts
287
+ d1: :set_glyph_width_and_bounding_box, # only for Type 3 fonts
288
+ sh: :paint_shading,
289
+ BI: :inline_image, # ID and EI are not sent because the complete image has been read
290
+ Do: :paint_xobject,
291
+ MP: :designate_marked_content_point,
292
+ DP: :designate_marked_content_point_with_property_list,
293
+ BMC: :begin_marked_content,
294
+ BDC: :begin_marked_content_with_property_list,
295
+ EMC: :end_marked_content,
296
+ BX: :begin_compatibility_section,
297
+ EX: :end_compatibility_section,
298
+ }
299
+
300
+ # Mapping from operator name (Symbol) to a callable object.
301
+ #
302
+ # This hash is prepopulated with the default operator implementations (see
303
+ # Operator::DEFAULT_OPERATORS). If a default operator implementation is not satisfactory, it
304
+ # can easily be changed by modifying this hash.
305
+ attr_reader :operators
306
+
307
+ # The resources dictionary used during processing.
308
+ attr_accessor :resources
309
+
310
+ # The GraphicsState object containing the current graphics state.
311
+ #
312
+ # It is not advised that this attribute is changed manually, it is automatically adjusted
313
+ # according to the processed operators!
314
+ attr_reader :graphics_state
315
+
316
+ # The current graphics object.
317
+ #
318
+ # It is not advised that this attribute is changed manually, it is automatically adjusted
319
+ # according to the processed operators!
320
+ #
321
+ # This attribute can have the following values:
322
+ #
323
+ # :none:: No current graphics object, i.e. the processor is at the page description level.
324
+ # :path:: The current graphics object is a path.
325
+ # :clipping_path:: The current graphics object is a clipping path.
326
+ # :text:: The current graphics object is text.
327
+ #
328
+ # See: PDF1.7 s8.2
329
+ attr_accessor :graphics_object
330
+
331
+ # Initializes a new processor that uses the resources PDF dictionary for resolving resources
332
+ # while processing operators.
333
+ #
334
+ # It is not mandatory to set the resources dictionary on initialization but it needs to be set
335
+ # prior to processing operators!
336
+ def initialize(resources = nil)
337
+ @operators = Operator::DEFAULT_OPERATORS.dup
338
+ @graphics_state = GraphicsState.new
339
+ @resources = resources
340
+ @graphics_object = :none
341
+ end
342
+
343
+ # Processes the operator with the given operands.
344
+ #
345
+ # The operator is first processed with an operator implementation (if any) to ensure correct
346
+ # operations and then the corresponding method on this object is invoked.
347
+ def process(operator, operands = [])
348
+ @operators[operator].invoke(self, *operands) if @operators.key?(operator)
349
+ msg = OPERATOR_MESSAGE_NAME_MAP[operator]
350
+ send(msg, *operands) if msg && respond_to?(msg, true)
351
+ end
352
+
353
+ protected
354
+
355
+ # Provides a default implementation for the 'Do' operator.
356
+ #
357
+ # It checks if the XObject is a Form XObject and if so, processes the contents of the Form
358
+ # XObject.
359
+ def paint_xobject(name)
360
+ xobject = resources.xobject(name)
361
+ return unless xobject[:Subtype] == :Form
362
+
363
+ res = resources
364
+ graphics_state.save
365
+
366
+ graphics_state.ctm.premultiply(*xobject[:Matrix]) if xobject.key?(:Matrix)
367
+ xobject.process_contents(self)
368
+
369
+ graphics_state.restore
370
+ self.resources = res
371
+ end
372
+
373
+ # Decodes the given text object and returns it as UTF-8 string.
374
+ #
375
+ # The argument may either be a simple text string (+Tj+ operator) or an array that contains
376
+ # text strings together with positioning information (+TJ+ operator).
377
+ def decode_text(data)
378
+ if data.kind_of?(Array)
379
+ data = data.each_with_object(''.b) {|obj, result| result << obj if obj.kind_of?(String)}
380
+ end
381
+ font = graphics_state.font
382
+ font.decode(data).map {|code_point| font.to_utf8(code_point)}.join('')
383
+ end
384
+
385
+ # Decodes the given text object and returns it as a CompositeBox object.
386
+ #
387
+ # The argument may either be a simple text string (+Tj+ operator) or an array that contains
388
+ # text strings together with positioning information (+TJ+ operator).
389
+ #
390
+ # For each glyph a GlyphBox object is computed. For horizontal fonts the width is
391
+ # predetermined but not the height. The latter is chosen to be the height and offset of the
392
+ # font's bounding box.
393
+ def decode_text_with_positioning(data)
394
+ data = Array(data)
395
+ if graphics_state.font.writing_mode == :horizontal
396
+ decode_horizontal_text(data)
397
+ else
398
+ decode_vertical_text(data)
399
+ end
400
+ end
401
+
402
+ private
403
+
404
+ # Decodes the given array containing text and positioning information while assuming that the
405
+ # writing direction is horizontal.
406
+ #
407
+ # See: PDF1.7 s9.4.4
408
+ def decode_horizontal_text(array)
409
+ font = graphics_state.font
410
+ scaled_char_space = graphics_state.scaled_character_spacing
411
+ scaled_word_space = graphics_state.scaled_word_spacing
412
+ scaled_font_size = graphics_state.scaled_font_size
413
+
414
+ below_baseline = font.bounding_box[1] * scaled_font_size / \
415
+ graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
416
+ above_baseline = font.bounding_box[3] * scaled_font_size / \
417
+ graphics_state.scaled_horizontal_scaling + graphics_state.text_rise
418
+
419
+ text = CompositeBox.new
420
+ array.each do |item|
421
+ if item.kind_of?(Numeric)
422
+ graphics_state.tm.translate(-item * scaled_font_size, 0)
423
+ else
424
+ font.decode(item).each do |code_point|
425
+ char = font.to_utf8(code_point)
426
+ width = font.width(code_point) * scaled_font_size
427
+ matrix = graphics_state.ctm.dup.premultiply(*graphics_state.tm)
428
+ fragment = GlyphBox.new(code_point, char,
429
+ *matrix.evaluate(0, below_baseline),
430
+ *matrix.evaluate(width, below_baseline),
431
+ *matrix.evaluate(0, above_baseline))
432
+ text << fragment
433
+ graphics_state.tm.translate(width + scaled_char_space + \
434
+ (char == ' ' ? scaled_word_space : 0), 0)
435
+ end
436
+ end
437
+ end
438
+
439
+ text.freeze
440
+ end
441
+
442
+ # Decodes the given array containing text and positioning information while assuming that the
443
+ # writing direction is vertical.
444
+ def decode_vertical_text(_data)
445
+ raise NotImplementedError
446
+ end
447
+
448
+ end
449
+
450
+ end
451
+ end