hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,193 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/type1'
35
+ require 'hexapdf/font/encoding'
36
+ require 'hexapdf/error'
37
+
38
+ module HexaPDF
39
+ module Font
40
+
41
+ # This class wraps a generic Type1 font object and provides the methods needed for working with
42
+ # the font in a PDF context.
43
+ class Type1Wrapper
44
+
45
+ # Represents a single glyph of the wrapped font.
46
+ class Glyph
47
+
48
+ # The name of the glyph.
49
+ attr_reader :name
50
+ alias_method :id, :name
51
+
52
+ # Creates a new Glyph object.
53
+ def initialize(font, name)
54
+ @font = font
55
+ @name = name
56
+ end
57
+
58
+ # Returns the width of the glyph.
59
+ def width
60
+ @width ||= @font.width(name)
61
+ end
62
+
63
+ # Returns +true+ if the glyph represents the space character.
64
+ def space?
65
+ @name == :space
66
+ end
67
+
68
+ end
69
+
70
+ private_constant :Glyph
71
+
72
+
73
+ # Returns the wrapped Type1 font object.
74
+ attr_reader :wrapped_font
75
+
76
+ # The PDF font dictionary representing the wrapped font.
77
+ attr_reader :dict
78
+
79
+ # Creates a new object wrapping the Type1 font for the PDF document.
80
+ #
81
+ # The optional argument +custom_encoding+ can be set to +true+ so that a custom encoding
82
+ # instead of the WinAnsiEncoding is used.
83
+ def initialize(document, font, custom_encoding: false)
84
+ @document = document
85
+ @wrapped_font = font
86
+
87
+ @dict = build_font_dict
88
+ @document.register_listener(:complete_objects, &method(:complete_font_dict))
89
+ if @wrapped_font.metrics.character_set == 'Special' || custom_encoding
90
+ @encoding = Encoding::Base.new
91
+ @encoding.code_to_name[32] = :space
92
+ @max_code = 32 # 32 = space
93
+ else
94
+ @encoding = Encoding.for_name(:WinAnsiEncoding)
95
+ @max_code = 255 # Encoding is not modified
96
+ end
97
+
98
+ @zapf_dingbats_opt = {zapf_dingbats: (@wrapped_font.font_name == 'ZapfDingbats')}
99
+ @name_to_glyph = {}
100
+ @codepoint_to_glyph = {}
101
+ @encoded_glyphs = {}
102
+ end
103
+
104
+ # Returns a Glyph object for the given glyph name.
105
+ def glyph(name)
106
+ @name_to_glyph[name] ||=
107
+ begin
108
+ unless @wrapped_font.metrics.character_metrics.key?(name)
109
+ name = @document.config['font.on_missing_glyph'].call(name, @wrapped_font)
110
+ end
111
+ Glyph.new(@wrapped_font, name)
112
+ end
113
+ end
114
+
115
+ # Returns an array of glyph objects representing the characters in the UTF-8 encoded string.
116
+ def decode_utf8(str)
117
+ str.each_codepoint.map do |c|
118
+ @codepoint_to_glyph[c] ||=
119
+ begin
120
+ name = Encoding::GlyphList.unicode_to_name('' << c, @zapf_dingbats_opt)
121
+ name = '' << c if name == :'.notdef'
122
+ glyph(name)
123
+ end
124
+ end
125
+ end
126
+
127
+ # Encodes the glyph and returns the code string.
128
+ def encode(glyph)
129
+ @encoded_glyphs[glyph.name] ||=
130
+ begin
131
+ code = @encoding.code_to_name.key(glyph.name)
132
+ if code
133
+ code.chr.freeze
134
+ elsif @max_code < 255
135
+ @max_code += 1
136
+ @encoding.code_to_name[@max_code] = glyph.name
137
+ @max_code.chr.freeze
138
+ else
139
+ raise HexaPDF::Error, "Type1 encoding has no codepoint for #{glyph.name}"
140
+ end
141
+ end
142
+ end
143
+
144
+ private
145
+
146
+ # Builds a generic Type1 font dictionary for the wrapped font.
147
+ #
148
+ # Generic in the sense that no information regarding the encoding or widths is included.
149
+ def build_font_dict
150
+ unless defined?(@fd)
151
+ @fd = @document.wrap(Type: :FontDescriptor,
152
+ FontName: @wrapped_font.font_name.intern,
153
+ FontBBox: @wrapped_font.bounding_box,
154
+ ItalicAngle: @wrapped_font.italic_angle || 0,
155
+ Ascent: @wrapped_font.ascender || 0,
156
+ Descent: @wrapped_font.descender || 0,
157
+ CapHeight: @wrapped_font.cap_height,
158
+ XHeight: @wrapped_font.x_height,
159
+ StemH: @wrapped_font.dominant_horizontal_stem_width,
160
+ StemV: @wrapped_font.dominant_vertical_stem_width || 0)
161
+ @fd.flag(:fixed_pitch) if @wrapped_font.metrics.is_fixed_pitch
162
+ @fd.flag(@wrapped_font.metrics.character_set == 'Special' ? :symbolic : :nonsymbolic)
163
+ @fd.must_be_indirect = true
164
+ end
165
+
166
+ @document.wrap(Type: :Font, Subtype: :Type1,
167
+ BaseFont: @wrapped_font.font_name.intern, Encoding: :WinAnsiEncoding,
168
+ FontDescriptor: @fd)
169
+ end
170
+
171
+ # Array of valid encoding names in PDF
172
+ VALID_ENCODING_NAMES = [:WinAnsiEncoding, :MacRomanEncoding, :MacExpertEncoding]
173
+
174
+ # Completes the font dictionary by filling in the values that depend on the used encoding.
175
+ def complete_font_dict
176
+ min, max = @encoding.code_to_name.keys.minmax
177
+ @dict[:FirstChar] = min
178
+ @dict[:LastChar] = max
179
+ @dict[:Widths] = (min..max).map {|code| glyph(@encoding.name(code)).width}
180
+
181
+ if VALID_ENCODING_NAMES.include?(@encoding.encoding_name)
182
+ @dict[:Encoding] = @encoding.encoding_name
183
+ else
184
+ differences = [min]
185
+ (min..max).each {|code| differences << @encoding.name(code)}
186
+ @dict[:Encoding] = {Differences: differences}
187
+ end
188
+ end
189
+
190
+ end
191
+
192
+ end
193
+ end
@@ -0,0 +1,70 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/font/true_type_wrapper'
35
+
36
+ module HexaPDF
37
+ module FontLoader
38
+
39
+ # This module uses the configuration option 'font.map' for loading a font.
40
+ module FromConfiguration
41
+
42
+ # Loads the given font by looking up the needed file in the 'font.map' configuration option.
43
+ #
44
+ # The file object representing the font file is *not* closed and if needed must be closed by
45
+ # the caller once the font is not needed anymore.
46
+ #
47
+ # +document+::
48
+ # The PDF document to associate the font object with.
49
+ #
50
+ # +name+::
51
+ # The name of the font.
52
+ #
53
+ # +variant+::
54
+ # The font variant. Normally one of :none, :bold, :italic, :bold_italic.
55
+ def self.call(document, name, variant: :none, **)
56
+ file = document.config['font.map'].dig(name, variant)
57
+ return nil if file.nil?
58
+
59
+ unless File.file?(file)
60
+ raise HexaPDF::Error, "The configured font file #{file} does not exist"
61
+ end
62
+
63
+ font = HexaPDF::Font::TrueType::Font.new(io: File.open(file))
64
+ HexaPDF::Font::TrueTypeWrapper.new(document, font)
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,98 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/data_dir'
35
+ require 'hexapdf/font/type1_wrapper'
36
+
37
+ module HexaPDF
38
+ module FontLoader
39
+
40
+ # This module is used for providing the standard 14 PDF fonts.
41
+ module Standard14
42
+
43
+ # Mapping of font family name and variant to font name.
44
+ MAPPING = {
45
+ 'Times' => {
46
+ none: 'Times-Roman',
47
+ bold: 'Times-Bold',
48
+ italic: 'Times-Italic',
49
+ bold_italic: 'Times-BoldItalic',
50
+ },
51
+ 'Helvetica' => {
52
+ none: 'Helvetica',
53
+ bold: 'Helvetica-Bold',
54
+ italic: 'Helvetica-Oblique',
55
+ bold_italic: 'Helvetica-BoldOblique',
56
+ },
57
+ 'Courier' => {
58
+ none: 'Courier',
59
+ bold: 'Courier-Bold',
60
+ italic: 'Courier-Oblique',
61
+ bold_italic: 'Courier-BoldOblique',
62
+ },
63
+ 'Symbol' => {
64
+ none: 'Symbol',
65
+ },
66
+ 'ZapfDingbats' => {
67
+ none: 'ZapfDingbats',
68
+ },
69
+ }
70
+
71
+ # Creates a new font object backed by the AFM font metrics read from the file or IO stream.
72
+ #
73
+ # +document+::
74
+ # The PDF document to associate the font object with.
75
+ #
76
+ # +name+::
77
+ # The name of the built-in font. One of Times, Helvetica, Courier, Symbol or ZapfDingbats.
78
+ #
79
+ # +variant+::
80
+ # The font variant. Can be :none, :bold, :italic, :bold_italic for Times, Helvetica and
81
+ # Courier; and must be :none for Symbol and ZapfDingbats.
82
+ #
83
+ # +custom_encoding+::
84
+ # For Times, Helvetica and Courier the standard encoding WinAnsiEncoding is used. If this
85
+ # option is not wanted because access to other glyphs is needed, set this to +true+
86
+ def self.call(document, name, variant: :none, custom_encoding: false, **)
87
+ name = MAPPING[name] && MAPPING[name][variant]
88
+ return nil if name.nil?
89
+
90
+ file = File.join(HexaPDF.data_dir, 'afm', "#{name}.afm")
91
+ font = HexaPDF::Font::Type1::Font.from_afm(file)
92
+ HexaPDF::Font::Type1Wrapper.new(document, font, custom_encoding: custom_encoding)
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,85 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ module HexaPDF
35
+
36
+ # == Overview
37
+ #
38
+ # A *font loader* is a callable object that loads a font based on the given name and options. If
39
+ # the font loader doesn't have the requested font, it has to return +nil+.
40
+ #
41
+ # The returned object has to be a PDF font wrapper and not the generic font object because it
42
+ # needs to be usable by the PDF canvas. See below for details.
43
+ #
44
+ #
45
+ # == Implementation of a Font Loader
46
+ #
47
+ # Each font loader is a (stateless) object (normally a module) that has to be callable, i.e. it
48
+ # has to provide the following method:
49
+ #
50
+ # call(document, name, **options)::
51
+ # Should return the font wrapper customized for the given document if the font is known or
52
+ # else +nil+.
53
+ #
54
+ # The +options+ argument is font loader dependent. However, all font loaders should handle the
55
+ # following common options:
56
+ #
57
+ # variant:: The font variant that should be used (e.g. +:none+, +:bold+, +:italic+,
58
+ # +:bold_italic+).
59
+ #
60
+ #
61
+ # == Font Wrappers
62
+ #
63
+ # A font wrapper needs to provide the following generic interface so that it can be used correctly
64
+ # by HexaPDF:
65
+ #
66
+ # dict::
67
+ # This method needs to return the PDF font dictionary that represents the wrapped font.
68
+ #
69
+ # decode_utf8(str)::
70
+ # This method needs to convert the given string into an array of glyph objects. The glyph
71
+ # objects themselves are treated as opaque objects by HexaPDF::Content::Canvas.
72
+ #
73
+ # encode(glyph)::
74
+ # This method takes a single glyph object, that needs to be compatible with the font wrapper,
75
+ # and returns an encoded string that can be decoded with the font dictionary returned by
76
+ # \#dict.
77
+ #
78
+ module FontLoader
79
+
80
+ autoload(:Standard14, 'hexapdf/font_loader/standard14')
81
+ autoload(:FromConfiguration, 'hexapdf/font_loader/from_configuration')
82
+
83
+ end
84
+
85
+ end
@@ -0,0 +1,89 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/configuration'
35
+ require 'hexapdf/font_loader'
36
+
37
+ module HexaPDF
38
+
39
+ # This class provides utility functions for working with fonts. It is available through the
40
+ # HexaPDF::Document#fonts method.
41
+ class FontUtils
42
+
43
+ # Creates a new FontUtils object for the given PDF document.
44
+ def initialize(document)
45
+ @document = document
46
+ @loaded_fonts_cache = {}
47
+ end
48
+
49
+ # :call-seq:
50
+ # fonts.load(name, **options) -> font
51
+ #
52
+ # Loads and returns the font (using the loaders specified with the configuration option
53
+ # 'font_loaders').
54
+ #
55
+ # If a font with the same parameters has been loaded before, the cached font object is used.
56
+ def load(name, **options)
57
+ font = @loaded_fonts_cache[[name, options]]
58
+ return font if font
59
+
60
+ each_font_loader do |loader|
61
+ font = loader.call(@document, name, **options)
62
+ break if font
63
+ end
64
+
65
+ if font
66
+ @loaded_fonts_cache[[name, options]] = font
67
+ else
68
+ raise HexaPDF::Error, "The requested font '#{name}' couldn't be found"
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ # :call-seq:
75
+ # fonts.each_font_loader {|loader| block}
76
+ #
77
+ # Iterates over all configured font loaders.
78
+ def each_font_loader
79
+ @document.config['font_loader'].each_index do |index|
80
+ loader = @document.config.constantize('font_loader', index) do
81
+ raise HexaPDF::Error, "Couldn't retrieve font loader ##{index} from configuration"
82
+ end
83
+ yield(loader)
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ end
@@ -0,0 +1,166 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/error'
35
+
36
+ module HexaPDF
37
+ module ImageLoader
38
+
39
+ # This module is used for loading images in the JPEG format from files or IO streams.
40
+ #
41
+ # See: PDF1.7 s7.4.8, ITU T.81 Annex B
42
+ module JPEG
43
+
44
+ # The magic marker that tells us if the file/IO contains an image in JPEG format.
45
+ MAGIC_FILE_MARKER = "\xFF\xD8\xFF".b
46
+
47
+ # The various start-of-frame markers that tell us which kind of JPEG it is. The marker
48
+ # segment itself contains all the needed information needed for creating the PDF image
49
+ # object.
50
+ #
51
+ # See: ITU T.81 B1.1.3
52
+ SOF_MARKERS = [0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF]
53
+
54
+ # Adobe uses the marker 0xEE (APPE) for its purposes. We need to use it for determinig
55
+ # whether to invert the colors for CMYK/YCCK images or not (Adobe does this...).
56
+ #
57
+ # The marker also let's us distinguish between YCCK and CMYK images. However, we don't
58
+ # actually need this information (and we don't need to set the /ColorTransform value)
59
+ # because if the image has this information it is automically used.
60
+ ADOBE_MARKER = 0xEE
61
+
62
+ # End-of-image marker
63
+ EOI_MARKER = 0xD9
64
+
65
+ # Start-of-scan marker
66
+ SOS_MARKER = 0xDA
67
+
68
+ # :call-seq:
69
+ # JPEG.handles?(filename) -> true or false
70
+ # JPGE.handles?(io) -> true or false
71
+ #
72
+ # Returns +true+ if the given file or IO stream can be handled, ie. if it contains an image
73
+ # in JPEG format.
74
+ def self.handles?(file_or_io)
75
+ if file_or_io.kind_of?(String)
76
+ File.read(file_or_io, 3, mode: 'rb') == MAGIC_FILE_MARKER
77
+ else
78
+ file_or_io.rewind
79
+ file_or_io.read(3) == MAGIC_FILE_MARKER
80
+ end
81
+ end
82
+
83
+ # :call-seq:
84
+ # JPEG.load(document, filename) -> image_obj
85
+ # JPEG.load(document, io) -> image_obj
86
+ #
87
+ # Creates a PDF image object from the JPEG file or IO stream.
88
+ def self.load(document, file_or_io)
89
+ dict = if file_or_io.kind_of?(String)
90
+ File.open(file_or_io, 'rb') {|io| image_data_from_io(io)}
91
+ else
92
+ image_data_from_io(file_or_io)
93
+ end
94
+ document.add(dict, stream: HexaPDF::StreamData.new(file_or_io))
95
+ end
96
+
97
+ # Returns a hash containing the extracted JPEG image data.
98
+ def self.image_data_from_io(io)
99
+ io.seek(2, IO::SEEK_SET)
100
+
101
+ while true
102
+ code0 = io.getbyte
103
+ code1 = io.getbyte
104
+
105
+ # B1.1.2 - all markers start with 0xFF
106
+ if code0 != 0xFF
107
+ raise HexaPDF::Error, "Invalid bytes found, expected marker code"
108
+ end
109
+
110
+ # B1.1.2 - markers may be preceeded by any number of 0xFF fill bytes
111
+ code1 = io.getbyte while code1 == 0xFF
112
+
113
+ break if code1 == SOS_MARKER || code1 == EOI_MARKER
114
+
115
+ # B1.1.4 - next two bytes are the length of the segment (except for RSTm or TEM markers
116
+ # but those shouldn't appear here)
117
+ length = io.read(2).unpack('n').first
118
+
119
+ if code1 == ADOBE_MARKER # Adobe apps invert the colors when using CMYK color space
120
+ invert_colors = true
121
+ io.seek(length - 2, IO::SEEK_CUR)
122
+ next
123
+ elsif !SOF_MARKERS.include?(code1)
124
+ io.seek(length - 2, IO::SEEK_CUR)
125
+ next
126
+ end
127
+
128
+ bits, height, width, components = io.read(6).unpack('CnnC')
129
+ io.seek(length - 2 - 6, IO::SEEK_CUR)
130
+
131
+ # short-cut loop if we have all needed information
132
+ break if components != 4 || invert_colors
133
+ end
134
+
135
+ # PDF1.7 s8.9.5.1
136
+ if bits != 8
137
+ raise HexaPDF::Error, "Unsupported number of bits per component: #{bits}"
138
+ end
139
+
140
+ color_space = case components
141
+ when 1 then :DeviceGray
142
+ when 3 then :DeviceRGB
143
+ when 4 then :DeviceCMYK
144
+ end
145
+
146
+ dict = {
147
+ Type: :XObject,
148
+ Subtype: :Image,
149
+ Width: width,
150
+ Height: height,
151
+ ColorSpace: color_space,
152
+ BitsPerComponent: bits,
153
+ Filter: :DCTDecode,
154
+ }
155
+ if invert_colors && color_space == :DeviceCMYK
156
+ dict[:Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
157
+ end
158
+
159
+ dict
160
+ end
161
+ private_class_method :image_data_from_io
162
+
163
+ end
164
+
165
+ end
166
+ end