hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,72 @@
1
+ # ## Standard PDF Fonts
2
+ #
3
+ # This example shows all characters that are available in the standard 14 PDF
4
+ # fonts.
5
+ #
6
+ # The standard 14 PDF fonts are those fonts that all PDF reading/viewing
7
+ # applications need to support. They only provide a limited set of glyphs but
8
+ # have the advantage that they don't need to be embedded.
9
+ #
10
+ # Usage:
11
+ # : `ruby standard_pdf_fonts.rb`
12
+ #
13
+
14
+ require 'hexapdf'
15
+
16
+ def base_encoding_for_font(font)
17
+ case font.font_name
18
+ when 'Symbol', 'ZapfDingbats'
19
+ font.encoding
20
+ else
21
+ HexaPDF::Font::Encoding.for_name(:WinAnsiEncoding)
22
+ end
23
+ end
24
+
25
+ doc = HexaPDF::Document.new
26
+
27
+ HexaPDF::FontLoader::Standard14::MAPPING.each do |font_name, mapping|
28
+ mapping.each_key do |variant|
29
+ canvas = doc.pages.add_page.canvas
30
+ canvas.font("Helvetica", size: 14)
31
+ canvas.text("#{font_name} #{variant != :none ? variant : ''}", at: [100, 800])
32
+
33
+ canvas.font(font_name, size: 14, variant: variant)
34
+ canvas.leading = 20
35
+ font = canvas.font
36
+ encoding = base_encoding_for_font(font.wrapped_font)
37
+ used_glyphs = []
38
+
39
+ # Showing the glyphs of the WinAnsi or built-in encoding
40
+ canvas.move_text_cursor(offset: [100, 750])
41
+ (2..15).each do |y|
42
+ data = []
43
+ (0..15).each do |x|
44
+ code = y * 16 + x
45
+ glyph = font.glyph(encoding.name(code)) rescue font.glyph(:space)
46
+ used_glyphs << glyph.name
47
+ data << glyph << -(2000 - glyph.width)
48
+ end
49
+ canvas.show_glyphs(data)
50
+ canvas.move_text_cursor
51
+ end
52
+
53
+ # Showing the remaining glyphs
54
+ canvas.move_text_cursor(offset: [0, -40], absolute: false)
55
+ glyphs = font.wrapped_font.metrics.character_metrics.keys.select do |k|
56
+ Symbol === k
57
+ end.sort - used_glyphs
58
+ canvas.font(font_name, size: 14, variant: variant, custom_encoding: true)
59
+ font = canvas.font
60
+ glyphs.each_slice(16).with_index do |slice, index|
61
+ data = []
62
+ slice.each do |name|
63
+ glyph = font.glyph(name)
64
+ data << glyph << -(2000 - glyph.width)
65
+ end
66
+ canvas.show_glyphs(data)
67
+ canvas.move_text_cursor
68
+ end
69
+ end
70
+ end
71
+
72
+ doc.write("standard_pdf_fonts.pdf", optimize: true)
@@ -0,0 +1,45 @@
1
+ # ## TrueType Fonts
2
+ #
3
+ # This example displays all glyphs of a TrueType font and shows that using a
4
+ # TrueType font with HexaPDF is very similar to using one of the standard PDF
5
+ # fonts.
6
+ #
7
+ # Before a TrueType font can be used, HexaPDF needs to be made aware of it. This
8
+ # is done by setting the configuration option 'font.map'.
9
+ #
10
+ # Once that is done the [HexaPDF::Content::Canvas#font] method can be used as
11
+ # usual.
12
+ #
13
+ # Usage:
14
+ # : `ruby truetype.pdf [FONT_FILE]`
15
+ #
16
+
17
+ require 'hexapdf'
18
+
19
+ doc = HexaPDF::Document.new
20
+ doc.config['font.on_missing_glyph'] = ->(n,f) { f.missing_glyph_id }
21
+ doc.config['font.map'] = {
22
+ 'myfont' => {none: ARGV.shift || File.join(__dir__, '../test/data/fonts/Ubuntu-Title.ttf')}
23
+ }
24
+
25
+ wrapper = doc.fonts.load('myfont')
26
+ max_gid = wrapper.wrapped_font[:maxp].num_glyphs
27
+
28
+ 255.times do |page|
29
+ break unless page * 256 < wrapper.wrapped_font[:maxp].num_glyphs
30
+ canvas = doc.pages.add_page.canvas
31
+ canvas.font("Helvetica", size: 10)
32
+ canvas.text("Font: #{wrapper.wrapped_font.full_name}", at: [50, 825])
33
+
34
+ canvas.font("myfont", size: 15)
35
+ 16.times do |y|
36
+ canvas.move_text_cursor(offset: [50, 800 - y * 50], absolute: true)
37
+ canvas.show_glyphs((0..15).map do |i|
38
+ gid = page * 256 + y * 16 + i
39
+ glyph = wrapper.glyph(gid)
40
+ gid > max_gid ? [] : [glyph, -(2000 - glyph.width)]
41
+ end.flatten!)
42
+ end
43
+ end
44
+
45
+ doc.write("truetype.pdf", optimize: true)
@@ -0,0 +1,128 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Extracts files from a PDF file.
40
+ #
41
+ # See: HexaPDF::Type::EmbeddedFile
42
+ class Extract < CmdParse::Command
43
+
44
+ def initialize #:nodoc:
45
+ super('extract', takes_commands: false)
46
+ short_desc("Extract files from a PDF file")
47
+ long_desc(<<-EOF.gsub!(/^ */, ''))
48
+ This command extracts files embedded in a PDF file. If the option --indices is not given,
49
+ the available files are listed with their names and indices. The --indices option can then
50
+ be used to extract one or more files.
51
+ EOF
52
+ options.on("--indices a,b,c", "-i a,b,c,...", Array,
53
+ "The indices of the files that should be extracted. Use 0 to extract " \
54
+ "all files.") do |indices|
55
+ @indices = indices.map(&:to_i)
56
+ end
57
+ options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
58
+ "standard locations (default: false)") do |search|
59
+ @search = search
60
+ end
61
+ options.on("--password PASSWORD", "-p", String,
62
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
63
+ @password = (pwd == '-' ? command_parser.read_password : pwd)
64
+ end
65
+ @indices = []
66
+ @password = ''
67
+ @search = false
68
+ end
69
+
70
+ def execute(file) #:nodoc:
71
+ HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
72
+ if @indices.empty?
73
+ list_files(doc)
74
+ else
75
+ extract_files(doc)
76
+ end
77
+ end
78
+ rescue HexaPDF::Error => e
79
+ $stderr.puts "Error while processing the PDF file: #{e.message}"
80
+ exit(1)
81
+ end
82
+
83
+ private
84
+
85
+ # Outputs the list of files embedded in the given PDF document.
86
+ def list_files(doc)
87
+ each_file(doc) do |obj, index|
88
+ $stdout.write(sprintf("%4i: %s", index + 1, obj.path))
89
+ ef_stream = obj.embedded_file_stream
90
+ if (params = ef_stream[:Params]) && !params.empty?
91
+ data = []
92
+ data << "size: #{params[:Size]}" if params.key?(:Size)
93
+ data << "md5: #{params[:CheckSum].unpack('H*').first}" if params.key?(:CheckSum)
94
+ data << "ctime: #{params[:CreationDate]}" if params.key?(:CreationDate)
95
+ data << "mtime: #{params[:ModDate]}" if params.key?(:ModDate)
96
+ $stdout.write(" (#{data.join(', ')})")
97
+ end
98
+ $stdout.puts
99
+ $stdout.puts(" #{obj[:Desc]}") if obj[:Desc] && !obj[:Desc].empty?
100
+ end
101
+ end
102
+
103
+ # Extracts the files with the given indices.
104
+ def extract_files(doc)
105
+ each_file(doc) do |obj, index|
106
+ next unless @indices.include?(index + 1) || @indices.include?(0)
107
+ if File.exist?(obj.path)
108
+ raise HexaPDF::Error, "Output file #{obj.path} already exists, not overwriting"
109
+ end
110
+ puts "Extracting #{obj.path}..."
111
+ File.open(obj.path, 'wb') do |file|
112
+ fiber = obj.embedded_file_stream.stream_decoder
113
+ while fiber.alive? && (data = fiber.resume)
114
+ file << data
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ # Iterates over all embedded files.
121
+ def each_file(doc, &block) # :yields: obj, index
122
+ doc.utils.each_file(search: @search).select(&:embedded_file?).each_with_index(&block)
123
+ end
124
+
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,121 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Outputs various bits of information about PDF files:
40
+ #
41
+ # * The entries in the trailers /Info dictionary
42
+ # * Encryption information from the trailers /Encrypt dictionary
43
+ # * The number of pages
44
+ # * The used PDF version
45
+ #
46
+ # See: HexaPDF::Type::Info, HexaPDF::Encryption::SecurityHandler
47
+ class Info < CmdParse::Command
48
+
49
+ def initialize #:nodoc:
50
+ super('info', takes_commands: false)
51
+ short_desc("Show document information")
52
+ long_desc(<<-EOF.gsub!(/^ */, ''))
53
+ This command extracts information from the Info dictionary of a PDF file as well
54
+ as some other useful information like the used PDF version and encryption information.
55
+ EOF
56
+ options.on("--password PASSWORD", "-p", String,
57
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
58
+ @password = (pwd == '-' ? command_parser.read_password : pwd)
59
+ end
60
+ @password = ''
61
+ @auto_decrypt = true
62
+ end
63
+
64
+ def execute(file) #:nodoc:
65
+ output_info(file)
66
+ end
67
+
68
+ private
69
+
70
+ INFO_KEYS = [:Title, :Author, :Subject, :Keywords, :Creator, :Producer, #:nodoc:
71
+ :CreationDate, :ModDate]
72
+
73
+ COLUMN_WIDTH = 20 #:nodoc:
74
+
75
+ def output_info(file) # :nodoc:
76
+ options = {decryption_opts: {password: @password},
77
+ config: {'document.auto_decrypt' => @auto_decrypt}}
78
+ HexaPDF::Document.open(file, options) do |doc|
79
+ INFO_KEYS.each do |name|
80
+ next unless doc.trailer.info.key?(name)
81
+ output_line(name.to_s, doc.trailer.info[name].to_s)
82
+ end if @auto_decrypt
83
+
84
+ if doc.encrypted? && @auto_decrypt
85
+ details = doc.security_handler.encryption_details
86
+ data = "yes (version: #{details[:version]}, key length: #{details[:key_length]}bits)"
87
+ output_line("Encrypted", data)
88
+ output_line(" String algorithm", details[:string_algorithm].to_s)
89
+ output_line(" Stream algorithm", details[:stream_algorithm].to_s)
90
+ output_line(" EFF algorithm", details[:embedded_file_algorithm].to_s)
91
+ if doc.security_handler.respond_to?(:permissions)
92
+ output_line(" Permissions", doc.security_handler.permissions.join(", "))
93
+ end
94
+ elsif doc.encrypted?
95
+ output_line("Encrypted", "yes (no or wrong password given)")
96
+ end
97
+
98
+ output_line("Pages", doc.pages.page_count.to_s)
99
+ output_line("Version", doc.version)
100
+ end
101
+ rescue HexaPDF::EncryptionError => e
102
+ if @auto_decrypt
103
+ @auto_decrypt = false
104
+ retry
105
+ else
106
+ $stderr.puts "Error while decrypting the PDF file: #{e.message}"
107
+ exit(1)
108
+ end
109
+ rescue HexaPDF::Error => e
110
+ $stderr.puts "Error while processing the PDF file: #{e.message}"
111
+ exit(1)
112
+ end
113
+
114
+ def output_line(header, text) #:nodoc:
115
+ puts((header + ":").ljust(COLUMN_WIDTH) << text)
116
+ end
117
+
118
+ end
119
+
120
+ end
121
+ end
@@ -0,0 +1,157 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Shows the internal structure of a PDF file.
40
+ class Inspect < CmdParse::Command
41
+
42
+ def initialize #:nodoc:
43
+ super('inspect', takes_commands: false)
44
+ short_desc("Dig into the internal structure of a PDF file")
45
+ long_desc(<<-EOF.gsub!(/^ */, ''))
46
+ Inspects a PDF file for debugging or testing purposes. This command is useful when one
47
+ needs to inspect the internal object structure or a stream of a PDF file. A PDF object is
48
+ always shown in the PDF syntax.
49
+
50
+ If no option is given, the main PDF object, the catalog, is shown. Otherwise the various,
51
+ mutually exclusive display options define the shown content. If multiple such options are
52
+ specified only the last is respected.
53
+ EOF
54
+
55
+ options.on("-t", "--trailer", "Show the trailer dictionary.") do
56
+ @exec = :trailer
57
+ end
58
+ options.on("-c", "--page-count", "Print the number of pages.") do
59
+ @exec = :page_count
60
+ end
61
+ options.on("--pages [PAGES]", "Show the pages with their object and generation numbers " \
62
+ "and their associated content streams. If the optional argument PAGES is " \
63
+ "specified, only the specified pages are listed.") do |range|
64
+ @exec = :pages
65
+ @param = range || '1-e'
66
+ end
67
+ options.on("-o", "--object OID[,GEN]", "Show the object with the given object and " \
68
+ "generation numbers. The generation number defaults to 0 if not given.") do |str|
69
+ @exec = :object
70
+ @param = str
71
+ end
72
+ options.on("-s", "--stream OID[,GEN]", "Show the filtered stream data (add --raw to get " \
73
+ "the raw stream data) of the object with the given object and generation " \
74
+ "numbers. The generation number defaults to 0 if not given.") do |str|
75
+ @exec = :stream
76
+ @param = str
77
+ @raw = (@raw ? @raw : false)
78
+ end
79
+ options.on("--raw", "Modifies --stream to show the raw stream data instead of the " \
80
+ "filtered one.") do
81
+ @raw = true
82
+ end
83
+
84
+ options.separator("")
85
+ options.on("--password PASSWORD", "-p", String,
86
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
87
+ @password = (pwd == '-' ? command_parser.read_password : pwd)
88
+ end
89
+
90
+ @password = nil
91
+ @exec = :catalog
92
+ @param = nil
93
+ @raw = nil
94
+ end
95
+
96
+ def execute(file) #:nodoc:
97
+ HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
98
+ send("do_#{@exec}", doc)
99
+ end
100
+ rescue HexaPDF::Error => e
101
+ $stderr.puts "Error while processing the PDF file: #{e.message}"
102
+ exit(1)
103
+ end
104
+
105
+ private
106
+
107
+ def do_catalog(doc) #:nodoc:
108
+ puts HexaPDF::Serializer.new.serialize(doc.catalog)
109
+ end
110
+
111
+ def do_trailer(doc) #:nodoc:
112
+ puts HexaPDF::Serializer.new.serialize(doc.trailer)
113
+ end
114
+
115
+ def do_page_count(doc) #:nodoc:
116
+ puts doc.pages.page_count
117
+ end
118
+
119
+ def do_pages(doc) #:nodoc:
120
+ pages = command_parser.parse_pages_specification(@param, doc.pages.page_count)
121
+ pages.each do |index, _|
122
+ page = doc.pages.page(index)
123
+ str = "page #{index + 1} (#{page.oid},#{page.gen}): "
124
+ Array(page[:Contents]).each {|c| str << "#{c.oid},#{c.gen}"}
125
+ puts str
126
+ end
127
+ end
128
+
129
+ def do_object(doc) #:nodoc:
130
+ object = doc.object(pdf_reference_from_string(@param))
131
+ return unless object
132
+ $stderr.puts("Note: Object also has stream data") if object.data.stream
133
+ puts HexaPDF::Serializer.new.serialize(object.value)
134
+ end
135
+
136
+ def do_stream(doc) #:nodoc:
137
+ object = doc.object(pdf_reference_from_string(@param))
138
+ if object.kind_of?(HexaPDF::Stream)
139
+ source = (@raw ? object.stream_source : object.stream_decoder)
140
+ while source.alive? && (data = source.resume)
141
+ $stdout.write(data)
142
+ end
143
+ else
144
+ $stderr.puts("Note: Object has no stream data")
145
+ end
146
+ end
147
+
148
+ # Parses the given string of the format "oid[,gen]" and returns a PDF reference object.
149
+ def pdf_reference_from_string(str)
150
+ oid, gen = str.split(",").map(&:to_i)
151
+ HexaPDF::Reference.new(oid, gen || 0)
152
+ end
153
+
154
+ end
155
+
156
+ end
157
+ end