hexapdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,494 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'digest/md5'
35
+ require 'hexapdf/error'
36
+ require 'hexapdf/dictionary'
37
+ require 'hexapdf/stream'
38
+
39
+ module HexaPDF
40
+ module Encryption
41
+
42
+ # Base class for all encryption dictionaries.
43
+ #
44
+ # Contains entries common to all encryption dictionaries. If a specific security handler
45
+ # needs further fields it should derive a new subclass and add the new fields there.
46
+ #
47
+ # See: PDF1.7 s7.6.1
48
+ class EncryptionDictionary < Dictionary
49
+
50
+ define_field :Filter, type: Symbol, required: true
51
+ define_field :SubFilter, type: Symbol, version: '1.3'
52
+ define_field :V, type: Integer, required: true
53
+ define_field :Lenth, type: Integer, default: 40, version: '1.4'
54
+ define_field :CF, type: Dictionary, version: '1.5'
55
+ define_field :StmF, type: Symbol, default: :Identity, version: '1.5'
56
+ define_field :StrF, type: Symbol, default: :Identity, version: '1.5'
57
+ define_field :EFF, type: Symbol, version: '1.6'
58
+
59
+ private
60
+
61
+ # Ensures that the encryption dictionary's content is valid.
62
+ def perform_validation
63
+ super
64
+ unless [1, 2, 4, 5].include?(value[:V])
65
+ yield("Value of /V is not one of 1, 2, 4 or 5", false)
66
+ end
67
+ if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
68
+ value[:Length] > 128 || value[:Length] % 8 != 0)
69
+ yield("Invalid value for /Length field when /V is 2", false)
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ # Base class for all security handlers.
76
+ #
77
+ # == Creating SecurityHandler Instances
78
+ #
79
+ # The base class provides two class methods for this:
80
+ #
81
+ # * The method ::set_up_encryption is used when a security handler instance should be created
82
+ # that populates the document's encryption dictionary.
83
+ #
84
+ # * The method ::set_up_decryption is used when a security handler should be created from the
85
+ # document's encryption dictionary.
86
+ #
87
+ # Security handlers could also be created with the ::new method but this is discouraged because
88
+ # the above methods provide the correct handling in both cases.
89
+ #
90
+ #
91
+ # == Using SecurityHandler Instances
92
+ #
93
+ # The SecurityHandler base class provides the methods for decrypting an indirect object and for
94
+ # encrypting strings and streams:
95
+ #
96
+ # * #decrypt
97
+ # * #encrypt_string
98
+ # * #encrypt_stream
99
+ #
100
+ # How the decryption/encryption key is actually computed is deferred to a sub class.
101
+ #
102
+ # Additionally, the #encryption_key_valid? method can be used to check whether the
103
+ # SecurityHandler instance is built from/built for the current version of the encryption
104
+ # dictionary.
105
+ #
106
+ #
107
+ # == Implementing a SecurityHandler Class
108
+ #
109
+ # Each security handler has to implement the following methods:
110
+ #
111
+ # prepare_encryption(**options)::
112
+ # Prepares the security handler for use in encrypting the document.
113
+ #
114
+ # See the #set_up_encryption documentation for information on which options are passed on to
115
+ # this method.
116
+ #
117
+ # Returns the encryption key as well as the names of the string, stream and embedded file
118
+ # algorithms.
119
+ #
120
+ # prepare_decryption(**options)::
121
+ # Prepares the security handler for decryption by using the information from the document's
122
+ # encryption dictionary as well as the provided arguments.
123
+ #
124
+ # See the #set_up_decryption documentation for additional information.
125
+ #
126
+ # Returns the encryption key that should be used for decryption.
127
+ #
128
+ # Additionally, the following methods can be overridden to provide a more specific meaning:
129
+ #
130
+ # encryption_dictionary_class::
131
+ # Returns the class that is used for the encryption dictionary. Should be derived from the
132
+ # EncryptionDictionary class.
133
+ class SecurityHandler
134
+
135
+ # :call-seq:
136
+ # SecurityHandler.set_up_encryption(document, handler_name, **options) -> handler
137
+ #
138
+ # Sets up and returns the security handler with the specified name for the document and
139
+ # modifies then document's encryption dictionary accordingly.
140
+ #
141
+ # The +encryption_opts+ can contain any encryption options for the specific security handler
142
+ # and the common encryption options.
143
+ #
144
+ # See: #set_up_encryption (for the common encryption options).
145
+ def self.set_up_encryption(document, handler_name, **options)
146
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.filter_map', handler_name)
147
+ if handler.nil?
148
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.sub_filter_map', handler_name)
149
+ end
150
+ if handler.nil?
151
+ raise HexaPDF::EncryptionError, "Could not find the specified security handler"
152
+ end
153
+
154
+ handler = handler.new(document)
155
+ document.trailer[:Encrypt] = handler.set_up_encryption(**options)
156
+ handler.freeze
157
+ end
158
+
159
+ # :call-seq:
160
+ # SecurityHandler.set_up_decryption(document, **options) -> handler
161
+ #
162
+ # Sets up and returns the security handler that is used for decrypting the given document and
163
+ # modifies the document's object loader so that the decryption is handled automatically behind
164
+ # the scenes.
165
+ #
166
+ # The +decryption_opts+ has to contain decryption options specific to the security handler
167
+ # that is used by the PDF file.
168
+ #
169
+ # See: #set_up_decryption
170
+ def self.set_up_decryption(document, **options)
171
+ dict = document.trailer[:Encrypt]
172
+ if dict.nil?
173
+ raise HexaPDF::EncryptionError, "No /Encrypt dictionary found"
174
+ end
175
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.filter_map', dict[:Filter])
176
+ if handler.nil?
177
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.sub_filter_map', dict[:SubFilter])
178
+ end
179
+ if handler.nil?
180
+ raise HexaPDF::EncryptionError, "Could not find a suitable security handler"
181
+ end
182
+
183
+ handler = handler.new(document)
184
+ document.trailer[:Encrypt] = handler.set_up_decryption(dict, **options)
185
+ document.revisions.each do |r|
186
+ loader = r.loader
187
+ r.loader = lambda do |xref_entry|
188
+ obj = loader.call(xref_entry)
189
+ xref_entry.compressed? ? obj : handler.decrypt(obj)
190
+ end
191
+ end
192
+
193
+ handler.freeze
194
+ end
195
+
196
+
197
+ # A hash containing information about the used encryption. This information is only
198
+ # available once the security handler has been set up for decryption or encryption.
199
+ #
200
+ # Available keys:
201
+ #
202
+ # :version::
203
+ # The version of the security handler in use.
204
+ # :string_algorithm::
205
+ # The algorithm used for encrypting/decrypting strings.
206
+ # :stream_algorithm::
207
+ # The algorithm used for encrypting/decrypting streams.
208
+ # :embedded_file_algorithm::
209
+ # The algorithm used for encrypting/decrypting embedded files.
210
+ # :key_length::
211
+ # The key length in bits.
212
+ attr_reader :encryption_details
213
+
214
+ # Creates a new SecurityHandler for the given document.
215
+ def initialize(document)
216
+ @document = document
217
+ @encrypt_dict_hash = nil
218
+ @encryption_details = {}
219
+ end
220
+
221
+ # Checks if the encryption key computed by this security handler is derived from the
222
+ # document's encryption dictionary.
223
+ def encryption_key_valid?
224
+ document.unwrap(document.trailer[:Encrypt]).hash == @encrypt_dict_hash
225
+ end
226
+
227
+ # Decrypts the strings and the possibly attached stream of the given indirect object in
228
+ # place.
229
+ #
230
+ # See: PDF1.7 s7.6.2
231
+ def decrypt(obj)
232
+ return obj if obj == document.trailer[:Encrypt] || obj.type == :XRef
233
+
234
+ key = object_key(obj.oid, obj.gen, string_algorithm)
235
+ each_string_in_object(obj.value) do |str|
236
+ next if str.empty?
237
+ str.replace(string_algorithm.decrypt(key, str))
238
+ end
239
+
240
+ if obj.kind_of?(HexaPDF::Stream)
241
+ unless string_algorithm == stream_algorithm
242
+ key = object_key(obj.oid, obj.gen, stream_algorithm)
243
+ end
244
+ obj.raw_stream.filter.unshift(:Encryption)
245
+ obj.raw_stream.decode_parms.unshift(key: key, algorithm: stream_algorithm)
246
+ end
247
+
248
+ obj
249
+ end
250
+
251
+ # Returns the encrypted version of the string that resides in the given indirect object.
252
+ #
253
+ # See: PDF1.7 s7.6.2
254
+ def encrypt_string(str, obj)
255
+ return str if str.empty? || obj == document.trailer[:Encrypt] || obj.type == :XRef
256
+
257
+ key = object_key(obj.oid, obj.gen, string_algorithm)
258
+ string_algorithm.encrypt(key, str)
259
+ end
260
+
261
+ # Returns a Fiber that encrypts the contents of the given stream object.
262
+ def encrypt_stream(obj)
263
+ return obj.stream_encoder if obj.type == :XRef
264
+
265
+ key = object_key(obj.oid, obj.gen, stream_algorithm)
266
+ obj.stream_encoder(:Encryption, key: key, algorithm: stream_algorithm)
267
+ end
268
+
269
+ # Computes the encryption key and sets up the algorithms for encrypting the document based on
270
+ # the given options, and returns the corresponding encryption dictionary.
271
+ #
272
+ # The security handler specific +options+ as well as the +algorithm+ argument are passed on to
273
+ # the #prepare_encryption method.
274
+ #
275
+ # Options for all security handlers:
276
+ #
277
+ # key_length::
278
+ # The key length in bits. Possible values are in the range of 40 to 128 and 256 and it
279
+ # needs to be divisible by 8.
280
+ #
281
+ # algorithm::
282
+ # The encryption algorithm. Possible values are :arc4 for ARC4 encryption with key lengths
283
+ # of 40 to 128 bit or :aes for AES encryption with key lengths of 128 or 256 bit.
284
+ #
285
+ # force_V4::
286
+ # Forces the use of protocol version 4 when key_length=128 and algorithm=:arc4.
287
+ #
288
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
289
+ def set_up_encryption(key_length: 128, algorithm: :aes, force_V4: false, **options)
290
+ @dict = document.wrap({}, type: encryption_dictionary_class)
291
+
292
+ dict[:V] =
293
+ case key_length
294
+ when 40
295
+ 1
296
+ when 48, 56, 64, 72, 80, 88, 96, 104, 112, 120
297
+ 2
298
+ when 128
299
+ (algorithm == :aes || force_V4 ? 4 : 2)
300
+ when 256
301
+ 5
302
+ else
303
+ raise(HexaPDF::UnsupportedEncryptionError,
304
+ "Invalid key length #{key_length} specified")
305
+ end
306
+ dict[:Length] = key_length if dict[:V] == 2
307
+
308
+ if ![:aes, :arc4].include?(algorithm)
309
+ raise(HexaPDF::UnsupportedEncryptionError,
310
+ "Unsupported encryption algorithm: #{algorithm}")
311
+ elsif key_length < 128 && algorithm == :aes
312
+ raise(HexaPDF::UnsupportedEncryptionError,
313
+ "AES algorithm needs a key length of 128 or 256 bit")
314
+ elsif key_length == 256 && algorithm == :arc4
315
+ raise(HexaPDF::UnsupportedEncryptionError,
316
+ "ARC4 algorithm can only be used with key lengths between 40 and 128 bit")
317
+ end
318
+
319
+ result = prepare_encryption(algorithm: algorithm, **options)
320
+ @encrypt_dict_hash = document.unwrap(dict).hash
321
+ set_up_security_handler(*result)
322
+ @dict
323
+ end
324
+
325
+ # Uses the given encryption dictionary to set up the security handler for decrypting the
326
+ # document.
327
+ #
328
+ # The security handler specific +options+ are passed on to the #prepare_decryption method.
329
+ #
330
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
331
+ def set_up_decryption(dictionary, **options)
332
+ @dict = document.wrap(dictionary, type: encryption_dictionary_class)
333
+
334
+ case dict[:V]
335
+ when 1, 2
336
+ strf = stmf = eff = :arc4
337
+ when 4, 5
338
+ strf, stmf, eff = [:StrF, :StmF, :EFF].map do |alg|
339
+ if dict[:CF] && (cf_dict = dict[:CF][dict[alg]])
340
+ case cf_dict[:CFM]
341
+ when :V2 then :arc4
342
+ when :AESV2, :AESV3 then :aes
343
+ when :None then :identity
344
+ else
345
+ raise(HexaPDF::UnsupportedEncryptionError,
346
+ "Unsupported encryption method: #{cf_dict[:CFM]}")
347
+ end
348
+ else
349
+ :identity
350
+ end
351
+ end
352
+ eff = stmf unless dict[:EFF]
353
+ else
354
+ raise HexaPDF::UnsupportedEncryptionError, "Unsupported encryption version #{dict[:V]}"
355
+ end
356
+
357
+ set_up_security_handler(prepare_decryption(**options), strf, stmf, eff)
358
+ @encrypt_dict_hash = document.unwrap(@dict).hash
359
+
360
+ @dict
361
+ end
362
+
363
+ private
364
+
365
+ # Returns the associated PDF document.
366
+ #
367
+ # Subclasses should use this method to access the document.
368
+ def document
369
+ @document
370
+ end
371
+
372
+ # Returns the encryption dictionary used by this security handler.
373
+ #
374
+ # Subclasses should use this dictionary to read and set values.
375
+ def dict
376
+ @dict
377
+ end
378
+
379
+ # Returns the encryption key that is used for encryption/decryption.
380
+ #
381
+ # Only available after decryption or encryption has been set up.
382
+ def encryption_key
383
+ @encryption_key
384
+ end
385
+
386
+ # Returns the algorithm class that is used for encrypting/decrypting strings.
387
+ #
388
+ # Only available after decryption or encryption has been set up.
389
+ def string_algorithm
390
+ @string_algorithm
391
+ end
392
+
393
+ # Returns the algorithm class that is used for encrypting/decrypting streams.
394
+ #
395
+ # Only available after decryption or encryption has been set up.
396
+ def stream_algorithm
397
+ @stream_algorithm
398
+ end
399
+
400
+ # Returns the algorithm class that is used for encrypting/decrypting embedded files.
401
+ #
402
+ # Only available after decryption or encryption has been set up.
403
+ def embedded_file_algorithm
404
+ @embedded_file_algorithm
405
+ end
406
+
407
+ # Assigns all necessary attributes so that encryption/decryption works correctly.
408
+ #
409
+ # The assigned values can be retrieved via the #encryption_key, #string_algorithm,
410
+ # #stream_algorithm and #embedded_file_algorithm methods.
411
+ def set_up_security_handler(key, strf, stmf, eff)
412
+ @encryption_key = key
413
+ @string_algorithm = send("#{strf}_algorithm")
414
+ @stream_algorithm = send("#{stmf}_algorithm")
415
+ @embedded_file_algorithm = send("#{eff}_algorithm")
416
+ @encryption_details = {
417
+ version: dict[:V],
418
+ string_algorithm: strf,
419
+ stream_algorithm: stmf,
420
+ embedded_file_algorithm: eff,
421
+ key_length: key_length * 8,
422
+ }
423
+ end
424
+
425
+ # Returns the class that is used for ARC4 encryption.
426
+ def arc4_algorithm
427
+ @arc4_algorithm ||= HexaPDF::GlobalConfiguration.constantize('encryption.arc4')
428
+ end
429
+
430
+ # Returns the class that is used for AES encryption.
431
+ def aes_algorithm
432
+ @aes_algorithm ||= HexaPDF::GlobalConfiguration.constantize('encryption.aes')
433
+ end
434
+
435
+ # Returns the class that is used for the identity algorithm which passes back the data as is
436
+ # without encrypting or decrypting it.
437
+ def identity_algorithm
438
+ Identity
439
+ end
440
+
441
+ # Computes the key for decrypting the indirect object with the given algorithm.
442
+ #
443
+ # See: PDF1.7 s7.6.2 (algorithm 1), PDF2.0 s7.6.2.2 (algorithm 1.A)
444
+ def object_key(oid, gen, algorithm)
445
+ key = encryption_key
446
+ return key if dict[:V] == 5
447
+
448
+ key += [oid, gen].pack('VXv'.freeze)
449
+ key << "sAlT".freeze if algorithm.ancestors.include?(AES)
450
+ n_plus_5 = key_length + 5
451
+ Digest::MD5.digest(key)[0, (n_plus_5 > 16 ? 16 : n_plus_5)]
452
+ end
453
+
454
+ # Returns the length of the encryption key in bytes based on the security handlers version.
455
+ #
456
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
457
+ def key_length
458
+ case dict[:V]
459
+ when 1 then 5
460
+ when 2 then dict[:Length] / 8
461
+ when 4 then 16 # PDF2.0 s7.6.1 specifies that a /V of 4 is equal to length of 128bit
462
+ when 5 then 32 # PDF2.0 s7.6.1 specifies that a /V of 5 is equal to length of 256bit
463
+ end
464
+ end
465
+
466
+ # Returns the class used as wrapper for the encryption dictionary.
467
+ def encryption_dictionary_class
468
+ EncryptionDictionary
469
+ end
470
+
471
+ # Returns +n+ random bytes.
472
+ def random_bytes(n)
473
+ aes_algorithm.random_bytes(n)
474
+ end
475
+
476
+ # Finds all strings in the given object and yields them.
477
+ #
478
+ # Note: Decryption happens directly after parsing and loading an object, before it can be
479
+ # touched by anthing else. Therefore we only have to contend with the basic data structures.
480
+ def each_string_in_object(obj, &block) # :yields: str
481
+ case obj
482
+ when Hash
483
+ obj.each_value {|val| each_string_in_object(val, &block)}
484
+ when Array
485
+ obj.each {|inner_o| each_string_in_object(inner_o, &block)}
486
+ when String
487
+ yield(obj)
488
+ end
489
+ end
490
+
491
+ end
492
+
493
+ end
494
+ end