hexapdf 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (346) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTERS +3 -0
  3. data/LICENSE +26 -0
  4. data/README.md +88 -0
  5. data/Rakefile +121 -0
  6. data/VERSION +1 -0
  7. data/agpl-3.0.txt +661 -0
  8. data/bin/hexapdf +6 -0
  9. data/data/hexapdf/afm/Courier-Bold.afm +342 -0
  10. data/data/hexapdf/afm/Courier-BoldOblique.afm +342 -0
  11. data/data/hexapdf/afm/Courier-Oblique.afm +342 -0
  12. data/data/hexapdf/afm/Courier.afm +342 -0
  13. data/data/hexapdf/afm/Helvetica-Bold.afm +2827 -0
  14. data/data/hexapdf/afm/Helvetica-BoldOblique.afm +2827 -0
  15. data/data/hexapdf/afm/Helvetica-Oblique.afm +3051 -0
  16. data/data/hexapdf/afm/Helvetica.afm +3051 -0
  17. data/data/hexapdf/afm/MustRead.html +1 -0
  18. data/data/hexapdf/afm/Symbol.afm +213 -0
  19. data/data/hexapdf/afm/Times-Bold.afm +2588 -0
  20. data/data/hexapdf/afm/Times-BoldItalic.afm +2384 -0
  21. data/data/hexapdf/afm/Times-Italic.afm +2667 -0
  22. data/data/hexapdf/afm/Times-Roman.afm +2419 -0
  23. data/data/hexapdf/afm/ZapfDingbats.afm +225 -0
  24. data/data/hexapdf/encoding/glyphlist.txt +4305 -0
  25. data/data/hexapdf/encoding/zapfdingbats.txt +225 -0
  26. data/examples/arc.rb +50 -0
  27. data/examples/graphics.rb +274 -0
  28. data/examples/hello_world.rb +16 -0
  29. data/examples/machupicchu.jpg +0 -0
  30. data/examples/merging.rb +24 -0
  31. data/examples/optimizing.rb +20 -0
  32. data/examples/show_char_bboxes.rb +55 -0
  33. data/examples/standard_pdf_fonts.rb +72 -0
  34. data/examples/truetype.rb +45 -0
  35. data/lib/hexapdf/cli/extract.rb +128 -0
  36. data/lib/hexapdf/cli/info.rb +121 -0
  37. data/lib/hexapdf/cli/inspect.rb +157 -0
  38. data/lib/hexapdf/cli/modify.rb +218 -0
  39. data/lib/hexapdf/cli.rb +121 -0
  40. data/lib/hexapdf/configuration.rb +392 -0
  41. data/lib/hexapdf/content/canvas.rb +1974 -0
  42. data/lib/hexapdf/content/color_space.rb +364 -0
  43. data/lib/hexapdf/content/graphic_object/arc.rb +267 -0
  44. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +208 -0
  45. data/lib/hexapdf/content/graphic_object/solid_arc.rb +173 -0
  46. data/lib/hexapdf/content/graphic_object.rb +81 -0
  47. data/lib/hexapdf/content/graphics_state.rb +579 -0
  48. data/lib/hexapdf/content/operator.rb +1072 -0
  49. data/lib/hexapdf/content/parser.rb +204 -0
  50. data/lib/hexapdf/content/processor.rb +451 -0
  51. data/lib/hexapdf/content/transformation_matrix.rb +172 -0
  52. data/lib/hexapdf/content.rb +47 -0
  53. data/lib/hexapdf/data_dir.rb +51 -0
  54. data/lib/hexapdf/dictionary.rb +303 -0
  55. data/lib/hexapdf/dictionary_fields.rb +382 -0
  56. data/lib/hexapdf/document.rb +589 -0
  57. data/lib/hexapdf/document_utils.rb +209 -0
  58. data/lib/hexapdf/encryption/aes.rb +206 -0
  59. data/lib/hexapdf/encryption/arc4.rb +93 -0
  60. data/lib/hexapdf/encryption/fast_aes.rb +79 -0
  61. data/lib/hexapdf/encryption/fast_arc4.rb +67 -0
  62. data/lib/hexapdf/encryption/identity.rb +63 -0
  63. data/lib/hexapdf/encryption/ruby_aes.rb +447 -0
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +96 -0
  65. data/lib/hexapdf/encryption/security_handler.rb +494 -0
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +616 -0
  67. data/lib/hexapdf/encryption.rb +94 -0
  68. data/lib/hexapdf/error.rb +73 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +160 -0
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +87 -0
  71. data/lib/hexapdf/filter/dct_decode.rb +57 -0
  72. data/lib/hexapdf/filter/encryption.rb +59 -0
  73. data/lib/hexapdf/filter/flate_decode.rb +93 -0
  74. data/lib/hexapdf/filter/jpx_decode.rb +56 -0
  75. data/lib/hexapdf/filter/lzw_decode.rb +191 -0
  76. data/lib/hexapdf/filter/predictor.rb +266 -0
  77. data/lib/hexapdf/filter/run_length_decode.rb +108 -0
  78. data/lib/hexapdf/filter.rb +176 -0
  79. data/lib/hexapdf/font/cmap/parser.rb +146 -0
  80. data/lib/hexapdf/font/cmap/writer.rb +176 -0
  81. data/lib/hexapdf/font/cmap.rb +90 -0
  82. data/lib/hexapdf/font/encoding/base.rb +77 -0
  83. data/lib/hexapdf/font/encoding/difference_encoding.rb +64 -0
  84. data/lib/hexapdf/font/encoding/glyph_list.rb +150 -0
  85. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +221 -0
  86. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +265 -0
  87. data/lib/hexapdf/font/encoding/standard_encoding.rb +205 -0
  88. data/lib/hexapdf/font/encoding/symbol_encoding.rb +244 -0
  89. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +280 -0
  90. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +250 -0
  91. data/lib/hexapdf/font/encoding.rb +68 -0
  92. data/lib/hexapdf/font/true_type/font.rb +179 -0
  93. data/lib/hexapdf/font/true_type/table/cmap.rb +103 -0
  94. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +384 -0
  95. data/lib/hexapdf/font/true_type/table/directory.rb +92 -0
  96. data/lib/hexapdf/font/true_type/table/glyf.rb +166 -0
  97. data/lib/hexapdf/font/true_type/table/head.rb +143 -0
  98. data/lib/hexapdf/font/true_type/table/hhea.rb +109 -0
  99. data/lib/hexapdf/font/true_type/table/hmtx.rb +79 -0
  100. data/lib/hexapdf/font/true_type/table/loca.rb +79 -0
  101. data/lib/hexapdf/font/true_type/table/maxp.rb +112 -0
  102. data/lib/hexapdf/font/true_type/table/name.rb +218 -0
  103. data/lib/hexapdf/font/true_type/table/os2.rb +200 -0
  104. data/lib/hexapdf/font/true_type/table/post.rb +230 -0
  105. data/lib/hexapdf/font/true_type/table.rb +155 -0
  106. data/lib/hexapdf/font/true_type.rb +48 -0
  107. data/lib/hexapdf/font/true_type_wrapper.rb +240 -0
  108. data/lib/hexapdf/font/type1/afm_parser.rb +230 -0
  109. data/lib/hexapdf/font/type1/character_metrics.rb +67 -0
  110. data/lib/hexapdf/font/type1/font.rb +123 -0
  111. data/lib/hexapdf/font/type1/font_metrics.rb +117 -0
  112. data/lib/hexapdf/font/type1/pfb_parser.rb +71 -0
  113. data/lib/hexapdf/font/type1.rb +52 -0
  114. data/lib/hexapdf/font/type1_wrapper.rb +193 -0
  115. data/lib/hexapdf/font_loader/from_configuration.rb +70 -0
  116. data/lib/hexapdf/font_loader/standard14.rb +98 -0
  117. data/lib/hexapdf/font_loader.rb +85 -0
  118. data/lib/hexapdf/font_utils.rb +89 -0
  119. data/lib/hexapdf/image_loader/jpeg.rb +166 -0
  120. data/lib/hexapdf/image_loader/pdf.rb +89 -0
  121. data/lib/hexapdf/image_loader/png.rb +410 -0
  122. data/lib/hexapdf/image_loader.rb +68 -0
  123. data/lib/hexapdf/importer.rb +139 -0
  124. data/lib/hexapdf/name_tree_node.rb +78 -0
  125. data/lib/hexapdf/number_tree_node.rb +67 -0
  126. data/lib/hexapdf/object.rb +363 -0
  127. data/lib/hexapdf/parser.rb +349 -0
  128. data/lib/hexapdf/rectangle.rb +99 -0
  129. data/lib/hexapdf/reference.rb +98 -0
  130. data/lib/hexapdf/revision.rb +206 -0
  131. data/lib/hexapdf/revisions.rb +194 -0
  132. data/lib/hexapdf/serializer.rb +326 -0
  133. data/lib/hexapdf/stream.rb +279 -0
  134. data/lib/hexapdf/task/dereference.rb +109 -0
  135. data/lib/hexapdf/task/optimize.rb +230 -0
  136. data/lib/hexapdf/task.rb +68 -0
  137. data/lib/hexapdf/tokenizer.rb +406 -0
  138. data/lib/hexapdf/type/catalog.rb +107 -0
  139. data/lib/hexapdf/type/embedded_file.rb +87 -0
  140. data/lib/hexapdf/type/file_specification.rb +232 -0
  141. data/lib/hexapdf/type/font.rb +81 -0
  142. data/lib/hexapdf/type/font_descriptor.rb +109 -0
  143. data/lib/hexapdf/type/font_simple.rb +190 -0
  144. data/lib/hexapdf/type/font_true_type.rb +47 -0
  145. data/lib/hexapdf/type/font_type1.rb +162 -0
  146. data/lib/hexapdf/type/form.rb +103 -0
  147. data/lib/hexapdf/type/graphics_state_parameter.rb +79 -0
  148. data/lib/hexapdf/type/image.rb +73 -0
  149. data/lib/hexapdf/type/info.rb +70 -0
  150. data/lib/hexapdf/type/names.rb +69 -0
  151. data/lib/hexapdf/type/object_stream.rb +224 -0
  152. data/lib/hexapdf/type/page.rb +355 -0
  153. data/lib/hexapdf/type/page_tree_node.rb +269 -0
  154. data/lib/hexapdf/type/resources.rb +212 -0
  155. data/lib/hexapdf/type/trailer.rb +128 -0
  156. data/lib/hexapdf/type/viewer_preferences.rb +73 -0
  157. data/lib/hexapdf/type/xref_stream.rb +204 -0
  158. data/lib/hexapdf/type.rb +67 -0
  159. data/lib/hexapdf/utils/bit_field.rb +87 -0
  160. data/lib/hexapdf/utils/bit_stream.rb +148 -0
  161. data/lib/hexapdf/utils/lru_cache.rb +65 -0
  162. data/lib/hexapdf/utils/math_helpers.rb +55 -0
  163. data/lib/hexapdf/utils/object_hash.rb +130 -0
  164. data/lib/hexapdf/utils/pdf_doc_encoding.rb +93 -0
  165. data/lib/hexapdf/utils/sorted_tree_node.rb +339 -0
  166. data/lib/hexapdf/version.rb +39 -0
  167. data/lib/hexapdf/writer.rb +199 -0
  168. data/lib/hexapdf/xref_section.rb +152 -0
  169. data/lib/hexapdf.rb +34 -0
  170. data/man/man1/hexapdf.1 +249 -0
  171. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  172. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  173. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  174. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  175. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  176. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  177. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  178. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  179. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  180. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  181. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  182. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  183. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  184. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  185. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  186. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  187. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  188. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  189. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  190. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  191. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  192. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  193. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  194. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  195. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  196. data/test/data/images/cmyk.jpg +0 -0
  197. data/test/data/images/fillbytes.jpg +0 -0
  198. data/test/data/images/gray.jpg +0 -0
  199. data/test/data/images/greyscale-1bit.png +0 -0
  200. data/test/data/images/greyscale-2bit.png +0 -0
  201. data/test/data/images/greyscale-4bit.png +0 -0
  202. data/test/data/images/greyscale-8bit.png +0 -0
  203. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  204. data/test/data/images/greyscale-trns-8bit.png +0 -0
  205. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  206. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  207. data/test/data/images/indexed-1bit.png +0 -0
  208. data/test/data/images/indexed-2bit.png +0 -0
  209. data/test/data/images/indexed-4bit.png +0 -0
  210. data/test/data/images/indexed-8bit.png +0 -0
  211. data/test/data/images/indexed-alpha-4bit.png +0 -0
  212. data/test/data/images/indexed-alpha-8bit.png +0 -0
  213. data/test/data/images/rgb.jpg +0 -0
  214. data/test/data/images/truecolour-8bit.png +0 -0
  215. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  216. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  217. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  218. data/test/data/minimal.pdf +44 -0
  219. data/test/data/standard-security-handler/README +9 -0
  220. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  221. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  222. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  223. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  224. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  225. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  226. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  227. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  228. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  229. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  230. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  231. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  232. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  233. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  234. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  235. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  236. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  237. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  238. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  239. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  240. data/test/hexapdf/common_tokenizer_tests.rb +204 -0
  241. data/test/hexapdf/content/common.rb +31 -0
  242. data/test/hexapdf/content/graphic_object/test_arc.rb +93 -0
  243. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +91 -0
  244. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  245. data/test/hexapdf/content/test_canvas.rb +1113 -0
  246. data/test/hexapdf/content/test_color_space.rb +97 -0
  247. data/test/hexapdf/content/test_graphics_state.rb +138 -0
  248. data/test/hexapdf/content/test_operator.rb +619 -0
  249. data/test/hexapdf/content/test_parser.rb +66 -0
  250. data/test/hexapdf/content/test_processor.rb +156 -0
  251. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  252. data/test/hexapdf/encryption/common.rb +87 -0
  253. data/test/hexapdf/encryption/test_aes.rb +121 -0
  254. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  255. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  256. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  257. data/test/hexapdf/encryption/test_identity.rb +21 -0
  258. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  259. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  260. data/test/hexapdf/encryption/test_security_handler.rb +356 -0
  261. data/test/hexapdf/encryption/test_standard_security_handler.rb +274 -0
  262. data/test/hexapdf/filter/common.rb +53 -0
  263. data/test/hexapdf/filter/test_ascii85_decode.rb +60 -0
  264. data/test/hexapdf/filter/test_ascii_hex_decode.rb +33 -0
  265. data/test/hexapdf/filter/test_encryption.rb +24 -0
  266. data/test/hexapdf/filter/test_flate_decode.rb +35 -0
  267. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  268. data/test/hexapdf/filter/test_predictor.rb +183 -0
  269. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  270. data/test/hexapdf/font/cmap/test_parser.rb +67 -0
  271. data/test/hexapdf/font/cmap/test_writer.rb +58 -0
  272. data/test/hexapdf/font/encoding/test_base.rb +35 -0
  273. data/test/hexapdf/font/encoding/test_difference_encoding.rb +21 -0
  274. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  275. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  276. data/test/hexapdf/font/test_encoding.rb +27 -0
  277. data/test/hexapdf/font/test_true_type_wrapper.rb +110 -0
  278. data/test/hexapdf/font/test_type1_wrapper.rb +66 -0
  279. data/test/hexapdf/font/true_type/common.rb +19 -0
  280. data/test/hexapdf/font/true_type/table/test_cmap.rb +59 -0
  281. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +133 -0
  282. data/test/hexapdf/font/true_type/table/test_directory.rb +35 -0
  283. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  284. data/test/hexapdf/font/true_type/table/test_head.rb +76 -0
  285. data/test/hexapdf/font/true_type/table/test_hhea.rb +40 -0
  286. data/test/hexapdf/font/true_type/table/test_hmtx.rb +38 -0
  287. data/test/hexapdf/font/true_type/table/test_loca.rb +43 -0
  288. data/test/hexapdf/font/true_type/table/test_maxp.rb +62 -0
  289. data/test/hexapdf/font/true_type/table/test_name.rb +95 -0
  290. data/test/hexapdf/font/true_type/table/test_os2.rb +65 -0
  291. data/test/hexapdf/font/true_type/table/test_post.rb +89 -0
  292. data/test/hexapdf/font/true_type/test_font.rb +120 -0
  293. data/test/hexapdf/font/true_type/test_table.rb +41 -0
  294. data/test/hexapdf/font/type1/test_afm_parser.rb +51 -0
  295. data/test/hexapdf/font/type1/test_font.rb +68 -0
  296. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  297. data/test/hexapdf/font_loader/test_from_configuration.rb +28 -0
  298. data/test/hexapdf/font_loader/test_standard14.rb +22 -0
  299. data/test/hexapdf/image_loader/test_jpeg.rb +83 -0
  300. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  301. data/test/hexapdf/image_loader/test_png.rb +258 -0
  302. data/test/hexapdf/task/test_dereference.rb +46 -0
  303. data/test/hexapdf/task/test_optimize.rb +137 -0
  304. data/test/hexapdf/test_configuration.rb +82 -0
  305. data/test/hexapdf/test_data_dir.rb +32 -0
  306. data/test/hexapdf/test_dictionary.rb +284 -0
  307. data/test/hexapdf/test_dictionary_fields.rb +185 -0
  308. data/test/hexapdf/test_document.rb +574 -0
  309. data/test/hexapdf/test_document_utils.rb +144 -0
  310. data/test/hexapdf/test_filter.rb +96 -0
  311. data/test/hexapdf/test_font_utils.rb +47 -0
  312. data/test/hexapdf/test_importer.rb +78 -0
  313. data/test/hexapdf/test_object.rb +177 -0
  314. data/test/hexapdf/test_parser.rb +394 -0
  315. data/test/hexapdf/test_rectangle.rb +36 -0
  316. data/test/hexapdf/test_reference.rb +41 -0
  317. data/test/hexapdf/test_revision.rb +139 -0
  318. data/test/hexapdf/test_revisions.rb +93 -0
  319. data/test/hexapdf/test_serializer.rb +169 -0
  320. data/test/hexapdf/test_stream.rb +262 -0
  321. data/test/hexapdf/test_tokenizer.rb +30 -0
  322. data/test/hexapdf/test_writer.rb +120 -0
  323. data/test/hexapdf/test_xref_section.rb +35 -0
  324. data/test/hexapdf/type/test_catalog.rb +30 -0
  325. data/test/hexapdf/type/test_embedded_file.rb +16 -0
  326. data/test/hexapdf/type/test_file_specification.rb +148 -0
  327. data/test/hexapdf/type/test_font.rb +35 -0
  328. data/test/hexapdf/type/test_font_descriptor.rb +51 -0
  329. data/test/hexapdf/type/test_font_simple.rb +190 -0
  330. data/test/hexapdf/type/test_font_type1.rb +128 -0
  331. data/test/hexapdf/type/test_form.rb +60 -0
  332. data/test/hexapdf/type/test_info.rb +14 -0
  333. data/test/hexapdf/type/test_names.rb +9 -0
  334. data/test/hexapdf/type/test_object_stream.rb +84 -0
  335. data/test/hexapdf/type/test_page.rb +260 -0
  336. data/test/hexapdf/type/test_page_tree_node.rb +255 -0
  337. data/test/hexapdf/type/test_resources.rb +167 -0
  338. data/test/hexapdf/type/test_trailer.rb +109 -0
  339. data/test/hexapdf/type/test_xref_stream.rb +131 -0
  340. data/test/hexapdf/utils/test_bit_field.rb +47 -0
  341. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  342. data/test/hexapdf/utils/test_object_hash.rb +115 -0
  343. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  344. data/test/hexapdf/utils/test_sorted_tree_node.rb +232 -0
  345. data/test/test_helper.rb +56 -0
  346. metadata +427 -0
@@ -0,0 +1,494 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2016 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'digest/md5'
35
+ require 'hexapdf/error'
36
+ require 'hexapdf/dictionary'
37
+ require 'hexapdf/stream'
38
+
39
+ module HexaPDF
40
+ module Encryption
41
+
42
+ # Base class for all encryption dictionaries.
43
+ #
44
+ # Contains entries common to all encryption dictionaries. If a specific security handler
45
+ # needs further fields it should derive a new subclass and add the new fields there.
46
+ #
47
+ # See: PDF1.7 s7.6.1
48
+ class EncryptionDictionary < Dictionary
49
+
50
+ define_field :Filter, type: Symbol, required: true
51
+ define_field :SubFilter, type: Symbol, version: '1.3'
52
+ define_field :V, type: Integer, required: true
53
+ define_field :Lenth, type: Integer, default: 40, version: '1.4'
54
+ define_field :CF, type: Dictionary, version: '1.5'
55
+ define_field :StmF, type: Symbol, default: :Identity, version: '1.5'
56
+ define_field :StrF, type: Symbol, default: :Identity, version: '1.5'
57
+ define_field :EFF, type: Symbol, version: '1.6'
58
+
59
+ private
60
+
61
+ # Ensures that the encryption dictionary's content is valid.
62
+ def perform_validation
63
+ super
64
+ unless [1, 2, 4, 5].include?(value[:V])
65
+ yield("Value of /V is not one of 1, 2, 4 or 5", false)
66
+ end
67
+ if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
68
+ value[:Length] > 128 || value[:Length] % 8 != 0)
69
+ yield("Invalid value for /Length field when /V is 2", false)
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ # Base class for all security handlers.
76
+ #
77
+ # == Creating SecurityHandler Instances
78
+ #
79
+ # The base class provides two class methods for this:
80
+ #
81
+ # * The method ::set_up_encryption is used when a security handler instance should be created
82
+ # that populates the document's encryption dictionary.
83
+ #
84
+ # * The method ::set_up_decryption is used when a security handler should be created from the
85
+ # document's encryption dictionary.
86
+ #
87
+ # Security handlers could also be created with the ::new method but this is discouraged because
88
+ # the above methods provide the correct handling in both cases.
89
+ #
90
+ #
91
+ # == Using SecurityHandler Instances
92
+ #
93
+ # The SecurityHandler base class provides the methods for decrypting an indirect object and for
94
+ # encrypting strings and streams:
95
+ #
96
+ # * #decrypt
97
+ # * #encrypt_string
98
+ # * #encrypt_stream
99
+ #
100
+ # How the decryption/encryption key is actually computed is deferred to a sub class.
101
+ #
102
+ # Additionally, the #encryption_key_valid? method can be used to check whether the
103
+ # SecurityHandler instance is built from/built for the current version of the encryption
104
+ # dictionary.
105
+ #
106
+ #
107
+ # == Implementing a SecurityHandler Class
108
+ #
109
+ # Each security handler has to implement the following methods:
110
+ #
111
+ # prepare_encryption(**options)::
112
+ # Prepares the security handler for use in encrypting the document.
113
+ #
114
+ # See the #set_up_encryption documentation for information on which options are passed on to
115
+ # this method.
116
+ #
117
+ # Returns the encryption key as well as the names of the string, stream and embedded file
118
+ # algorithms.
119
+ #
120
+ # prepare_decryption(**options)::
121
+ # Prepares the security handler for decryption by using the information from the document's
122
+ # encryption dictionary as well as the provided arguments.
123
+ #
124
+ # See the #set_up_decryption documentation for additional information.
125
+ #
126
+ # Returns the encryption key that should be used for decryption.
127
+ #
128
+ # Additionally, the following methods can be overridden to provide a more specific meaning:
129
+ #
130
+ # encryption_dictionary_class::
131
+ # Returns the class that is used for the encryption dictionary. Should be derived from the
132
+ # EncryptionDictionary class.
133
+ class SecurityHandler
134
+
135
+ # :call-seq:
136
+ # SecurityHandler.set_up_encryption(document, handler_name, **options) -> handler
137
+ #
138
+ # Sets up and returns the security handler with the specified name for the document and
139
+ # modifies then document's encryption dictionary accordingly.
140
+ #
141
+ # The +encryption_opts+ can contain any encryption options for the specific security handler
142
+ # and the common encryption options.
143
+ #
144
+ # See: #set_up_encryption (for the common encryption options).
145
+ def self.set_up_encryption(document, handler_name, **options)
146
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.filter_map', handler_name)
147
+ if handler.nil?
148
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.sub_filter_map', handler_name)
149
+ end
150
+ if handler.nil?
151
+ raise HexaPDF::EncryptionError, "Could not find the specified security handler"
152
+ end
153
+
154
+ handler = handler.new(document)
155
+ document.trailer[:Encrypt] = handler.set_up_encryption(**options)
156
+ handler.freeze
157
+ end
158
+
159
+ # :call-seq:
160
+ # SecurityHandler.set_up_decryption(document, **options) -> handler
161
+ #
162
+ # Sets up and returns the security handler that is used for decrypting the given document and
163
+ # modifies the document's object loader so that the decryption is handled automatically behind
164
+ # the scenes.
165
+ #
166
+ # The +decryption_opts+ has to contain decryption options specific to the security handler
167
+ # that is used by the PDF file.
168
+ #
169
+ # See: #set_up_decryption
170
+ def self.set_up_decryption(document, **options)
171
+ dict = document.trailer[:Encrypt]
172
+ if dict.nil?
173
+ raise HexaPDF::EncryptionError, "No /Encrypt dictionary found"
174
+ end
175
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.filter_map', dict[:Filter])
176
+ if handler.nil?
177
+ handler = HexaPDF::GlobalConfiguration.constantize('encryption.sub_filter_map', dict[:SubFilter])
178
+ end
179
+ if handler.nil?
180
+ raise HexaPDF::EncryptionError, "Could not find a suitable security handler"
181
+ end
182
+
183
+ handler = handler.new(document)
184
+ document.trailer[:Encrypt] = handler.set_up_decryption(dict, **options)
185
+ document.revisions.each do |r|
186
+ loader = r.loader
187
+ r.loader = lambda do |xref_entry|
188
+ obj = loader.call(xref_entry)
189
+ xref_entry.compressed? ? obj : handler.decrypt(obj)
190
+ end
191
+ end
192
+
193
+ handler.freeze
194
+ end
195
+
196
+
197
+ # A hash containing information about the used encryption. This information is only
198
+ # available once the security handler has been set up for decryption or encryption.
199
+ #
200
+ # Available keys:
201
+ #
202
+ # :version::
203
+ # The version of the security handler in use.
204
+ # :string_algorithm::
205
+ # The algorithm used for encrypting/decrypting strings.
206
+ # :stream_algorithm::
207
+ # The algorithm used for encrypting/decrypting streams.
208
+ # :embedded_file_algorithm::
209
+ # The algorithm used for encrypting/decrypting embedded files.
210
+ # :key_length::
211
+ # The key length in bits.
212
+ attr_reader :encryption_details
213
+
214
+ # Creates a new SecurityHandler for the given document.
215
+ def initialize(document)
216
+ @document = document
217
+ @encrypt_dict_hash = nil
218
+ @encryption_details = {}
219
+ end
220
+
221
+ # Checks if the encryption key computed by this security handler is derived from the
222
+ # document's encryption dictionary.
223
+ def encryption_key_valid?
224
+ document.unwrap(document.trailer[:Encrypt]).hash == @encrypt_dict_hash
225
+ end
226
+
227
+ # Decrypts the strings and the possibly attached stream of the given indirect object in
228
+ # place.
229
+ #
230
+ # See: PDF1.7 s7.6.2
231
+ def decrypt(obj)
232
+ return obj if obj == document.trailer[:Encrypt] || obj.type == :XRef
233
+
234
+ key = object_key(obj.oid, obj.gen, string_algorithm)
235
+ each_string_in_object(obj.value) do |str|
236
+ next if str.empty?
237
+ str.replace(string_algorithm.decrypt(key, str))
238
+ end
239
+
240
+ if obj.kind_of?(HexaPDF::Stream)
241
+ unless string_algorithm == stream_algorithm
242
+ key = object_key(obj.oid, obj.gen, stream_algorithm)
243
+ end
244
+ obj.raw_stream.filter.unshift(:Encryption)
245
+ obj.raw_stream.decode_parms.unshift(key: key, algorithm: stream_algorithm)
246
+ end
247
+
248
+ obj
249
+ end
250
+
251
+ # Returns the encrypted version of the string that resides in the given indirect object.
252
+ #
253
+ # See: PDF1.7 s7.6.2
254
+ def encrypt_string(str, obj)
255
+ return str if str.empty? || obj == document.trailer[:Encrypt] || obj.type == :XRef
256
+
257
+ key = object_key(obj.oid, obj.gen, string_algorithm)
258
+ string_algorithm.encrypt(key, str)
259
+ end
260
+
261
+ # Returns a Fiber that encrypts the contents of the given stream object.
262
+ def encrypt_stream(obj)
263
+ return obj.stream_encoder if obj.type == :XRef
264
+
265
+ key = object_key(obj.oid, obj.gen, stream_algorithm)
266
+ obj.stream_encoder(:Encryption, key: key, algorithm: stream_algorithm)
267
+ end
268
+
269
+ # Computes the encryption key and sets up the algorithms for encrypting the document based on
270
+ # the given options, and returns the corresponding encryption dictionary.
271
+ #
272
+ # The security handler specific +options+ as well as the +algorithm+ argument are passed on to
273
+ # the #prepare_encryption method.
274
+ #
275
+ # Options for all security handlers:
276
+ #
277
+ # key_length::
278
+ # The key length in bits. Possible values are in the range of 40 to 128 and 256 and it
279
+ # needs to be divisible by 8.
280
+ #
281
+ # algorithm::
282
+ # The encryption algorithm. Possible values are :arc4 for ARC4 encryption with key lengths
283
+ # of 40 to 128 bit or :aes for AES encryption with key lengths of 128 or 256 bit.
284
+ #
285
+ # force_V4::
286
+ # Forces the use of protocol version 4 when key_length=128 and algorithm=:arc4.
287
+ #
288
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
289
+ def set_up_encryption(key_length: 128, algorithm: :aes, force_V4: false, **options)
290
+ @dict = document.wrap({}, type: encryption_dictionary_class)
291
+
292
+ dict[:V] =
293
+ case key_length
294
+ when 40
295
+ 1
296
+ when 48, 56, 64, 72, 80, 88, 96, 104, 112, 120
297
+ 2
298
+ when 128
299
+ (algorithm == :aes || force_V4 ? 4 : 2)
300
+ when 256
301
+ 5
302
+ else
303
+ raise(HexaPDF::UnsupportedEncryptionError,
304
+ "Invalid key length #{key_length} specified")
305
+ end
306
+ dict[:Length] = key_length if dict[:V] == 2
307
+
308
+ if ![:aes, :arc4].include?(algorithm)
309
+ raise(HexaPDF::UnsupportedEncryptionError,
310
+ "Unsupported encryption algorithm: #{algorithm}")
311
+ elsif key_length < 128 && algorithm == :aes
312
+ raise(HexaPDF::UnsupportedEncryptionError,
313
+ "AES algorithm needs a key length of 128 or 256 bit")
314
+ elsif key_length == 256 && algorithm == :arc4
315
+ raise(HexaPDF::UnsupportedEncryptionError,
316
+ "ARC4 algorithm can only be used with key lengths between 40 and 128 bit")
317
+ end
318
+
319
+ result = prepare_encryption(algorithm: algorithm, **options)
320
+ @encrypt_dict_hash = document.unwrap(dict).hash
321
+ set_up_security_handler(*result)
322
+ @dict
323
+ end
324
+
325
+ # Uses the given encryption dictionary to set up the security handler for decrypting the
326
+ # document.
327
+ #
328
+ # The security handler specific +options+ are passed on to the #prepare_decryption method.
329
+ #
330
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
331
+ def set_up_decryption(dictionary, **options)
332
+ @dict = document.wrap(dictionary, type: encryption_dictionary_class)
333
+
334
+ case dict[:V]
335
+ when 1, 2
336
+ strf = stmf = eff = :arc4
337
+ when 4, 5
338
+ strf, stmf, eff = [:StrF, :StmF, :EFF].map do |alg|
339
+ if dict[:CF] && (cf_dict = dict[:CF][dict[alg]])
340
+ case cf_dict[:CFM]
341
+ when :V2 then :arc4
342
+ when :AESV2, :AESV3 then :aes
343
+ when :None then :identity
344
+ else
345
+ raise(HexaPDF::UnsupportedEncryptionError,
346
+ "Unsupported encryption method: #{cf_dict[:CFM]}")
347
+ end
348
+ else
349
+ :identity
350
+ end
351
+ end
352
+ eff = stmf unless dict[:EFF]
353
+ else
354
+ raise HexaPDF::UnsupportedEncryptionError, "Unsupported encryption version #{dict[:V]}"
355
+ end
356
+
357
+ set_up_security_handler(prepare_decryption(**options), strf, stmf, eff)
358
+ @encrypt_dict_hash = document.unwrap(@dict).hash
359
+
360
+ @dict
361
+ end
362
+
363
+ private
364
+
365
+ # Returns the associated PDF document.
366
+ #
367
+ # Subclasses should use this method to access the document.
368
+ def document
369
+ @document
370
+ end
371
+
372
+ # Returns the encryption dictionary used by this security handler.
373
+ #
374
+ # Subclasses should use this dictionary to read and set values.
375
+ def dict
376
+ @dict
377
+ end
378
+
379
+ # Returns the encryption key that is used for encryption/decryption.
380
+ #
381
+ # Only available after decryption or encryption has been set up.
382
+ def encryption_key
383
+ @encryption_key
384
+ end
385
+
386
+ # Returns the algorithm class that is used for encrypting/decrypting strings.
387
+ #
388
+ # Only available after decryption or encryption has been set up.
389
+ def string_algorithm
390
+ @string_algorithm
391
+ end
392
+
393
+ # Returns the algorithm class that is used for encrypting/decrypting streams.
394
+ #
395
+ # Only available after decryption or encryption has been set up.
396
+ def stream_algorithm
397
+ @stream_algorithm
398
+ end
399
+
400
+ # Returns the algorithm class that is used for encrypting/decrypting embedded files.
401
+ #
402
+ # Only available after decryption or encryption has been set up.
403
+ def embedded_file_algorithm
404
+ @embedded_file_algorithm
405
+ end
406
+
407
+ # Assigns all necessary attributes so that encryption/decryption works correctly.
408
+ #
409
+ # The assigned values can be retrieved via the #encryption_key, #string_algorithm,
410
+ # #stream_algorithm and #embedded_file_algorithm methods.
411
+ def set_up_security_handler(key, strf, stmf, eff)
412
+ @encryption_key = key
413
+ @string_algorithm = send("#{strf}_algorithm")
414
+ @stream_algorithm = send("#{stmf}_algorithm")
415
+ @embedded_file_algorithm = send("#{eff}_algorithm")
416
+ @encryption_details = {
417
+ version: dict[:V],
418
+ string_algorithm: strf,
419
+ stream_algorithm: stmf,
420
+ embedded_file_algorithm: eff,
421
+ key_length: key_length * 8,
422
+ }
423
+ end
424
+
425
+ # Returns the class that is used for ARC4 encryption.
426
+ def arc4_algorithm
427
+ @arc4_algorithm ||= HexaPDF::GlobalConfiguration.constantize('encryption.arc4')
428
+ end
429
+
430
+ # Returns the class that is used for AES encryption.
431
+ def aes_algorithm
432
+ @aes_algorithm ||= HexaPDF::GlobalConfiguration.constantize('encryption.aes')
433
+ end
434
+
435
+ # Returns the class that is used for the identity algorithm which passes back the data as is
436
+ # without encrypting or decrypting it.
437
+ def identity_algorithm
438
+ Identity
439
+ end
440
+
441
+ # Computes the key for decrypting the indirect object with the given algorithm.
442
+ #
443
+ # See: PDF1.7 s7.6.2 (algorithm 1), PDF2.0 s7.6.2.2 (algorithm 1.A)
444
+ def object_key(oid, gen, algorithm)
445
+ key = encryption_key
446
+ return key if dict[:V] == 5
447
+
448
+ key += [oid, gen].pack('VXv'.freeze)
449
+ key << "sAlT".freeze if algorithm.ancestors.include?(AES)
450
+ n_plus_5 = key_length + 5
451
+ Digest::MD5.digest(key)[0, (n_plus_5 > 16 ? 16 : n_plus_5)]
452
+ end
453
+
454
+ # Returns the length of the encryption key in bytes based on the security handlers version.
455
+ #
456
+ # See: PDF1.7 s7.6.1, PDF2.0 s7.6.1
457
+ def key_length
458
+ case dict[:V]
459
+ when 1 then 5
460
+ when 2 then dict[:Length] / 8
461
+ when 4 then 16 # PDF2.0 s7.6.1 specifies that a /V of 4 is equal to length of 128bit
462
+ when 5 then 32 # PDF2.0 s7.6.1 specifies that a /V of 5 is equal to length of 256bit
463
+ end
464
+ end
465
+
466
+ # Returns the class used as wrapper for the encryption dictionary.
467
+ def encryption_dictionary_class
468
+ EncryptionDictionary
469
+ end
470
+
471
+ # Returns +n+ random bytes.
472
+ def random_bytes(n)
473
+ aes_algorithm.random_bytes(n)
474
+ end
475
+
476
+ # Finds all strings in the given object and yields them.
477
+ #
478
+ # Note: Decryption happens directly after parsing and loading an object, before it can be
479
+ # touched by anthing else. Therefore we only have to contend with the basic data structures.
480
+ def each_string_in_object(obj, &block) # :yields: str
481
+ case obj
482
+ when Hash
483
+ obj.each_value {|val| each_string_in_object(val, &block)}
484
+ when Array
485
+ obj.each {|inner_o| each_string_in_object(inner_o, &block)}
486
+ when String
487
+ yield(obj)
488
+ end
489
+ end
490
+
491
+ end
492
+
493
+ end
494
+ end