hexapdf 0.46.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +342 -16
  3. data/LICENSE +1 -1
  4. data/README.md +9 -8
  5. data/examples/009-text_layouter_alignment.rb +4 -0
  6. data/examples/010-text_layouter_inline_boxes.rb +4 -0
  7. data/examples/011-text_layouter_line_wrapping.rb +4 -0
  8. data/examples/012-text_layouter_styling.rb +9 -4
  9. data/examples/013-text_layouter_shapes.rb +5 -0
  10. data/examples/015-boxes.rb +3 -0
  11. data/examples/016-frame_automatic_box_placement.rb +3 -0
  12. data/examples/017-frame_text_flow.rb +3 -0
  13. data/examples/022-outline.rb +5 -1
  14. data/examples/{028-frame_mask_mode.rb → 028-composer_mask_mode.rb} +3 -3
  15. data/lib/hexapdf/cli/batch.rb +1 -1
  16. data/lib/hexapdf/cli/command.rb +65 -65
  17. data/lib/hexapdf/cli/debug_info.rb +98 -0
  18. data/lib/hexapdf/cli/files.rb +1 -1
  19. data/lib/hexapdf/cli/fonts.rb +1 -1
  20. data/lib/hexapdf/cli/form.rb +11 -6
  21. data/lib/hexapdf/cli/image2pdf.rb +1 -1
  22. data/lib/hexapdf/cli/images.rb +19 -4
  23. data/lib/hexapdf/cli/info.rb +1 -1
  24. data/lib/hexapdf/cli/inspect.rb +24 -8
  25. data/lib/hexapdf/cli/merge.rb +1 -1
  26. data/lib/hexapdf/cli/modify.rb +1 -2
  27. data/lib/hexapdf/cli/optimize.rb +6 -6
  28. data/lib/hexapdf/cli/split.rb +1 -1
  29. data/lib/hexapdf/cli/usage.rb +1 -1
  30. data/lib/hexapdf/cli/watermark.rb +1 -1
  31. data/lib/hexapdf/cli.rb +20 -2
  32. data/lib/hexapdf/composer.rb +22 -1
  33. data/lib/hexapdf/configuration.rb +56 -1
  34. data/lib/hexapdf/content/canvas.rb +1 -1
  35. data/lib/hexapdf/content/canvas_composer.rb +1 -1
  36. data/lib/hexapdf/content/color_space.rb +1 -1
  37. data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
  38. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
  39. data/lib/hexapdf/content/graphic_object/geom2d.rb +1 -1
  40. data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
  41. data/lib/hexapdf/content/graphic_object.rb +1 -1
  42. data/lib/hexapdf/content/graphics_state.rb +2 -2
  43. data/lib/hexapdf/content/operator.rb +1 -1
  44. data/lib/hexapdf/content/parser.rb +22 -23
  45. data/lib/hexapdf/content/processor.rb +1 -1
  46. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  47. data/lib/hexapdf/content.rb +1 -1
  48. data/lib/hexapdf/data_dir.rb +1 -1
  49. data/lib/hexapdf/dictionary.rb +8 -2
  50. data/lib/hexapdf/dictionary_fields.rb +2 -2
  51. data/lib/hexapdf/digital_signature/cms_handler.rb +19 -2
  52. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  53. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +1 -1
  54. data/lib/hexapdf/digital_signature/signature.rb +2 -2
  55. data/lib/hexapdf/digital_signature/signatures.rb +1 -1
  56. data/lib/hexapdf/digital_signature/signing/default_handler.rb +3 -3
  57. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -2
  58. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +25 -5
  59. data/lib/hexapdf/digital_signature/signing.rb +1 -1
  60. data/lib/hexapdf/digital_signature/verification_result.rb +1 -1
  61. data/lib/hexapdf/digital_signature.rb +1 -1
  62. data/lib/hexapdf/document/annotations.rb +220 -0
  63. data/lib/hexapdf/document/destinations.rb +1 -1
  64. data/lib/hexapdf/document/files.rb +1 -1
  65. data/lib/hexapdf/document/fonts.rb +1 -1
  66. data/lib/hexapdf/document/images.rb +1 -1
  67. data/lib/hexapdf/document/layout.rb +95 -16
  68. data/lib/hexapdf/document/metadata.rb +11 -4
  69. data/lib/hexapdf/document/pages.rb +1 -1
  70. data/lib/hexapdf/document.rb +52 -9
  71. data/lib/hexapdf/encryption/aes.rb +1 -1
  72. data/lib/hexapdf/encryption/arc4.rb +3 -3
  73. data/lib/hexapdf/encryption/fast_aes.rb +1 -1
  74. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  75. data/lib/hexapdf/encryption/identity.rb +1 -1
  76. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  77. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  78. data/lib/hexapdf/encryption/security_handler.rb +4 -2
  79. data/lib/hexapdf/encryption/standard_security_handler.rb +40 -29
  80. data/lib/hexapdf/encryption.rb +1 -1
  81. data/lib/hexapdf/error.rb +12 -4
  82. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  83. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  84. data/lib/hexapdf/filter/crypt.rb +1 -1
  85. data/lib/hexapdf/filter/encryption.rb +1 -1
  86. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  87. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  88. data/lib/hexapdf/filter/pass_through.rb +1 -1
  89. data/lib/hexapdf/filter/predictor.rb +1 -1
  90. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  91. data/lib/hexapdf/filter.rb +1 -1
  92. data/lib/hexapdf/font/cmap/parser.rb +1 -1
  93. data/lib/hexapdf/font/cmap/writer.rb +59 -5
  94. data/lib/hexapdf/font/cmap.rb +18 -7
  95. data/lib/hexapdf/font/encoding/base.rb +28 -1
  96. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  97. data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
  98. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  99. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
  100. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  101. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  102. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
  103. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  104. data/lib/hexapdf/font/encoding.rb +1 -1
  105. data/lib/hexapdf/font/invalid_glyph.rb +1 -1
  106. data/lib/hexapdf/font/true_type/builder.rb +1 -1
  107. data/lib/hexapdf/font/true_type/font.rb +1 -1
  108. data/lib/hexapdf/font/true_type/optimizer.rb +1 -1
  109. data/lib/hexapdf/font/true_type/subsetter.rb +1 -1
  110. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  111. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
  112. data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
  113. data/lib/hexapdf/font/true_type/table/glyf.rb +1 -1
  114. data/lib/hexapdf/font/true_type/table/head.rb +1 -1
  115. data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
  116. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
  117. data/lib/hexapdf/font/true_type/table/kern.rb +1 -1
  118. data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
  119. data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
  120. data/lib/hexapdf/font/true_type/table/name.rb +1 -1
  121. data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
  122. data/lib/hexapdf/font/true_type/table/post.rb +1 -1
  123. data/lib/hexapdf/font/true_type/table.rb +7 -2
  124. data/lib/hexapdf/font/true_type.rb +1 -1
  125. data/lib/hexapdf/font/true_type_wrapper.rb +51 -16
  126. data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
  127. data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
  128. data/lib/hexapdf/font/type1/font.rb +1 -1
  129. data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
  130. data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
  131. data/lib/hexapdf/font/type1.rb +1 -1
  132. data/lib/hexapdf/font/type1_wrapper.rb +3 -4
  133. data/lib/hexapdf/font_loader/from_configuration.rb +1 -1
  134. data/lib/hexapdf/font_loader/from_file.rb +1 -1
  135. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  136. data/lib/hexapdf/font_loader/variant_from_name.rb +1 -1
  137. data/lib/hexapdf/font_loader.rb +1 -1
  138. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  139. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  140. data/lib/hexapdf/image_loader/png.rb +1 -1
  141. data/lib/hexapdf/image_loader.rb +1 -1
  142. data/lib/hexapdf/importer.rb +2 -2
  143. data/lib/hexapdf/layout/box.rb +6 -1
  144. data/lib/hexapdf/layout/box_fitter.rb +1 -1
  145. data/lib/hexapdf/layout/column_box.rb +1 -1
  146. data/lib/hexapdf/layout/container_box.rb +64 -29
  147. data/lib/hexapdf/layout/frame.rb +1 -1
  148. data/lib/hexapdf/layout/image_box.rb +1 -1
  149. data/lib/hexapdf/layout/inline_box.rb +1 -1
  150. data/lib/hexapdf/layout/line.rb +1 -1
  151. data/lib/hexapdf/layout/list_box.rb +1 -1
  152. data/lib/hexapdf/layout/numeric_refinements.rb +1 -1
  153. data/lib/hexapdf/layout/page_style.rb +1 -1
  154. data/lib/hexapdf/layout/style.rb +133 -22
  155. data/lib/hexapdf/layout/table_box.rb +86 -14
  156. data/lib/hexapdf/layout/text_box.rb +1 -1
  157. data/lib/hexapdf/layout/text_fragment.rb +13 -2
  158. data/lib/hexapdf/layout/text_layouter.rb +1 -1
  159. data/lib/hexapdf/layout/text_shaper.rb +1 -1
  160. data/lib/hexapdf/layout/width_from_polygon.rb +1 -1
  161. data/lib/hexapdf/layout.rb +1 -1
  162. data/lib/hexapdf/name_tree_node.rb +1 -1
  163. data/lib/hexapdf/number_tree_node.rb +1 -1
  164. data/lib/hexapdf/object.rb +4 -4
  165. data/lib/hexapdf/parser.rb +36 -7
  166. data/lib/hexapdf/pdf_array.rb +26 -4
  167. data/lib/hexapdf/rectangle.rb +1 -1
  168. data/lib/hexapdf/reference.rb +2 -2
  169. data/lib/hexapdf/revision.rb +7 -3
  170. data/lib/hexapdf/revisions.rb +1 -1
  171. data/lib/hexapdf/serializer.rb +8 -8
  172. data/lib/hexapdf/stream.rb +1 -1
  173. data/lib/hexapdf/task/dereference.rb +1 -1
  174. data/lib/hexapdf/task/merge_acro_form.rb +164 -0
  175. data/lib/hexapdf/task/optimize.rb +5 -5
  176. data/lib/hexapdf/task/pdfa.rb +1 -1
  177. data/lib/hexapdf/task.rb +2 -1
  178. data/lib/hexapdf/test_utils.rb +3 -2
  179. data/lib/hexapdf/tokenizer.rb +52 -44
  180. data/lib/hexapdf/type/acro_form/appearance_generator.rb +66 -13
  181. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  182. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  183. data/lib/hexapdf/type/acro_form/field.rb +6 -2
  184. data/lib/hexapdf/type/acro_form/form.rb +23 -32
  185. data/lib/hexapdf/type/acro_form/java_script_actions.rb +10 -3
  186. data/lib/hexapdf/type/acro_form/signature_field.rb +19 -8
  187. data/lib/hexapdf/type/acro_form/text_field.rb +10 -3
  188. data/lib/hexapdf/type/acro_form/variable_text_field.rb +13 -5
  189. data/lib/hexapdf/type/acro_form.rb +1 -1
  190. data/lib/hexapdf/type/action.rb +1 -1
  191. data/lib/hexapdf/type/actions/go_to.rb +2 -1
  192. data/lib/hexapdf/type/actions/go_to_r.rb +2 -1
  193. data/lib/hexapdf/type/actions/launch.rb +6 -2
  194. data/lib/hexapdf/type/actions/set_ocg_state.rb +1 -1
  195. data/lib/hexapdf/type/actions/uri.rb +1 -1
  196. data/lib/hexapdf/type/actions.rb +1 -1
  197. data/lib/hexapdf/type/annotation.rb +78 -3
  198. data/lib/hexapdf/type/annotations/appearance_generator.rb +426 -0
  199. data/lib/hexapdf/type/annotations/border_effect.rb +99 -0
  200. data/lib/hexapdf/type/annotations/border_styling.rb +160 -0
  201. data/lib/hexapdf/type/annotations/circle.rb +65 -0
  202. data/lib/hexapdf/type/annotations/interior_color.rb +84 -0
  203. data/lib/hexapdf/type/annotations/line.rb +334 -0
  204. data/lib/hexapdf/type/annotations/line_ending_styling.rb +208 -0
  205. data/lib/hexapdf/type/annotations/link.rb +1 -1
  206. data/lib/hexapdf/type/annotations/markup_annotation.rb +15 -3
  207. data/lib/hexapdf/type/annotations/polygon.rb +64 -0
  208. data/lib/hexapdf/type/annotations/polygon_polyline.rb +109 -0
  209. data/lib/hexapdf/type/annotations/polyline.rb +64 -0
  210. data/lib/hexapdf/type/annotations/square.rb +65 -0
  211. data/lib/hexapdf/type/annotations/square_circle.rb +77 -0
  212. data/lib/hexapdf/type/annotations/text.rb +1 -1
  213. data/lib/hexapdf/type/annotations/widget.rb +56 -118
  214. data/lib/hexapdf/type/annotations.rb +13 -1
  215. data/lib/hexapdf/type/catalog.rb +5 -2
  216. data/lib/hexapdf/type/cid_font.rb +6 -3
  217. data/lib/hexapdf/type/cmap.rb +58 -0
  218. data/lib/hexapdf/type/embedded_file.rb +1 -1
  219. data/lib/hexapdf/type/file_specification.rb +18 -15
  220. data/lib/hexapdf/type/font.rb +1 -1
  221. data/lib/hexapdf/type/font_descriptor.rb +5 -4
  222. data/lib/hexapdf/type/font_simple.rb +4 -2
  223. data/lib/hexapdf/type/font_true_type.rb +3 -1
  224. data/lib/hexapdf/type/font_type0.rb +2 -2
  225. data/lib/hexapdf/type/font_type1.rb +19 -1
  226. data/lib/hexapdf/type/font_type3.rb +1 -2
  227. data/lib/hexapdf/type/form.rb +8 -5
  228. data/lib/hexapdf/type/graphics_state_parameter.rb +8 -5
  229. data/lib/hexapdf/type/icon_fit.rb +1 -1
  230. data/lib/hexapdf/type/image.rb +9 -5
  231. data/lib/hexapdf/type/info.rb +3 -3
  232. data/lib/hexapdf/type/mark_information.rb +3 -3
  233. data/lib/hexapdf/type/marked_content_reference.rb +59 -0
  234. data/lib/hexapdf/type/measure.rb +57 -0
  235. data/lib/hexapdf/type/metadata.rb +1 -1
  236. data/lib/hexapdf/type/names.rb +1 -1
  237. data/lib/hexapdf/type/namespace.rb +57 -0
  238. data/lib/hexapdf/type/object_reference.rb +57 -0
  239. data/lib/hexapdf/type/object_stream.rb +1 -1
  240. data/lib/hexapdf/type/optional_content_configuration.rb +2 -2
  241. data/lib/hexapdf/type/optional_content_group.rb +1 -1
  242. data/lib/hexapdf/type/optional_content_membership.rb +2 -2
  243. data/lib/hexapdf/type/optional_content_properties.rb +1 -1
  244. data/lib/hexapdf/type/outline.rb +1 -1
  245. data/lib/hexapdf/type/outline_item.rb +1 -1
  246. data/lib/hexapdf/type/output_intent.rb +1 -1
  247. data/lib/hexapdf/type/page.rb +6 -4
  248. data/lib/hexapdf/type/page_label.rb +1 -1
  249. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  250. data/lib/hexapdf/type/resources.rb +13 -9
  251. data/lib/hexapdf/type/struct_elem.rb +72 -0
  252. data/lib/hexapdf/type/struct_tree_root.rb +64 -0
  253. data/lib/hexapdf/type/trailer.rb +1 -1
  254. data/lib/hexapdf/type/viewer_preferences.rb +5 -4
  255. data/lib/hexapdf/type/xref_stream.rb +1 -1
  256. data/lib/hexapdf/type.rb +8 -1
  257. data/lib/hexapdf/utils/bit_field.rb +1 -1
  258. data/lib/hexapdf/utils/bit_stream.rb +1 -1
  259. data/lib/hexapdf/utils/graphics_helpers.rb +1 -1
  260. data/lib/hexapdf/utils/lru_cache.rb +1 -1
  261. data/lib/hexapdf/utils/math_helpers.rb +1 -1
  262. data/lib/hexapdf/utils/object_hash.rb +1 -1
  263. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  264. data/lib/hexapdf/utils/sorted_tree_node.rb +17 -4
  265. data/lib/hexapdf/utils.rb +1 -1
  266. data/lib/hexapdf/version.rb +2 -2
  267. data/lib/hexapdf/writer.rb +3 -2
  268. data/lib/hexapdf/xref_section.rb +25 -6
  269. data/lib/hexapdf.rb +1 -1
  270. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5-R5.pdf +43 -0
  271. data/test/data/standard-security-handler/nopwd-aes-256bit-V5-R5.pdf +44 -0
  272. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5-R5.pdf +43 -0
  273. data/test/data/standard-security-handler/userpwd-aes-256bit-V5-R5.pdf +0 -0
  274. data/test/hexapdf/common_tokenizer_tests.rb +7 -7
  275. data/test/hexapdf/content/test_graphics_state.rb +2 -3
  276. data/test/hexapdf/content/test_operator.rb +4 -5
  277. data/test/hexapdf/digital_signature/common.rb +6 -1
  278. data/test/hexapdf/digital_signature/signing/test_default_handler.rb +6 -1
  279. data/test/hexapdf/digital_signature/signing/test_timestamp_handler.rb +12 -0
  280. data/test/hexapdf/digital_signature/test_cms_handler.rb +25 -15
  281. data/test/hexapdf/digital_signature/test_handler.rb +2 -3
  282. data/test/hexapdf/digital_signature/test_pkcs1_handler.rb +1 -2
  283. data/test/hexapdf/digital_signature/test_signature.rb +7 -0
  284. data/test/hexapdf/digital_signature/test_signatures.rb +12 -7
  285. data/test/hexapdf/document/test_annotations.rb +75 -0
  286. data/test/hexapdf/document/test_layout.rb +38 -10
  287. data/test/hexapdf/document/test_metadata.rb +13 -1
  288. data/test/hexapdf/encryption/common.rb +1 -1
  289. data/test/hexapdf/encryption/test_aes.rb +1 -1
  290. data/test/hexapdf/encryption/test_arc4.rb +2 -2
  291. data/test/hexapdf/encryption/test_security_handler.rb +8 -6
  292. data/test/hexapdf/encryption/test_standard_security_handler.rb +7 -3
  293. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  294. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  295. data/test/hexapdf/filter/test_flate_decode.rb +2 -3
  296. data/test/hexapdf/font/cmap/test_writer.rb +73 -16
  297. data/test/hexapdf/font/encoding/test_base.rb +20 -0
  298. data/test/hexapdf/font/encoding/test_glyph_list.rb +1 -1
  299. data/test/hexapdf/font/test_true_type_wrapper.rb +31 -5
  300. data/test/hexapdf/font/test_type1_wrapper.rb +8 -1
  301. data/test/hexapdf/font/true_type/test_table.rb +12 -0
  302. data/test/hexapdf/layout/test_box.rb +8 -2
  303. data/test/hexapdf/layout/test_container_box.rb +34 -6
  304. data/test/hexapdf/layout/test_list_box.rb +7 -7
  305. data/test/hexapdf/layout/test_page_style.rb +1 -1
  306. data/test/hexapdf/layout/test_style.rb +46 -12
  307. data/test/hexapdf/layout/test_table_box.rb +66 -16
  308. data/test/hexapdf/layout/test_text_box.rb +0 -6
  309. data/test/hexapdf/layout/test_text_fragment.rb +3 -3
  310. data/test/hexapdf/layout/test_text_layouter.rb +4 -2
  311. data/test/hexapdf/task/test_merge_acro_form.rb +104 -0
  312. data/test/hexapdf/task/test_optimize.rb +3 -1
  313. data/test/hexapdf/test_composer.rb +15 -0
  314. data/test/hexapdf/test_dictionary.rb +15 -0
  315. data/test/hexapdf/test_dictionary_fields.rb +1 -0
  316. data/test/hexapdf/test_document.rb +26 -8
  317. data/test/hexapdf/test_filter.rb +1 -1
  318. data/test/hexapdf/test_importer.rb +7 -0
  319. data/test/hexapdf/test_object.rb +1 -1
  320. data/test/hexapdf/test_parser.rb +87 -18
  321. data/test/hexapdf/test_pdf_array.rb +36 -3
  322. data/test/hexapdf/test_revision.rb +27 -6
  323. data/test/hexapdf/test_revisions.rb +1 -1
  324. data/test/hexapdf/test_serializer.rb +4 -4
  325. data/test/hexapdf/test_stream.rb +1 -2
  326. data/test/hexapdf/test_tokenizer.rb +1 -1
  327. data/test/hexapdf/test_writer.rb +22 -8
  328. data/test/hexapdf/test_xref_section.rb +15 -0
  329. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +118 -26
  330. data/test/hexapdf/type/acro_form/test_button_field.rb +7 -6
  331. data/test/hexapdf/type/acro_form/test_field.rb +10 -0
  332. data/test/hexapdf/type/acro_form/test_form.rb +32 -9
  333. data/test/hexapdf/type/acro_form/test_java_script_actions.rb +21 -0
  334. data/test/hexapdf/type/acro_form/test_signature_field.rb +3 -1
  335. data/test/hexapdf/type/acro_form/test_text_field.rb +7 -1
  336. data/test/hexapdf/type/acro_form/test_variable_text_field.rb +14 -1
  337. data/test/hexapdf/type/actions/test_launch.rb +6 -2
  338. data/test/hexapdf/type/annotations/test_appearance_generator.rb +608 -0
  339. data/test/hexapdf/type/annotations/test_border_effect.rb +59 -0
  340. data/test/hexapdf/type/annotations/test_border_styling.rb +114 -0
  341. data/test/hexapdf/type/annotations/test_interior_color.rb +37 -0
  342. data/test/hexapdf/type/annotations/test_line.rb +144 -0
  343. data/test/hexapdf/type/annotations/test_line_ending_styling.rb +42 -0
  344. data/test/hexapdf/type/annotations/test_polygon_polyline.rb +29 -0
  345. data/test/hexapdf/type/annotations/test_widget.rb +47 -81
  346. data/test/hexapdf/type/test_annotation.rb +58 -0
  347. data/test/hexapdf/type/test_font_type1.rb +20 -1
  348. data/test/hexapdf/type/test_form.rb +7 -1
  349. data/test/hexapdf/type/test_image.rb +1 -1
  350. data/test/hexapdf/type/test_page.rb +7 -1
  351. data/test/hexapdf/type/test_page_tree_node.rb +2 -2
  352. data/test/hexapdf/type/test_resources.rb +3 -1
  353. data/test/hexapdf/utils/test_sorted_tree_node.rb +18 -7
  354. data/test/test_helper.rb +7 -0
  355. metadata +69 -9
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -112,11 +112,25 @@ module HexaPDF
112
112
  end
113
113
 
114
114
  if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
115
- raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
116
- "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
115
+ msg = "The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
116
+ "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref"
117
+ # Some invalid PDFs contain entries where the generation number in the xref is different
118
+ # from the one found in the indirect object. If the file were reconstructed the generation
119
+ # number from the indirect object itself would be used.
120
+ # To gracefully handle such invalid PDFs they need to have a single revision.
121
+ # The other code part that handles this is in Revision#object.
122
+ if oid == xref_entry.oid && @document.revisions.count == 1
123
+ maybe_raise(msg, pos: xref_entry.pos)
124
+ else
125
+ raise_malformed(msg)
126
+ end
117
127
  end
118
128
 
119
- @document.wrap(obj, oid: oid, gen: gen, stream: stream)
129
+ if obj.kind_of?(Reference)
130
+ @document.deref(obj)
131
+ else
132
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
133
+ end
120
134
  rescue HexaPDF::MalformedPDFError
121
135
  reconstructed_revision.object(xref_entry) ||
122
136
  @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
@@ -184,7 +198,7 @@ module HexaPDF
184
198
  length = if object[:Length].kind_of?(Integer)
185
199
  object[:Length]
186
200
  elsif object[:Length].kind_of?(Reference)
187
- @document.deref(object[:Length]).value
201
+ @document.deref(object[:Length])&.value || 0
188
202
  else
189
203
  0
190
204
  end
@@ -205,9 +219,24 @@ module HexaPDF
205
219
  tok = @tokenizer.next_token
206
220
 
207
221
  object[:Length] = length
222
+ if object.key?(:Filter)
223
+ begin
224
+ object[:Filter] = @document.unwrap(object[:Filter])
225
+ rescue HexaPDF::Error
226
+ maybe_raise("Invalid /Filter entry for stream", pos: @tokenizer.pos)
227
+ object.delete(:Filter)
228
+ end
229
+ end
230
+ if object.key?(:DecodeParms)
231
+ begin
232
+ object[:DecodeParms] = @document.unwrap(object[:DecodeParms])
233
+ rescue HexaPDF::Error
234
+ maybe_raise("Invalid /DecodeParms entry for stream", pos: @tokenizer.pos)
235
+ object.delete(:DecodeParms)
236
+ end
237
+ end
208
238
  stream = StreamData.new(@tokenizer.io, offset: pos, length: length,
209
- filter: @document.unwrap(object[:Filter]),
210
- decode_parms: @document.unwrap(object[:DecodeParms]))
239
+ filter: object[:Filter], decode_parms: object[:DecodeParms])
211
240
  end
212
241
 
213
242
  unless tok.kind_of?(Tokenizer::Token) && tok == 'endobj'
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -143,10 +143,32 @@ module HexaPDF
143
143
  # array.reject! {|item| block } -> array or nil
144
144
  # array.reject! -> Enumerator
145
145
  #
146
- # Deletes all elements from the array for which the block returns +true+. If no changes were
147
- # done, returns +nil+.
146
+ # Deletes all elements from the array for which the block returns +true+ and returns +self+. If
147
+ # no changes were done, returns +nil+.
148
148
  def reject!
149
- value.reject! {|item| yield(process_entry(item)) }
149
+ return to_enum(__method__) unless block_given?
150
+ value.reject! {|item| yield(process_entry(item)) } && self
151
+ end
152
+
153
+ # :call-seq:
154
+ # array.map! {|item| block } -> array
155
+ # array.map! -> Enumerator
156
+ #
157
+ # Maps all elements from the array in-place to the respective return value of the block+ and
158
+ # returns +self+.
159
+ def map!
160
+ return to_enum(__method__) unless block_given?
161
+ value.map! {|item| yield(process_entry(item)) }
162
+ self
163
+ end
164
+
165
+ # :call-seq:
166
+ # array.compact! -> array or nil
167
+ #
168
+ # Removes all +nil+ elements from the array. Returns +self+ if any elements were removed, +nil+
169
+ # otherwise.
170
+ def compact!
171
+ value.compact! && self
150
172
  end
151
173
 
152
174
  # :call-seq:
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -87,7 +87,7 @@ module HexaPDF
87
87
 
88
88
  # Computes the hash value based on the object and generation numbers.
89
89
  def hash
90
- oid.hash ^ gen.hash
90
+ [oid, gen].hash
91
91
  end
92
92
 
93
93
  # Returns the object identifier as "oid,gen".
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -128,6 +128,11 @@ module HexaPDF
128
128
  @objects[oid, gen]
129
129
  elsif (xref_entry = @xref_section[oid, gen])
130
130
  load_object(xref_entry)
131
+ elsif (xref_entry = @xref_section[oid]) && (obj = load_object(xref_entry))&.gen == gen
132
+ # This branch handles invalid PDFs with a single revision containing xref entries where the
133
+ # gen doesn't match the gen of the indirect object. Also see the special handling in
134
+ # Parser#load_object.
135
+ obj
131
136
  else
132
137
  nil
133
138
  end
@@ -219,8 +224,7 @@ module HexaPDF
219
224
  seen = {}
220
225
  @objects.each {|oid, _gen, data| seen[oid] = true; yield(data) }
221
226
  @xref_section.each do |oid, _gen, data|
222
- next if seen.key?(oid)
223
- yield(@objects[oid] || load_object(data))
227
+ yield(@objects[oid] || load_object(data)) unless seen.key?(oid)
224
228
  end
225
229
  @all_objects_loaded = true
226
230
  end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -276,16 +276,16 @@ module HexaPDF
276
276
  #
277
277
  # See: PDF2.0 s7.3.4
278
278
  def serialize_string(obj)
279
+ if obj.encoding != Encoding::BINARY && obj.match?(/[^ -~\t\r\n]/)
280
+ utf16_encoded = true
281
+ obj = "\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
282
+ end
279
283
  obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
280
284
  encrypter.encrypt_string(obj, @object)
281
- elsif obj.encoding != Encoding::BINARY
282
- if obj.match?(/[^ -~\t\r\n]/)
283
- "\xFE\xFF".b << obj.encode(Encoding::UTF_16BE).force_encoding(Encoding::BINARY)
284
- else
285
- obj.b
286
- end
285
+ elsif utf16_encoded
286
+ obj
287
287
  else
288
- obj.dup
288
+ obj.b
289
289
  end
290
290
  obj.gsub!(/[()\\\r]/n, STRING_ESCAPE_MAP)
291
291
  "(#{obj})"
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -0,0 +1,164 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/serializer'
38
+
39
+ module HexaPDF
40
+ module Task
41
+
42
+ # Task for merging an AcroForm from one PDF into another.
43
+ #
44
+ # It takes care of
45
+ #
46
+ # * adding the fields to the main Type::AcroForm::Form dictionary,
47
+ # * adjusting the field names so that they are unique,
48
+ # * and merging the properties of the main AcroForm dictionary itself and adjusting field
49
+ # information appropriately.
50
+ #
51
+ # Note that the pages with the fields need to be imported already.
52
+ #
53
+ # The steps for using this task are:
54
+ #
55
+ # 1. Import the pages into the target document and add all imported pages to an array
56
+ # 2. Call this task using the created array of pages.
57
+ #
58
+ # Example:
59
+ #
60
+ # pages = doc.pages.map {|page| target.pages.add(target.import(page)) }
61
+ # target.task(:merge_acro_form, source: doc, pages: pages)
62
+ module MergeAcroForm
63
+
64
+ # Performs the necessary steps to merge the AcroForm fields from the +source+ into the target
65
+ # document +doc+.
66
+ #
67
+ # +source+::
68
+ # Specifies the source PDF document the information from which should be merged into the
69
+ # target document.
70
+ #
71
+ # +pages+::
72
+ # An array of pages that were imported from +source+ and contain the widgets of the fields
73
+ # that should be merged.
74
+ def self.call(doc, source:, pages:)
75
+ return unless source.acro_form
76
+
77
+ acro_form = doc.acro_form(create: true)
78
+
79
+ # Determine a unique name for root field and create root field
80
+ import_name = 'merged_' +
81
+ (acro_form.root_fields.select {|field| field[:T] =~ /\Amerged_\d+\z/ }.
82
+ map {|field| field[:T][/\d+/].to_i }.sort.last || 0).succ.to_s
83
+ root_field = doc.add({T: import_name, Kids: []})
84
+ acro_form.root_fields << root_field
85
+
86
+ # Merge the main AcroForm dictionary
87
+ font_name_mapping = merge_form_dictionary(acro_form, source.acro_form, root_field)
88
+ font_name_re = font_name_mapping.keys.map {|name| Regexp.escape(name) }.join('|')
89
+ root_field[:DA] && root_field[:DA].sub!(font_name_re, font_name_mapping)
90
+
91
+ # Process all field widgets of the given pages
92
+ process_calculate_actions = false
93
+ signature_field_seen = false
94
+ pages.each do |page|
95
+ page.each_annotation do |widget|
96
+ next unless widget[:Subtype] == :Widget
97
+ field = widget.form_field
98
+
99
+ # Correct the font name in the default appearance string
100
+ widget[:DA] && widget[:DA].sub!(font_name_re, font_name_mapping)
101
+ field[:DA] && field[:DA].sub!(font_name_re, font_name_mapping)
102
+
103
+ process_calculate_actions = true if field[:AA]&.[](:C)
104
+ signature_field_seen = true if field.field_type == :Sig
105
+
106
+ # Add to the root field
107
+ field = field[:Parent] while field[:Parent]
108
+ if field != root_field
109
+ field[:Parent] = root_field
110
+ root_field[:Kids] << field
111
+ end
112
+ end
113
+ end
114
+
115
+ # Update calculation JavaScript actions with changed field names
116
+ fix_calculate_actions(acro_form, source.acro_form, import_name) if process_calculate_actions
117
+
118
+ # Update signature flags if necessary
119
+ if signature_field_seen && source.acro_form.signature_flag?(:signatures_exist)
120
+ acro_form.signature_flag(:signatures_exist)
121
+ end
122
+ end
123
+
124
+ # Merges the AcroForm +source_form+ into the +target_form+ and returns a mapping of old font
125
+ # names to new ones.
126
+ def self.merge_form_dictionary(target_form, source_form, root_field)
127
+ target_resources = target_form.default_resources
128
+ font_name_mapping = {}
129
+ serializer = HexaPDF::Serializer.new
130
+
131
+ source_form.default_resources[:Font].each do |font_name, value|
132
+ new_name = target_resources.add_font(target_form.document.import(value))
133
+ font_name_mapping[serializer.serialize(font_name)] = serializer.serialize(new_name)
134
+ end
135
+
136
+ root_field[:DA] = target_form.document.import(source_form[:DA])
137
+ root_field[:Q] = target_form.document.import(source_form[:Q])
138
+
139
+ font_name_mapping
140
+ end
141
+
142
+ # Fixes the calculate actions listed in the /CO entry of the main AcroForm dictionary to use
143
+ # the new names of the fields.
144
+ def self.fix_calculate_actions(acro_form, source_form, import_name)
145
+ if source_form[:CO]
146
+ acro_form[:CO] ||= []
147
+ acro_form[:CO].value.concat(acro_form.document.import(source_form[:CO]).value)
148
+ acro_form[:CO].each do |field|
149
+ next unless (action = field[:AA]&.[](:C))
150
+ action[:JS].gsub!(/"(.*?)"/) do |match|
151
+ if source_form.field_by_name($1)
152
+ "\"#{import_name}.#{$1}\""
153
+ else
154
+ match
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+
161
+ end
162
+
163
+ end
164
+ end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -214,13 +214,13 @@ module HexaPDF
214
214
  end
215
215
  end
216
216
 
217
- # Deletes field entries of the object that are optional and currently set to their default
218
- # value.
217
+ # Deletes field entries (except for /Type) of the object that are optional and currently set
218
+ # to their default value.
219
219
  def self.delete_fields_with_defaults(obj)
220
220
  return unless obj.kind_of?(HexaPDF::Dictionary) && !obj.null?
221
221
  obj.each do |name, value|
222
- if (field = obj.class.field(name)) && !field.required? && field.default? &&
223
- value == field.default
222
+ if name != :Type && (field = obj.class.field(name)) && !field.required? &&
223
+ field.default? && value == field.default
224
224
  obj.delete(name)
225
225
  end
226
226
  end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
data/lib/hexapdf/task.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -65,6 +65,7 @@ module HexaPDF
65
65
  autoload(:Optimize, 'hexapdf/task/optimize')
66
66
  autoload(:Dereference, 'hexapdf/task/dereference')
67
67
  autoload(:PDFA, 'hexapdf/task/pdfa')
68
+ autoload(:MergeAcroForm, 'hexapdf/task/merge_acro_form')
68
69
 
69
70
  end
70
71
 
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -92,8 +92,9 @@ module HexaPDF
92
92
  # Creates a fiber that yields the given string in +len+ length parts.
93
93
  def feeder(string, len = string.length)
94
94
  Fiber.new do
95
+ string = string.b
95
96
  until string.empty?
96
- Fiber.yield(string.slice!(0, len).force_encoding('BINARY'))
97
+ Fiber.yield(string.slice!(0, len))
97
98
  end
98
99
  end
99
100
  end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2014-2024 Thomas Leitner
7
+ # Copyright (C) 2014-2025 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -118,42 +118,43 @@ module HexaPDF
118
118
  def next_token
119
119
  prepare_string_scanner(20)
120
120
  prepare_string_scanner(20) while @ss.skip(WHITESPACE_MULTI_RE)
121
- byte = @ss.string.getbyte(@ss.pos) || -1
122
- if (48 <= byte && byte <= 57) || byte == 45 || byte == 43 || byte == 46 # 0..9 - + .
121
+ case (byte = @ss.scan_byte || -1)
122
+ when 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 # + - . 0..9
123
+ @ss.pos -= 1
123
124
  parse_number
124
- elsif byte == 47 # /
125
+ when 47 # /
125
126
  parse_name
126
- elsif byte == 40 # (
127
+ when 40 # (
127
128
  parse_literal_string
128
- elsif byte == 60 # <
129
- if @ss.string.getbyte(@ss.pos + 1) == 60
130
- @ss.pos += 2
129
+ when 60 # <
130
+ if @ss.peek_byte == 60
131
+ @ss.pos += 1
131
132
  TOKEN_DICT_START
132
133
  else
133
134
  parse_hex_string
134
135
  end
135
- elsif byte == 62 # >
136
- unless @ss.string.getbyte(@ss.pos + 1) == 62
137
- raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
136
+ when 62 # >
137
+ unless @ss.scan_byte == 62
138
+ raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos - 1)
138
139
  end
139
- @ss.pos += 2
140
140
  TOKEN_DICT_END
141
- elsif byte == 91 # [
142
- @ss.pos += 1
141
+ when 91 # [
143
142
  TOKEN_ARRAY_START
144
- elsif byte == 93 # ]
145
- @ss.pos += 1
143
+ when 93 # ]
146
144
  TOKEN_ARRAY_END
147
- elsif byte == 123 || byte == 125 # { }
148
- Token.new(@ss.get_byte)
149
- elsif byte == 37 # %
145
+ when 41 # )
146
+ raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos - 1)
147
+ when 123, 125 # { }
148
+ Token.new(byte.chr.b)
149
+ when 37 # %
150
150
  until @ss.skip_until(/(?=[\r\n])/)
151
151
  return NO_MORE_TOKENS unless prepare_string_scanner
152
152
  end
153
153
  next_token
154
- elsif byte == -1 # we reached the end of the file
154
+ when -1 # we reached the end of the file
155
155
  NO_MORE_TOKENS
156
156
  else # everything else consisting of regular characters
157
+ @ss.pos -= 1
157
158
  parse_keyword
158
159
  end
159
160
  end
@@ -207,12 +208,13 @@ module HexaPDF
207
208
  # Note: This is a special method meant for use with reconstructing the cross-reference table!
208
209
  def next_integer_or_keyword
209
210
  skip_whitespace
210
- byte = @ss.string.getbyte(@ss.pos) || -1
211
- if 48 <= byte && byte <= 57
211
+ byte = @ss.peek_byte || -1
212
+ case byte
213
+ when 48, 49, 50, 51, 52, 53, 54, 55, 56, 57
212
214
  parse_number
213
- elsif (97 <= byte && byte <= 122) || (65 <= byte && byte <= 90)
215
+ when 97..122, 65..90
214
216
  parse_keyword
215
- elsif byte == -1 # we reached the end of the file
217
+ when -1 # we reached the end of the file
216
218
  NO_MORE_TOKENS
217
219
  else
218
220
  nil
@@ -222,8 +224,7 @@ module HexaPDF
222
224
  # Reads the byte (an integer) at the current position and advances the scan pointer.
223
225
  def next_byte
224
226
  prepare_string_scanner(1)
225
- @ss.pos += 1
226
- @ss.string.getbyte(@ss.pos - 1)
227
+ @ss.scan_byte
227
228
  end
228
229
 
229
230
  # Reads the cross-reference subsection entry at the current position and advances the scan
@@ -277,25 +278,35 @@ module HexaPDF
277
278
 
278
279
  REFERENCE_RE = /[#{WHITESPACE}]+([+]?\d+)[#{WHITESPACE}]+R#{WHITESPACE_OR_DELIMITER_RE}/ # :nodoc:
279
280
 
281
+ WHITESPACE_OR_DELIMITER_LUT = [] # :nodoc:
282
+ (WHITESPACE + DELIMITER).each_byte {|x| WHITESPACE_OR_DELIMITER_LUT[x] = true }
283
+
280
284
  # Parses the number (integer or real) at the current position.
281
285
  #
282
286
  # See: PDF2.0 s7.3.3
283
287
  def parse_number
284
- val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
285
- if val.match?(/\A[+-]?\d++(?!\.)\z/)
286
- tmp = val.to_i
287
- # Handle object references, see PDF2.0 s7.3.10
288
- prepare_string_scanner(10)
289
- if @ss.scan(REFERENCE_RE)
290
- tmp = if tmp > 0
291
- Reference.new(tmp, @ss[1].to_i)
292
- else
293
- maybe_raise("Invalid indirect object reference (#{tmp},#{@ss[1].to_i})")
294
- nil
295
- end
288
+ prepare_string_scanner(40)
289
+ pos = self.pos
290
+ if (tmp = @ss.scan_integer)
291
+ if @ss.eos? || WHITESPACE_OR_DELIMITER_LUT[@ss.peek_byte]
292
+ # Handle object references, see PDF2.0 s7.3.10
293
+ prepare_string_scanner(10)
294
+ if @ss.scan(REFERENCE_RE)
295
+ tmp = if tmp > 0
296
+ Reference.new(tmp, @ss[1].to_i)
297
+ else
298
+ maybe_raise("Invalid indirect object reference (#{tmp},#{@ss[1].to_i})")
299
+ nil
300
+ end
301
+ end
302
+ return tmp
303
+ else
304
+ self.pos = pos
296
305
  end
297
- tmp
298
- elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
306
+ end
307
+
308
+ val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
309
+ if val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
299
310
  val << '0' if val.getbyte(-1) == 46 # dot '.'
300
311
  Float(val)
301
312
  else
@@ -318,7 +329,6 @@ module HexaPDF
318
329
  #
319
330
  # See: PDF2.0 s7.3.4.2
320
331
  def parse_literal_string
321
- @ss.pos += 1
322
332
  str = "".b
323
333
  parentheses = 1
324
334
 
@@ -335,7 +345,7 @@ module HexaPDF
335
345
  when 40 then parentheses += 1 # (
336
346
  when 13 # \r
337
347
  str[-1] = "\n"
338
- @ss.pos += 1 if @ss.peek(1) == "\n"
348
+ @ss.pos += 1 if @ss.peek_byte == 10 # \n
339
349
  when 92 # \\
340
350
  str.chop!
341
351
  prepare_string_scanner(3)
@@ -361,7 +371,6 @@ module HexaPDF
361
371
  #
362
372
  # See: PDF2.0 s7.3.4.3
363
373
  def parse_hex_string
364
- @ss.pos += 1
365
374
  data = scan_until(/(?=>)/)
366
375
  unless data
367
376
  raise HexaPDF::MalformedPDFError.new("Unclosed hex string found", pos: pos)
@@ -376,7 +385,6 @@ module HexaPDF
376
385
  #
377
386
  # See: PDF2.0 s7.3.5
378
387
  def parse_name
379
- @ss.pos += 1
380
388
  str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
381
389
  str.gsub!(/#[A-Fa-f0-9]{2}/) {|m| m[1, 2].hex.chr }
382
390
  if str.force_encoding(Encoding::UTF_8).valid_encoding?