hexapdf 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -0
  3. data/LICENSE +1 -1
  4. data/examples/005-merging.rb +2 -1
  5. data/examples/032-acro_form_list_and_fill.rb +47 -0
  6. data/examples/033-text_extraction.rb +34 -0
  7. data/lib/hexapdf/cli/batch.rb +1 -1
  8. data/lib/hexapdf/cli/command.rb +1 -1
  9. data/lib/hexapdf/cli/debug_info.rb +1 -1
  10. data/lib/hexapdf/cli/files.rb +1 -1
  11. data/lib/hexapdf/cli/fonts.rb +6 -4
  12. data/lib/hexapdf/cli/form.rb +1 -1
  13. data/lib/hexapdf/cli/image2pdf.rb +1 -1
  14. data/lib/hexapdf/cli/images.rb +17 -17
  15. data/lib/hexapdf/cli/info.rb +3 -1
  16. data/lib/hexapdf/cli/inspect.rb +1 -1
  17. data/lib/hexapdf/cli/merge.rb +14 -2
  18. data/lib/hexapdf/cli/modify.rb +1 -1
  19. data/lib/hexapdf/cli/optimize.rb +1 -1
  20. data/lib/hexapdf/cli/split.rb +1 -1
  21. data/lib/hexapdf/cli/usage.rb +1 -1
  22. data/lib/hexapdf/cli/watermark.rb +1 -1
  23. data/lib/hexapdf/cli.rb +1 -1
  24. data/lib/hexapdf/composer.rb +1 -1
  25. data/lib/hexapdf/configuration.rb +10 -1
  26. data/lib/hexapdf/content/canvas.rb +2 -2
  27. data/lib/hexapdf/content/canvas_composer.rb +1 -1
  28. data/lib/hexapdf/content/color_space.rb +1 -1
  29. data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
  30. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
  31. data/lib/hexapdf/content/graphic_object/geom2d.rb +1 -1
  32. data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
  33. data/lib/hexapdf/content/graphic_object.rb +1 -1
  34. data/lib/hexapdf/content/graphics_state.rb +1 -1
  35. data/lib/hexapdf/content/operator.rb +1 -1
  36. data/lib/hexapdf/content/parser.rb +1 -1
  37. data/lib/hexapdf/content/processor.rb +1 -1
  38. data/lib/hexapdf/content/smart_text_extractor.rb +311 -0
  39. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  40. data/lib/hexapdf/content.rb +3 -1
  41. data/lib/hexapdf/data_dir.rb +1 -1
  42. data/lib/hexapdf/dictionary.rb +1 -1
  43. data/lib/hexapdf/dictionary_fields.rb +1 -1
  44. data/lib/hexapdf/digital_signature/cms_handler.rb +1 -1
  45. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  46. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +1 -1
  47. data/lib/hexapdf/digital_signature/signature.rb +1 -1
  48. data/lib/hexapdf/digital_signature/signatures.rb +1 -1
  49. data/lib/hexapdf/digital_signature/signing/default_handler.rb +2 -16
  50. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +22 -9
  51. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +1 -1
  52. data/lib/hexapdf/digital_signature/signing.rb +1 -1
  53. data/lib/hexapdf/digital_signature/verification_result.rb +1 -1
  54. data/lib/hexapdf/digital_signature.rb +1 -1
  55. data/lib/hexapdf/document/annotations.rb +1 -1
  56. data/lib/hexapdf/document/destinations.rb +1 -1
  57. data/lib/hexapdf/document/files.rb +1 -1
  58. data/lib/hexapdf/document/fonts.rb +1 -1
  59. data/lib/hexapdf/document/images.rb +1 -1
  60. data/lib/hexapdf/document/layout.rb +1 -1
  61. data/lib/hexapdf/document/metadata.rb +1 -1
  62. data/lib/hexapdf/document/pages.rb +1 -1
  63. data/lib/hexapdf/document.rb +8 -4
  64. data/lib/hexapdf/encryption/aes.rb +1 -1
  65. data/lib/hexapdf/encryption/arc4.rb +1 -1
  66. data/lib/hexapdf/encryption/fast_aes.rb +1 -1
  67. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  68. data/lib/hexapdf/encryption/identity.rb +1 -1
  69. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  70. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  71. data/lib/hexapdf/encryption/security_handler.rb +1 -1
  72. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
  73. data/lib/hexapdf/encryption.rb +1 -1
  74. data/lib/hexapdf/error.rb +1 -1
  75. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  76. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  77. data/lib/hexapdf/filter/brotli_decode.rb +88 -0
  78. data/lib/hexapdf/filter/crypt.rb +1 -1
  79. data/lib/hexapdf/filter/encryption.rb +1 -1
  80. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  81. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  82. data/lib/hexapdf/filter/pass_through.rb +1 -1
  83. data/lib/hexapdf/filter/predictor.rb +1 -1
  84. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  85. data/lib/hexapdf/filter.rb +2 -1
  86. data/lib/hexapdf/font/cmap/parser.rb +1 -1
  87. data/lib/hexapdf/font/cmap/writer.rb +1 -1
  88. data/lib/hexapdf/font/cmap.rb +1 -1
  89. data/lib/hexapdf/font/encoding/base.rb +1 -1
  90. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  91. data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
  92. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  93. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
  94. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  95. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  96. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
  97. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  98. data/lib/hexapdf/font/encoding.rb +1 -1
  99. data/lib/hexapdf/font/invalid_glyph.rb +1 -1
  100. data/lib/hexapdf/font/true_type/builder.rb +2 -2
  101. data/lib/hexapdf/font/true_type/font.rb +14 -1
  102. data/lib/hexapdf/font/true_type/optimizer.rb +1 -1
  103. data/lib/hexapdf/font/true_type/subsetter.rb +11 -6
  104. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  105. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
  106. data/lib/hexapdf/font/true_type/table/directory.rb +6 -1
  107. data/lib/hexapdf/font/true_type/table/glyf.rb +1 -1
  108. data/lib/hexapdf/font/true_type/table/head.rb +1 -1
  109. data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
  110. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
  111. data/lib/hexapdf/font/true_type/table/kern.rb +1 -1
  112. data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
  113. data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
  114. data/lib/hexapdf/font/true_type/table/name.rb +1 -1
  115. data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
  116. data/lib/hexapdf/font/true_type/table/post.rb +1 -1
  117. data/lib/hexapdf/font/true_type/table.rb +1 -1
  118. data/lib/hexapdf/font/true_type.rb +2 -1
  119. data/lib/hexapdf/font/true_type_wrapper.rb +3 -3
  120. data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
  121. data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
  122. data/lib/hexapdf/font/type1/font.rb +1 -1
  123. data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
  124. data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
  125. data/lib/hexapdf/font/type1.rb +1 -1
  126. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  127. data/lib/hexapdf/font_loader/from_configuration.rb +1 -1
  128. data/lib/hexapdf/font_loader/from_file.rb +5 -1
  129. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  130. data/lib/hexapdf/font_loader/variant_from_name.rb +1 -1
  131. data/lib/hexapdf/font_loader.rb +1 -1
  132. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  133. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  134. data/lib/hexapdf/image_loader/png.rb +1 -1
  135. data/lib/hexapdf/image_loader.rb +1 -1
  136. data/lib/hexapdf/importer.rb +1 -1
  137. data/lib/hexapdf/layout/box.rb +1 -1
  138. data/lib/hexapdf/layout/box_fitter.rb +1 -1
  139. data/lib/hexapdf/layout/column_box.rb +1 -1
  140. data/lib/hexapdf/layout/container_box.rb +1 -1
  141. data/lib/hexapdf/layout/frame.rb +1 -1
  142. data/lib/hexapdf/layout/image_box.rb +1 -1
  143. data/lib/hexapdf/layout/inline_box.rb +1 -1
  144. data/lib/hexapdf/layout/line.rb +1 -1
  145. data/lib/hexapdf/layout/list_box.rb +1 -1
  146. data/lib/hexapdf/layout/numeric_refinements.rb +1 -1
  147. data/lib/hexapdf/layout/page_style.rb +1 -1
  148. data/lib/hexapdf/layout/style.rb +7 -3
  149. data/lib/hexapdf/layout/table_box.rb +1 -1
  150. data/lib/hexapdf/layout/text_box.rb +1 -1
  151. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  152. data/lib/hexapdf/layout/text_layouter.rb +1 -1
  153. data/lib/hexapdf/layout/text_shaper.rb +1 -1
  154. data/lib/hexapdf/layout/width_from_polygon.rb +1 -1
  155. data/lib/hexapdf/layout.rb +1 -1
  156. data/lib/hexapdf/name_tree_node.rb +1 -1
  157. data/lib/hexapdf/number_tree_node.rb +1 -1
  158. data/lib/hexapdf/object.rb +1 -1
  159. data/lib/hexapdf/parser.rb +1 -1
  160. data/lib/hexapdf/pdf_array.rb +1 -1
  161. data/lib/hexapdf/rectangle.rb +1 -1
  162. data/lib/hexapdf/reference.rb +1 -1
  163. data/lib/hexapdf/revision.rb +1 -1
  164. data/lib/hexapdf/revisions.rb +1 -1
  165. data/lib/hexapdf/serializer.rb +3 -3
  166. data/lib/hexapdf/stream.rb +1 -1
  167. data/lib/hexapdf/task/dereference.rb +1 -1
  168. data/lib/hexapdf/task/import_pages.rb +185 -0
  169. data/lib/hexapdf/task/merge_acro_form.rb +1 -1
  170. data/lib/hexapdf/task/optimize.rb +1 -1
  171. data/lib/hexapdf/task/pdfa.rb +109 -2
  172. data/lib/hexapdf/task.rb +2 -1
  173. data/lib/hexapdf/test_utils.rb +1 -1
  174. data/lib/hexapdf/tokenizer.rb +1 -1
  175. data/lib/hexapdf/type/acro_form/appearance_generator.rb +1 -1
  176. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  177. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  178. data/lib/hexapdf/type/acro_form/field.rb +1 -1
  179. data/lib/hexapdf/type/acro_form/form.rb +5 -1
  180. data/lib/hexapdf/type/acro_form/java_script_actions.rb +1 -1
  181. data/lib/hexapdf/type/acro_form/signature_field.rb +1 -1
  182. data/lib/hexapdf/type/acro_form/text_field.rb +5 -3
  183. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  184. data/lib/hexapdf/type/acro_form.rb +1 -1
  185. data/lib/hexapdf/type/action.rb +1 -1
  186. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  187. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  188. data/lib/hexapdf/type/actions/launch.rb +1 -1
  189. data/lib/hexapdf/type/actions/set_ocg_state.rb +1 -1
  190. data/lib/hexapdf/type/actions/uri.rb +1 -1
  191. data/lib/hexapdf/type/actions.rb +1 -1
  192. data/lib/hexapdf/type/annotation.rb +1 -1
  193. data/lib/hexapdf/type/annotations/appearance_generator.rb +1 -1
  194. data/lib/hexapdf/type/annotations/border_effect.rb +1 -1
  195. data/lib/hexapdf/type/annotations/border_styling.rb +1 -1
  196. data/lib/hexapdf/type/annotations/circle.rb +1 -1
  197. data/lib/hexapdf/type/annotations/interior_color.rb +1 -1
  198. data/lib/hexapdf/type/annotations/line.rb +1 -1
  199. data/lib/hexapdf/type/annotations/line_ending_styling.rb +1 -1
  200. data/lib/hexapdf/type/annotations/link.rb +1 -1
  201. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  202. data/lib/hexapdf/type/annotations/polygon.rb +1 -1
  203. data/lib/hexapdf/type/annotations/polygon_polyline.rb +1 -1
  204. data/lib/hexapdf/type/annotations/polyline.rb +1 -1
  205. data/lib/hexapdf/type/annotations/square.rb +1 -1
  206. data/lib/hexapdf/type/annotations/square_circle.rb +1 -1
  207. data/lib/hexapdf/type/annotations/text.rb +1 -1
  208. data/lib/hexapdf/type/annotations/widget.rb +10 -1
  209. data/lib/hexapdf/type/annotations.rb +1 -1
  210. data/lib/hexapdf/type/catalog.rb +1 -1
  211. data/lib/hexapdf/type/cid_font.rb +1 -1
  212. data/lib/hexapdf/type/cmap.rb +1 -1
  213. data/lib/hexapdf/type/document_security_store.rb +80 -0
  214. data/lib/hexapdf/type/embedded_file.rb +1 -1
  215. data/lib/hexapdf/type/file_specification.rb +1 -1
  216. data/lib/hexapdf/type/font.rb +4 -4
  217. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  218. data/lib/hexapdf/type/font_simple.rb +1 -1
  219. data/lib/hexapdf/type/font_true_type.rb +1 -1
  220. data/lib/hexapdf/type/font_type0.rb +1 -1
  221. data/lib/hexapdf/type/font_type1.rb +1 -1
  222. data/lib/hexapdf/type/font_type3.rb +6 -1
  223. data/lib/hexapdf/type/form.rb +1 -1
  224. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  225. data/lib/hexapdf/type/icon_fit.rb +1 -1
  226. data/lib/hexapdf/type/image.rb +1 -1
  227. data/lib/hexapdf/type/info.rb +1 -1
  228. data/lib/hexapdf/type/mark_information.rb +1 -1
  229. data/lib/hexapdf/type/marked_content_reference.rb +1 -1
  230. data/lib/hexapdf/type/measure.rb +1 -1
  231. data/lib/hexapdf/type/metadata.rb +1 -1
  232. data/lib/hexapdf/type/names.rb +1 -1
  233. data/lib/hexapdf/type/namespace.rb +1 -1
  234. data/lib/hexapdf/type/object_reference.rb +1 -1
  235. data/lib/hexapdf/type/object_stream.rb +1 -1
  236. data/lib/hexapdf/type/optional_content_configuration.rb +1 -1
  237. data/lib/hexapdf/type/optional_content_group.rb +1 -1
  238. data/lib/hexapdf/type/optional_content_membership.rb +1 -1
  239. data/lib/hexapdf/type/optional_content_properties.rb +1 -1
  240. data/lib/hexapdf/type/outline.rb +1 -1
  241. data/lib/hexapdf/type/outline_item.rb +1 -1
  242. data/lib/hexapdf/type/output_intent.rb +1 -1
  243. data/lib/hexapdf/type/page.rb +12 -1
  244. data/lib/hexapdf/type/page_label.rb +1 -1
  245. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  246. data/lib/hexapdf/type/resources.rb +1 -1
  247. data/lib/hexapdf/type/struct_elem.rb +1 -1
  248. data/lib/hexapdf/type/struct_tree_root.rb +1 -1
  249. data/lib/hexapdf/type/trailer.rb +1 -1
  250. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  251. data/lib/hexapdf/type/xref_stream.rb +1 -1
  252. data/lib/hexapdf/type.rb +2 -1
  253. data/lib/hexapdf/utils/bit_field.rb +1 -1
  254. data/lib/hexapdf/utils/bit_stream.rb +1 -1
  255. data/lib/hexapdf/utils/graphics_helpers.rb +1 -1
  256. data/lib/hexapdf/utils/lru_cache.rb +1 -1
  257. data/lib/hexapdf/utils/math_helpers.rb +1 -1
  258. data/lib/hexapdf/utils/object_hash.rb +1 -1
  259. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  260. data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
  261. data/lib/hexapdf/utils.rb +1 -1
  262. data/lib/hexapdf/version.rb +2 -2
  263. data/lib/hexapdf/writer.rb +1 -1
  264. data/lib/hexapdf/xref_section.rb +1 -1
  265. data/lib/hexapdf.rb +1 -1
  266. data/test/data/pdfa/mismatching_glyph_widths_cidfont_type2.pdf +0 -0
  267. data/test/hexapdf/content/test_smart_text_extractor.rb +129 -0
  268. data/test/hexapdf/digital_signature/common.rb +19 -5
  269. data/test/hexapdf/digital_signature/signing/test_signed_data_creator.rb +29 -4
  270. data/test/hexapdf/digital_signature/test_signatures.rb +3 -3
  271. data/test/hexapdf/filter/test_brotli_decode.rb +34 -0
  272. data/test/hexapdf/font/test_true_type_wrapper.rb +2 -2
  273. data/test/hexapdf/font/true_type/table/test_directory.rb +5 -3
  274. data/test/hexapdf/font/true_type/test_builder.rb +9 -0
  275. data/test/hexapdf/font/true_type/test_font.rb +17 -3
  276. data/test/hexapdf/font/true_type/test_subsetter.rb +11 -9
  277. data/test/hexapdf/font_loader/test_from_file.rb +7 -0
  278. data/test/hexapdf/task/test_import_pages.rb +126 -0
  279. data/test/hexapdf/task/test_pdfa.rb +72 -0
  280. data/test/hexapdf/test_document.rb +13 -0
  281. data/test/hexapdf/test_serializer.rb +1 -1
  282. data/test/hexapdf/type/acro_form/test_form.rb +6 -0
  283. data/test/hexapdf/type/acro_form/test_text_field.rb +7 -1
  284. data/test/hexapdf/type/annotations/test_widget.rb +11 -0
  285. data/test/hexapdf/type/test_font_type3.rb +4 -0
  286. data/test/hexapdf/type/test_page.rb +8 -0
  287. metadata +25 -1
@@ -29,12 +29,13 @@ describe HexaPDF::Font::TrueType::Subsetter do
29
29
 
30
30
  it "doesn't use certain subset glyph IDs for performance reasons" do
31
31
  1.upto(93) {|i| @subsetter.use_glyph(i) }
32
- # glyph 0, 93 used glyph, 4 special glyphs
33
- assert_equal(1 + 93 + 4, @subsetter.instance_variable_get(:@glyph_map).size)
34
- 1.upto(12) {|i| assert_equal(i, @subsetter.subset_glyph_id(i), "id=#{i}") }
35
- 13.upto(38) {|i| assert_equal(i + 1, @subsetter.subset_glyph_id(i), "id=#{i}") }
36
- 39.upto(88) {|i| assert_equal(i + 3, @subsetter.subset_glyph_id(i), "id=#{i}") }
37
- 89.upto(93) {|i| assert_equal(i + 4, @subsetter.subset_glyph_id(i), "id=#{i}") }
32
+ # glyph 0 and 93 are used glyph, 5 special glyphs
33
+ assert_equal(1 + 93 + 5, @subsetter.instance_variable_get(:@glyph_map).size)
34
+ 1.upto(9) {|i| assert_equal(i, @subsetter.subset_glyph_id(i), "id=#{i}") }
35
+ 10.upto(11) {|i| assert_equal(i + 1, @subsetter.subset_glyph_id(i), "id=#{i}") }
36
+ 12.upto(37) {|i| assert_equal(i + 2, @subsetter.subset_glyph_id(i), "id=#{i}") }
37
+ 38.upto(87) {|i| assert_equal(i + 4, @subsetter.subset_glyph_id(i), "id=#{i}") }
38
+ 88.upto(93) {|i| assert_equal(i + 5, @subsetter.subset_glyph_id(i), "id=#{i}") }
38
39
  end
39
40
 
40
41
  it "creates the subset font file" do
@@ -54,18 +55,19 @@ describe HexaPDF::Font::TrueType::Subsetter do
54
55
  end
55
56
 
56
57
  it "correctly subsets compound glyphs" do
57
- font_file = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
58
+ font_file = "/usr/share/fonts/truetype/noto/NotoSansMono-Regular.ttf"
58
59
  skip unless File.exist?(font_file)
59
60
 
60
61
  begin
61
62
  @font = HexaPDF::Font::TrueType::Font.new(File.open(font_file))
62
63
  @subsetter = HexaPDF::Font::TrueType::Subsetter.new(@font)
63
64
 
64
- @subsetter.use_glyph(@font[:cmap].preferred_table['À'.ord])
65
+ @subsetter.use_glyph(@font[:cmap].preferred_table['ë'.ord])
65
66
  subset = HexaPDF::Font::TrueType::Font.new(StringIO.new(@subsetter.build_font))
66
67
 
67
- assert_equal(4, subset[:maxp].num_glyphs)
68
+ assert_equal(5, subset[:maxp].num_glyphs)
68
69
  assert_equal([2, 3], subset[:glyf][1].components)
70
+ assert_equal([4], subset[:glyf][3].components)
69
71
  ensure
70
72
  @font.io.close
71
73
  end
@@ -30,6 +30,13 @@ describe HexaPDF::FontLoader::FromFile do
30
30
  refute(wrapper.subset?)
31
31
  end
32
32
 
33
+ it "raises an error if the provided font does not contain TrueType outlines" do
34
+ font = HexaPDF::Font::TrueType::Font.new(File.open(@font_file, 'rb'))
35
+ font.directory.instance_variable_get(:@tables).delete('glyf')
36
+ exception = assert_raises(HexaPDF::Error) { @klass.call(@doc, font) }
37
+ assert_match(/does not contain TrueType but CFF/, exception.message)
38
+ end
39
+
33
40
  it "returns nil if the given name doesn't represent a file" do
34
41
  assert_nil(@klass.call(@doc, "Unknown"))
35
42
  end
@@ -0,0 +1,126 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'hexapdf/document'
5
+ require 'hexapdf/task/import_pages'
6
+
7
+ describe HexaPDF::Task::ImportPages do
8
+ before do
9
+ @doc = HexaPDF::Document.new
10
+ @pages = [@doc.pages.add, @doc.pages.add]
11
+ @pages[0][:Page1] = true
12
+ @pages[1][:Page2] = true
13
+
14
+ @target = HexaPDF::Document.new
15
+ end
16
+
17
+ describe "pages argument" do
18
+ it "imports all pages by default" do
19
+ @target.task(:import_pages, source: @doc)
20
+ assert_equal(2, @target.pages.count)
21
+ assert(@target.pages[0][:Page1])
22
+ assert(@target.pages[1][:Page2])
23
+ end
24
+
25
+ it "imports the provided page objects" do
26
+ @target.task(:import_pages, source: @doc, pages: @pages.reverse)
27
+ assert_equal(2, @target.pages.count)
28
+ assert(@target.pages[0][:Page2])
29
+ assert(@target.pages[1][:Page1])
30
+ end
31
+
32
+ it "imports a single page" do
33
+ @target.task(:import_pages, source: @doc, pages: 1)
34
+ assert_equal(1, @target.pages.count)
35
+ assert(@target.pages[0][:Page2])
36
+ end
37
+
38
+ it "imports a page range" do
39
+ @target.task(:import_pages, source: @doc, pages: 0..-1)
40
+ assert_equal(2, @target.pages.count)
41
+ assert(@target.pages[0][:Page1])
42
+ assert(@target.pages[1][:Page2])
43
+ end
44
+
45
+ it "imports multiple pages" do
46
+ @target.task(:import_pages, source: @doc, pages: [1, 0..-1],
47
+ ocgs: :ignore, acro_form: :ignore)
48
+ assert_equal(2, @target.pages.count)
49
+ assert(@target.pages[0][:Page2])
50
+ assert(@target.pages[1][:Page1])
51
+ end
52
+ end
53
+
54
+ it "doesn't append the pages if specified so" do
55
+ result = @target.task(:import_pages, source: @doc, append: false)
56
+ assert_equal(0, @target.pages.count)
57
+ assert_equal(2, result.size)
58
+ assert(result[0][:Page1])
59
+ assert(result[1][:Page2])
60
+ end
61
+
62
+ it "merges the AcroForm fields" do
63
+ form = @doc.acro_form(create: true)
64
+ field = form.create_text_field("Text")
65
+ field.create_widget(@doc.pages[0], Rect: [0, 0, 0, 0])
66
+ @doc.dispatch_message(:complete_objects)
67
+ @doc.validate
68
+
69
+ @target.task(:import_pages, source: @doc)
70
+ assert_equal(1, @target.acro_form.root_fields.size)
71
+ end
72
+
73
+ describe "ocgs argument" do
74
+ before do
75
+ @ocg1 = @doc.optional_content.ocg('OCG')
76
+ @ocg1.add_to_ui(path: @ocg1)
77
+ @ocg2 = @doc.optional_content.ocg('OCMD')
78
+ @ocg2.add_to_ui(path: @ocg2)
79
+ @ocg2.off!
80
+ @ocmd = @doc.optional_content.create_ocmd(@ocg2)
81
+ end
82
+
83
+ it "doesn't preserve unused ocgs" do
84
+ @target.task(:import_pages, source: @doc)
85
+ assert(@target.optional_content.ocgs.empty?)
86
+ end
87
+
88
+ it "preserves OCGs and OCMDs in content streams" do
89
+ canvas = @doc.pages[0].canvas
90
+ canvas.optional_content(@ocg1)
91
+ canvas.optional_content(@ocmd)
92
+ @target.task(:import_pages, source: @doc)
93
+ assert_equal(['OCG', 'OCMD'], @target.optional_content.ocgs.map(&:name))
94
+ assert(@target.optional_content.ocg('OCG').on?)
95
+ refute(@target.optional_content.ocg('OCMD').on?)
96
+ end
97
+
98
+ it "preserves OCGs/OCMDs associated with XObjects" do
99
+ canvas = @doc.pages[0].canvas
100
+ form = canvas.form
101
+ form[:OC] = @ocg1
102
+ canvas.xobject(form, at: [0, 0])
103
+ @target.task(:import_pages, source: @doc)
104
+ assert_equal(['OCG'], @target.optional_content.ocgs.map(&:name))
105
+ end
106
+
107
+ it "preserves OCGs/OCMDs associated with annotations" do
108
+ annot = @doc.annotations.create_line(@doc.pages[0], start_point: [0, 0], end_point: [50, 50])
109
+ annot[:OC] = @ocmd
110
+ annot.regenerate_appearance
111
+ @target.task(:import_pages, source: @doc)
112
+ assert_equal(['OCMD'], @target.optional_content.ocgs.map(&:name))
113
+ refute(@target.optional_content.ocg('OCMD').on?)
114
+ end
115
+
116
+ it "preserves the radio button group state of imported OCGs" do
117
+ @doc.pages[0].canvas.optional_content(@ocg1)
118
+ @doc.optional_content.default_configuration[:RBGroups] = [[@ocg1, @ocg2]]
119
+ @target.task(:import_pages, source: @doc)
120
+ assert_equal(['OCG'], @target.optional_content.ocgs.map {|ocg| ocg.name })
121
+ rb_groups = @target.optional_content.default_configuration[:RBGroups]
122
+ assert_equal(1, rb_groups.size)
123
+ assert_equal(['OCG'], rb_groups[0].map(&:name))
124
+ end
125
+ end
126
+ end
@@ -38,4 +38,76 @@ describe HexaPDF::Task::PDFA do
38
38
  assert_equal('sRGB2014.icc', oi[:Info])
39
39
  assert_kind_of(HexaPDF::Stream, oi[:DestOutputProfile])
40
40
  end
41
+
42
+ it "applies fixes based on the optional fixes argument" do
43
+ file = File.join(TEST_DATA_DIR, 'pdfa', 'mismatching_glyph_widths_cidfont_type2.pdf')
44
+
45
+ # Document loaded -> all fixes applied by default
46
+ doc = HexaPDF::Document.open(file)
47
+ doc.task(:pdfa, level: '3b')
48
+ doc.dispatch_message(:complete_objects)
49
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
50
+ assert_equal(348, font[:hmtx][1].advance_width)
51
+
52
+ # Not loaded -> fixes for loaded documents excluded
53
+ doc = HexaPDF::Document.open(file)
54
+ created = HexaPDF::Document.new
55
+ created.pages << created.import(doc.pages[0])
56
+ created.task(:pdfa, level: '3b')
57
+ created.dispatch_message(:complete_objects)
58
+ font_file = created.pages[0].resources.font(:F1).descendant_font[:FontDescriptor][:FontFile2]
59
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font_file.stream))
60
+ assert_equal(346, font[:hmtx][1].advance_width)
61
+
62
+ # Explicitly specify to apply all fixes
63
+ created.task(:pdfa, level: '3b', fixes: :all)
64
+ created.dispatch_message(:complete_objects)
65
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font_file.stream))
66
+ assert_equal(348, font[:hmtx][1].advance_width)
67
+ end
68
+
69
+ describe "fix_glyph_widths" do
70
+ before do
71
+ @file = File.join(TEST_DATA_DIR, 'pdfa', 'mismatching_glyph_widths_cidfont_type2.pdf')
72
+ end
73
+
74
+ it "fixes glyph width inconsistencies between the font and the font dictionary" do
75
+ doc = HexaPDF::Document.open(@file)
76
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
77
+
78
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
79
+ assert_equal(346, font[:hmtx][1].advance_width)
80
+ doc.dispatch_message(:complete_objects)
81
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
82
+ assert_equal(348, font[:hmtx][1].advance_width)
83
+ end
84
+
85
+ it "works if there is an explicit CIDToGIDMap stream" do
86
+ doc = HexaPDF::Document.open(@file)
87
+ doc.object(5)[:CIDToGIDMap] = doc.wrap({}, stream: [0, 1, 2, 3, 4].pack('n*'))
88
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
89
+ doc.dispatch_message(:complete_objects)
90
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
91
+ assert_equal(348, font[:hmtx][1].advance_width)
92
+ end
93
+
94
+ it "processes annotation appearances" do
95
+ doc = HexaPDF::Document.new
96
+ doc.pages.add
97
+ doc.annotations.create_rectangle(doc.pages[0], 20, 20, 20, 60).
98
+ regenerate_appearance
99
+ form = doc.pages[0][:Annots][0].create_appearance
100
+ form.canvas.
101
+ font(File.join(TEST_DATA_DIR, 'fonts', 'Ubuntu-Title.ttf'), size: 10).
102
+ text('Hola', at: [0, 0])
103
+
104
+ doc = HexaPDF::Document.new(io: StringIO.new(doc.write_to_string))
105
+ font = doc.pages[0][:Annots][0].appearance.resources.font(:F1).descendant_font
106
+ font[:W][1][0] = 10
107
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
108
+ doc.dispatch_message(:complete_objects)
109
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font[:FontDescriptor][:FontFile2].stream))
110
+ assert_equal(10, font[:hmtx][1].advance_width)
111
+ end
112
+ end
41
113
  end
@@ -286,6 +286,13 @@ describe HexaPDF::Document do
286
286
  assert_equal({a: {b: 10}}, @doc.unwrap(value))
287
287
  end
288
288
 
289
+ it "doesn't unwrap PDF stream objects" do
290
+ stream = @io_doc.wrap({a: HexaPDF::Reference.new(1, 0)}, stream: 'data')
291
+ result = @io_doc.unwrap(stream)
292
+ assert_same(stream, result)
293
+ assert_equal(HexaPDF::Reference.new(1, 0), result.value[:a])
294
+ end
295
+
289
296
  it "fails to unwrap recursive structures" do
290
297
  obj1 = @doc.add({})
291
298
  obj2 = @doc.add({})
@@ -413,6 +420,12 @@ describe HexaPDF::Document do
413
420
  assert(@doc.trailer.info.key?(:Author))
414
421
  end
415
422
 
423
+ it "works even in case of invalid PDFs with a non-dictionary value for trailer.info" do
424
+ @doc.trailer[:Info] = :something_else
425
+ @doc.write(StringIO.new)
426
+ assert(@doc.trailer.info.key?(:ModDate))
427
+ end
428
+
416
429
  it "it doesn't optimize the file by default" do
417
430
  io = StringIO.new(''.b)
418
431
  @io_doc.write(io)
@@ -104,7 +104,7 @@ describe HexaPDF::Serializer do
104
104
 
105
105
  it "serializes strings" do
106
106
  assert_serialized("(Hallo)", "Hallo")
107
- assert_serialized("(Hallo\\r\n\t\\(\\)\\\\)", "Hallo\r\n\t()\\")
107
+ assert_serialized("(Hallo\\r\\n\t\\(\\)\\\\)", "Hallo\r\n\t()\\")
108
108
  assert_serialized("(\xFE\xFF\x00H\x00a\x00l\x00\f\x00\b\x00\\()".b, "Hal\f\b(")
109
109
  end
110
110
 
@@ -322,6 +322,12 @@ describe HexaPDF::Type::AcroForm::Form do
322
322
  assert_equal("value", field.field_value)
323
323
  end
324
324
 
325
+ it "ignores values for password fields" do
326
+ field = @acro_form.create_password_field('test')
327
+ @acro_form.fill("test" => "value")
328
+ assert_nil(field.field_value)
329
+ end
330
+
325
331
  it "works for radio buttons" do
326
332
  field = @acro_form.create_radio_button("test")
327
333
  field.create_widget(@doc.pages.add, value: :name)
@@ -272,10 +272,16 @@ describe HexaPDF::Type::AcroForm::TextField do
272
272
 
273
273
  it "checks that the field value has a valid type" do
274
274
  assert(@field.validate) # no field value
275
- @field[:V] = :sym
275
+ @field[:V] = [5]
276
276
  refute(@field.validate)
277
277
  end
278
278
 
279
+ it "converts an invalid Symbol value to string" do
280
+ @field[:V] = :sym
281
+ assert(@field.validate)
282
+ assert_equal('sym', @field[:V])
283
+ end
284
+
279
285
  it "checks the field value against /MaxLen" do
280
286
  @field[:V] = 'Test'
281
287
  assert(@field.validate)
@@ -188,4 +188,15 @@ describe HexaPDF::Type::Annotations::Widget do
188
188
  end
189
189
  end
190
190
  end
191
+
192
+ describe "perform_validation" do
193
+ it "validates the widget as form field if they are the same" do
194
+ @widget[:Rect] = [0, 0, 0, 0]
195
+ @widget[:FT] = :Tx
196
+ @widget[:T] = 'field'
197
+ @widget[:V] = :Sym
198
+ assert(@widget.validate)
199
+ assert_equal('Sym', @widget[:V]) # this auto-correct is part of TextField
200
+ end
201
+ end
191
202
  end
@@ -28,6 +28,10 @@ describe HexaPDF::Type::FontType3 do
28
28
  assert_equal(0.002, @font.glyph_scaling_factor)
29
29
  end
30
30
 
31
+ it "always returns true for embedded?" do
32
+ assert(@font.embedded?)
33
+ end
34
+
31
35
  describe "validation" do
32
36
  it "works for valid objects" do
33
37
  assert(@font.validate)
@@ -416,6 +416,14 @@ describe HexaPDF::Type::Page do
416
416
  end
417
417
  end
418
418
 
419
+ describe "extract_text" do
420
+ it "extracts the layouted text from the page" do
421
+ page = @doc.pages.add
422
+ page.canvas.font('Helvetica', size: 10).text('Hello', at: [10, 10])
423
+ assert_equal('Hello', page.extract_text(line_tolerance_factor: 5))
424
+ end
425
+ end
426
+
419
427
  describe "index" do
420
428
  it "returns the index of the page in the page tree" do
421
429
  kid1 = @doc.add({Type: :Pages, Parent: @doc.pages.root, Count: 4})
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
@@ -77,6 +77,20 @@ dependencies:
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
79
  version: 3.1.2
80
+ - !ruby/object:Gem::Dependency
81
+ name: brotli
82
+ requirement: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - "~>"
85
+ - !ruby/object:Gem::Version
86
+ version: '0.7'
87
+ type: :development
88
+ prerelease: false
89
+ version_requirements: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - "~>"
92
+ - !ruby/object:Gem::Version
93
+ version: '0.7'
80
94
  - !ruby/object:Gem::Dependency
81
95
  name: kramdown
82
96
  requirement: !ruby/object:Gem::Requirement
@@ -334,6 +348,8 @@ files:
334
348
  - examples/029-composer_fallback_fonts.rb
335
349
  - examples/030-pdfa.rb
336
350
  - examples/031-acro_form_java_script.rb
351
+ - examples/032-acro_form_list_and_fill.rb
352
+ - examples/033-text_extraction.rb
337
353
  - examples/emoji-smile.png
338
354
  - examples/emoji-wink.png
339
355
  - examples/machupicchu.jpg
@@ -370,6 +386,7 @@ files:
370
386
  - lib/hexapdf/content/operator.rb
371
387
  - lib/hexapdf/content/parser.rb
372
388
  - lib/hexapdf/content/processor.rb
389
+ - lib/hexapdf/content/smart_text_extractor.rb
373
390
  - lib/hexapdf/content/transformation_matrix.rb
374
391
  - lib/hexapdf/data_dir.rb
375
392
  - lib/hexapdf/dictionary.rb
@@ -408,6 +425,7 @@ files:
408
425
  - lib/hexapdf/filter.rb
409
426
  - lib/hexapdf/filter/ascii85_decode.rb
410
427
  - lib/hexapdf/filter/ascii_hex_decode.rb
428
+ - lib/hexapdf/filter/brotli_decode.rb
411
429
  - lib/hexapdf/filter/crypt.rb
412
430
  - lib/hexapdf/filter/encryption.rb
413
431
  - lib/hexapdf/filter/flate_decode.rb
@@ -498,6 +516,7 @@ files:
498
516
  - lib/hexapdf/stream.rb
499
517
  - lib/hexapdf/task.rb
500
518
  - lib/hexapdf/task/dereference.rb
519
+ - lib/hexapdf/task/import_pages.rb
501
520
  - lib/hexapdf/task/merge_acro_form.rb
502
521
  - lib/hexapdf/task/optimize.rb
503
522
  - lib/hexapdf/task/pdfa.rb
@@ -542,6 +561,7 @@ files:
542
561
  - lib/hexapdf/type/catalog.rb
543
562
  - lib/hexapdf/type/cid_font.rb
544
563
  - lib/hexapdf/type/cmap.rb
564
+ - lib/hexapdf/type/document_security_store.rb
545
565
  - lib/hexapdf/type/embedded_file.rb
546
566
  - lib/hexapdf/type/file_specification.rb
547
567
  - lib/hexapdf/type/font.rb
@@ -641,6 +661,7 @@ files:
641
661
  - test/data/images/truecolour-srgb-8bit.png
642
662
  - test/data/images/ycck.jpg
643
663
  - test/data/minimal.pdf
664
+ - test/data/pdfa/mismatching_glyph_widths_cidfont_type2.pdf
644
665
  - test/data/standard-security-handler/README
645
666
  - test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf
646
667
  - test/data/standard-security-handler/bothpwd-aes-256bit-V5-R5.pdf
@@ -678,6 +699,7 @@ files:
678
699
  - test/hexapdf/content/test_operator.rb
679
700
  - test/hexapdf/content/test_parser.rb
680
701
  - test/hexapdf/content/test_processor.rb
702
+ - test/hexapdf/content/test_smart_text_extractor.rb
681
703
  - test/hexapdf/content/test_transformation_matrix.rb
682
704
  - test/hexapdf/digital_signature/common.rb
683
705
  - test/hexapdf/digital_signature/signing/test_default_handler.rb
@@ -711,6 +733,7 @@ files:
711
733
  - test/hexapdf/filter/common.rb
712
734
  - test/hexapdf/filter/test_ascii85_decode.rb
713
735
  - test/hexapdf/filter/test_ascii_hex_decode.rb
736
+ - test/hexapdf/filter/test_brotli_decode.rb
714
737
  - test/hexapdf/filter/test_crypt.rb
715
738
  - test/hexapdf/filter/test_encryption.rb
716
739
  - test/hexapdf/filter/test_flate_decode.rb
@@ -778,6 +801,7 @@ files:
778
801
  - test/hexapdf/layout/test_text_shaper.rb
779
802
  - test/hexapdf/layout/test_width_from_polygon.rb
780
803
  - test/hexapdf/task/test_dereference.rb
804
+ - test/hexapdf/task/test_import_pages.rb
781
805
  - test/hexapdf/task/test_merge_acro_form.rb
782
806
  - test/hexapdf/task/test_optimize.rb
783
807
  - test/hexapdf/task/test_pdfa.rb