hexapdf 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +79 -1
  3. data/Rakefile +1 -1
  4. data/lib/hexapdf/cli/form.rb +30 -3
  5. data/lib/hexapdf/cli/inspect.rb +18 -5
  6. data/lib/hexapdf/cli/modify.rb +23 -3
  7. data/lib/hexapdf/composer.rb +24 -2
  8. data/lib/hexapdf/dictionary_fields.rb +1 -1
  9. data/lib/hexapdf/document/destinations.rb +396 -0
  10. data/lib/hexapdf/document.rb +38 -89
  11. data/lib/hexapdf/encryption/aes.rb +9 -5
  12. data/lib/hexapdf/layout/frame.rb +8 -9
  13. data/lib/hexapdf/layout/style.rb +280 -7
  14. data/lib/hexapdf/layout/text_box.rb +10 -2
  15. data/lib/hexapdf/layout/text_layouter.rb +6 -1
  16. data/lib/hexapdf/revision.rb +8 -1
  17. data/lib/hexapdf/revisions.rb +151 -50
  18. data/lib/hexapdf/task/optimize.rb +21 -11
  19. data/lib/hexapdf/type/acro_form/form.rb +11 -5
  20. data/lib/hexapdf/type/acro_form/text_field.rb +8 -0
  21. data/lib/hexapdf/type/catalog.rb +9 -1
  22. data/lib/hexapdf/type/image.rb +47 -3
  23. data/lib/hexapdf/type/names.rb +13 -0
  24. data/lib/hexapdf/type/xref_stream.rb +2 -1
  25. data/lib/hexapdf/utils/sorted_tree_node.rb +3 -1
  26. data/lib/hexapdf/version.rb +1 -1
  27. data/lib/hexapdf/writer.rb +15 -2
  28. data/test/hexapdf/document/test_destinations.rb +338 -0
  29. data/test/hexapdf/encryption/test_aes.rb +8 -0
  30. data/test/hexapdf/encryption/test_security_handler.rb +2 -2
  31. data/test/hexapdf/layout/test_frame.rb +15 -1
  32. data/test/hexapdf/layout/test_text_box.rb +16 -0
  33. data/test/hexapdf/layout/test_text_layouter.rb +7 -0
  34. data/test/hexapdf/task/test_optimize.rb +17 -4
  35. data/test/hexapdf/test_composer.rb +24 -1
  36. data/test/hexapdf/test_dictionary_fields.rb +1 -1
  37. data/test/hexapdf/test_document.rb +30 -133
  38. data/test/hexapdf/test_parser.rb +1 -1
  39. data/test/hexapdf/test_revision.rb +14 -0
  40. data/test/hexapdf/test_revisions.rb +137 -29
  41. data/test/hexapdf/test_writer.rb +43 -14
  42. data/test/hexapdf/type/acro_form/test_form.rb +2 -1
  43. data/test/hexapdf/type/acro_form/test_text_field.rb +17 -0
  44. data/test/hexapdf/type/test_catalog.rb +8 -0
  45. data/test/hexapdf/type/test_image.rb +45 -9
  46. data/test/hexapdf/type/test_names.rb +20 -0
  47. data/test/hexapdf/type/test_xref_stream.rb +2 -1
  48. data/test/hexapdf/utils/test_sorted_tree_node.rb +11 -1
  49. metadata +6 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b666a69330b87a3ad7a5c937a1113a66a3b5ed3d0cb4f4478ee24f97bed0e411
4
- data.tar.gz: 460c3cd90b8f76d3e4d32fdd6f1bc8fd71ddfef5d18799c6a207f562773371f8
3
+ metadata.gz: d6f11a38da2472389966c1d103b86e46ba8cd858d2927b1a65f441f56e2e7dd5
4
+ data.tar.gz: 74ff92dcb6ede9f137303afc7c8b9d08e0baa42b0ab0b7d4436777e6db617ea4
5
5
  SHA512:
6
- metadata.gz: a9135019912d8c3d1e282797054dfbc99c028f26aa28f8412a138ab3d8d67124c4e9c52ad104eeb4289487e47dd850e3ff4211b5e928cdb0a8aab2d8ea773930
7
- data.tar.gz: 070d0facdb6576fb6a0377a7139609ee648a580be879dabfd10e943bd2f62d66d9d5077058edcecf9ca3eb4e7969a2427c7a4af26bbb4d8f734e238f7022c7cd
6
+ metadata.gz: 468e0a5a24e06574b4a802408daafbdb59b3a1dc460ad2b6bf9f543088d1210d8295b843847819a872436ea4c380df85240822e28b548dd88b55f74b30ff8ab3
7
+ data.tar.gz: 29e95def1e7ccf0d55bca3cf5ad4d95a070d5910a0e1b8645d2884e0e5022367c63e14e2df3ecc0bac7cdfc329d88698c01060dc8005e1c1025a0c8337367afc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,81 @@
1
+ ## 0.23.0 - 2022-05-26
2
+
3
+ ### Added
4
+
5
+ - [HexaPDF::Composer#create_stamp] for creating a form Xobject
6
+ - [HexaPDF::Revision#reset_objects] for deleting all live loaded and added
7
+ objects
8
+ - Support for removing or flattening annotations to the `hexapdf modify` command
9
+ - Option to CLI command `hexapdf form` to allow generation of a template file
10
+ - Support for centering a floating box in [HexaPDF::Layout::Frame]
11
+ - [HexaPDF::Type::Catalog#names] for easier access to the name dictionary
12
+ - [HexaPDF::Type::Names#destinations] for easier access to the destinations name
13
+ tree
14
+ - [HexaPDF::Document::Destinations], accessible via
15
+ [HexaPDF::Document#destinations], as convenience interface for working with
16
+ destination arrays
17
+
18
+ ### Changed
19
+
20
+ - **Breaking change**: Refactored the [HexaPDF::Document] interface for working
21
+ with objects and move parts into [HexaPDF::Revisions]
22
+ - **Breaking change**: [HexaPDF::Layout::TextBox] to use whole available width
23
+ when aligning to the center or right
24
+ - **Breaking change**: [HexaPDF::Layout::TextBox] to use whole available height
25
+ when vertically aligning to the center or bottom
26
+ - CLI command `hexapdf inspect` to show the type of revisions, as well as the
27
+ number of objects per revision
28
+ - [HexaPDF::Task::Optimize] to allow skipping invalid content stream operations
29
+ - [HexaPDF::Composer#image] to allow using a form xobject in place of the image
30
+
31
+ ### Fixed
32
+
33
+ - [HexaPDF::Writer#write] to write modified objects into the correct revision
34
+ - [HexaPDF::Revisions::from_io] to correctly handle hybrid-reference files
35
+ - [HexaPDF::Writer] to assign a valid object number to a created cross-reference
36
+ stream in all cases
37
+ * [HexaPDF::Type::AcroForm::TextField] to validate the existence of a /MaxLen
38
+ value for comb text fields
39
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to check for the existence
40
+ of /MaxLen when setting a value for a comb text field
41
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to check the value against
42
+ /MaxLen
43
+ * [HexaPDF::Layout::TextLayouter#fit] to not use style valign when doing
44
+ variable width layouting
45
+ * [HexaPDF::Utils::SortedTreeNode#find_entry] to work in case of a node without
46
+ a container name or kids key
47
+ * CLI command `hexapdf form` to allow setting array values when using a template
48
+ * CLI command `hexapdf form` to allow setting file select fields
49
+
50
+
51
+ ## 0.22.0 - 2022-03-26
52
+
53
+ ### Added
54
+
55
+ - Support for writing images with an ICCBased color space
56
+ - Support for writing images with soft masks
57
+
58
+ ### Changed
59
+
60
+ - CLI command `hexapdf form` to show a warning when working with a file
61
+ containing an XFA form
62
+
63
+ ### Fixed
64
+
65
+ - [HexaPDF::Type::AcroForm::Form#field_by_name] to work correctly when field
66
+ name parts are UTF-16BE encoded
67
+ - `hexapdf inspect` command 'revision' to correctly detect the end of revisions
68
+ - [HexaPDF::DictionaryFields::StringConverter] to use correct method name
69
+ `HexaPDF::Document#config`
70
+
71
+
72
+ ## 0.21.1 - 2022-03-12
73
+
74
+ ### Fixed
75
+
76
+ - Handling of invalid AES encrypted files where the padding is missing
77
+
78
+
1
79
  ## 0.21.0 - 2022-03-04
2
80
 
3
81
  ### Added
@@ -246,7 +324,7 @@
246
324
 
247
325
  ## 0.16.0 - 2021-09-28
248
326
 
249
- ## Added
327
+ ### Added
250
328
 
251
329
  * Support for RGB color values of the form "RGB" in addition to "RRGGBB" and for
252
330
  CSS color module level 3 color names
data/Rakefile CHANGED
@@ -49,7 +49,7 @@ namespace :dev do
49
49
  task :test_all do
50
50
  versions = `rbenv versions --bare | grep -i 2.[567]\\\\\\|3.`.split("\n")
51
51
  versions.each do |version|
52
- sh "rbenv shell #{version} &>/dev/null && rake test"
52
+ sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
54
54
  puts "Looks okay? (enter to continue, Ctrl-c to abort)"
55
55
  $stdin.gets
@@ -70,6 +70,9 @@ module HexaPDF
70
70
  @template = template
71
71
  @fill = true
72
72
  end
73
+ options.on('--generate-template', 'Print a template for use with --template') do
74
+ @generate_template = true
75
+ end
73
76
  options.on('--flatten', 'Flatten the form fields') do
74
77
  @flatten = true
75
78
  end
@@ -85,6 +88,7 @@ module HexaPDF
85
88
  @password = nil
86
89
  @fill = false
87
90
  @flatten = false
91
+ @generate_template = false
88
92
  @template = nil
89
93
  @need_appearances = nil
90
94
  @incremental = true
@@ -97,6 +101,10 @@ module HexaPDF
97
101
  end
98
102
  with_document(in_file, password: @password, out_file: out_file,
99
103
  incremental: @incremental) do |doc|
104
+ if doc.acro_form[:XFA]
105
+ $stderr.puts "Warning: Unsupported XFA form detected, some things may not work correctly"
106
+ end
107
+
100
108
  if !doc.acro_form
101
109
  raise "This PDF doesn't contain an interactive form"
102
110
  elsif out_file
@@ -113,6 +121,15 @@ module HexaPDF
113
121
  doc.catalog.delete(:AcroForm)
114
122
  doc.delete(doc.acro_form)
115
123
  end
124
+ elsif @generate_template
125
+ unsupported_fields = [:signature_field, :password_field]
126
+ each_field(doc) do |_, _, field, _|
127
+ next if unsupported_fields.include?(field.concrete_field_type)
128
+ name = field.full_field_name.gsub(':', "\\:")
129
+ Array(field.field_value).each do |val|
130
+ puts "#{name}: #{val.to_s.gsub(/(\r|\r\n|\n)/, '\1 ')}"
131
+ end
132
+ end
116
133
  else
117
134
  list_form_fields(doc)
118
135
  end
@@ -216,8 +233,16 @@ module HexaPDF
216
233
  field_name = scanner.scan(/(\\:|[^:])*?:/)
217
234
  break unless field_name
218
235
  field_name.gsub!(/\\:/, ':')
236
+ field_name.chop!
219
237
  field_value = scanner.scan(/.*?(?=^\S|\z)/m)
220
- data[field_name.chop] = field_value.strip.gsub(/^\s*/, '') if field_value
238
+ next unless field_value
239
+ field_value = field_value.strip.gsub(/^\s*/, '')
240
+ if data.key?(field_name)
241
+ data[field_name] = [data[field_name]] unless data[field_name].kind_of?(Array)
242
+ data[field_name] << field_value
243
+ else
244
+ data[field_name] = field_value
245
+ end
221
246
  end
222
247
  if !scanner.eos? && command_parser.verbosity_warning?
223
248
  $stderr.puts "Warning: Some template could not be parsed"
@@ -228,8 +253,8 @@ module HexaPDF
228
253
  # Applies the given value to the field.
229
254
  def apply_field_value(field, value)
230
255
  case field.concrete_field_type
231
- when :single_line_text_field, :multiline_text_field, :comb_text_field, :combo_box,
232
- :list_box, :editable_combo_box
256
+ when :single_line_text_field, :multiline_text_field, :comb_text_field, :file_select_field,
257
+ :combo_box, :list_box, :editable_combo_box
233
258
  field.field_value = value
234
259
  when :check_box
235
260
  field.field_value = case value
@@ -245,6 +270,8 @@ module HexaPDF
245
270
  else
246
271
  raise "Field type #{field.concrete_field_type} not yet supported"
247
272
  end
273
+ rescue
274
+ raise "Error while setting '#{field.full_field_name}': #{$!.message}"
248
275
  end
249
276
 
250
277
  # Iterates over all non-push button fields in page order. If a field appears on multiple
@@ -229,10 +229,19 @@ module HexaPDF
229
229
  end
230
230
  IO.copy_stream(@doc.revisions.parser.io, $stdout, length, 0)
231
231
  else
232
- puts "Document has #{@doc.revisions.size} revision#{@doc.revisions.size == 1 ? '' : 's'}"
233
- revision_information do |_, index, count, signature, end_offset|
232
+ puts "Document has #{@doc.revisions.count} revision#{@doc.revisions.count == 1 ? '' : 's'}"
233
+ revision_information do |rev, index, count, signature, end_offset|
234
+ type = if rev.trailer[:XRefStm]
235
+ "xref table + stream"
236
+ elsif rev.trailer[:Type] == :XRef
237
+ "xref stream"
238
+ else
239
+ "xref table"
240
+ end
234
241
  puts "Revision #{index + 1}"
242
+ puts " Type : #{type}"
235
243
  puts " Objects : #{count}"
244
+ puts " Size : #{rev.trailer[:Size]}"
236
245
  puts " Signed : yes" if signature
237
246
  puts " Byte range: 0-#{end_offset}"
238
247
  end
@@ -342,13 +351,17 @@ module HexaPDF
342
351
  end_index = sig[:ByteRange][-2] + sig[:ByteRange][-1]
343
352
  else
344
353
  io.seek(startxrefs[index], IO::SEEK_SET)
354
+ buffer = ''.b
345
355
  while io.pos < startxrefs[index + 1]
346
- if io.gets =~ /^\s*%%EOF\s*$/
347
- end_index = io.pos
356
+ buffer << io.read(1_000)
357
+ if (buffer_index = buffer.index(/(?:\n|\r\n?)\s*%%EOF\s*(?:\n|\r\n?)/))
358
+ end_index = io.pos - buffer.size + buffer_index + $~[0].size
359
+ break
348
360
  end
361
+ buffer = buffer[-20..-1]
349
362
  end
350
363
  end
351
- yield(rev, index, rev.next_free_oid - 1, sig, end_index)
364
+ yield(rev, index, rev.each.count, sig, end_index)
352
365
  end
353
366
  end
354
367
 
@@ -53,14 +53,15 @@ module HexaPDF
53
53
  super('modify', takes_commands: false)
54
54
  short_desc("Modify a PDF file")
55
55
  long_desc(<<~EOF)
56
- This command modifies a PDF file. It can be used to select pages that should appear in
57
- the output file and/or rotate them. The output file can also be encrypted/decrypted and
58
- optimized in various ways.
56
+ This command modifies a PDF file. It can be used, for example, to select pages that should
57
+ appear in the output file and/or rotate them. The output file can also be
58
+ encrypted/decrypted and optimized in various ways.
59
59
  EOF
60
60
 
61
61
  @password = nil
62
62
  @pages = '1-e'
63
63
  @embed_files = []
64
+ @annotation_mode = nil
64
65
 
65
66
  options.on("--password PASSWORD", "-p", String,
66
67
  "The password for decryption. Use - for reading from standard input.") do |pwd|
@@ -74,6 +75,10 @@ module HexaPDF
74
75
  "used multiple times)") do |file|
75
76
  @embed_files << file
76
77
  end
78
+ options.on("--annotations MODE", [:remove, :flatten], "Handling of annotations (either " \
79
+ "remove or flatten)") do |mode|
80
+ @annotation_mode = mode
81
+ end
77
82
  define_optimization_options
78
83
  define_encryption_options
79
84
  end
@@ -82,6 +87,7 @@ module HexaPDF
82
87
  maybe_raise_on_existing_file(out_file)
83
88
  with_document(in_file, password: @password, out_file: out_file) do |doc|
84
89
  arrange_pages(doc) unless @pages == '1-e'
90
+ handle_annotations(doc)
85
91
  @embed_files.each {|file| doc.files.add(file, embed: true) }
86
92
  apply_encryption_options(doc)
87
93
  apply_optimization_options(doc)
@@ -109,6 +115,20 @@ module HexaPDF
109
115
  doc.pages.add unless doc.pages.count > 0
110
116
  end
111
117
 
118
+ # Handles the annotations of all selected pages by doing nothing, removing them or flattening
119
+ # them.
120
+ def handle_annotations(doc)
121
+ return unless @annotation_mode
122
+
123
+ doc.pages.each do |page|
124
+ if @annotation_mode == :remove
125
+ page.delete(:Annots)
126
+ else
127
+ page.flatten_annotations
128
+ end
129
+ end
130
+ end
131
+
112
132
  end
113
133
 
114
134
  end
@@ -313,7 +313,8 @@ module HexaPDF
313
313
 
314
314
  # Draws the given image at the current position.
315
315
  #
316
- # The +file+ argument can be anything that is accepted by HexaPDF::Document::Images#add.
316
+ # The +file+ argument can be anything that is accepted by HexaPDF::Document::Images#add or a
317
+ # HexaPDF::Type::Form object.
317
318
  #
318
319
  # See #text for details on +width+, +height+, +style+ and +style_properties+.
319
320
  #
@@ -324,7 +325,7 @@ module HexaPDF
324
325
  # composer.image(machu_picchu, height: 30)
325
326
  def image(file, width: 0, height: 0, style: nil, **style_properties)
326
327
  style = retrieve_style(style, style_properties)
327
- image = document.images.add(file)
328
+ image = file.kind_of?(HexaPDF::Stream) ? file : document.images.add(file)
328
329
  draw_box(Layout::ImageBox.new(image, width: width, height: height, style: style))
329
330
  end
330
331
 
@@ -361,6 +362,27 @@ module HexaPDF
361
362
  end
362
363
  end
363
364
 
365
+ # Creates a stamp (Form XObject) which can be used like an image multiple times on a single page
366
+ # or on multiple pages.
367
+ #
368
+ # The width and the height of the stamp need to be set (frame.width/height or
369
+ # page.box.width/height might be good choices).
370
+ #
371
+ # Examples:
372
+ #
373
+ # #>pdf-composer
374
+ # stamp = composer.create_stamp(50, 50) do |canvas|
375
+ # canvas.fill_color("red").line_width(5).
376
+ # rectangle(10, 10, 30, 30).fill_stroke
377
+ # end
378
+ # composer.image(stamp, width: 20, height: 20)
379
+ # composer.image(stamp, width: 50)
380
+ def create_stamp(width, height) # :yield: canvas
381
+ stamp = @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, width, height]})
382
+ yield(stamp.canvas) if block_given?
383
+ stamp
384
+ end
385
+
364
386
  private
365
387
 
366
388
  # Creates the frame into which boxes are layed out when a new page is created.
@@ -241,7 +241,7 @@ module HexaPDF
241
241
  if str.valid_encoding?
242
242
  str.encode!(Encoding::UTF_8)
243
243
  else
244
- document.configuration['document.on_invalid_string'].call(str)
244
+ document.config['document.on_invalid_string'].call(str)
245
245
  end
246
246
  else
247
247
  Utils::PDFDocEncoding.convert_to_utf8(str)