hexapdf 0.21.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +79 -1
  3. data/Rakefile +1 -1
  4. data/lib/hexapdf/cli/form.rb +30 -3
  5. data/lib/hexapdf/cli/inspect.rb +18 -5
  6. data/lib/hexapdf/cli/modify.rb +23 -3
  7. data/lib/hexapdf/composer.rb +24 -2
  8. data/lib/hexapdf/dictionary_fields.rb +1 -1
  9. data/lib/hexapdf/document/destinations.rb +396 -0
  10. data/lib/hexapdf/document.rb +38 -89
  11. data/lib/hexapdf/encryption/aes.rb +9 -5
  12. data/lib/hexapdf/layout/frame.rb +8 -9
  13. data/lib/hexapdf/layout/style.rb +280 -7
  14. data/lib/hexapdf/layout/text_box.rb +10 -2
  15. data/lib/hexapdf/layout/text_layouter.rb +6 -1
  16. data/lib/hexapdf/revision.rb +8 -1
  17. data/lib/hexapdf/revisions.rb +151 -50
  18. data/lib/hexapdf/task/optimize.rb +21 -11
  19. data/lib/hexapdf/type/acro_form/form.rb +11 -5
  20. data/lib/hexapdf/type/acro_form/text_field.rb +8 -0
  21. data/lib/hexapdf/type/catalog.rb +9 -1
  22. data/lib/hexapdf/type/image.rb +47 -3
  23. data/lib/hexapdf/type/names.rb +13 -0
  24. data/lib/hexapdf/type/xref_stream.rb +2 -1
  25. data/lib/hexapdf/utils/sorted_tree_node.rb +3 -1
  26. data/lib/hexapdf/version.rb +1 -1
  27. data/lib/hexapdf/writer.rb +15 -2
  28. data/test/hexapdf/document/test_destinations.rb +338 -0
  29. data/test/hexapdf/encryption/test_aes.rb +8 -0
  30. data/test/hexapdf/encryption/test_security_handler.rb +2 -2
  31. data/test/hexapdf/layout/test_frame.rb +15 -1
  32. data/test/hexapdf/layout/test_text_box.rb +16 -0
  33. data/test/hexapdf/layout/test_text_layouter.rb +7 -0
  34. data/test/hexapdf/task/test_optimize.rb +17 -4
  35. data/test/hexapdf/test_composer.rb +24 -1
  36. data/test/hexapdf/test_dictionary_fields.rb +1 -1
  37. data/test/hexapdf/test_document.rb +30 -133
  38. data/test/hexapdf/test_parser.rb +1 -1
  39. data/test/hexapdf/test_revision.rb +14 -0
  40. data/test/hexapdf/test_revisions.rb +137 -29
  41. data/test/hexapdf/test_writer.rb +43 -14
  42. data/test/hexapdf/type/acro_form/test_form.rb +2 -1
  43. data/test/hexapdf/type/acro_form/test_text_field.rb +17 -0
  44. data/test/hexapdf/type/test_catalog.rb +8 -0
  45. data/test/hexapdf/type/test_image.rb +45 -9
  46. data/test/hexapdf/type/test_names.rb +20 -0
  47. data/test/hexapdf/type/test_xref_stream.rb +2 -1
  48. data/test/hexapdf/utils/test_sorted_tree_node.rb +11 -1
  49. metadata +6 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b666a69330b87a3ad7a5c937a1113a66a3b5ed3d0cb4f4478ee24f97bed0e411
4
- data.tar.gz: 460c3cd90b8f76d3e4d32fdd6f1bc8fd71ddfef5d18799c6a207f562773371f8
3
+ metadata.gz: d6f11a38da2472389966c1d103b86e46ba8cd858d2927b1a65f441f56e2e7dd5
4
+ data.tar.gz: 74ff92dcb6ede9f137303afc7c8b9d08e0baa42b0ab0b7d4436777e6db617ea4
5
5
  SHA512:
6
- metadata.gz: a9135019912d8c3d1e282797054dfbc99c028f26aa28f8412a138ab3d8d67124c4e9c52ad104eeb4289487e47dd850e3ff4211b5e928cdb0a8aab2d8ea773930
7
- data.tar.gz: 070d0facdb6576fb6a0377a7139609ee648a580be879dabfd10e943bd2f62d66d9d5077058edcecf9ca3eb4e7969a2427c7a4af26bbb4d8f734e238f7022c7cd
6
+ metadata.gz: 468e0a5a24e06574b4a802408daafbdb59b3a1dc460ad2b6bf9f543088d1210d8295b843847819a872436ea4c380df85240822e28b548dd88b55f74b30ff8ab3
7
+ data.tar.gz: 29e95def1e7ccf0d55bca3cf5ad4d95a070d5910a0e1b8645d2884e0e5022367c63e14e2df3ecc0bac7cdfc329d88698c01060dc8005e1c1025a0c8337367afc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,81 @@
1
+ ## 0.23.0 - 2022-05-26
2
+
3
+ ### Added
4
+
5
+ - [HexaPDF::Composer#create_stamp] for creating a form Xobject
6
+ - [HexaPDF::Revision#reset_objects] for deleting all live loaded and added
7
+ objects
8
+ - Support for removing or flattening annotations to the `hexapdf modify` command
9
+ - Option to CLI command `hexapdf form` to allow generation of a template file
10
+ - Support for centering a floating box in [HexaPDF::Layout::Frame]
11
+ - [HexaPDF::Type::Catalog#names] for easier access to the name dictionary
12
+ - [HexaPDF::Type::Names#destinations] for easier access to the destinations name
13
+ tree
14
+ - [HexaPDF::Document::Destinations], accessible via
15
+ [HexaPDF::Document#destinations], as convenience interface for working with
16
+ destination arrays
17
+
18
+ ### Changed
19
+
20
+ - **Breaking change**: Refactored the [HexaPDF::Document] interface for working
21
+ with objects and move parts into [HexaPDF::Revisions]
22
+ - **Breaking change**: [HexaPDF::Layout::TextBox] to use whole available width
23
+ when aligning to the center or right
24
+ - **Breaking change**: [HexaPDF::Layout::TextBox] to use whole available height
25
+ when vertically aligning to the center or bottom
26
+ - CLI command `hexapdf inspect` to show the type of revisions, as well as the
27
+ number of objects per revision
28
+ - [HexaPDF::Task::Optimize] to allow skipping invalid content stream operations
29
+ - [HexaPDF::Composer#image] to allow using a form xobject in place of the image
30
+
31
+ ### Fixed
32
+
33
+ - [HexaPDF::Writer#write] to write modified objects into the correct revision
34
+ - [HexaPDF::Revisions::from_io] to correctly handle hybrid-reference files
35
+ - [HexaPDF::Writer] to assign a valid object number to a created cross-reference
36
+ stream in all cases
37
+ * [HexaPDF::Type::AcroForm::TextField] to validate the existence of a /MaxLen
38
+ value for comb text fields
39
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to check for the existence
40
+ of /MaxLen when setting a value for a comb text field
41
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to check the value against
42
+ /MaxLen
43
+ * [HexaPDF::Layout::TextLayouter#fit] to not use style valign when doing
44
+ variable width layouting
45
+ * [HexaPDF::Utils::SortedTreeNode#find_entry] to work in case of a node without
46
+ a container name or kids key
47
+ * CLI command `hexapdf form` to allow setting array values when using a template
48
+ * CLI command `hexapdf form` to allow setting file select fields
49
+
50
+
51
+ ## 0.22.0 - 2022-03-26
52
+
53
+ ### Added
54
+
55
+ - Support for writing images with an ICCBased color space
56
+ - Support for writing images with soft masks
57
+
58
+ ### Changed
59
+
60
+ - CLI command `hexapdf form` to show a warning when working with a file
61
+ containing an XFA form
62
+
63
+ ### Fixed
64
+
65
+ - [HexaPDF::Type::AcroForm::Form#field_by_name] to work correctly when field
66
+ name parts are UTF-16BE encoded
67
+ - `hexapdf inspect` command 'revision' to correctly detect the end of revisions
68
+ - [HexaPDF::DictionaryFields::StringConverter] to use correct method name
69
+ `HexaPDF::Document#config`
70
+
71
+
72
+ ## 0.21.1 - 2022-03-12
73
+
74
+ ### Fixed
75
+
76
+ - Handling of invalid AES encrypted files where the padding is missing
77
+
78
+
1
79
  ## 0.21.0 - 2022-03-04
2
80
 
3
81
  ### Added
@@ -246,7 +324,7 @@
246
324
 
247
325
  ## 0.16.0 - 2021-09-28
248
326
 
249
- ## Added
327
+ ### Added
250
328
 
251
329
  * Support for RGB color values of the form "RGB" in addition to "RRGGBB" and for
252
330
  CSS color module level 3 color names
data/Rakefile CHANGED
@@ -49,7 +49,7 @@ namespace :dev do
49
49
  task :test_all do
50
50
  versions = `rbenv versions --bare | grep -i 2.[567]\\\\\\|3.`.split("\n")
51
51
  versions.each do |version|
52
- sh "rbenv shell #{version} &>/dev/null && rake test"
52
+ sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
54
54
  puts "Looks okay? (enter to continue, Ctrl-c to abort)"
55
55
  $stdin.gets
@@ -70,6 +70,9 @@ module HexaPDF
70
70
  @template = template
71
71
  @fill = true
72
72
  end
73
+ options.on('--generate-template', 'Print a template for use with --template') do
74
+ @generate_template = true
75
+ end
73
76
  options.on('--flatten', 'Flatten the form fields') do
74
77
  @flatten = true
75
78
  end
@@ -85,6 +88,7 @@ module HexaPDF
85
88
  @password = nil
86
89
  @fill = false
87
90
  @flatten = false
91
+ @generate_template = false
88
92
  @template = nil
89
93
  @need_appearances = nil
90
94
  @incremental = true
@@ -97,6 +101,10 @@ module HexaPDF
97
101
  end
98
102
  with_document(in_file, password: @password, out_file: out_file,
99
103
  incremental: @incremental) do |doc|
104
+ if doc.acro_form[:XFA]
105
+ $stderr.puts "Warning: Unsupported XFA form detected, some things may not work correctly"
106
+ end
107
+
100
108
  if !doc.acro_form
101
109
  raise "This PDF doesn't contain an interactive form"
102
110
  elsif out_file
@@ -113,6 +121,15 @@ module HexaPDF
113
121
  doc.catalog.delete(:AcroForm)
114
122
  doc.delete(doc.acro_form)
115
123
  end
124
+ elsif @generate_template
125
+ unsupported_fields = [:signature_field, :password_field]
126
+ each_field(doc) do |_, _, field, _|
127
+ next if unsupported_fields.include?(field.concrete_field_type)
128
+ name = field.full_field_name.gsub(':', "\\:")
129
+ Array(field.field_value).each do |val|
130
+ puts "#{name}: #{val.to_s.gsub(/(\r|\r\n|\n)/, '\1 ')}"
131
+ end
132
+ end
116
133
  else
117
134
  list_form_fields(doc)
118
135
  end
@@ -216,8 +233,16 @@ module HexaPDF
216
233
  field_name = scanner.scan(/(\\:|[^:])*?:/)
217
234
  break unless field_name
218
235
  field_name.gsub!(/\\:/, ':')
236
+ field_name.chop!
219
237
  field_value = scanner.scan(/.*?(?=^\S|\z)/m)
220
- data[field_name.chop] = field_value.strip.gsub(/^\s*/, '') if field_value
238
+ next unless field_value
239
+ field_value = field_value.strip.gsub(/^\s*/, '')
240
+ if data.key?(field_name)
241
+ data[field_name] = [data[field_name]] unless data[field_name].kind_of?(Array)
242
+ data[field_name] << field_value
243
+ else
244
+ data[field_name] = field_value
245
+ end
221
246
  end
222
247
  if !scanner.eos? && command_parser.verbosity_warning?
223
248
  $stderr.puts "Warning: Some template could not be parsed"
@@ -228,8 +253,8 @@ module HexaPDF
228
253
  # Applies the given value to the field.
229
254
  def apply_field_value(field, value)
230
255
  case field.concrete_field_type
231
- when :single_line_text_field, :multiline_text_field, :comb_text_field, :combo_box,
232
- :list_box, :editable_combo_box
256
+ when :single_line_text_field, :multiline_text_field, :comb_text_field, :file_select_field,
257
+ :combo_box, :list_box, :editable_combo_box
233
258
  field.field_value = value
234
259
  when :check_box
235
260
  field.field_value = case value
@@ -245,6 +270,8 @@ module HexaPDF
245
270
  else
246
271
  raise "Field type #{field.concrete_field_type} not yet supported"
247
272
  end
273
+ rescue
274
+ raise "Error while setting '#{field.full_field_name}': #{$!.message}"
248
275
  end
249
276
 
250
277
  # Iterates over all non-push button fields in page order. If a field appears on multiple
@@ -229,10 +229,19 @@ module HexaPDF
229
229
  end
230
230
  IO.copy_stream(@doc.revisions.parser.io, $stdout, length, 0)
231
231
  else
232
- puts "Document has #{@doc.revisions.size} revision#{@doc.revisions.size == 1 ? '' : 's'}"
233
- revision_information do |_, index, count, signature, end_offset|
232
+ puts "Document has #{@doc.revisions.count} revision#{@doc.revisions.count == 1 ? '' : 's'}"
233
+ revision_information do |rev, index, count, signature, end_offset|
234
+ type = if rev.trailer[:XRefStm]
235
+ "xref table + stream"
236
+ elsif rev.trailer[:Type] == :XRef
237
+ "xref stream"
238
+ else
239
+ "xref table"
240
+ end
234
241
  puts "Revision #{index + 1}"
242
+ puts " Type : #{type}"
235
243
  puts " Objects : #{count}"
244
+ puts " Size : #{rev.trailer[:Size]}"
236
245
  puts " Signed : yes" if signature
237
246
  puts " Byte range: 0-#{end_offset}"
238
247
  end
@@ -342,13 +351,17 @@ module HexaPDF
342
351
  end_index = sig[:ByteRange][-2] + sig[:ByteRange][-1]
343
352
  else
344
353
  io.seek(startxrefs[index], IO::SEEK_SET)
354
+ buffer = ''.b
345
355
  while io.pos < startxrefs[index + 1]
346
- if io.gets =~ /^\s*%%EOF\s*$/
347
- end_index = io.pos
356
+ buffer << io.read(1_000)
357
+ if (buffer_index = buffer.index(/(?:\n|\r\n?)\s*%%EOF\s*(?:\n|\r\n?)/))
358
+ end_index = io.pos - buffer.size + buffer_index + $~[0].size
359
+ break
348
360
  end
361
+ buffer = buffer[-20..-1]
349
362
  end
350
363
  end
351
- yield(rev, index, rev.next_free_oid - 1, sig, end_index)
364
+ yield(rev, index, rev.each.count, sig, end_index)
352
365
  end
353
366
  end
354
367
 
@@ -53,14 +53,15 @@ module HexaPDF
53
53
  super('modify', takes_commands: false)
54
54
  short_desc("Modify a PDF file")
55
55
  long_desc(<<~EOF)
56
- This command modifies a PDF file. It can be used to select pages that should appear in
57
- the output file and/or rotate them. The output file can also be encrypted/decrypted and
58
- optimized in various ways.
56
+ This command modifies a PDF file. It can be used, for example, to select pages that should
57
+ appear in the output file and/or rotate them. The output file can also be
58
+ encrypted/decrypted and optimized in various ways.
59
59
  EOF
60
60
 
61
61
  @password = nil
62
62
  @pages = '1-e'
63
63
  @embed_files = []
64
+ @annotation_mode = nil
64
65
 
65
66
  options.on("--password PASSWORD", "-p", String,
66
67
  "The password for decryption. Use - for reading from standard input.") do |pwd|
@@ -74,6 +75,10 @@ module HexaPDF
74
75
  "used multiple times)") do |file|
75
76
  @embed_files << file
76
77
  end
78
+ options.on("--annotations MODE", [:remove, :flatten], "Handling of annotations (either " \
79
+ "remove or flatten)") do |mode|
80
+ @annotation_mode = mode
81
+ end
77
82
  define_optimization_options
78
83
  define_encryption_options
79
84
  end
@@ -82,6 +87,7 @@ module HexaPDF
82
87
  maybe_raise_on_existing_file(out_file)
83
88
  with_document(in_file, password: @password, out_file: out_file) do |doc|
84
89
  arrange_pages(doc) unless @pages == '1-e'
90
+ handle_annotations(doc)
85
91
  @embed_files.each {|file| doc.files.add(file, embed: true) }
86
92
  apply_encryption_options(doc)
87
93
  apply_optimization_options(doc)
@@ -109,6 +115,20 @@ module HexaPDF
109
115
  doc.pages.add unless doc.pages.count > 0
110
116
  end
111
117
 
118
+ # Handles the annotations of all selected pages by doing nothing, removing them or flattening
119
+ # them.
120
+ def handle_annotations(doc)
121
+ return unless @annotation_mode
122
+
123
+ doc.pages.each do |page|
124
+ if @annotation_mode == :remove
125
+ page.delete(:Annots)
126
+ else
127
+ page.flatten_annotations
128
+ end
129
+ end
130
+ end
131
+
112
132
  end
113
133
 
114
134
  end
@@ -313,7 +313,8 @@ module HexaPDF
313
313
 
314
314
  # Draws the given image at the current position.
315
315
  #
316
- # The +file+ argument can be anything that is accepted by HexaPDF::Document::Images#add.
316
+ # The +file+ argument can be anything that is accepted by HexaPDF::Document::Images#add or a
317
+ # HexaPDF::Type::Form object.
317
318
  #
318
319
  # See #text for details on +width+, +height+, +style+ and +style_properties+.
319
320
  #
@@ -324,7 +325,7 @@ module HexaPDF
324
325
  # composer.image(machu_picchu, height: 30)
325
326
  def image(file, width: 0, height: 0, style: nil, **style_properties)
326
327
  style = retrieve_style(style, style_properties)
327
- image = document.images.add(file)
328
+ image = file.kind_of?(HexaPDF::Stream) ? file : document.images.add(file)
328
329
  draw_box(Layout::ImageBox.new(image, width: width, height: height, style: style))
329
330
  end
330
331
 
@@ -361,6 +362,27 @@ module HexaPDF
361
362
  end
362
363
  end
363
364
 
365
+ # Creates a stamp (Form XObject) which can be used like an image multiple times on a single page
366
+ # or on multiple pages.
367
+ #
368
+ # The width and the height of the stamp need to be set (frame.width/height or
369
+ # page.box.width/height might be good choices).
370
+ #
371
+ # Examples:
372
+ #
373
+ # #>pdf-composer
374
+ # stamp = composer.create_stamp(50, 50) do |canvas|
375
+ # canvas.fill_color("red").line_width(5).
376
+ # rectangle(10, 10, 30, 30).fill_stroke
377
+ # end
378
+ # composer.image(stamp, width: 20, height: 20)
379
+ # composer.image(stamp, width: 50)
380
+ def create_stamp(width, height) # :yield: canvas
381
+ stamp = @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, width, height]})
382
+ yield(stamp.canvas) if block_given?
383
+ stamp
384
+ end
385
+
364
386
  private
365
387
 
366
388
  # Creates the frame into which boxes are layed out when a new page is created.
@@ -241,7 +241,7 @@ module HexaPDF
241
241
  if str.valid_encoding?
242
242
  str.encode!(Encoding::UTF_8)
243
243
  else
244
- document.configuration['document.on_invalid_string'].call(str)
244
+ document.config['document.on_invalid_string'].call(str)
245
245
  end
246
246
  else
247
247
  Utils::PDFDocEncoding.convert_to_utf8(str)