hexapdf 0.14.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 958692ab2c53f74fe599c0ba8c9c046aa41b38d2bf840a47dec8f0e258fd86e0
4
- data.tar.gz: b41d46ccb39d36d351cc143ba0afc145e785307b2a7ae90bc0f32d1ab76949af
3
+ metadata.gz: bcd3bc77b70872416b1377b4fdf97804de083cf4d7213dfd200738fd8b2adae7
4
+ data.tar.gz: 53a8a850610a744570999cf56c656d6bd65c8ab691a5658b81172111bdd44804
5
5
  SHA512:
6
- metadata.gz: c16e231aeeb12b55daf75a28a8e3918f6807127257655f42d1f3433c76cb25f0ea42daafc8dd20bd37099f9ef5f6f2a6a3e3b2468acf000c041452a32908e1ee
7
- data.tar.gz: a9a1fff7c7ff699c2b48333d89fae1aa67cd7ce61003845f76e4855cfc2f2ad5969ae668359091cd3fa225ea9a8b99fb9883e28b07fe6fcd985c7f83b842969d
6
+ metadata.gz: 54c99dbd44c4ae146496912f295982d47bb5ca297d4d2b76475c1f3151670068cd3be0c2dedee413b0a52e493383229bbd36d819ff2db2a9c04b377731cc107e
7
+ data.tar.gz: 8426e942709633b921f7a644e01b645d555cfcdecbdbd25bae03d0154cf07df3a3cd1f108adb16a8b449801c203014698a6b382133bbb63033a1d590351b61f7
data/CHANGELOG.md CHANGED
@@ -1,3 +1,41 @@
1
+ ## 0.15.0 - 2021-04-12
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
6
+ page
7
+ * [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
8
+ * [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
9
+ object
10
+ * [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
11
+ fields
12
+ * Support for form field flattening to the `hexapdf form` CLI command
13
+
14
+ ### Changed
15
+
16
+ * **Breaking change**: Overhauled the interface for accessing appearances of
17
+ annotations to make it more convenient
18
+ * Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
19
+ value
20
+ * [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
21
+ the exception message
22
+ * Configuration option 'acro_form.fallback_font' to allow a callable object for
23
+ more advanced fallback font handling
24
+
25
+ ### Fixed
26
+
27
+ * [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
28
+ empty background color arrays
29
+ * [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
30
+ stored in the document in case the widget is embedded
31
+ * Processing of invalid PDF files containing a space,CR,LF combination after
32
+ the 'stream' keyword
33
+ * Cross-reference stream reconstruction with respect to detection of linearized
34
+ files
35
+ * Detection of existing appearances for AcroForm push button fields when
36
+ creating appearances
37
+
38
+
1
39
  ## 0.14.4 - 2021-02-27
2
40
 
3
41
  ### Added
@@ -52,18 +52,26 @@ module HexaPDF
52
52
  If the the output file name is not given, all form fields are listed in page order. Use
53
53
  the global --verbose option to show additional information like field type and location.
54
54
 
55
- If the output file name is given, the fields can be interactively filled out. By
56
- additionally using the --template option, the data for the fields is read from the given
57
- template file instead of the standard input.
55
+ If the output file name is given, the fields can be filled out interactively, via a
56
+ template or just flattened by using the respective options. Form field flattening can also
57
+ be activated in addition to filling out the form. If neither --fill, --template nor
58
+ --flatten is specified, --fill is implied.
58
59
  EOF
59
60
 
60
61
  options.on("--password PASSWORD", "-p", String,
61
62
  "The password for decryption. Use - for reading from standard input.") do |pwd|
62
63
  @password = (pwd == '-' ? read_password : pwd)
63
64
  end
65
+ options.on("--fill", "Fill out the form") do
66
+ @fill = true
67
+ end
64
68
  options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
65
- "Use the template file for the field values") do |template|
69
+ "Use the template file for the field values (implies --fill)") do |template|
66
70
  @template = template
71
+ @fill = true
72
+ end
73
+ options.on('--flatten', 'Flatten the form fields') do
74
+ @flatten = true
67
75
  end
68
76
  options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
69
77
  "appearance. Default: use setting from input PDF") do |need_appearances|
@@ -75,6 +83,8 @@ module HexaPDF
75
83
  end
76
84
 
77
85
  @password = nil
86
+ @fill = false
87
+ @flatten = false
78
88
  @template = nil
79
89
  @need_appearances = nil
80
90
  @incremental = true
@@ -82,16 +92,28 @@ module HexaPDF
82
92
 
83
93
  def execute(in_file, out_file = nil) #:nodoc:
84
94
  maybe_raise_on_existing_file(out_file) if out_file
95
+ if (@fill || @flatten) && !out_file
96
+ raise "Output file missing"
97
+ end
85
98
  with_document(in_file, password: @password, out_file: out_file,
86
99
  incremental: @incremental) do |doc|
87
100
  if !doc.acro_form
88
101
  raise "This PDF doesn't contain an interactive form"
89
102
  elsif out_file
90
103
  doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
91
- if @template
92
- fill_form_with_template(doc)
93
- else
94
- fill_form(doc)
104
+ if @fill || !@flatten
105
+ if @template
106
+ fill_form_with_template(doc)
107
+ else
108
+ fill_form(doc)
109
+ end
110
+ end
111
+ if @flatten
112
+ unless doc.acro_form.flatten.empty?
113
+ $stderr.puts "Warning: Not all form fields could be flattened"
114
+ doc.catalog.delete(:AcroForm)
115
+ doc.delete(doc.acro_form)
116
+ end
95
117
  end
96
118
  else
97
119
  list_form_fields(doc)
@@ -164,9 +164,20 @@ module HexaPDF
164
164
  # acro_form.fallback_font::
165
165
  # The font that should be used when a variable text field references a font that cannot be used.
166
166
  #
167
- # Can either be the name of a font, like 'Helvetica', or an array consisting of the font name
168
- # and a hash of font options, like ['Helvetica', variant: :italic]. If set to +nil+, the use of
169
- # the fallback font is disabled.
167
+ # Can be one of the following:
168
+ #
169
+ # * The name of a font, like 'Helvetica'.
170
+ #
171
+ # * An array consisting of the font name and a hash of font options, like ['Helvetica',
172
+ # variant: :italic].
173
+ #
174
+ # * A callable object receiving the field and the font object (or +nil+ if no valid font object
175
+ # was found) and which has to return either a font name or an array consisting of the font
176
+ # name and a hash of font options. This way the response can be different depending on the
177
+ # original font and it would also allow e.g. modifying the configured fonts to add custom
178
+ # ones.
179
+ #
180
+ # If set to +nil+, the use of the fallback font is disabled.
170
181
  #
171
182
  # Default is 'Helvetica'.
172
183
  #
@@ -516,6 +527,9 @@ module HexaPDF
516
527
  XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
517
528
  XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
518
529
  Border: 'HexaPDF::Type::Annotation::Border',
530
+ SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
531
+ SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
532
+ SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
519
533
  },
520
534
  'object.subtype_map' => {
521
535
  nil => {
@@ -561,6 +575,7 @@ module HexaPDF
561
575
  Tx: 'HexaPDF::Type::AcroForm::TextField',
562
576
  Btn: 'HexaPDF::Type::AcroForm::ButtonField',
563
577
  Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
578
+ Sig: 'HexaPDF::Type::AcroForm::SignatureField',
564
579
  },
565
580
  })
566
581
 
data/lib/hexapdf/error.rb CHANGED
@@ -43,18 +43,19 @@ module HexaPDF
43
43
  class MalformedPDFError < Error
44
44
 
45
45
  # The byte position in the PDF file where the error occured.
46
- attr_reader :pos
46
+ attr_accessor :pos
47
47
 
48
48
  # Creates a new malformed PDF error object for the given exception message.
49
49
  #
50
- # The byte position where the error occured can be given via the +pos+ argument.
50
+ # The byte position where the error occured can either be given via the +pos+ argument or later
51
+ # via the #pos accessor but must be set before the exception message is retrieved.
51
52
  def initialize(message, pos: nil)
52
53
  super(message)
53
54
  @pos = pos
54
55
  end
55
56
 
56
57
  def message # :nodoc:
57
- "PDF malformed#{pos ? "around position #{pos}" : ''}: #{super}"
58
+ "PDF malformed around position #{pos}: #{super}"
58
59
  end
59
60
 
60
61
  end
@@ -140,11 +140,13 @@ module HexaPDF
140
140
  raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
141
141
  end
142
142
  tok1 = @tokenizer.next_byte
143
- tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
143
+ if tok1 == 32 # space
144
+ maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
145
+ tok1 = @tokenizer.next_byte
146
+ end
147
+ tok2 = @tokenizer.next_byte if tok1 == 13 # CR
144
148
  if tok1 != 10 && tok1 != 13
145
- tok2 = @tokenizer.next_byte
146
- maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
147
- force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
149
+ raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
148
150
  elsif tok1 == 13 && tok2 != 10
149
151
  maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
150
152
  pos: @tokenizer.pos)
@@ -214,7 +216,12 @@ module HexaPDF
214
216
  unless obj.respond_to?(:xref_section)
215
217
  raise_malformed("Object is not a cross-reference stream", pos: pos)
216
218
  end
217
- xref_section = obj.xref_section
219
+ begin
220
+ xref_section = obj.xref_section
221
+ rescue MalformedPDFError => e
222
+ e.pos = pos
223
+ raise
224
+ end
218
225
  trailer = obj.trailer
219
226
  unless xref_section.entry?(obj.oid, obj.gen)
220
227
  maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
@@ -401,6 +408,7 @@ module HexaPDF
401
408
 
402
409
  xref = XRefSection.new
403
410
  @tokenizer.pos = 0
411
+ linearized = nil
404
412
  while true
405
413
  @tokenizer.skip_whitespace
406
414
  pos = @tokenizer.pos
@@ -416,13 +424,17 @@ module HexaPDF
416
424
  @tokenizer.pos = next_new_line_pos
417
425
  elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
418
426
  xref.add_in_use_entry(token, gen, pos)
427
+ if linearized.nil?
428
+ obj = @tokenizer.next_object rescue nil
429
+ linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
430
+ end
419
431
  @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
420
432
  end
421
433
  elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
422
434
  obj = @tokenizer.next_object rescue nil
423
435
  # Use last trailer found in case of multiple revisions but use first trailer in case of
424
436
  # linearized file.
425
- trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
437
+ trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
426
438
  elsif token == Tokenizer::NO_MORE_TOKENS
427
439
  break
428
440
  else
@@ -158,6 +158,22 @@ module HexaPDF
158
158
  add_without_check(obj)
159
159
  end
160
160
 
161
+ # :call-seq:
162
+ # revision.update(obj) -> obj or nil
163
+ #
164
+ # Updates the stored object to point to the given HexaPDF::Object wrapper, returning the object
165
+ # if successful or +nil+ otherwise.
166
+ #
167
+ # If +obj+ isn't stored in this revision or the stored object doesn't contain the same
168
+ # HexaPDF::PDFData object as the given object, nothing is done.
169
+ #
170
+ # This method should only be used if the wrong wrapper class is stored (e.g. because
171
+ # auto-detection didn't or couldn't work correctly) and thus needs correction.
172
+ def update(obj)
173
+ return nil if object(obj)&.data != obj.data
174
+ add_without_check(obj)
175
+ end
176
+
161
177
  # :call-seq:
162
178
  # revision.delete(ref, mark_as_free: true)
163
179
  # revision.delete(oid, mark_as_free: true)
@@ -48,6 +48,7 @@ module HexaPDF
48
48
  autoload(:TextField, 'hexapdf/type/acro_form/text_field')
49
49
  autoload(:ButtonField, 'hexapdf/type/acro_form/button_field')
50
50
  autoload(:ChoiceField, 'hexapdf/type/acro_form/choice_field')
51
+ autoload(:SignatureField, 'hexapdf/type/acro_form/signature_field')
51
52
 
52
53
  autoload(:AppearanceGenerator, 'hexapdf/type/acro_form/appearance_generator')
53
54
 
@@ -120,7 +120,7 @@ module HexaPDF
120
120
  # widget.marker_style(style: :cross)
121
121
  # # => no visible rectangle, gray background, cross mark when checked
122
122
  def create_check_box_appearances
123
- unless @widget.appearance&.normal_appearance&.value&.size == 2
123
+ unless @widget.appearance_dict&.normal_appearance&.value&.size == 2
124
124
  raise HexaPDF::Error, "Widget of check box doesn't define name for on state"
125
125
  end
126
126
  border_style = @widget.border_style
@@ -128,11 +128,11 @@ module HexaPDF
128
128
 
129
129
  rect = update_widget(@field[:V], border_width)
130
130
 
131
- off_form = @widget.appearance.normal_appearance[:Off] =
131
+ off_form = @widget.appearance_dict.normal_appearance[:Off] =
132
132
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
133
133
  apply_background_and_border(border_style, off_form.canvas)
134
134
 
135
- on_form = @widget.appearance.normal_appearance[@field.check_box_on_name] =
135
+ on_form = @widget.appearance_dict.normal_appearance[@field.check_box_on_name] =
136
136
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
137
137
  canvas = on_form.canvas
138
138
  apply_background_and_border(border_style, canvas)
@@ -169,22 +169,22 @@ module HexaPDF
169
169
  # widget.marker_style(style: :circle, size: 0, color: 0)
170
170
  # # => default appearance
171
171
  def create_radio_button_appearances
172
- unless @widget.appearance&.normal_appearance&.value&.size == 2
172
+ unless @widget.appearance_dict&.normal_appearance&.value&.size == 2
173
173
  raise HexaPDF::Error, "Widget of radio button doesn't define unique name for on state"
174
174
  end
175
175
 
176
- on_name = (@widget.appearance.normal_appearance.value.keys - [:Off]).first
176
+ on_name = (@widget.appearance_dict.normal_appearance.value.keys - [:Off]).first
177
177
  border_style = @widget.border_style
178
178
  marker_style = @widget.marker_style
179
179
 
180
180
  rect = update_widget(@field[:V] == on_name ? on_name : :Off, border_style.width)
181
181
 
182
- off_form = @widget.appearance.normal_appearance[:Off] =
182
+ off_form = @widget.appearance_dict.normal_appearance[:Off] =
183
183
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
184
184
  apply_background_and_border(border_style, off_form.canvas,
185
185
  circular: marker_style.style == :circle)
186
186
 
187
- on_form = @widget.appearance.normal_appearance[on_name] =
187
+ on_form = @widget.appearance_dict.normal_appearance[on_name] =
188
188
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
189
189
  canvas = on_form.canvas
190
190
  apply_background_and_border(border_style, canvas,
@@ -219,17 +219,8 @@ module HexaPDF
219
219
  #
220
220
  # Note: Multiline, comb and rich text fields are currently not supported!
221
221
  def create_text_appearances
222
- font_name, font_size = @field.parse_default_appearance_string
223
222
  default_resources = @document.acro_form.default_resources
224
- font = default_resources.font(font_name).font_wrapper rescue nil
225
- unless font
226
- fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
227
- if fallback_font_name
228
- font = @document.fonts.add(fallback_font_name, **(fallback_font_options || {}))
229
- else
230
- raise(HexaPDF::Error, "Font #{font_name} of the AcroForm's default resources not usable")
231
- end
232
- end
223
+ font, font_size = retrieve_font_information(default_resources)
233
224
  style = HexaPDF::Layout::Style.new(font: font)
234
225
  border_style = @widget.border_style
235
226
  padding = [1, border_style.width].max
@@ -482,6 +473,27 @@ module HexaPDF
482
473
  end
483
474
  end
484
475
 
476
+ # Returns the font wrapper and font size to be used for a variable text field.
477
+ def retrieve_font_information(resources)
478
+ font_name, font_size = @field.parse_default_appearance_string
479
+ font_object = resources.font(font_name) rescue nil
480
+ font = font_object&.font_wrapper
481
+ unless font
482
+ fallback_font = @document.config['acro_form.fallback_font']
483
+ fallback_font_name, fallback_font_options = if fallback_font.respond_to?(:call)
484
+ fallback_font.call(@field, font_object)
485
+ else
486
+ fallback_font
487
+ end
488
+ if fallback_font_name
489
+ font = @document.fonts.add(fallback_font_name, **(fallback_font_options || {}))
490
+ else
491
+ raise(HexaPDF::Error, "Font #{font_name} of the AcroForm's default resources not usable")
492
+ end
493
+ end
494
+ [font, font_size]
495
+ end
496
+
485
497
  # Calculates the font size for text fields based on the font and font size of the default
486
498
  # appearance string, the annotation rectangle and the border style.
487
499
  def calculate_font_size(font, font_size, rect, border_style)
@@ -184,7 +184,7 @@ module HexaPDF
184
184
  #
185
185
  # Defaults to :Yes if no other name could be determined.
186
186
  def check_box_on_name
187
- each_widget.to_a.first&.appearance&.normal_appearance&.value&.each_key&.
187
+ each_widget.to_a.first&.appearance_dict&.normal_appearance&.value&.each_key&.
188
188
  find {|key| key != :Off } || :Yes
189
189
  end
190
190
 
@@ -192,7 +192,7 @@ module HexaPDF
192
192
  # button.
193
193
  def radio_button_values
194
194
  each_widget.map do |widget|
195
- widget.appearance&.normal_appearance&.value&.each_key&.find {|key| key != :Off }
195
+ widget.appearance_dict&.normal_appearance&.value&.each_key&.find {|key| key != :Off }
196
196
  end.compact
197
197
  end
198
198
 
@@ -233,7 +233,11 @@ module HexaPDF
233
233
  def create_appearances(force: false)
234
234
  appearance_generator_class = document.config.constantize('acro_form.appearance_generator')
235
235
  each_widget do |widget|
236
- next if !force && widget.appearance?
236
+ normal_appearance = widget.appearance_dict&.normal_appearance
237
+ next if !force && normal_appearance &&
238
+ ((!push_button? && normal_appearance.value.length == 2 &&
239
+ normal_appearance.value.each_value.all?(HexaPDF::Stream)) ||
240
+ (push_button? && normal_appearance.kind_of?(HexaPDF::Stream)))
237
241
  if check_box?
238
242
  appearance_generator_class.new(widget).create_check_box_appearances
239
243
  elsif radio_button?
@@ -250,7 +254,7 @@ module HexaPDF
250
254
  create_appearances
251
255
  value = self[:V]
252
256
  each_widget do |widget|
253
- widget[:AS] = (widget.appearance&.normal_appearance&.value&.key?(value) ? value : :Off)
257
+ widget[:AS] = (widget.appearance_dict&.normal_appearance&.key?(value) ? value : :Off)
254
258
  end
255
259
  end
256
260
 
@@ -315,6 +315,7 @@ module HexaPDF
315
315
 
316
316
  if embedded_widget?
317
317
  WIDGET_FIELDS.each {|key| delete(key) }
318
+ document.revisions.each {|revision| break if revision.update(self)}
318
319
  else
319
320
  self[:Kids].delete_at(widget_index)
320
321
  document.delete(widget)
@@ -331,6 +331,43 @@ module HexaPDF
331
331
  end
332
332
  end
333
333
 
334
+ # Flattens the whole interactive form or only the given fields, and returns the fields that
335
+ # couldn't be flattened.
336
+ #
337
+ # Flattening means making the appearance streams of the field widgets part of the respective
338
+ # page's content stream and removing the fields themselves.
339
+ #
340
+ # If the whole interactive form is flattened, the form object itself is also removed if all
341
+ # fields were flattened.
342
+ #
343
+ # The +create_appearances+ argument controls whether missing appearances should
344
+ # automatically be created.
345
+ #
346
+ # See: HexaPDF::Type::Page#flatten_annotations
347
+ def flatten(fields: nil, create_appearances: true)
348
+ remove_form = fields.nil?
349
+ fields ||= each_field.to_a
350
+ if create_appearances
351
+ fields.each {|field| field.create_appearances if field.respond_to?(:create_appearances) }
352
+ end
353
+
354
+ not_flattened = fields.map {|field| field.each_widget.to_a }.flatten
355
+ document.pages.each {|page| not_flattened = page.flatten_annotations(not_flattened) }
356
+ fields -= not_flattened.map(&:form_field)
357
+
358
+ fields.each do |field|
359
+ (field[:Parent]&.[](:Kids) || self[:Fields]).delete(field)
360
+ document.delete(field)
361
+ end
362
+
363
+ if remove_form && not_flattened.empty?
364
+ document.catalog.delete(:AcroForm)
365
+ document.delete(self)
366
+ end
367
+
368
+ not_flattened
369
+ end
370
+
334
371
  private
335
372
 
336
373
  # Helper method for bit field getter access.