hexapdf 0.14.4 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 958692ab2c53f74fe599c0ba8c9c046aa41b38d2bf840a47dec8f0e258fd86e0
4
- data.tar.gz: b41d46ccb39d36d351cc143ba0afc145e785307b2a7ae90bc0f32d1ab76949af
3
+ metadata.gz: bcd3bc77b70872416b1377b4fdf97804de083cf4d7213dfd200738fd8b2adae7
4
+ data.tar.gz: 53a8a850610a744570999cf56c656d6bd65c8ab691a5658b81172111bdd44804
5
5
  SHA512:
6
- metadata.gz: c16e231aeeb12b55daf75a28a8e3918f6807127257655f42d1f3433c76cb25f0ea42daafc8dd20bd37099f9ef5f6f2a6a3e3b2468acf000c041452a32908e1ee
7
- data.tar.gz: a9a1fff7c7ff699c2b48333d89fae1aa67cd7ce61003845f76e4855cfc2f2ad5969ae668359091cd3fa225ea9a8b99fb9883e28b07fe6fcd985c7f83b842969d
6
+ metadata.gz: 54c99dbd44c4ae146496912f295982d47bb5ca297d4d2b76475c1f3151670068cd3be0c2dedee413b0a52e493383229bbd36d819ff2db2a9c04b377731cc107e
7
+ data.tar.gz: 8426e942709633b921f7a644e01b645d555cfcdecbdbd25bae03d0154cf07df3a3cd1f108adb16a8b449801c203014698a6b382133bbb63033a1d590351b61f7
data/CHANGELOG.md CHANGED
@@ -1,3 +1,41 @@
1
+ ## 0.15.0 - 2021-04-12
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
6
+ page
7
+ * [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
8
+ * [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
9
+ object
10
+ * [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
11
+ fields
12
+ * Support for form field flattening to the `hexapdf form` CLI command
13
+
14
+ ### Changed
15
+
16
+ * **Breaking change**: Overhauled the interface for accessing appearances of
17
+ annotations to make it more convenient
18
+ * Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
19
+ value
20
+ * [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
21
+ the exception message
22
+ * Configuration option 'acro_form.fallback_font' to allow a callable object for
23
+ more advanced fallback font handling
24
+
25
+ ### Fixed
26
+
27
+ * [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
28
+ empty background color arrays
29
+ * [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
30
+ stored in the document in case the widget is embedded
31
+ * Processing of invalid PDF files containing a space,CR,LF combination after
32
+ the 'stream' keyword
33
+ * Cross-reference stream reconstruction with respect to detection of linearized
34
+ files
35
+ * Detection of existing appearances for AcroForm push button fields when
36
+ creating appearances
37
+
38
+
1
39
  ## 0.14.4 - 2021-02-27
2
40
 
3
41
  ### Added
@@ -52,18 +52,26 @@ module HexaPDF
52
52
  If the the output file name is not given, all form fields are listed in page order. Use
53
53
  the global --verbose option to show additional information like field type and location.
54
54
 
55
- If the output file name is given, the fields can be interactively filled out. By
56
- additionally using the --template option, the data for the fields is read from the given
57
- template file instead of the standard input.
55
+ If the output file name is given, the fields can be filled out interactively, via a
56
+ template or just flattened by using the respective options. Form field flattening can also
57
+ be activated in addition to filling out the form. If neither --fill, --template nor
58
+ --flatten is specified, --fill is implied.
58
59
  EOF
59
60
 
60
61
  options.on("--password PASSWORD", "-p", String,
61
62
  "The password for decryption. Use - for reading from standard input.") do |pwd|
62
63
  @password = (pwd == '-' ? read_password : pwd)
63
64
  end
65
+ options.on("--fill", "Fill out the form") do
66
+ @fill = true
67
+ end
64
68
  options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
65
- "Use the template file for the field values") do |template|
69
+ "Use the template file for the field values (implies --fill)") do |template|
66
70
  @template = template
71
+ @fill = true
72
+ end
73
+ options.on('--flatten', 'Flatten the form fields') do
74
+ @flatten = true
67
75
  end
68
76
  options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
69
77
  "appearance. Default: use setting from input PDF") do |need_appearances|
@@ -75,6 +83,8 @@ module HexaPDF
75
83
  end
76
84
 
77
85
  @password = nil
86
+ @fill = false
87
+ @flatten = false
78
88
  @template = nil
79
89
  @need_appearances = nil
80
90
  @incremental = true
@@ -82,16 +92,28 @@ module HexaPDF
82
92
 
83
93
  def execute(in_file, out_file = nil) #:nodoc:
84
94
  maybe_raise_on_existing_file(out_file) if out_file
95
+ if (@fill || @flatten) && !out_file
96
+ raise "Output file missing"
97
+ end
85
98
  with_document(in_file, password: @password, out_file: out_file,
86
99
  incremental: @incremental) do |doc|
87
100
  if !doc.acro_form
88
101
  raise "This PDF doesn't contain an interactive form"
89
102
  elsif out_file
90
103
  doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
91
- if @template
92
- fill_form_with_template(doc)
93
- else
94
- fill_form(doc)
104
+ if @fill || !@flatten
105
+ if @template
106
+ fill_form_with_template(doc)
107
+ else
108
+ fill_form(doc)
109
+ end
110
+ end
111
+ if @flatten
112
+ unless doc.acro_form.flatten.empty?
113
+ $stderr.puts "Warning: Not all form fields could be flattened"
114
+ doc.catalog.delete(:AcroForm)
115
+ doc.delete(doc.acro_form)
116
+ end
95
117
  end
96
118
  else
97
119
  list_form_fields(doc)
@@ -164,9 +164,20 @@ module HexaPDF
164
164
  # acro_form.fallback_font::
165
165
  # The font that should be used when a variable text field references a font that cannot be used.
166
166
  #
167
- # Can either be the name of a font, like 'Helvetica', or an array consisting of the font name
168
- # and a hash of font options, like ['Helvetica', variant: :italic]. If set to +nil+, the use of
169
- # the fallback font is disabled.
167
+ # Can be one of the following:
168
+ #
169
+ # * The name of a font, like 'Helvetica'.
170
+ #
171
+ # * An array consisting of the font name and a hash of font options, like ['Helvetica',
172
+ # variant: :italic].
173
+ #
174
+ # * A callable object receiving the field and the font object (or +nil+ if no valid font object
175
+ # was found) and which has to return either a font name or an array consisting of the font
176
+ # name and a hash of font options. This way the response can be different depending on the
177
+ # original font and it would also allow e.g. modifying the configured fonts to add custom
178
+ # ones.
179
+ #
180
+ # If set to +nil+, the use of the fallback font is disabled.
170
181
  #
171
182
  # Default is 'Helvetica'.
172
183
  #
@@ -516,6 +527,9 @@ module HexaPDF
516
527
  XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
517
528
  XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
518
529
  Border: 'HexaPDF::Type::Annotation::Border',
530
+ SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
531
+ SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
532
+ SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
519
533
  },
520
534
  'object.subtype_map' => {
521
535
  nil => {
@@ -561,6 +575,7 @@ module HexaPDF
561
575
  Tx: 'HexaPDF::Type::AcroForm::TextField',
562
576
  Btn: 'HexaPDF::Type::AcroForm::ButtonField',
563
577
  Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
578
+ Sig: 'HexaPDF::Type::AcroForm::SignatureField',
564
579
  },
565
580
  })
566
581
 
data/lib/hexapdf/error.rb CHANGED
@@ -43,18 +43,19 @@ module HexaPDF
43
43
  class MalformedPDFError < Error
44
44
 
45
45
  # The byte position in the PDF file where the error occured.
46
- attr_reader :pos
46
+ attr_accessor :pos
47
47
 
48
48
  # Creates a new malformed PDF error object for the given exception message.
49
49
  #
50
- # The byte position where the error occured can be given via the +pos+ argument.
50
+ # The byte position where the error occured can either be given via the +pos+ argument or later
51
+ # via the #pos accessor but must be set before the exception message is retrieved.
51
52
  def initialize(message, pos: nil)
52
53
  super(message)
53
54
  @pos = pos
54
55
  end
55
56
 
56
57
  def message # :nodoc:
57
- "PDF malformed#{pos ? "around position #{pos}" : ''}: #{super}"
58
+ "PDF malformed around position #{pos}: #{super}"
58
59
  end
59
60
 
60
61
  end
@@ -140,11 +140,13 @@ module HexaPDF
140
140
  raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
141
141
  end
142
142
  tok1 = @tokenizer.next_byte
143
- tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
143
+ if tok1 == 32 # space
144
+ maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
145
+ tok1 = @tokenizer.next_byte
146
+ end
147
+ tok2 = @tokenizer.next_byte if tok1 == 13 # CR
144
148
  if tok1 != 10 && tok1 != 13
145
- tok2 = @tokenizer.next_byte
146
- maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
147
- force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
149
+ raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
148
150
  elsif tok1 == 13 && tok2 != 10
149
151
  maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
150
152
  pos: @tokenizer.pos)
@@ -214,7 +216,12 @@ module HexaPDF
214
216
  unless obj.respond_to?(:xref_section)
215
217
  raise_malformed("Object is not a cross-reference stream", pos: pos)
216
218
  end
217
- xref_section = obj.xref_section
219
+ begin
220
+ xref_section = obj.xref_section
221
+ rescue MalformedPDFError => e
222
+ e.pos = pos
223
+ raise
224
+ end
218
225
  trailer = obj.trailer
219
226
  unless xref_section.entry?(obj.oid, obj.gen)
220
227
  maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
@@ -401,6 +408,7 @@ module HexaPDF
401
408
 
402
409
  xref = XRefSection.new
403
410
  @tokenizer.pos = 0
411
+ linearized = nil
404
412
  while true
405
413
  @tokenizer.skip_whitespace
406
414
  pos = @tokenizer.pos
@@ -416,13 +424,17 @@ module HexaPDF
416
424
  @tokenizer.pos = next_new_line_pos
417
425
  elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
418
426
  xref.add_in_use_entry(token, gen, pos)
427
+ if linearized.nil?
428
+ obj = @tokenizer.next_object rescue nil
429
+ linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
430
+ end
419
431
  @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
420
432
  end
421
433
  elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
422
434
  obj = @tokenizer.next_object rescue nil
423
435
  # Use last trailer found in case of multiple revisions but use first trailer in case of
424
436
  # linearized file.
425
- trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
437
+ trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
426
438
  elsif token == Tokenizer::NO_MORE_TOKENS
427
439
  break
428
440
  else
@@ -158,6 +158,22 @@ module HexaPDF
158
158
  add_without_check(obj)
159
159
  end
160
160
 
161
+ # :call-seq:
162
+ # revision.update(obj) -> obj or nil
163
+ #
164
+ # Updates the stored object to point to the given HexaPDF::Object wrapper, returning the object
165
+ # if successful or +nil+ otherwise.
166
+ #
167
+ # If +obj+ isn't stored in this revision or the stored object doesn't contain the same
168
+ # HexaPDF::PDFData object as the given object, nothing is done.
169
+ #
170
+ # This method should only be used if the wrong wrapper class is stored (e.g. because
171
+ # auto-detection didn't or couldn't work correctly) and thus needs correction.
172
+ def update(obj)
173
+ return nil if object(obj)&.data != obj.data
174
+ add_without_check(obj)
175
+ end
176
+
161
177
  # :call-seq:
162
178
  # revision.delete(ref, mark_as_free: true)
163
179
  # revision.delete(oid, mark_as_free: true)
@@ -48,6 +48,7 @@ module HexaPDF
48
48
  autoload(:TextField, 'hexapdf/type/acro_form/text_field')
49
49
  autoload(:ButtonField, 'hexapdf/type/acro_form/button_field')
50
50
  autoload(:ChoiceField, 'hexapdf/type/acro_form/choice_field')
51
+ autoload(:SignatureField, 'hexapdf/type/acro_form/signature_field')
51
52
 
52
53
  autoload(:AppearanceGenerator, 'hexapdf/type/acro_form/appearance_generator')
53
54
 
@@ -120,7 +120,7 @@ module HexaPDF
120
120
  # widget.marker_style(style: :cross)
121
121
  # # => no visible rectangle, gray background, cross mark when checked
122
122
  def create_check_box_appearances
123
- unless @widget.appearance&.normal_appearance&.value&.size == 2
123
+ unless @widget.appearance_dict&.normal_appearance&.value&.size == 2
124
124
  raise HexaPDF::Error, "Widget of check box doesn't define name for on state"
125
125
  end
126
126
  border_style = @widget.border_style
@@ -128,11 +128,11 @@ module HexaPDF
128
128
 
129
129
  rect = update_widget(@field[:V], border_width)
130
130
 
131
- off_form = @widget.appearance.normal_appearance[:Off] =
131
+ off_form = @widget.appearance_dict.normal_appearance[:Off] =
132
132
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
133
133
  apply_background_and_border(border_style, off_form.canvas)
134
134
 
135
- on_form = @widget.appearance.normal_appearance[@field.check_box_on_name] =
135
+ on_form = @widget.appearance_dict.normal_appearance[@field.check_box_on_name] =
136
136
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
137
137
  canvas = on_form.canvas
138
138
  apply_background_and_border(border_style, canvas)
@@ -169,22 +169,22 @@ module HexaPDF
169
169
  # widget.marker_style(style: :circle, size: 0, color: 0)
170
170
  # # => default appearance
171
171
  def create_radio_button_appearances
172
- unless @widget.appearance&.normal_appearance&.value&.size == 2
172
+ unless @widget.appearance_dict&.normal_appearance&.value&.size == 2
173
173
  raise HexaPDF::Error, "Widget of radio button doesn't define unique name for on state"
174
174
  end
175
175
 
176
- on_name = (@widget.appearance.normal_appearance.value.keys - [:Off]).first
176
+ on_name = (@widget.appearance_dict.normal_appearance.value.keys - [:Off]).first
177
177
  border_style = @widget.border_style
178
178
  marker_style = @widget.marker_style
179
179
 
180
180
  rect = update_widget(@field[:V] == on_name ? on_name : :Off, border_style.width)
181
181
 
182
- off_form = @widget.appearance.normal_appearance[:Off] =
182
+ off_form = @widget.appearance_dict.normal_appearance[:Off] =
183
183
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
184
184
  apply_background_and_border(border_style, off_form.canvas,
185
185
  circular: marker_style.style == :circle)
186
186
 
187
- on_form = @widget.appearance.normal_appearance[on_name] =
187
+ on_form = @widget.appearance_dict.normal_appearance[on_name] =
188
188
  @document.add({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
189
189
  canvas = on_form.canvas
190
190
  apply_background_and_border(border_style, canvas,
@@ -219,17 +219,8 @@ module HexaPDF
219
219
  #
220
220
  # Note: Multiline, comb and rich text fields are currently not supported!
221
221
  def create_text_appearances
222
- font_name, font_size = @field.parse_default_appearance_string
223
222
  default_resources = @document.acro_form.default_resources
224
- font = default_resources.font(font_name).font_wrapper rescue nil
225
- unless font
226
- fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
227
- if fallback_font_name
228
- font = @document.fonts.add(fallback_font_name, **(fallback_font_options || {}))
229
- else
230
- raise(HexaPDF::Error, "Font #{font_name} of the AcroForm's default resources not usable")
231
- end
232
- end
223
+ font, font_size = retrieve_font_information(default_resources)
233
224
  style = HexaPDF::Layout::Style.new(font: font)
234
225
  border_style = @widget.border_style
235
226
  padding = [1, border_style.width].max
@@ -482,6 +473,27 @@ module HexaPDF
482
473
  end
483
474
  end
484
475
 
476
+ # Returns the font wrapper and font size to be used for a variable text field.
477
+ def retrieve_font_information(resources)
478
+ font_name, font_size = @field.parse_default_appearance_string
479
+ font_object = resources.font(font_name) rescue nil
480
+ font = font_object&.font_wrapper
481
+ unless font
482
+ fallback_font = @document.config['acro_form.fallback_font']
483
+ fallback_font_name, fallback_font_options = if fallback_font.respond_to?(:call)
484
+ fallback_font.call(@field, font_object)
485
+ else
486
+ fallback_font
487
+ end
488
+ if fallback_font_name
489
+ font = @document.fonts.add(fallback_font_name, **(fallback_font_options || {}))
490
+ else
491
+ raise(HexaPDF::Error, "Font #{font_name} of the AcroForm's default resources not usable")
492
+ end
493
+ end
494
+ [font, font_size]
495
+ end
496
+
485
497
  # Calculates the font size for text fields based on the font and font size of the default
486
498
  # appearance string, the annotation rectangle and the border style.
487
499
  def calculate_font_size(font, font_size, rect, border_style)
@@ -184,7 +184,7 @@ module HexaPDF
184
184
  #
185
185
  # Defaults to :Yes if no other name could be determined.
186
186
  def check_box_on_name
187
- each_widget.to_a.first&.appearance&.normal_appearance&.value&.each_key&.
187
+ each_widget.to_a.first&.appearance_dict&.normal_appearance&.value&.each_key&.
188
188
  find {|key| key != :Off } || :Yes
189
189
  end
190
190
 
@@ -192,7 +192,7 @@ module HexaPDF
192
192
  # button.
193
193
  def radio_button_values
194
194
  each_widget.map do |widget|
195
- widget.appearance&.normal_appearance&.value&.each_key&.find {|key| key != :Off }
195
+ widget.appearance_dict&.normal_appearance&.value&.each_key&.find {|key| key != :Off }
196
196
  end.compact
197
197
  end
198
198
 
@@ -233,7 +233,11 @@ module HexaPDF
233
233
  def create_appearances(force: false)
234
234
  appearance_generator_class = document.config.constantize('acro_form.appearance_generator')
235
235
  each_widget do |widget|
236
- next if !force && widget.appearance?
236
+ normal_appearance = widget.appearance_dict&.normal_appearance
237
+ next if !force && normal_appearance &&
238
+ ((!push_button? && normal_appearance.value.length == 2 &&
239
+ normal_appearance.value.each_value.all?(HexaPDF::Stream)) ||
240
+ (push_button? && normal_appearance.kind_of?(HexaPDF::Stream)))
237
241
  if check_box?
238
242
  appearance_generator_class.new(widget).create_check_box_appearances
239
243
  elsif radio_button?
@@ -250,7 +254,7 @@ module HexaPDF
250
254
  create_appearances
251
255
  value = self[:V]
252
256
  each_widget do |widget|
253
- widget[:AS] = (widget.appearance&.normal_appearance&.value&.key?(value) ? value : :Off)
257
+ widget[:AS] = (widget.appearance_dict&.normal_appearance&.key?(value) ? value : :Off)
254
258
  end
255
259
  end
256
260
 
@@ -315,6 +315,7 @@ module HexaPDF
315
315
 
316
316
  if embedded_widget?
317
317
  WIDGET_FIELDS.each {|key| delete(key) }
318
+ document.revisions.each {|revision| break if revision.update(self)}
318
319
  else
319
320
  self[:Kids].delete_at(widget_index)
320
321
  document.delete(widget)
@@ -331,6 +331,43 @@ module HexaPDF
331
331
  end
332
332
  end
333
333
 
334
+ # Flattens the whole interactive form or only the given fields, and returns the fields that
335
+ # couldn't be flattened.
336
+ #
337
+ # Flattening means making the appearance streams of the field widgets part of the respective
338
+ # page's content stream and removing the fields themselves.
339
+ #
340
+ # If the whole interactive form is flattened, the form object itself is also removed if all
341
+ # fields were flattened.
342
+ #
343
+ # The +create_appearances+ argument controls whether missing appearances should
344
+ # automatically be created.
345
+ #
346
+ # See: HexaPDF::Type::Page#flatten_annotations
347
+ def flatten(fields: nil, create_appearances: true)
348
+ remove_form = fields.nil?
349
+ fields ||= each_field.to_a
350
+ if create_appearances
351
+ fields.each {|field| field.create_appearances if field.respond_to?(:create_appearances) }
352
+ end
353
+
354
+ not_flattened = fields.map {|field| field.each_widget.to_a }.flatten
355
+ document.pages.each {|page| not_flattened = page.flatten_annotations(not_flattened) }
356
+ fields -= not_flattened.map(&:form_field)
357
+
358
+ fields.each do |field|
359
+ (field[:Parent]&.[](:Kids) || self[:Fields]).delete(field)
360
+ document.delete(field)
361
+ end
362
+
363
+ if remove_form && not_flattened.empty?
364
+ document.catalog.delete(:AcroForm)
365
+ document.delete(self)
366
+ end
367
+
368
+ not_flattened
369
+ end
370
+
334
371
  private
335
372
 
336
373
  # Helper method for bit field getter access.