hexapdf 0.26.2 → 0.27.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +1 -1
  4. data/examples/013-text_layouter_shapes.rb +8 -8
  5. data/examples/016-frame_automatic_box_placement.rb +3 -3
  6. data/examples/017-frame_text_flow.rb +3 -3
  7. data/examples/020-column_box.rb +3 -3
  8. data/lib/hexapdf/cli/split.rb +7 -7
  9. data/lib/hexapdf/cli/watermark.rb +2 -2
  10. data/lib/hexapdf/configuration.rb +2 -0
  11. data/lib/hexapdf/dictionary.rb +3 -12
  12. data/lib/hexapdf/document/destinations.rb +42 -5
  13. data/lib/hexapdf/document/signatures.rb +265 -48
  14. data/lib/hexapdf/importer.rb +3 -0
  15. data/lib/hexapdf/parser.rb +1 -0
  16. data/lib/hexapdf/revisions.rb +3 -1
  17. data/lib/hexapdf/tokenizer.rb +2 -2
  18. data/lib/hexapdf/type/acro_form/form.rb +28 -1
  19. data/lib/hexapdf/type/catalog.rb +1 -1
  20. data/lib/hexapdf/type/outline.rb +18 -0
  21. data/lib/hexapdf/type/outline_item.rb +72 -14
  22. data/lib/hexapdf/type/page.rb +56 -35
  23. data/lib/hexapdf/type/resources.rb +13 -17
  24. data/lib/hexapdf/type/signature/adbe_pkcs7_detached.rb +16 -2
  25. data/lib/hexapdf/type/signature.rb +10 -0
  26. data/lib/hexapdf/version.rb +1 -1
  27. data/lib/hexapdf/writer.rb +3 -0
  28. data/test/hexapdf/document/test_destinations.rb +41 -0
  29. data/test/hexapdf/document/test_signatures.rb +139 -19
  30. data/test/hexapdf/test_importer.rb +14 -0
  31. data/test/hexapdf/test_parser.rb +2 -2
  32. data/test/hexapdf/test_revisions.rb +20 -12
  33. data/test/hexapdf/test_tokenizer.rb +11 -1
  34. data/test/hexapdf/test_writer.rb +11 -3
  35. data/test/hexapdf/type/acro_form/test_form.rb +47 -0
  36. data/test/hexapdf/type/signature/common.rb +52 -0
  37. data/test/hexapdf/type/signature/test_adbe_pkcs7_detached.rb +21 -0
  38. data/test/hexapdf/type/test_catalog.rb +5 -2
  39. data/test/hexapdf/type/test_outline.rb +1 -1
  40. data/test/hexapdf/type/test_outline_item.rb +62 -1
  41. data/test/hexapdf/type/test_page.rb +41 -20
  42. data/test/hexapdf/type/test_resources.rb +0 -5
  43. data/test/hexapdf/type/test_signature.rb +8 -0
  44. data/test/test_helper.rb +1 -1
  45. metadata +17 -3
@@ -35,7 +35,9 @@
35
35
  #++
36
36
 
37
37
  require 'openssl'
38
+ require 'net/http'
38
39
  require 'hexapdf/error'
40
+ require 'stringio'
39
41
 
40
42
  module HexaPDF
41
43
  class Document
@@ -43,19 +45,59 @@ module HexaPDF
43
45
  # This class provides methods for interacting with digital signatures of a PDF file.
44
46
  class Signatures
45
47
 
46
- # This is the default signing handler which provides the ability to sign a document with a
47
- # provided certificate using the adb.pkcs7.detached algorithm.
48
+ # This is the default signing handler which provides the ability to sign a document with the
49
+ # adbe.pkcs7.detached or ETSI.CAdES.detached algorithms. It is registered under the :default
50
+ # name.
51
+ #
52
+ # == Usage
53
+ #
54
+ # The signing handler is used by default by all methods that need a signing handler. Therefore
55
+ # it is usually only necessary to provide the actual attribute values.
56
+ #
57
+ # This handler provides two ways to create the PKCS#7 signed-data structure required by
58
+ # Signatures#add:
59
+ #
60
+ # * By providing the signing certificate together with the signing key and the certificate
61
+ # chain. This way HexaPDF itself does the signing. It is the preferred way if all the needed
62
+ # information is available.
63
+ #
64
+ # Assign the respective data to the #certificate, #key and #certificate_chain attributes.
65
+ #
66
+ # * By using an external signing mechanism. Here the actual signing happens "outside" of
67
+ # HexaPDF, for example, in custom code or even asynchronously. This is needed in case the
68
+ # signing certificate plus key are not directly available but only an interface to them
69
+ # (e.g. when dealing with a HSM).
70
+ #
71
+ # Assign a callable object to #external_signing. If the signing process needs to be
72
+ # asynchronous, make sure to set the #signature_size appropriately, return an empty string
73
+ # during signing and later use Signatures.embed_signature to embed the actual signature.
48
74
  #
49
75
  # Additional functionality:
50
76
  #
51
77
  # * Optionally setting the reason, location and contact information.
52
78
  # * Making the signature a certification signature by applying the DocMDP transform method.
53
79
  #
80
+ # Example:
81
+ #
82
+ # # Signing using certificate + key
83
+ # document.sign("output.pdf", certificate: my_cert, key: my_key,
84
+ # certificate_chain: my_chain)
85
+ #
86
+ # # Signing using an external mechanism:
87
+ # signing_proc = lambda do |io, byte_range|
88
+ # io.pos = byte_range[0]
89
+ # data = io.read(byte_range[1])
90
+ # io.pos = byte_range[2]
91
+ # data << io.read(byte_range[3])
92
+ # signing_service.pkcs7_sign(data)
93
+ # end
94
+ # document.sign("output.pdf", signature_size: 10_000, external_signing: signing_proc)
95
+ #
54
96
  # == Implementing a Signing Handler
55
97
  #
56
98
  # This class also serves as an example on how to create a custom handler: The public methods
57
- # #filter_name, #sub_filter_name, #signature_size, #finalize_objects and #sign are used by the
58
- # digital signature algorithm.
99
+ # #signature_size, #finalize_objects and #sign are used by the digital signature algorithm.
100
+ # See their descriptions for details.
59
101
  #
60
102
  # Once a custom signing handler has been created, it can be registered under the
61
103
  # 'signature.signing_handler' configuration option for easy use. It has to take keyword
@@ -72,6 +114,13 @@ module HexaPDF
72
114
  # certificates up to the root certificate.
73
115
  attr_accessor :certificate_chain
74
116
 
117
+ # A callable object fulfilling the same role as the #sign method that is used instead of the
118
+ # default mechanism for signing.
119
+ #
120
+ # If this attribute is set, the attributes #certificate, #key and #certificate_chain are not
121
+ # used.
122
+ attr_accessor :external_signing
123
+
75
124
  # The reason for signing. If used, will be set on the signature object.
76
125
  attr_accessor :reason
77
126
 
@@ -81,6 +130,21 @@ module HexaPDF
81
130
  # The contact information. If used, will be set on the signature object.
82
131
  attr_accessor :contact_info
83
132
 
133
+ # The size of the serialized signature that should be reserved.
134
+ #
135
+ # If this attribute has not been set, an empty string will be signed using #sign to
136
+ # determine the signature size.
137
+ #
138
+ # The size needs to be at least as big as the final signature, otherwise signing results in
139
+ # an error.
140
+ attr_writer :signature_size
141
+
142
+ # The type of signature to be written (i.e. the value of the /SubFilter key).
143
+ #
144
+ # The value can either be :adobe (the default; uses a detached PKCS7 signature) or :etsi
145
+ # (uses an ETSI CAdES compatible signature).
146
+ attr_accessor :signature_type
147
+
84
148
  # The DocMDP permissions that should be set on the document.
85
149
  #
86
150
  # See #doc_mdp_permissions=
@@ -88,19 +152,10 @@ module HexaPDF
88
152
 
89
153
  # Creates a new DefaultHandler with the given attributes.
90
154
  def initialize(**arguments)
155
+ @signature_size = nil
91
156
  arguments.each {|name, value| send("#{name}=", value) }
92
157
  end
93
158
 
94
- # Returns the name to be set on the /Filter key when using this signing handler.
95
- def filter_name
96
- :'Adobe.PPKLite'
97
- end
98
-
99
- # Returns the name to be set on the /SubFilter key when using this signing handler.
100
- def sub_filter_name
101
- :'adbe.pkcs7.detached'
102
- end
103
-
104
159
  # Sets the DocMDP permissions that should be applied to the document.
105
160
  #
106
161
  # Valid values for +permissions+ are:
@@ -128,13 +183,17 @@ module HexaPDF
128
183
  end
129
184
  end
130
185
 
131
- # Returns the size of the signature that would be created.
186
+ # Returns the size of the serialized signature that should be reserved.
187
+ #
188
+ # If a custom size is set using #signature_size=, it used. Otherwise the size is determined
189
+ # by using #sign to sign an empty string.
132
190
  def signature_size
133
- sign("").size
191
+ @signature_size || sign(StringIO.new, [0, 0, 0, 0]).size
134
192
  end
135
193
 
136
194
  # Finalizes the signature field as well as the signature dictionary before writing.
137
195
  def finalize_objects(_signature_field, signature)
196
+ signature[:SubFilter] = :'ETSI.CAdES.detached' if signature_type == :etsi
138
197
  signature[:Reason] = reason if reason
139
198
  signature[:Location] = location if location
140
199
  signature[:ContactInfo] = contact_info if contact_info
@@ -153,14 +212,173 @@ module HexaPDF
153
212
  end
154
213
 
155
214
  # Returns the DER serialized OpenSSL::PKCS7 structure containing the signature for the given
156
- # data.
157
- def sign(data)
158
- OpenSSL::PKCS7.sign(@certificate, @key, data, @certificate_chain,
159
- OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
215
+ # IO byte ranges.
216
+ #
217
+ # The +byte_range+ argument is an array containing four numbers [offset1, length1, offset2,
218
+ # length2]. The offset numbers are byte positions in the +io+ argument and the to-be-signed
219
+ # data can be determined by reading length bytes at the offsets.
220
+ def sign(io, byte_range)
221
+ if external_signing
222
+ external_signing.call(io, byte_range)
223
+ else
224
+ io.pos = byte_range[0]
225
+ data = io.read(byte_range[1])
226
+ io.pos = byte_range[2]
227
+ data << io.read(byte_range[3])
228
+ OpenSSL::PKCS7.sign(@certificate, @key, data, @certificate_chain,
229
+ OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
230
+ end
231
+ end
232
+
233
+ end
234
+
235
+ # This is a signing handler for adding a timestamp signature (a PDF2.0 feature) to a PDF
236
+ # document. It is registered under the :timestamp name.
237
+ #
238
+ # The timestamp is provided by a timestamp authority and establishes the document contents at
239
+ # the time indicated in the timestamp. Timestamping a PDF document is usually done in context
240
+ # of long term validation but can also be done standalone.
241
+ #
242
+ # == Usage
243
+ #
244
+ # It is necessary to provide at least the URL of the timestamp authority server (TSA) via
245
+ # #tsa_url, everything else is optional and uses default values. The TSA server must not use
246
+ # authentication to be usable.
247
+ #
248
+ # Example:
249
+ #
250
+ # document.sign("output.pdf", handler: :timestamp, tsa_url: 'https://freetsa.org/tsr')
251
+ class TimestampHandler
252
+
253
+ # The URL of the timestamp authority server.
254
+ #
255
+ # This value is required.
256
+ attr_accessor :tsa_url
257
+
258
+ # The hash algorithm to use for timestamping. Defaults to SHA512.
259
+ attr_accessor :tsa_hash_algorithm
260
+
261
+ # The policy OID to use for timestamping. Defaults to +nil+.
262
+ attr_accessor :tsa_policy_id
263
+
264
+ # The size of the serialized signature that should be reserved.
265
+ #
266
+ # If this attribute has not been set, an empty string will be signed using #sign to
267
+ # determine the signature size which will contact the TSA server
268
+ #
269
+ # The size needs to be at least as big as the final signature, otherwise signing results in
270
+ # an error.
271
+ attr_writer :signature_size
272
+
273
+ # The reason for timestamping. If used, will be set on the signature object.
274
+ attr_accessor :reason
275
+
276
+ # The timestamping location. If used, will be set on the signature object.
277
+ attr_accessor :location
278
+
279
+ # The contact information. If used, will be set on the signature object.
280
+ attr_accessor :contact_info
281
+
282
+ # Creates a new TimestampHandler with the given attributes.
283
+ def initialize(**arguments)
284
+ @signature_size = nil
285
+ arguments.each {|name, value| send("#{name}=", value) }
286
+ end
287
+
288
+ # Returns the size of the serialized signature that should be reserved.
289
+ def signature_size
290
+ @signature_size || (sign(StringIO.new, [0, 0, 0, 0]).size * 1.5).to_i
291
+ end
292
+
293
+ # Finalizes the signature field as well as the signature dictionary before writing.
294
+ def finalize_objects(_signature_field, signature)
295
+ signature.document.version = '2.0'
296
+ signature[:Type] = :DocTimeStamp
297
+ signature[:SubFilter] = :'ETSI.RFC3161'
298
+ signature[:Reason] = reason if reason
299
+ signature[:Location] = location if location
300
+ signature[:ContactInfo] = contact_info if contact_info
301
+ end
302
+
303
+ # Returns the DER serialized OpenSSL::PKCS7 structure containing the timestamp token for the
304
+ # given IO byte ranges.
305
+ def sign(io, byte_range)
306
+ hash_algorithm = tsa_hash_algorithm || 'SHA512'
307
+ digest = OpenSSL::Digest.new(hash_algorithm)
308
+ io.pos = byte_range[0]
309
+ digest << io.read(byte_range[1])
310
+ io.pos = byte_range[2]
311
+ digest << io.read(byte_range[3])
312
+
313
+ req = OpenSSL::Timestamp::Request.new
314
+ req.algorithm = hash_algorithm
315
+ req.message_imprint = digest.digest
316
+ req.policy_id = tsa_policy_id if tsa_policy_id
317
+
318
+ http_response = Net::HTTP.post(URI(tsa_url), req.to_der,
319
+ 'content-type' => 'application/timestamp-query')
320
+ if http_response.kind_of?(Net::HTTPOK)
321
+ response = OpenSSL::Timestamp::Response.new(http_response.body)
322
+ if response.status == 0
323
+ response.token.to_der
324
+ else
325
+ raise HexaPDF::Error, "Timestamp token could not be created: #{response.failure_info}"
326
+ end
327
+ else
328
+ raise HexaPDF::Error, "Invalid TSA server response: #{http_response.body}"
329
+ end
160
330
  end
161
331
 
162
332
  end
163
333
 
334
+ # Embeds the given +signature+ into the /Contents value of the newest signature dictionary of
335
+ # the PDF document given by the +io+ argument.
336
+ #
337
+ # This functionality can be used together with the support for external signing (see
338
+ # DefaultHandler and DefaultHandler#external_signing) to implement asynchronous signing.
339
+ #
340
+ # Note: This will, most probably, only work on documents prepared for external signing by
341
+ # HexaPDF and not by other libraries.
342
+ def self.embed_signature(io, signature)
343
+ doc = HexaPDF::Document.new(io: io)
344
+ signature_dict = doc.signatures.find {|sig| doc.revisions.current.object(sig) == sig }
345
+ signature_dict_offset, signature_dict_length = locate_signature_dict(
346
+ doc.revisions.current.xref_section,
347
+ doc.revisions.parser.startxref_offset,
348
+ signature_dict.oid
349
+ )
350
+ io.pos = signature_dict_offset
351
+ signature_data = io.read(signature_dict_length)
352
+ replace_signature_contents(signature_data, signature)
353
+ io.pos = signature_dict_offset
354
+ io.write(signature_data)
355
+ end
356
+
357
+ # Uses the information in the given cross-reference section as well as the byte offset of the
358
+ # cross-reference section to calculate the offset and length of the signature dictionary with
359
+ # the given object id.
360
+ def self.locate_signature_dict(xref_section, start_xref_position, signature_oid)
361
+ data = xref_section.map {|oid, _gen, entry| [entry.pos, oid] if entry.in_use? }.compact.sort <<
362
+ [start_xref_position, nil]
363
+ index = data.index {|_pos, oid| oid == signature_oid }
364
+ [data[index][0], data[index + 1][0] - data[index][0]]
365
+ end
366
+
367
+ # Replaces the value of the /Contents key in the serialized +signature_data+ with the value of
368
+ # +contents+.
369
+ def self.replace_signature_contents(signature_data, contents)
370
+ signature_data.sub!(/Contents(?:\(.*?\)|<.*?>)/) do |match|
371
+ length = match.size
372
+ result = "Contents<#{contents.unpack1('H*')}"
373
+ if length < result.size
374
+ raise HexaPDF::Error, "The reserved space for the signature was too small " \
375
+ "(#{(length - 10) / 2} vs #{(result.size - 10) / 2}) - use the handlers " \
376
+ "#signature_size method to increase the reserved space"
377
+ end
378
+ "#{result.ljust(length - 1, '0')}>"
379
+ end
380
+ end
381
+
164
382
  include Enumerable
165
383
 
166
384
  # Creates a new Signatures object for the given PDF document.
@@ -168,15 +386,15 @@ module HexaPDF
168
386
  @document = document
169
387
  end
170
388
 
171
- # Creates a signing handler with the given options and returns it.
389
+ # Creates a signing handler with the given attributes and returns it.
172
390
  #
173
391
  # A signing handler name is mapped to a class via the 'signature.signing_handler'
174
392
  # configuration option. The default signing handler is DefaultHandler.
175
- def handler(name: :default, **options)
393
+ def handler(name: :default, **attributes)
176
394
  handler = @document.config.constantize('signature.signing_handler', name) do
177
395
  raise HexaPDF::Error, "No signing handler named '#{name}' is available"
178
396
  end
179
- handler.new(**options)
397
+ handler.new(**attributes)
180
398
  end
181
399
 
182
400
  # Adds a signature to the document and returns the corresponding signature object.
@@ -209,8 +427,15 @@ module HexaPDF
209
427
  #
210
428
  # +write_options+::
211
429
  # The key-value pairs of this hash will be passed on to the HexaPDF::Document#write
212
- # command. Note that +incremental+ will be automatically set if signing an already
213
- # existing file.
430
+ # method. Note that +incremental+ will be automatically set to ensure proper behaviour.
431
+ #
432
+ # The used signature object will have the following default values set:
433
+ #
434
+ # /Filter:: /Adobe.PPKLite
435
+ # /SubFilter:: /adbe.pkcs7.detached
436
+ # /M:: The current time.
437
+ #
438
+ # These values can be overridden in the #finalize_objects method of the signature handler.
214
439
  def add(file_or_io, handler, signature: nil, write_options: {})
215
440
  if signature && signature.type != :Sig
216
441
  signature_field = signature
@@ -232,11 +457,12 @@ module HexaPDF
232
457
  end
233
458
 
234
459
  # Prepare signature object
235
- signature[:Filter] = handler.filter_name
236
- signature[:SubFilter] = handler.sub_filter_name
460
+ signature[:Filter] = :'Adobe.PPKLite'
461
+ signature[:SubFilter] = :'adbe.pkcs7.detached'
462
+ signature[:M] = Time.now
463
+ handler.finalize_objects(signature_field, signature)
237
464
  signature[:ByteRange] = [0, 1_000_000_000_000, 1_000_000_000_000, 1_000_000_000_000]
238
465
  signature[:Contents] = '00' * handler.signature_size # twice the size due to hex encoding
239
- signature[:M] = Time.now
240
466
 
241
467
  io = if file_or_io.kind_of?(String)
242
468
  File.open(file_or_io, 'wb+')
@@ -246,23 +472,19 @@ module HexaPDF
246
472
 
247
473
  # Save the current state so that we can determine the correct /ByteRange value and set the
248
474
  # values
249
- handler.finalize_objects(signature_field, signature)
250
- start_xref_position, section = @document.write(io, incremental: true, **write_options)
251
- data = section.map {|oid, _gen, entry| [entry.pos, oid] if entry.in_use? }.compact.sort <<
252
- [start_xref_position, nil]
253
- index = data.index {|_pos, oid| oid == signature.oid }
254
- signature_offset = data[index][0]
255
- signature_length = data[index + 1][0] - data[index][0]
475
+ start_xref, section = @document.write(io, incremental: true, **write_options)
476
+ signature_offset, signature_length = self.class.locate_signature_dict(section, start_xref,
477
+ signature.oid)
256
478
  io.pos = signature_offset
257
479
  signature_data = io.read(signature_length)
258
480
 
259
- io.rewind
260
- file_data = io.read
481
+ io.seek(0, IO::SEEK_END)
482
+ file_size = io.pos
261
483
 
262
484
  # Calculate the offsets for the /ByteRange
263
485
  contents_offset = signature_offset + signature_data.index('Contents(') + 8
264
486
  offset2 = contents_offset + signature[:Contents].size + 2 # +2 because of the needed < and >
265
- length2 = file_data.size - offset2
487
+ length2 = file_size - offset2
266
488
  signature[:ByteRange] = [0, contents_offset, offset2, length2]
267
489
 
268
490
  # Set the correct /ByteRange value
@@ -274,17 +496,12 @@ module HexaPDF
274
496
 
275
497
  # Now everything besides the /Contents value is correct, so we can read the contents for
276
498
  # signing
277
- file_data[signature_offset, signature_length] = signature_data
278
- signed_contents = file_data[0, contents_offset] << file_data[offset2, length2]
279
- signature[:Contents] = handler.sign(signed_contents)
280
-
281
- # Set the correct /Contents value as hexstring
282
- signature_data.sub!(/Contents\(0+\)/) do |match|
283
- length = match.size
284
- result = "Contents<#{signature[:Contents].unpack1('H*')}"
285
- "#{result.ljust(length - 1, '0')}>"
286
- end
499
+ io.pos = signature_offset
500
+ io.write(signature_data)
501
+ signature[:Contents] = handler.sign(io, signature[:ByteRange].value)
287
502
 
503
+ # And now replace the /Contents value
504
+ self.class.replace_signature_contents(signature_data, signature[:Contents])
288
505
  io.pos = signature_offset
289
506
  io.write(signature_data)
290
507
 
@@ -94,6 +94,9 @@ module HexaPDF
94
94
  if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
95
  raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
96
  elsif mapped_object && !mapped_object.null?
97
+ if object.class != mapped_object.class
98
+ mapped_object = @destination.wrap(mapped_object, type: object.class)
99
+ end
97
100
  mapped_object
98
101
  else
99
102
  duplicate(object)
@@ -137,6 +137,7 @@ module HexaPDF
137
137
  @tokenizer.pos -= 6
138
138
  else
139
139
  maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
140
+ return [nil, oid, gen, nil]
140
141
  end
141
142
  end
142
143
  end
@@ -95,13 +95,15 @@ module HexaPDF
95
95
 
96
96
  if merge_revision == offset
97
97
  xref_section.merge!(revisions.first.xref_section)
98
+ offset = trailer[:Prev] # Get possible next offset before overwriting trailer
98
99
  trailer = revisions.first.trailer
99
100
  revisions.shift
101
+ else
102
+ offset = trailer[:Prev]
100
103
  end
101
104
 
102
105
  revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
103
106
  xref_section: xref_section, loader: object_loader))
104
- offset = trailer[:Prev]
105
107
  end
106
108
  rescue HexaPDF::MalformedPDFError
107
109
  raise unless (reconstructed_revision = parser.reconstructed_revision)
@@ -274,7 +274,7 @@ module HexaPDF
274
274
  TOKEN_CACHE[str]
275
275
  end
276
276
 
277
- REFERENCE_RE = /[#{WHITESPACE}]+([+-]?\d+)[#{WHITESPACE}]+R#{WHITESPACE_OR_DELIMITER_RE}/ # :nodoc:
277
+ REFERENCE_RE = /[#{WHITESPACE}]+([+]?\d+)[#{WHITESPACE}]+R#{WHITESPACE_OR_DELIMITER_RE}/ # :nodoc:
278
278
 
279
279
  # Parses the number (integer or real) at the current position.
280
280
  #
@@ -285,7 +285,7 @@ module HexaPDF
285
285
  tmp = val.to_i
286
286
  # Handle object references, see PDF1.7 s7.3.10
287
287
  prepare_string_scanner(10)
288
- tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
288
+ tmp = Reference.new(tmp, @ss[1].to_i) if tmp > 0 && @ss.scan(REFERENCE_RE)
289
289
  tmp
290
290
  elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
291
291
  val << '0' if val.getbyte(-1) == 46 # dot '.'
@@ -395,7 +395,8 @@ module HexaPDF
395
395
 
396
396
  not_flattened = fields.map {|field| field.each_widget.to_a }.flatten
397
397
  document.pages.each {|page| not_flattened = page.flatten_annotations(not_flattened) }
398
- fields -= not_flattened.map(&:form_field)
398
+ not_flattened.map!(&:form_field)
399
+ fields -= not_flattened
399
400
 
400
401
  fields.each do |field|
401
402
  (field[:Parent]&.[](:Kids) || self[:Fields]).delete(field)
@@ -448,6 +449,32 @@ module HexaPDF
448
449
  def perform_validation # :nodoc:
449
450
  super
450
451
 
452
+ validate_array = lambda do |parent, container|
453
+ container.reject! do |field|
454
+ if !field.kind_of?(HexaPDF::Object) || !field.kind_of?(HexaPDF::Dictionary) || field.null?
455
+ yield("Invalid object in AcroForm field hierarchy", true)
456
+ next true
457
+ end
458
+ next false unless field.key?(:T) # Skip widgets
459
+
460
+ field = document.wrap(field, type: :XXAcroFormField,
461
+ subtype: Field.inherited_value(field, :FT))
462
+ reject = false
463
+ if field[:Parent] != parent
464
+ yield("Parent entry of field (#{field.oid},#{field.gen}) invalid", true)
465
+ if field[:Parent].nil?
466
+ root_fields << field
467
+ reject = true
468
+ else
469
+ field[:Parent] = parent
470
+ end
471
+ end
472
+ validate_array.call(field, field[:Kids]) if field.key?(:Kids)
473
+ reject
474
+ end
475
+ end
476
+ validate_array.call(nil, root_fields)
477
+
451
478
  if (da = self[:DA])
452
479
  unless self[:DR]
453
480
  yield("When the field /DA is present, the field /DR must also be present")
@@ -65,7 +65,7 @@ module HexaPDF
65
65
  :TwoPageLeft, :TwoPageRight]
66
66
  define_field :PageMode, type: Symbol, default: :UseNone,
67
67
  allowed_values: [:UseNone, :UseOutlines, :UseThumbs, :FullScreen, :UseOC, :UseAttachments]
68
- define_field :Outlines, type: Dictionary, indirect: true
68
+ define_field :Outlines, type: :Outlines, indirect: true
69
69
  define_field :Threads, type: PDFArray, version: '1.1'
70
70
  define_field :OpenAction, type: [Dictionary, PDFArray], version: '1.1'
71
71
  define_field :AA, type: Dictionary, version: '1.4'
@@ -50,6 +50,8 @@ module HexaPDF
50
50
  # The outline dictionary is linked via the /Outlines entry from the Type::Catalog and can
51
51
  # directly be accessed via HexaPDF::Document#outline.
52
52
  #
53
+ # == Examples
54
+ #
53
55
  # Here is an example for creating an outline:
54
56
  #
55
57
  # doc = HexaPDF::Document.new
@@ -62,6 +64,22 @@ module HexaPDF
62
64
  # end
63
65
  # end
64
66
  #
67
+ # Here is one for copying the complete outline from one PDF to another:
68
+ #
69
+ # doc = HexaPDF::Document.open(ARGV[0])
70
+ # target = HexaPDF::Document.new
71
+ # stack = [target.outline]
72
+ # doc.outline.each_item do |item, level|
73
+ # if stack.size < level
74
+ # stack << stack.last[:Last]
75
+ # elsif stack.size > level
76
+ # (stack.size - level).times { stack.pop }
77
+ # end
78
+ # stack.last.add_item(target.import(item))
79
+ # end
80
+ # # Copying all the pages so that the references work.
81
+ # doc.pages.each {|page| target.pages << target.import(page) }
82
+ #
65
83
  # See: PDF1.7 s12.3.3
66
84
  class Outline < Dictionary
67
85