hexapdf 0.26.2 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +1 -1
  4. data/examples/013-text_layouter_shapes.rb +8 -8
  5. data/examples/016-frame_automatic_box_placement.rb +3 -3
  6. data/examples/017-frame_text_flow.rb +3 -3
  7. data/examples/020-column_box.rb +3 -3
  8. data/lib/hexapdf/cli/split.rb +7 -7
  9. data/lib/hexapdf/cli/watermark.rb +2 -2
  10. data/lib/hexapdf/configuration.rb +2 -0
  11. data/lib/hexapdf/dictionary.rb +3 -12
  12. data/lib/hexapdf/document/destinations.rb +42 -5
  13. data/lib/hexapdf/document/signatures.rb +265 -48
  14. data/lib/hexapdf/importer.rb +3 -0
  15. data/lib/hexapdf/parser.rb +1 -0
  16. data/lib/hexapdf/revisions.rb +3 -1
  17. data/lib/hexapdf/tokenizer.rb +2 -2
  18. data/lib/hexapdf/type/acro_form/form.rb +28 -1
  19. data/lib/hexapdf/type/catalog.rb +1 -1
  20. data/lib/hexapdf/type/outline.rb +18 -0
  21. data/lib/hexapdf/type/outline_item.rb +72 -14
  22. data/lib/hexapdf/type/page.rb +56 -35
  23. data/lib/hexapdf/type/resources.rb +13 -17
  24. data/lib/hexapdf/type/signature/adbe_pkcs7_detached.rb +16 -2
  25. data/lib/hexapdf/type/signature.rb +10 -0
  26. data/lib/hexapdf/version.rb +1 -1
  27. data/lib/hexapdf/writer.rb +3 -0
  28. data/test/hexapdf/document/test_destinations.rb +41 -0
  29. data/test/hexapdf/document/test_signatures.rb +139 -19
  30. data/test/hexapdf/test_importer.rb +14 -0
  31. data/test/hexapdf/test_parser.rb +2 -2
  32. data/test/hexapdf/test_revisions.rb +20 -12
  33. data/test/hexapdf/test_tokenizer.rb +11 -1
  34. data/test/hexapdf/test_writer.rb +11 -3
  35. data/test/hexapdf/type/acro_form/test_form.rb +47 -0
  36. data/test/hexapdf/type/signature/common.rb +52 -0
  37. data/test/hexapdf/type/signature/test_adbe_pkcs7_detached.rb +21 -0
  38. data/test/hexapdf/type/test_catalog.rb +5 -2
  39. data/test/hexapdf/type/test_outline.rb +1 -1
  40. data/test/hexapdf/type/test_outline_item.rb +62 -1
  41. data/test/hexapdf/type/test_page.rb +41 -20
  42. data/test/hexapdf/type/test_resources.rb +0 -5
  43. data/test/hexapdf/type/test_signature.rb +8 -0
  44. data/test/test_helper.rb +1 -1
  45. metadata +17 -3
@@ -35,7 +35,9 @@
35
35
  #++
36
36
 
37
37
  require 'openssl'
38
+ require 'net/http'
38
39
  require 'hexapdf/error'
40
+ require 'stringio'
39
41
 
40
42
  module HexaPDF
41
43
  class Document
@@ -43,19 +45,59 @@ module HexaPDF
43
45
  # This class provides methods for interacting with digital signatures of a PDF file.
44
46
  class Signatures
45
47
 
46
- # This is the default signing handler which provides the ability to sign a document with a
47
- # provided certificate using the adb.pkcs7.detached algorithm.
48
+ # This is the default signing handler which provides the ability to sign a document with the
49
+ # adbe.pkcs7.detached or ETSI.CAdES.detached algorithms. It is registered under the :default
50
+ # name.
51
+ #
52
+ # == Usage
53
+ #
54
+ # The signing handler is used by default by all methods that need a signing handler. Therefore
55
+ # it is usually only necessary to provide the actual attribute values.
56
+ #
57
+ # This handler provides two ways to create the PKCS#7 signed-data structure required by
58
+ # Signatures#add:
59
+ #
60
+ # * By providing the signing certificate together with the signing key and the certificate
61
+ # chain. This way HexaPDF itself does the signing. It is the preferred way if all the needed
62
+ # information is available.
63
+ #
64
+ # Assign the respective data to the #certificate, #key and #certificate_chain attributes.
65
+ #
66
+ # * By using an external signing mechanism. Here the actual signing happens "outside" of
67
+ # HexaPDF, for example, in custom code or even asynchronously. This is needed in case the
68
+ # signing certificate plus key are not directly available but only an interface to them
69
+ # (e.g. when dealing with a HSM).
70
+ #
71
+ # Assign a callable object to #external_signing. If the signing process needs to be
72
+ # asynchronous, make sure to set the #signature_size appropriately, return an empty string
73
+ # during signing and later use Signatures.embed_signature to embed the actual signature.
48
74
  #
49
75
  # Additional functionality:
50
76
  #
51
77
  # * Optionally setting the reason, location and contact information.
52
78
  # * Making the signature a certification signature by applying the DocMDP transform method.
53
79
  #
80
+ # Example:
81
+ #
82
+ # # Signing using certificate + key
83
+ # document.sign("output.pdf", certificate: my_cert, key: my_key,
84
+ # certificate_chain: my_chain)
85
+ #
86
+ # # Signing using an external mechanism:
87
+ # signing_proc = lambda do |io, byte_range|
88
+ # io.pos = byte_range[0]
89
+ # data = io.read(byte_range[1])
90
+ # io.pos = byte_range[2]
91
+ # data << io.read(byte_range[3])
92
+ # signing_service.pkcs7_sign(data)
93
+ # end
94
+ # document.sign("output.pdf", signature_size: 10_000, external_signing: signing_proc)
95
+ #
54
96
  # == Implementing a Signing Handler
55
97
  #
56
98
  # This class also serves as an example on how to create a custom handler: The public methods
57
- # #filter_name, #sub_filter_name, #signature_size, #finalize_objects and #sign are used by the
58
- # digital signature algorithm.
99
+ # #signature_size, #finalize_objects and #sign are used by the digital signature algorithm.
100
+ # See their descriptions for details.
59
101
  #
60
102
  # Once a custom signing handler has been created, it can be registered under the
61
103
  # 'signature.signing_handler' configuration option for easy use. It has to take keyword
@@ -72,6 +114,13 @@ module HexaPDF
72
114
  # certificates up to the root certificate.
73
115
  attr_accessor :certificate_chain
74
116
 
117
+ # A callable object fulfilling the same role as the #sign method that is used instead of the
118
+ # default mechanism for signing.
119
+ #
120
+ # If this attribute is set, the attributes #certificate, #key and #certificate_chain are not
121
+ # used.
122
+ attr_accessor :external_signing
123
+
75
124
  # The reason for signing. If used, will be set on the signature object.
76
125
  attr_accessor :reason
77
126
 
@@ -81,6 +130,21 @@ module HexaPDF
81
130
  # The contact information. If used, will be set on the signature object.
82
131
  attr_accessor :contact_info
83
132
 
133
+ # The size of the serialized signature that should be reserved.
134
+ #
135
+ # If this attribute has not been set, an empty string will be signed using #sign to
136
+ # determine the signature size.
137
+ #
138
+ # The size needs to be at least as big as the final signature, otherwise signing results in
139
+ # an error.
140
+ attr_writer :signature_size
141
+
142
+ # The type of signature to be written (i.e. the value of the /SubFilter key).
143
+ #
144
+ # The value can either be :adobe (the default; uses a detached PKCS7 signature) or :etsi
145
+ # (uses an ETSI CAdES compatible signature).
146
+ attr_accessor :signature_type
147
+
84
148
  # The DocMDP permissions that should be set on the document.
85
149
  #
86
150
  # See #doc_mdp_permissions=
@@ -88,19 +152,10 @@ module HexaPDF
88
152
 
89
153
  # Creates a new DefaultHandler with the given attributes.
90
154
  def initialize(**arguments)
155
+ @signature_size = nil
91
156
  arguments.each {|name, value| send("#{name}=", value) }
92
157
  end
93
158
 
94
- # Returns the name to be set on the /Filter key when using this signing handler.
95
- def filter_name
96
- :'Adobe.PPKLite'
97
- end
98
-
99
- # Returns the name to be set on the /SubFilter key when using this signing handler.
100
- def sub_filter_name
101
- :'adbe.pkcs7.detached'
102
- end
103
-
104
159
  # Sets the DocMDP permissions that should be applied to the document.
105
160
  #
106
161
  # Valid values for +permissions+ are:
@@ -128,13 +183,17 @@ module HexaPDF
128
183
  end
129
184
  end
130
185
 
131
- # Returns the size of the signature that would be created.
186
+ # Returns the size of the serialized signature that should be reserved.
187
+ #
188
+ # If a custom size is set using #signature_size=, it used. Otherwise the size is determined
189
+ # by using #sign to sign an empty string.
132
190
  def signature_size
133
- sign("").size
191
+ @signature_size || sign(StringIO.new, [0, 0, 0, 0]).size
134
192
  end
135
193
 
136
194
  # Finalizes the signature field as well as the signature dictionary before writing.
137
195
  def finalize_objects(_signature_field, signature)
196
+ signature[:SubFilter] = :'ETSI.CAdES.detached' if signature_type == :etsi
138
197
  signature[:Reason] = reason if reason
139
198
  signature[:Location] = location if location
140
199
  signature[:ContactInfo] = contact_info if contact_info
@@ -153,14 +212,173 @@ module HexaPDF
153
212
  end
154
213
 
155
214
  # Returns the DER serialized OpenSSL::PKCS7 structure containing the signature for the given
156
- # data.
157
- def sign(data)
158
- OpenSSL::PKCS7.sign(@certificate, @key, data, @certificate_chain,
159
- OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
215
+ # IO byte ranges.
216
+ #
217
+ # The +byte_range+ argument is an array containing four numbers [offset1, length1, offset2,
218
+ # length2]. The offset numbers are byte positions in the +io+ argument and the to-be-signed
219
+ # data can be determined by reading length bytes at the offsets.
220
+ def sign(io, byte_range)
221
+ if external_signing
222
+ external_signing.call(io, byte_range)
223
+ else
224
+ io.pos = byte_range[0]
225
+ data = io.read(byte_range[1])
226
+ io.pos = byte_range[2]
227
+ data << io.read(byte_range[3])
228
+ OpenSSL::PKCS7.sign(@certificate, @key, data, @certificate_chain,
229
+ OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
230
+ end
231
+ end
232
+
233
+ end
234
+
235
+ # This is a signing handler for adding a timestamp signature (a PDF2.0 feature) to a PDF
236
+ # document. It is registered under the :timestamp name.
237
+ #
238
+ # The timestamp is provided by a timestamp authority and establishes the document contents at
239
+ # the time indicated in the timestamp. Timestamping a PDF document is usually done in context
240
+ # of long term validation but can also be done standalone.
241
+ #
242
+ # == Usage
243
+ #
244
+ # It is necessary to provide at least the URL of the timestamp authority server (TSA) via
245
+ # #tsa_url, everything else is optional and uses default values. The TSA server must not use
246
+ # authentication to be usable.
247
+ #
248
+ # Example:
249
+ #
250
+ # document.sign("output.pdf", handler: :timestamp, tsa_url: 'https://freetsa.org/tsr')
251
+ class TimestampHandler
252
+
253
+ # The URL of the timestamp authority server.
254
+ #
255
+ # This value is required.
256
+ attr_accessor :tsa_url
257
+
258
+ # The hash algorithm to use for timestamping. Defaults to SHA512.
259
+ attr_accessor :tsa_hash_algorithm
260
+
261
+ # The policy OID to use for timestamping. Defaults to +nil+.
262
+ attr_accessor :tsa_policy_id
263
+
264
+ # The size of the serialized signature that should be reserved.
265
+ #
266
+ # If this attribute has not been set, an empty string will be signed using #sign to
267
+ # determine the signature size which will contact the TSA server
268
+ #
269
+ # The size needs to be at least as big as the final signature, otherwise signing results in
270
+ # an error.
271
+ attr_writer :signature_size
272
+
273
+ # The reason for timestamping. If used, will be set on the signature object.
274
+ attr_accessor :reason
275
+
276
+ # The timestamping location. If used, will be set on the signature object.
277
+ attr_accessor :location
278
+
279
+ # The contact information. If used, will be set on the signature object.
280
+ attr_accessor :contact_info
281
+
282
+ # Creates a new TimestampHandler with the given attributes.
283
+ def initialize(**arguments)
284
+ @signature_size = nil
285
+ arguments.each {|name, value| send("#{name}=", value) }
286
+ end
287
+
288
+ # Returns the size of the serialized signature that should be reserved.
289
+ def signature_size
290
+ @signature_size || (sign(StringIO.new, [0, 0, 0, 0]).size * 1.5).to_i
291
+ end
292
+
293
+ # Finalizes the signature field as well as the signature dictionary before writing.
294
+ def finalize_objects(_signature_field, signature)
295
+ signature.document.version = '2.0'
296
+ signature[:Type] = :DocTimeStamp
297
+ signature[:SubFilter] = :'ETSI.RFC3161'
298
+ signature[:Reason] = reason if reason
299
+ signature[:Location] = location if location
300
+ signature[:ContactInfo] = contact_info if contact_info
301
+ end
302
+
303
+ # Returns the DER serialized OpenSSL::PKCS7 structure containing the timestamp token for the
304
+ # given IO byte ranges.
305
+ def sign(io, byte_range)
306
+ hash_algorithm = tsa_hash_algorithm || 'SHA512'
307
+ digest = OpenSSL::Digest.new(hash_algorithm)
308
+ io.pos = byte_range[0]
309
+ digest << io.read(byte_range[1])
310
+ io.pos = byte_range[2]
311
+ digest << io.read(byte_range[3])
312
+
313
+ req = OpenSSL::Timestamp::Request.new
314
+ req.algorithm = hash_algorithm
315
+ req.message_imprint = digest.digest
316
+ req.policy_id = tsa_policy_id if tsa_policy_id
317
+
318
+ http_response = Net::HTTP.post(URI(tsa_url), req.to_der,
319
+ 'content-type' => 'application/timestamp-query')
320
+ if http_response.kind_of?(Net::HTTPOK)
321
+ response = OpenSSL::Timestamp::Response.new(http_response.body)
322
+ if response.status == 0
323
+ response.token.to_der
324
+ else
325
+ raise HexaPDF::Error, "Timestamp token could not be created: #{response.failure_info}"
326
+ end
327
+ else
328
+ raise HexaPDF::Error, "Invalid TSA server response: #{http_response.body}"
329
+ end
160
330
  end
161
331
 
162
332
  end
163
333
 
334
+ # Embeds the given +signature+ into the /Contents value of the newest signature dictionary of
335
+ # the PDF document given by the +io+ argument.
336
+ #
337
+ # This functionality can be used together with the support for external signing (see
338
+ # DefaultHandler and DefaultHandler#external_signing) to implement asynchronous signing.
339
+ #
340
+ # Note: This will, most probably, only work on documents prepared for external signing by
341
+ # HexaPDF and not by other libraries.
342
+ def self.embed_signature(io, signature)
343
+ doc = HexaPDF::Document.new(io: io)
344
+ signature_dict = doc.signatures.find {|sig| doc.revisions.current.object(sig) == sig }
345
+ signature_dict_offset, signature_dict_length = locate_signature_dict(
346
+ doc.revisions.current.xref_section,
347
+ doc.revisions.parser.startxref_offset,
348
+ signature_dict.oid
349
+ )
350
+ io.pos = signature_dict_offset
351
+ signature_data = io.read(signature_dict_length)
352
+ replace_signature_contents(signature_data, signature)
353
+ io.pos = signature_dict_offset
354
+ io.write(signature_data)
355
+ end
356
+
357
+ # Uses the information in the given cross-reference section as well as the byte offset of the
358
+ # cross-reference section to calculate the offset and length of the signature dictionary with
359
+ # the given object id.
360
+ def self.locate_signature_dict(xref_section, start_xref_position, signature_oid)
361
+ data = xref_section.map {|oid, _gen, entry| [entry.pos, oid] if entry.in_use? }.compact.sort <<
362
+ [start_xref_position, nil]
363
+ index = data.index {|_pos, oid| oid == signature_oid }
364
+ [data[index][0], data[index + 1][0] - data[index][0]]
365
+ end
366
+
367
+ # Replaces the value of the /Contents key in the serialized +signature_data+ with the value of
368
+ # +contents+.
369
+ def self.replace_signature_contents(signature_data, contents)
370
+ signature_data.sub!(/Contents(?:\(.*?\)|<.*?>)/) do |match|
371
+ length = match.size
372
+ result = "Contents<#{contents.unpack1('H*')}"
373
+ if length < result.size
374
+ raise HexaPDF::Error, "The reserved space for the signature was too small " \
375
+ "(#{(length - 10) / 2} vs #{(result.size - 10) / 2}) - use the handlers " \
376
+ "#signature_size method to increase the reserved space"
377
+ end
378
+ "#{result.ljust(length - 1, '0')}>"
379
+ end
380
+ end
381
+
164
382
  include Enumerable
165
383
 
166
384
  # Creates a new Signatures object for the given PDF document.
@@ -168,15 +386,15 @@ module HexaPDF
168
386
  @document = document
169
387
  end
170
388
 
171
- # Creates a signing handler with the given options and returns it.
389
+ # Creates a signing handler with the given attributes and returns it.
172
390
  #
173
391
  # A signing handler name is mapped to a class via the 'signature.signing_handler'
174
392
  # configuration option. The default signing handler is DefaultHandler.
175
- def handler(name: :default, **options)
393
+ def handler(name: :default, **attributes)
176
394
  handler = @document.config.constantize('signature.signing_handler', name) do
177
395
  raise HexaPDF::Error, "No signing handler named '#{name}' is available"
178
396
  end
179
- handler.new(**options)
397
+ handler.new(**attributes)
180
398
  end
181
399
 
182
400
  # Adds a signature to the document and returns the corresponding signature object.
@@ -209,8 +427,15 @@ module HexaPDF
209
427
  #
210
428
  # +write_options+::
211
429
  # The key-value pairs of this hash will be passed on to the HexaPDF::Document#write
212
- # command. Note that +incremental+ will be automatically set if signing an already
213
- # existing file.
430
+ # method. Note that +incremental+ will be automatically set to ensure proper behaviour.
431
+ #
432
+ # The used signature object will have the following default values set:
433
+ #
434
+ # /Filter:: /Adobe.PPKLite
435
+ # /SubFilter:: /adbe.pkcs7.detached
436
+ # /M:: The current time.
437
+ #
438
+ # These values can be overridden in the #finalize_objects method of the signature handler.
214
439
  def add(file_or_io, handler, signature: nil, write_options: {})
215
440
  if signature && signature.type != :Sig
216
441
  signature_field = signature
@@ -232,11 +457,12 @@ module HexaPDF
232
457
  end
233
458
 
234
459
  # Prepare signature object
235
- signature[:Filter] = handler.filter_name
236
- signature[:SubFilter] = handler.sub_filter_name
460
+ signature[:Filter] = :'Adobe.PPKLite'
461
+ signature[:SubFilter] = :'adbe.pkcs7.detached'
462
+ signature[:M] = Time.now
463
+ handler.finalize_objects(signature_field, signature)
237
464
  signature[:ByteRange] = [0, 1_000_000_000_000, 1_000_000_000_000, 1_000_000_000_000]
238
465
  signature[:Contents] = '00' * handler.signature_size # twice the size due to hex encoding
239
- signature[:M] = Time.now
240
466
 
241
467
  io = if file_or_io.kind_of?(String)
242
468
  File.open(file_or_io, 'wb+')
@@ -246,23 +472,19 @@ module HexaPDF
246
472
 
247
473
  # Save the current state so that we can determine the correct /ByteRange value and set the
248
474
  # values
249
- handler.finalize_objects(signature_field, signature)
250
- start_xref_position, section = @document.write(io, incremental: true, **write_options)
251
- data = section.map {|oid, _gen, entry| [entry.pos, oid] if entry.in_use? }.compact.sort <<
252
- [start_xref_position, nil]
253
- index = data.index {|_pos, oid| oid == signature.oid }
254
- signature_offset = data[index][0]
255
- signature_length = data[index + 1][0] - data[index][0]
475
+ start_xref, section = @document.write(io, incremental: true, **write_options)
476
+ signature_offset, signature_length = self.class.locate_signature_dict(section, start_xref,
477
+ signature.oid)
256
478
  io.pos = signature_offset
257
479
  signature_data = io.read(signature_length)
258
480
 
259
- io.rewind
260
- file_data = io.read
481
+ io.seek(0, IO::SEEK_END)
482
+ file_size = io.pos
261
483
 
262
484
  # Calculate the offsets for the /ByteRange
263
485
  contents_offset = signature_offset + signature_data.index('Contents(') + 8
264
486
  offset2 = contents_offset + signature[:Contents].size + 2 # +2 because of the needed < and >
265
- length2 = file_data.size - offset2
487
+ length2 = file_size - offset2
266
488
  signature[:ByteRange] = [0, contents_offset, offset2, length2]
267
489
 
268
490
  # Set the correct /ByteRange value
@@ -274,17 +496,12 @@ module HexaPDF
274
496
 
275
497
  # Now everything besides the /Contents value is correct, so we can read the contents for
276
498
  # signing
277
- file_data[signature_offset, signature_length] = signature_data
278
- signed_contents = file_data[0, contents_offset] << file_data[offset2, length2]
279
- signature[:Contents] = handler.sign(signed_contents)
280
-
281
- # Set the correct /Contents value as hexstring
282
- signature_data.sub!(/Contents\(0+\)/) do |match|
283
- length = match.size
284
- result = "Contents<#{signature[:Contents].unpack1('H*')}"
285
- "#{result.ljust(length - 1, '0')}>"
286
- end
499
+ io.pos = signature_offset
500
+ io.write(signature_data)
501
+ signature[:Contents] = handler.sign(io, signature[:ByteRange].value)
287
502
 
503
+ # And now replace the /Contents value
504
+ self.class.replace_signature_contents(signature_data, signature[:Contents])
288
505
  io.pos = signature_offset
289
506
  io.write(signature_data)
290
507
 
@@ -94,6 +94,9 @@ module HexaPDF
94
94
  if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
95
  raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
96
  elsif mapped_object && !mapped_object.null?
97
+ if object.class != mapped_object.class
98
+ mapped_object = @destination.wrap(mapped_object, type: object.class)
99
+ end
97
100
  mapped_object
98
101
  else
99
102
  duplicate(object)
@@ -137,6 +137,7 @@ module HexaPDF
137
137
  @tokenizer.pos -= 6
138
138
  else
139
139
  maybe_raise("Invalid value after '#{oid} #{gen} obj', treating as null", pos: @tokenizer.pos)
140
+ return [nil, oid, gen, nil]
140
141
  end
141
142
  end
142
143
  end
@@ -95,13 +95,15 @@ module HexaPDF
95
95
 
96
96
  if merge_revision == offset
97
97
  xref_section.merge!(revisions.first.xref_section)
98
+ offset = trailer[:Prev] # Get possible next offset before overwriting trailer
98
99
  trailer = revisions.first.trailer
99
100
  revisions.shift
101
+ else
102
+ offset = trailer[:Prev]
100
103
  end
101
104
 
102
105
  revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
103
106
  xref_section: xref_section, loader: object_loader))
104
- offset = trailer[:Prev]
105
107
  end
106
108
  rescue HexaPDF::MalformedPDFError
107
109
  raise unless (reconstructed_revision = parser.reconstructed_revision)
@@ -274,7 +274,7 @@ module HexaPDF
274
274
  TOKEN_CACHE[str]
275
275
  end
276
276
 
277
- REFERENCE_RE = /[#{WHITESPACE}]+([+-]?\d+)[#{WHITESPACE}]+R#{WHITESPACE_OR_DELIMITER_RE}/ # :nodoc:
277
+ REFERENCE_RE = /[#{WHITESPACE}]+([+]?\d+)[#{WHITESPACE}]+R#{WHITESPACE_OR_DELIMITER_RE}/ # :nodoc:
278
278
 
279
279
  # Parses the number (integer or real) at the current position.
280
280
  #
@@ -285,7 +285,7 @@ module HexaPDF
285
285
  tmp = val.to_i
286
286
  # Handle object references, see PDF1.7 s7.3.10
287
287
  prepare_string_scanner(10)
288
- tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
288
+ tmp = Reference.new(tmp, @ss[1].to_i) if tmp > 0 && @ss.scan(REFERENCE_RE)
289
289
  tmp
290
290
  elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
291
291
  val << '0' if val.getbyte(-1) == 46 # dot '.'
@@ -395,7 +395,8 @@ module HexaPDF
395
395
 
396
396
  not_flattened = fields.map {|field| field.each_widget.to_a }.flatten
397
397
  document.pages.each {|page| not_flattened = page.flatten_annotations(not_flattened) }
398
- fields -= not_flattened.map(&:form_field)
398
+ not_flattened.map!(&:form_field)
399
+ fields -= not_flattened
399
400
 
400
401
  fields.each do |field|
401
402
  (field[:Parent]&.[](:Kids) || self[:Fields]).delete(field)
@@ -448,6 +449,32 @@ module HexaPDF
448
449
  def perform_validation # :nodoc:
449
450
  super
450
451
 
452
+ validate_array = lambda do |parent, container|
453
+ container.reject! do |field|
454
+ if !field.kind_of?(HexaPDF::Object) || !field.kind_of?(HexaPDF::Dictionary) || field.null?
455
+ yield("Invalid object in AcroForm field hierarchy", true)
456
+ next true
457
+ end
458
+ next false unless field.key?(:T) # Skip widgets
459
+
460
+ field = document.wrap(field, type: :XXAcroFormField,
461
+ subtype: Field.inherited_value(field, :FT))
462
+ reject = false
463
+ if field[:Parent] != parent
464
+ yield("Parent entry of field (#{field.oid},#{field.gen}) invalid", true)
465
+ if field[:Parent].nil?
466
+ root_fields << field
467
+ reject = true
468
+ else
469
+ field[:Parent] = parent
470
+ end
471
+ end
472
+ validate_array.call(field, field[:Kids]) if field.key?(:Kids)
473
+ reject
474
+ end
475
+ end
476
+ validate_array.call(nil, root_fields)
477
+
451
478
  if (da = self[:DA])
452
479
  unless self[:DR]
453
480
  yield("When the field /DA is present, the field /DR must also be present")
@@ -65,7 +65,7 @@ module HexaPDF
65
65
  :TwoPageLeft, :TwoPageRight]
66
66
  define_field :PageMode, type: Symbol, default: :UseNone,
67
67
  allowed_values: [:UseNone, :UseOutlines, :UseThumbs, :FullScreen, :UseOC, :UseAttachments]
68
- define_field :Outlines, type: Dictionary, indirect: true
68
+ define_field :Outlines, type: :Outlines, indirect: true
69
69
  define_field :Threads, type: PDFArray, version: '1.1'
70
70
  define_field :OpenAction, type: [Dictionary, PDFArray], version: '1.1'
71
71
  define_field :AA, type: Dictionary, version: '1.4'
@@ -50,6 +50,8 @@ module HexaPDF
50
50
  # The outline dictionary is linked via the /Outlines entry from the Type::Catalog and can
51
51
  # directly be accessed via HexaPDF::Document#outline.
52
52
  #
53
+ # == Examples
54
+ #
53
55
  # Here is an example for creating an outline:
54
56
  #
55
57
  # doc = HexaPDF::Document.new
@@ -62,6 +64,22 @@ module HexaPDF
62
64
  # end
63
65
  # end
64
66
  #
67
+ # Here is one for copying the complete outline from one PDF to another:
68
+ #
69
+ # doc = HexaPDF::Document.open(ARGV[0])
70
+ # target = HexaPDF::Document.new
71
+ # stack = [target.outline]
72
+ # doc.outline.each_item do |item, level|
73
+ # if stack.size < level
74
+ # stack << stack.last[:Last]
75
+ # elsif stack.size > level
76
+ # (stack.size - level).times { stack.pop }
77
+ # end
78
+ # stack.last.add_item(target.import(item))
79
+ # end
80
+ # # Copying all the pages so that the references work.
81
+ # doc.pages.each {|page| target.pages << target.import(page) }
82
+ #
65
83
  # See: PDF1.7 s12.3.3
66
84
  class Outline < Dictionary
67
85