hexapdf 0.12.3 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 889b4bf1bc77da0a3fdfc62d2b5b09042aa1b5a567d5ed80ae382e6cdeb193f9
4
- data.tar.gz: 67f217de3dbd01653e9df4e8f8af7e8dba3745cd772e6d6ab930411ff3d1cfb3
3
+ metadata.gz: 77d9895ece62ee8b8df5afb5a44035868c6b33eb7b43a7cb5b85bd730bee56bc
4
+ data.tar.gz: 16b61502ef5c35df588c6a2fd53e1099b80f584276c07ec7a3c23343705ccb42
5
5
  SHA512:
6
- metadata.gz: 71affdceb736e0645c45b181a585b3a425135c0b22fba1daf28d89aaa6e73e5226f18a1e420fb75325653c87274f66664526d8ca55baaaa5251b4f822617b986
7
- data.tar.gz: 63aceaac41dd2ea797f92e7335a381bea5d1bdd2f7388c583431323e7ac9fae0855a404a84fbed70222130bd5eded126dae5385be2291d76c91021633d03a3bb
6
+ metadata.gz: 490fca7cfa535ebfab2af613dacf3ff9e9a6b0b2b76c865ceeb98a45d1cc7b668772dbd3a46ea00a5b82eb1374f36cb42b604754bbb1353e103cde726bc7e886
7
+ data.tar.gz: 1f85edaa9b2214218bb586d8c5409c9f741893fa0effdb60c64d294be231893ed96f1efa9db286d9a98f094caeb3fb272d318782dc7977adb8b252425d72cfb4
@@ -1,3 +1,41 @@
1
+ ## 0.13.0 - 2020-11-15
2
+
3
+ ### Added
4
+
5
+ * Cross-reference table reconstruction for damaged PDFs, controllable via the
6
+ new 'parser.try_xref_reconstruction' option
7
+ * Two new `hexapdf inspect` commands for showing page objects and page content
8
+ streams by page number
9
+ * Flag `--check` to the CLI command `hexapdf info` for checking a file for
10
+ parse and validation errors
11
+ * [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
12
+ embedded in the field object
13
+ * [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
14
+ * [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
15
+ * [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
16
+ nodes of a page
17
+ * [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
18
+
19
+ ### Changed
20
+
21
+ * **Breaking change**: Overhauled document/object validation interfaces and
22
+ internals to be more similar and to allow for reporting of multiple validation
23
+ problems
24
+ * Validation of TrueType fonts to ignore missing fields if the font name
25
+ suggests that the font is one of the standard 14 PDF fonts
26
+ * Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
27
+ size names
28
+
29
+ ### Fixed
30
+
31
+ * Reporting of cross-reference section entry parsing error
32
+ * PDF version used by default for dictionary fields
33
+ * Error in CLI command `hexapdf inspect` when parsing an invalid object number
34
+ * Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
35
+ * Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
36
+ handling
37
+
38
+
1
39
  ## 0.12.3 - 2020-08-22
2
40
 
3
41
  ### Changed
@@ -100,6 +100,7 @@ module HexaPDF
100
100
  def pdf_options(password)
101
101
  hash = {decryption_opts: {password: password}, config: {}}
102
102
  HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
103
+ hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
103
104
  hash[:config]['parser.on_correctable_error'] =
104
105
  if command_parser.strict
105
106
  proc { true }
@@ -277,14 +278,15 @@ module HexaPDF
277
278
  #
278
279
  # See: #define_encryption_options
279
280
  def apply_encryption_options(doc)
280
- if @out_options.encryption == :add
281
+ case @out_options.encryption
282
+ when :add
281
283
  doc.encrypt(algorithm: @out_options.enc_algorithm,
282
284
  key_length: @out_options.enc_key_length,
283
285
  force_v4: @out_options.enc_force_v4,
284
286
  permissions: @out_options.enc_permissions,
285
287
  owner_password: @out_options.enc_owner_pwd,
286
288
  user_password: @out_options.enc_user_pwd)
287
- elsif @out_options.encryption == :remove
289
+ when :remove
288
290
  doc.encrypt(name: nil)
289
291
  end
290
292
  end
@@ -64,7 +64,8 @@ module HexaPDF
64
64
  orientation = :landscape
65
65
  page_size.delete_suffix!('-landscape')
66
66
  end
67
- HexaPDF::Type::Page.media_box(page_size.to_sym, orientation: orientation)
67
+ page_size = page_size.capitalize.to_sym
68
+ HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
68
69
  end
69
70
  end
70
71
  options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
@@ -55,13 +55,21 @@ module HexaPDF
55
55
  long_desc(<<~EOF)
56
56
  This command extracts information from the Info dictionary of a PDF file as well
57
57
  as some other useful information like the used PDF version and encryption information.
58
+
59
+ If the --check option is specified, the PDF file will also be checked for parse and
60
+ validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
61
+ file by correcting the errors.
58
62
  EOF
63
+ options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
64
+ @check_file = check
65
+ end
59
66
  options.on("--password PASSWORD", "-p", String,
60
67
  "The password for decryption. Use - for reading from standard input.") do |pwd|
61
68
  @password = (pwd == '-' ? read_password : pwd)
62
69
  end
63
70
  @password = nil
64
71
  @auto_decrypt = true
72
+ @check_file = false
65
73
  end
66
74
 
67
75
  def execute(file) #:nodoc:
@@ -79,8 +87,30 @@ module HexaPDF
79
87
  options = pdf_options(@password)
80
88
  options[:config]['document.auto_decrypt'] = @auto_decrypt
81
89
  HexaPDF::Document.open(file, **options) do |doc|
90
+ if @check_file
91
+ indirect_object = nil
92
+ validation_block = lambda do |msg, correctable, object|
93
+ object = indirect_object unless object.indirect? || object.type == :XXTrailer
94
+ object_type = if object.type == :XXTrailer
95
+ 'trailer'
96
+ elsif !object.type.to_s.start_with?("XX")
97
+ "object type #{object.type} (#{object.oid},#{object.gen})"
98
+ else
99
+ "object (#{object.oid},#{object.gen})"
100
+ end
101
+ object_type = "sub-object of #{object_type}" if object == indirect_object
102
+ puts "WARNING: Validation error for #{object_type}: #{msg} " \
103
+ "#{correctable ? '(correctable)' : ''}"
104
+ end
105
+ doc.trailer.validate(auto_correct: true, &validation_block)
106
+ doc.each(only_current: false, only_loaded: false) do |obj|
107
+ indirect_object = obj
108
+ obj.validate(auto_correct: true, &validation_block)
109
+ end
110
+ end
111
+
82
112
  output_line("File name", file)
83
- output_line("File size", File.stat(file).size.to_s + " bytes")
113
+ output_line("File size", File.stat(file).size.to_s << " bytes")
84
114
  @auto_decrypt && INFO_KEYS.each do |name|
85
115
  next unless doc.trailer.info.key?(name)
86
116
  output_line(name.to_s, doc.trailer.info[name].to_s)
@@ -110,10 +140,29 @@ module HexaPDF
110
140
  else
111
141
  raise
112
142
  end
143
+ rescue HexaPDF::MalformedPDFError => e
144
+ $stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
145
+ $stderr.puts " #{e}"
146
+ end
147
+
148
+ # Use custom options if we are checking the PDF file for errors.
149
+ def pdf_options(password)
150
+ if @check_file
151
+ options = {decryption_opts: {password: password}, config: {}}
152
+ HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
153
+ options[:config]['parser.try_xref_reconstruction'] = true
154
+ options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
155
+ puts "WARNING: Parse error at position #{pos}: #{msg}"
156
+ false
157
+ end
158
+ options
159
+ else
160
+ super
161
+ end
113
162
  end
114
163
 
115
164
  def output_line(header, text) #:nodoc:
116
- puts((header + ":").ljust(COLUMN_WIDTH) << text)
165
+ puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
117
166
  end
118
167
 
119
168
  end
@@ -122,22 +122,22 @@ module HexaPDF
122
122
  case command
123
123
  when /^\d+(,\d+)?$/, 'o', 'object'
124
124
  arg = (command.start_with?('o') ? data.shift : command)
125
- obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
126
- if obj.data.stream && command_parser.verbosity_info?
125
+ obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
126
+ if obj&.data&.stream && command_parser.verbosity_info?
127
127
  $stderr.puts("Note: Object also has stream data")
128
128
  end
129
129
  serialize(obj.value, recursive: false) if obj
130
130
 
131
131
  when 'r', 'recursive'
132
132
  obj = if (obj = data.shift)
133
- pdf_object_from_string_reference(obj) rescue puts($!.message)
133
+ pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
134
134
  else
135
135
  @doc.trailer
136
136
  end
137
137
  serialize(obj.value, recursive: true) if obj
138
138
 
139
139
  when 's', 'stream', 'raw', 'raw-stream'
140
- if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
140
+ if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
141
141
  obj.kind_of?(HexaPDF::Stream)
142
142
  source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
143
143
  while source.alive? && (stream_data = source.resume)
@@ -148,7 +148,7 @@ module HexaPDF
148
148
  end
149
149
 
150
150
  when 'x', 'xref'
151
- if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
151
+ if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
152
152
  @doc.revisions.reverse_each do |rev|
153
153
  if (xref = rev.xref(obj))
154
154
  puts xref
@@ -178,6 +178,26 @@ module HexaPDF
178
178
  puts str
179
179
  end
180
180
 
181
+ when 'po', 'ps'
182
+ page_number_str = data.shift
183
+ unless page_number_str
184
+ $stderr.puts("Error: Missing PAGE argument to #{command}")
185
+ next
186
+ end
187
+ page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
188
+ unless page_number
189
+ $stderr.puts("Error: Invalid page number #{page_number_str}")
190
+ next
191
+ end
192
+ page = @doc.pages[page_number]
193
+ if command.start_with?('ps')
194
+ $stdout.write(page.contents)
195
+ else
196
+ puts "#{page.oid} #{page.gen} obj"
197
+ serialize(page.value, recursive: false)
198
+ puts "endobj"
199
+ end
200
+
181
201
  when 'pc', 'page-count'
182
202
  puts @doc.pages.count
183
203
 
@@ -217,9 +237,9 @@ module HexaPDF
217
237
  if str.nil?
218
238
  raise "Error: Missing argument object identifier OID[,GEN]"
219
239
  elsif !str.match?(/^\d+(,\d+)?$/)
220
- raise "Error: Invalid argument: Must be of form OID[,GEN]"
240
+ raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
221
241
  elsif !(obj = @doc.object(pdf_reference_from_string(str)))
222
- raise "Error: No object with the given object identifier found"
242
+ raise "Error: No object with the given object identifier '#{str}' found"
223
243
  else
224
244
  obj
225
245
  end
@@ -240,7 +260,7 @@ module HexaPDF
240
260
  puts "<<"
241
261
  (recursive ? val.sort : val).each do |k, v|
242
262
  next if v.nil? || (v.respond_to?(:null?) && v.null?)
243
- print ' ' * (indent + 1) + @serializer.serialize_symbol(k) + " "
263
+ print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
244
264
  serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
245
265
  puts
246
266
  end
@@ -283,6 +303,8 @@ module HexaPDF
283
303
  ["c[atalog]", "Print the catalog dictionary"],
284
304
  ["t[railer]", "Print the trailer dictionary"],
285
305
  ["p[ages] [RANGE]", "Print information about pages"],
306
+ ["po PAGE", "Print the page object"],
307
+ ["ps PAGE", "Print the content stream of the page"],
286
308
  ["pc | page-count", "Print the number of pages"],
287
309
  ["search REGEXP", "Print objects matching the pattern"],
288
310
  ["h[elp]", "Show the help"],
@@ -122,7 +122,7 @@ module HexaPDF
122
122
 
123
123
  # Assemble pages
124
124
  target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
125
- page_tree = target.add(Type: :Pages)
125
+ page_tree = target.add({Type: :Pages})
126
126
  import_pages(page_tree)
127
127
  target.catalog[:Pages] = page_tree
128
128
  remove_unused_pages(target)
@@ -334,6 +334,20 @@ module HexaPDF
334
334
  # The value needs to be an object that responds to \#call(document, message, position) and
335
335
  # returns +true+ if an error should be raised.
336
336
  #
337
+ # parser.try_xref_reconstruction::
338
+ # A boolean specifying whether non-recoverable parsing errors should lead to reconstructing the
339
+ # main cross-reference table.
340
+ #
341
+ # The reconstructed cross-reference table might make damaged files usable but there is no way
342
+ # to ensure that the reconstructed file is equal to the undamaged original file (though
343
+ # generally it works out).
344
+ #
345
+ # There is also the possibility that reconstructing doesn't work because the algorithm has to
346
+ # assume that the PDF was written in a certain way (which is recommended by the PDF
347
+ # specification).
348
+ #
349
+ # Defaults to +true+.
350
+ #
337
351
  # sorted_tree.max_leaf_node_size::
338
352
  # The maximum number of nodes that should be in a leaf node of a node tree.
339
353
  #
@@ -412,6 +426,7 @@ module HexaPDF
412
426
  'page.default_media_box' => :A4,
413
427
  'page.default_media_orientation' => :portrait,
414
428
  'parser.on_correctable_error' => proc { false },
429
+ 'parser.try_xref_reconstruction' => true,
415
430
  'sorted_tree.max_leaf_node_size' => 64,
416
431
  'style.layers_map' => {
417
432
  link: 'HexaPDF::Layout::Style::LinkLayer',
@@ -45,7 +45,7 @@ module HexaPDF
45
45
  # all either in clockwise or counterclockwise direction and optionally inclined in respect to
46
46
  # the x-axis.
47
47
  #
48
- # See: ELL - https://www.spaceroots.org/documents/ellipse/elliptical-arc.pdf
48
+ # See: ELL - https://spaceroots.org/documents/ellipse/elliptical-arc.pdf
49
49
  class Arc
50
50
 
51
51
  include HexaPDF::Utils::MathHelpers
@@ -202,8 +202,8 @@ module HexaPDF
202
202
  p2x_prime, p2y_prime = derivative_evaluate(eta2)
203
203
 
204
204
  result << [p2x, p2y,
205
- p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
206
- p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]]
205
+ {p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
206
+ p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]}]
207
207
  end
208
208
 
209
209
  result
@@ -97,7 +97,7 @@ module HexaPDF
97
97
  #
98
98
  # version:: Specifies the minimum version of the PDF specification needed for this value.
99
99
  def self.define_field(name, type:, required: false, default: nil, indirect: nil,
100
- allowed_values: nil, version: '1.2')
100
+ allowed_values: nil, version: '1.0')
101
101
  @fields ||= {}
102
102
  @fields[name] = Field.new(type, required: required, default: default, indirect: indirect,
103
103
  allowed_values: allowed_values, version: version)
@@ -163,7 +163,7 @@ module HexaPDF
163
163
  value[name] = field.default
164
164
  end
165
165
  value[name] = data = document.deref(data) if data.kind_of?(HexaPDF::Reference)
166
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
166
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
167
167
  data = data.value
168
168
  end
169
169
  if (result = field&.convert(data, document))
@@ -182,7 +182,7 @@ module HexaPDF
182
182
  raise ArgumentError, "Only Symbol (Name) keys are allowed to be used in PDF dictionaries"
183
183
  end
184
184
 
185
- if value[name].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
185
+ if value[name].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
186
186
  !data.kind_of?(HexaPDF::Reference)
187
187
  value[name].value = data
188
188
  else
@@ -273,7 +273,7 @@ module HexaPDF
273
273
  # Check that required fields are set
274
274
  if field.required? && obj.nil?
275
275
  yield("Required field #{name} is not set", field.default?)
276
- self[name] = obj = field.default
276
+ self[name] = obj = field.default if field.default?
277
277
  end
278
278
 
279
279
  # Check if the document version is set high enough
@@ -151,17 +151,9 @@ module HexaPDF
151
151
  # Returns a duplicated default value, automatically taking unduplicatable classes into
152
152
  # account.
153
153
  def default
154
- duplicatable_default? ? @default.dup : @default
154
+ @default.dup
155
155
  end
156
156
 
157
- # Returns +true+ if the default value can safely be duplicated with #dup.
158
- def duplicatable_default?
159
- @duplicatable_default ||= HexaPDF::Object::NOT_DUPLICATABLE_CLASSES.none? do |klass|
160
- @default.kind_of?(klass)
161
- end
162
- end
163
- private :duplicatable_default?
164
-
165
157
  # Returns +true+ if the given object is valid for this field.
166
158
  def valid_object?(obj)
167
159
  type.any? {|t| obj.kind_of?(t) } ||
@@ -69,15 +69,35 @@ module HexaPDF
69
69
 
70
70
  autoload(:Composer, 'hexapdf/composer')
71
71
 
72
+ # == HexaPDF::Document
73
+ #
72
74
  # Represents one PDF document.
73
75
  #
74
76
  # A PDF document consists of (indirect) objects, so the main job of this class is to provide
75
77
  # methods for working with these objects. However, since a PDF document may also be
76
78
  # incrementally updated and can therefore contain one or more revisions, there are also methods
77
- # to work with these revisions.
79
+ # for working with these revisions.
78
80
  #
79
81
  # Note: This class provides everything to work on PDF documents on a low-level basis. This means
80
- # that there are no convenience methods for higher PDF functionality whatsoever.
82
+ # that there are no convenience methods for higher PDF functionality. Those can be found in the
83
+ # objects linked from here, like #catalog.
84
+ #
85
+ # == Known Messages
86
+ #
87
+ # The document object provides a basic message dispatch system via #register_listener and
88
+ # #dispatch_message.
89
+ #
90
+ # Following are the messages that are used by HexaPDF itself:
91
+ #
92
+ # :complete_objects::
93
+ # This message is called before the first step of writing a document. Listeners should
94
+ # complete PDF objects that are missing some information.
95
+ #
96
+ # For example, the font system uses this message to complete the font objects with
97
+ # information that is only available once all the used glyphs are known.
98
+ #
99
+ # :before_write::
100
+ # This message is called before a document is actually serialized and written.
81
101
  class Document
82
102
 
83
103
  autoload(:Pages, 'hexapdf/document/pages')
@@ -400,11 +420,11 @@ module HexaPDF
400
420
  # object in the PDF document. The block may either accept only the object or the object and the
401
421
  # revision it is in.
402
422
  #
403
- # By default, only the current version of each object is returned which implies that each
404
- # object number is yielded exactly once. If the +current+ option is +false+, all stored
405
- # objects from newest to oldest are returned, not only the current version of each object.
423
+ # By default, only the current version of each object is returned which implies that each object
424
+ # number is yielded exactly once. If the +only_current+ option is +false+, all stored objects
425
+ # from newest to oldest are returned, not only the current version of each object.
406
426
  #
407
- # The +current+ option can make a difference because the document can contain multiple
427
+ # The +only_current+ option can make a difference because the document can contain multiple
408
428
  # revisions:
409
429
  #
410
430
  # * Multiple revisions may contain objects with the same object and generation numbers, e.g.
@@ -442,6 +462,9 @@ module HexaPDF
442
462
  end
443
463
 
444
464
  # Dispatches the message +name+ with the given arguments to all registered listeners.
465
+ #
466
+ # See the main Document documentation for an overview of messages that are used by HexaPDF
467
+ # itself.
445
468
  def dispatch_message(name, *args)
446
469
  @listeners[name]&.each {|obj| obj.call(*args) }
447
470
  end
@@ -594,13 +617,9 @@ module HexaPDF
594
617
  # If a block is given, it is called on validation problems.
595
618
  #
596
619
  # See HexaPDF::Object#validate for more information.
597
- def validate(auto_correct: true, only_loaded: false) #:yield: object, msg, correctable
598
- cur_obj = trailer
599
- block = (block_given? ? lambda {|msg, correctable| yield(cur_obj, msg, correctable) } : nil)
600
-
620
+ def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
601
621
  result = trailer.validate(auto_correct: auto_correct, &block)
602
622
  each(only_current: false, only_loaded: only_loaded) do |obj|
603
- cur_obj = obj
604
623
  result &&= obj.validate(auto_correct: auto_correct, &block)
605
624
  end
606
625
  result
@@ -643,7 +662,7 @@ module HexaPDF
643
662
  end
644
663
 
645
664
  if validate
646
- self.validate(auto_correct: true) do |obj, msg, correctable|
665
+ self.validate(auto_correct: true) do |msg, correctable, obj|
647
666
  next if correctable
648
667
  raise HexaPDF::Error, "Validation error for (#{obj.oid},#{obj.gen}): #{msg}"
649
668
  end