hexapdf 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 889b4bf1bc77da0a3fdfc62d2b5b09042aa1b5a567d5ed80ae382e6cdeb193f9
4
- data.tar.gz: 67f217de3dbd01653e9df4e8f8af7e8dba3745cd772e6d6ab930411ff3d1cfb3
3
+ metadata.gz: 77d9895ece62ee8b8df5afb5a44035868c6b33eb7b43a7cb5b85bd730bee56bc
4
+ data.tar.gz: 16b61502ef5c35df588c6a2fd53e1099b80f584276c07ec7a3c23343705ccb42
5
5
  SHA512:
6
- metadata.gz: 71affdceb736e0645c45b181a585b3a425135c0b22fba1daf28d89aaa6e73e5226f18a1e420fb75325653c87274f66664526d8ca55baaaa5251b4f822617b986
7
- data.tar.gz: 63aceaac41dd2ea797f92e7335a381bea5d1bdd2f7388c583431323e7ac9fae0855a404a84fbed70222130bd5eded126dae5385be2291d76c91021633d03a3bb
6
+ metadata.gz: 490fca7cfa535ebfab2af613dacf3ff9e9a6b0b2b76c865ceeb98a45d1cc7b668772dbd3a46ea00a5b82eb1374f36cb42b604754bbb1353e103cde726bc7e886
7
+ data.tar.gz: 1f85edaa9b2214218bb586d8c5409c9f741893fa0effdb60c64d294be231893ed96f1efa9db286d9a98f094caeb3fb272d318782dc7977adb8b252425d72cfb4
@@ -1,3 +1,41 @@
1
+ ## 0.13.0 - 2020-11-15
2
+
3
+ ### Added
4
+
5
+ * Cross-reference table reconstruction for damaged PDFs, controllable via the
6
+ new 'parser.try_xref_reconstruction' option
7
+ * Two new `hexapdf inspect` commands for showing page objects and page content
8
+ streams by page number
9
+ * Flag `--check` to the CLI command `hexapdf info` for checking a file for
10
+ parse and validation errors
11
+ * [HexaPDF::Type::AcroForm::Field#embedded_widget?] for checking if a widget is
12
+ embedded in the field object
13
+ * [HexaPDF::Type::AcroForm::Field#delete_widget] for deleting a widget
14
+ * [HexaPDF::PDFArray#delete] for deleting an object from a PDF array
15
+ * [HexaPDF::Type::Page#ancestor_nodes] for retrieving all ancestor page tree
16
+ nodes of a page
17
+ * [HexaPDF::Type::PageTreeNode#move_page] for moving a page to another index
18
+
19
+ ### Changed
20
+
21
+ * **Breaking change**: Overhauled document/object validation interfaces and
22
+ internals to be more similar and to allow for reporting of multiple validation
23
+ problems
24
+ * Validation of TrueType fonts to ignore missing fields if the font name
25
+ suggests that the font is one of the standard 14 PDF fonts
26
+ * Option `-p` of CLI command `hexapdf image2pdf` to also allow lowercase page
27
+ size names
28
+
29
+ ### Fixed
30
+
31
+ * Reporting of cross-reference section entry parsing error
32
+ * PDF version used by default for dictionary fields
33
+ * Error in CLI command `hexapdf inspect` when parsing an invalid object number
34
+ * Output of error messages in CLI command `hexapdf inspect` to go to `$stderr`
35
+ * Bug in [HexaPDF::Type::AcroForm::TextField] validation due to missing nil
36
+ handling
37
+
38
+
1
39
  ## 0.12.3 - 2020-08-22
2
40
 
3
41
  ### Changed
@@ -100,6 +100,7 @@ module HexaPDF
100
100
  def pdf_options(password)
101
101
  hash = {decryption_opts: {password: password}, config: {}}
102
102
  HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
103
+ hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
103
104
  hash[:config]['parser.on_correctable_error'] =
104
105
  if command_parser.strict
105
106
  proc { true }
@@ -277,14 +278,15 @@ module HexaPDF
277
278
  #
278
279
  # See: #define_encryption_options
279
280
  def apply_encryption_options(doc)
280
- if @out_options.encryption == :add
281
+ case @out_options.encryption
282
+ when :add
281
283
  doc.encrypt(algorithm: @out_options.enc_algorithm,
282
284
  key_length: @out_options.enc_key_length,
283
285
  force_v4: @out_options.enc_force_v4,
284
286
  permissions: @out_options.enc_permissions,
285
287
  owner_password: @out_options.enc_owner_pwd,
286
288
  user_password: @out_options.enc_user_pwd)
287
- elsif @out_options.encryption == :remove
289
+ when :remove
288
290
  doc.encrypt(name: nil)
289
291
  end
290
292
  end
@@ -64,7 +64,8 @@ module HexaPDF
64
64
  orientation = :landscape
65
65
  page_size.delete_suffix!('-landscape')
66
66
  end
67
- HexaPDF::Type::Page.media_box(page_size.to_sym, orientation: orientation)
67
+ page_size = page_size.capitalize.to_sym
68
+ HexaPDF::Type::Page.media_box(page_size, orientation: orientation)
68
69
  end
69
70
  end
70
71
  options.on("--[no-]auto-rotate", "Automatically rotate pages based on image dimesions. " \
@@ -55,13 +55,21 @@ module HexaPDF
55
55
  long_desc(<<~EOF)
56
56
  This command extracts information from the Info dictionary of a PDF file as well
57
57
  as some other useful information like the used PDF version and encryption information.
58
+
59
+ If the --check option is specified, the PDF file will also be checked for parse and
60
+ validation errors. And if the process doesn't abort, HexaPDF is still able to handle the
61
+ file by correcting the errors.
58
62
  EOF
63
+ options.on("--check", "-c", "Check the PDF file for parse errors and validity") do |check|
64
+ @check_file = check
65
+ end
59
66
  options.on("--password PASSWORD", "-p", String,
60
67
  "The password for decryption. Use - for reading from standard input.") do |pwd|
61
68
  @password = (pwd == '-' ? read_password : pwd)
62
69
  end
63
70
  @password = nil
64
71
  @auto_decrypt = true
72
+ @check_file = false
65
73
  end
66
74
 
67
75
  def execute(file) #:nodoc:
@@ -79,8 +87,30 @@ module HexaPDF
79
87
  options = pdf_options(@password)
80
88
  options[:config]['document.auto_decrypt'] = @auto_decrypt
81
89
  HexaPDF::Document.open(file, **options) do |doc|
90
+ if @check_file
91
+ indirect_object = nil
92
+ validation_block = lambda do |msg, correctable, object|
93
+ object = indirect_object unless object.indirect? || object.type == :XXTrailer
94
+ object_type = if object.type == :XXTrailer
95
+ 'trailer'
96
+ elsif !object.type.to_s.start_with?("XX")
97
+ "object type #{object.type} (#{object.oid},#{object.gen})"
98
+ else
99
+ "object (#{object.oid},#{object.gen})"
100
+ end
101
+ object_type = "sub-object of #{object_type}" if object == indirect_object
102
+ puts "WARNING: Validation error for #{object_type}: #{msg} " \
103
+ "#{correctable ? '(correctable)' : ''}"
104
+ end
105
+ doc.trailer.validate(auto_correct: true, &validation_block)
106
+ doc.each(only_current: false, only_loaded: false) do |obj|
107
+ indirect_object = obj
108
+ obj.validate(auto_correct: true, &validation_block)
109
+ end
110
+ end
111
+
82
112
  output_line("File name", file)
83
- output_line("File size", File.stat(file).size.to_s + " bytes")
113
+ output_line("File size", File.stat(file).size.to_s << " bytes")
84
114
  @auto_decrypt && INFO_KEYS.each do |name|
85
115
  next unless doc.trailer.info.key?(name)
86
116
  output_line(name.to_s, doc.trailer.info[name].to_s)
@@ -110,10 +140,29 @@ module HexaPDF
110
140
  else
111
141
  raise
112
142
  end
143
+ rescue HexaPDF::MalformedPDFError => e
144
+ $stderr.puts "Error: PDF file #{file} is damaged and cannot be recovered"
145
+ $stderr.puts " #{e}"
146
+ end
147
+
148
+ # Use custom options if we are checking the PDF file for errors.
149
+ def pdf_options(password)
150
+ if @check_file
151
+ options = {decryption_opts: {password: password}, config: {}}
152
+ HexaPDF::GlobalConfiguration['filter.predictor.strict'] = false
153
+ options[:config]['parser.try_xref_reconstruction'] = true
154
+ options[:config]['parser.on_correctable_error'] = lambda do |_, msg, pos|
155
+ puts "WARNING: Parse error at position #{pos}: #{msg}"
156
+ false
157
+ end
158
+ options
159
+ else
160
+ super
161
+ end
113
162
  end
114
163
 
115
164
  def output_line(header, text) #:nodoc:
116
- puts((header + ":").ljust(COLUMN_WIDTH) << text)
165
+ puts(("#{header}:").ljust(COLUMN_WIDTH) << text)
117
166
  end
118
167
 
119
168
  end
@@ -122,22 +122,22 @@ module HexaPDF
122
122
  case command
123
123
  when /^\d+(,\d+)?$/, 'o', 'object'
124
124
  arg = (command.start_with?('o') ? data.shift : command)
125
- obj = pdf_object_from_string_reference(arg) rescue puts($!.message)
126
- if obj.data.stream && command_parser.verbosity_info?
125
+ obj = pdf_object_from_string_reference(arg) rescue $stderr.puts($!.message)
126
+ if obj&.data&.stream && command_parser.verbosity_info?
127
127
  $stderr.puts("Note: Object also has stream data")
128
128
  end
129
129
  serialize(obj.value, recursive: false) if obj
130
130
 
131
131
  when 'r', 'recursive'
132
132
  obj = if (obj = data.shift)
133
- pdf_object_from_string_reference(obj) rescue puts($!.message)
133
+ pdf_object_from_string_reference(obj) rescue $stderr.puts($!.message)
134
134
  else
135
135
  @doc.trailer
136
136
  end
137
137
  serialize(obj.value, recursive: true) if obj
138
138
 
139
139
  when 's', 'stream', 'raw', 'raw-stream'
140
- if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message)) &&
140
+ if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message)) &&
141
141
  obj.kind_of?(HexaPDF::Stream)
142
142
  source = (command.start_with?('raw') ? obj.stream_source : obj.stream_decoder)
143
143
  while source.alive? && (stream_data = source.resume)
@@ -148,7 +148,7 @@ module HexaPDF
148
148
  end
149
149
 
150
150
  when 'x', 'xref'
151
- if (obj = pdf_object_from_string_reference(data.shift) rescue puts($!.message))
151
+ if (obj = pdf_object_from_string_reference(data.shift) rescue $stderr.puts($!.message))
152
152
  @doc.revisions.reverse_each do |rev|
153
153
  if (xref = rev.xref(obj))
154
154
  puts xref
@@ -178,6 +178,26 @@ module HexaPDF
178
178
  puts str
179
179
  end
180
180
 
181
+ when 'po', 'ps'
182
+ page_number_str = data.shift
183
+ unless page_number_str
184
+ $stderr.puts("Error: Missing PAGE argument to #{command}")
185
+ next
186
+ end
187
+ page_number = parse_pages_specification(page_number_str, @doc.pages.count).first&.first
188
+ unless page_number
189
+ $stderr.puts("Error: Invalid page number #{page_number_str}")
190
+ next
191
+ end
192
+ page = @doc.pages[page_number]
193
+ if command.start_with?('ps')
194
+ $stdout.write(page.contents)
195
+ else
196
+ puts "#{page.oid} #{page.gen} obj"
197
+ serialize(page.value, recursive: false)
198
+ puts "endobj"
199
+ end
200
+
181
201
  when 'pc', 'page-count'
182
202
  puts @doc.pages.count
183
203
 
@@ -217,9 +237,9 @@ module HexaPDF
217
237
  if str.nil?
218
238
  raise "Error: Missing argument object identifier OID[,GEN]"
219
239
  elsif !str.match?(/^\d+(,\d+)?$/)
220
- raise "Error: Invalid argument: Must be of form OID[,GEN]"
240
+ raise "Error: Invalid argument: Must be of form OID[,GEN], not '#{str}'"
221
241
  elsif !(obj = @doc.object(pdf_reference_from_string(str)))
222
- raise "Error: No object with the given object identifier found"
242
+ raise "Error: No object with the given object identifier '#{str}' found"
223
243
  else
224
244
  obj
225
245
  end
@@ -240,7 +260,7 @@ module HexaPDF
240
260
  puts "<<"
241
261
  (recursive ? val.sort : val).each do |k, v|
242
262
  next if v.nil? || (v.respond_to?(:null?) && v.null?)
243
- print ' ' * (indent + 1) + @serializer.serialize_symbol(k) + " "
263
+ print '%s%s ' % [' ' * (indent + 1), @serializer.serialize_symbol(k)]
244
264
  serialize(v, recursive: recursive, seen: seen, indent: indent + 1)
245
265
  puts
246
266
  end
@@ -283,6 +303,8 @@ module HexaPDF
283
303
  ["c[atalog]", "Print the catalog dictionary"],
284
304
  ["t[railer]", "Print the trailer dictionary"],
285
305
  ["p[ages] [RANGE]", "Print information about pages"],
306
+ ["po PAGE", "Print the page object"],
307
+ ["ps PAGE", "Print the content stream of the page"],
286
308
  ["pc | page-count", "Print the number of pages"],
287
309
  ["search REGEXP", "Print objects matching the pattern"],
288
310
  ["h[elp]", "Show the help"],
@@ -122,7 +122,7 @@ module HexaPDF
122
122
 
123
123
  # Assemble pages
124
124
  target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
125
- page_tree = target.add(Type: :Pages)
125
+ page_tree = target.add({Type: :Pages})
126
126
  import_pages(page_tree)
127
127
  target.catalog[:Pages] = page_tree
128
128
  remove_unused_pages(target)
@@ -334,6 +334,20 @@ module HexaPDF
334
334
  # The value needs to be an object that responds to \#call(document, message, position) and
335
335
  # returns +true+ if an error should be raised.
336
336
  #
337
+ # parser.try_xref_reconstruction::
338
+ # A boolean specifying whether non-recoverable parsing errors should lead to reconstructing the
339
+ # main cross-reference table.
340
+ #
341
+ # The reconstructed cross-reference table might make damaged files usable but there is no way
342
+ # to ensure that the reconstructed file is equal to the undamaged original file (though
343
+ # generally it works out).
344
+ #
345
+ # There is also the possibility that reconstructing doesn't work because the algorithm has to
346
+ # assume that the PDF was written in a certain way (which is recommended by the PDF
347
+ # specification).
348
+ #
349
+ # Defaults to +true+.
350
+ #
337
351
  # sorted_tree.max_leaf_node_size::
338
352
  # The maximum number of nodes that should be in a leaf node of a node tree.
339
353
  #
@@ -412,6 +426,7 @@ module HexaPDF
412
426
  'page.default_media_box' => :A4,
413
427
  'page.default_media_orientation' => :portrait,
414
428
  'parser.on_correctable_error' => proc { false },
429
+ 'parser.try_xref_reconstruction' => true,
415
430
  'sorted_tree.max_leaf_node_size' => 64,
416
431
  'style.layers_map' => {
417
432
  link: 'HexaPDF::Layout::Style::LinkLayer',
@@ -45,7 +45,7 @@ module HexaPDF
45
45
  # all either in clockwise or counterclockwise direction and optionally inclined in respect to
46
46
  # the x-axis.
47
47
  #
48
- # See: ELL - https://www.spaceroots.org/documents/ellipse/elliptical-arc.pdf
48
+ # See: ELL - https://spaceroots.org/documents/ellipse/elliptical-arc.pdf
49
49
  class Arc
50
50
 
51
51
  include HexaPDF::Utils::MathHelpers
@@ -202,8 +202,8 @@ module HexaPDF
202
202
  p2x_prime, p2y_prime = derivative_evaluate(eta2)
203
203
 
204
204
  result << [p2x, p2y,
205
- p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
206
- p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]]
205
+ {p1: [p1x + alpha * p1x_prime, p1y + alpha * p1y_prime],
206
+ p2: [p2x - alpha * p2x_prime, p2y - alpha * p2y_prime]}]
207
207
  end
208
208
 
209
209
  result
@@ -97,7 +97,7 @@ module HexaPDF
97
97
  #
98
98
  # version:: Specifies the minimum version of the PDF specification needed for this value.
99
99
  def self.define_field(name, type:, required: false, default: nil, indirect: nil,
100
- allowed_values: nil, version: '1.2')
100
+ allowed_values: nil, version: '1.0')
101
101
  @fields ||= {}
102
102
  @fields[name] = Field.new(type, required: required, default: default, indirect: indirect,
103
103
  allowed_values: allowed_values, version: version)
@@ -163,7 +163,7 @@ module HexaPDF
163
163
  value[name] = field.default
164
164
  end
165
165
  value[name] = data = document.deref(data) if data.kind_of?(HexaPDF::Reference)
166
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
166
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
167
167
  data = data.value
168
168
  end
169
169
  if (result = field&.convert(data, document))
@@ -182,7 +182,7 @@ module HexaPDF
182
182
  raise ArgumentError, "Only Symbol (Name) keys are allowed to be used in PDF dictionaries"
183
183
  end
184
184
 
185
- if value[name].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
185
+ if value[name].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
186
186
  !data.kind_of?(HexaPDF::Reference)
187
187
  value[name].value = data
188
188
  else
@@ -273,7 +273,7 @@ module HexaPDF
273
273
  # Check that required fields are set
274
274
  if field.required? && obj.nil?
275
275
  yield("Required field #{name} is not set", field.default?)
276
- self[name] = obj = field.default
276
+ self[name] = obj = field.default if field.default?
277
277
  end
278
278
 
279
279
  # Check if the document version is set high enough
@@ -151,17 +151,9 @@ module HexaPDF
151
151
  # Returns a duplicated default value, automatically taking unduplicatable classes into
152
152
  # account.
153
153
  def default
154
- duplicatable_default? ? @default.dup : @default
154
+ @default.dup
155
155
  end
156
156
 
157
- # Returns +true+ if the default value can safely be duplicated with #dup.
158
- def duplicatable_default?
159
- @duplicatable_default ||= HexaPDF::Object::NOT_DUPLICATABLE_CLASSES.none? do |klass|
160
- @default.kind_of?(klass)
161
- end
162
- end
163
- private :duplicatable_default?
164
-
165
157
  # Returns +true+ if the given object is valid for this field.
166
158
  def valid_object?(obj)
167
159
  type.any? {|t| obj.kind_of?(t) } ||
@@ -69,15 +69,35 @@ module HexaPDF
69
69
 
70
70
  autoload(:Composer, 'hexapdf/composer')
71
71
 
72
+ # == HexaPDF::Document
73
+ #
72
74
  # Represents one PDF document.
73
75
  #
74
76
  # A PDF document consists of (indirect) objects, so the main job of this class is to provide
75
77
  # methods for working with these objects. However, since a PDF document may also be
76
78
  # incrementally updated and can therefore contain one or more revisions, there are also methods
77
- # to work with these revisions.
79
+ # for working with these revisions.
78
80
  #
79
81
  # Note: This class provides everything to work on PDF documents on a low-level basis. This means
80
- # that there are no convenience methods for higher PDF functionality whatsoever.
82
+ # that there are no convenience methods for higher PDF functionality. Those can be found in the
83
+ # objects linked from here, like #catalog.
84
+ #
85
+ # == Known Messages
86
+ #
87
+ # The document object provides a basic message dispatch system via #register_listener and
88
+ # #dispatch_message.
89
+ #
90
+ # Following are the messages that are used by HexaPDF itself:
91
+ #
92
+ # :complete_objects::
93
+ # This message is called before the first step of writing a document. Listeners should
94
+ # complete PDF objects that are missing some information.
95
+ #
96
+ # For example, the font system uses this message to complete the font objects with
97
+ # information that is only available once all the used glyphs are known.
98
+ #
99
+ # :before_write::
100
+ # This message is called before a document is actually serialized and written.
81
101
  class Document
82
102
 
83
103
  autoload(:Pages, 'hexapdf/document/pages')
@@ -400,11 +420,11 @@ module HexaPDF
400
420
  # object in the PDF document. The block may either accept only the object or the object and the
401
421
  # revision it is in.
402
422
  #
403
- # By default, only the current version of each object is returned which implies that each
404
- # object number is yielded exactly once. If the +current+ option is +false+, all stored
405
- # objects from newest to oldest are returned, not only the current version of each object.
423
+ # By default, only the current version of each object is returned which implies that each object
424
+ # number is yielded exactly once. If the +only_current+ option is +false+, all stored objects
425
+ # from newest to oldest are returned, not only the current version of each object.
406
426
  #
407
- # The +current+ option can make a difference because the document can contain multiple
427
+ # The +only_current+ option can make a difference because the document can contain multiple
408
428
  # revisions:
409
429
  #
410
430
  # * Multiple revisions may contain objects with the same object and generation numbers, e.g.
@@ -442,6 +462,9 @@ module HexaPDF
442
462
  end
443
463
 
444
464
  # Dispatches the message +name+ with the given arguments to all registered listeners.
465
+ #
466
+ # See the main Document documentation for an overview of messages that are used by HexaPDF
467
+ # itself.
445
468
  def dispatch_message(name, *args)
446
469
  @listeners[name]&.each {|obj| obj.call(*args) }
447
470
  end
@@ -594,13 +617,9 @@ module HexaPDF
594
617
  # If a block is given, it is called on validation problems.
595
618
  #
596
619
  # See HexaPDF::Object#validate for more information.
597
- def validate(auto_correct: true, only_loaded: false) #:yield: object, msg, correctable
598
- cur_obj = trailer
599
- block = (block_given? ? lambda {|msg, correctable| yield(cur_obj, msg, correctable) } : nil)
600
-
620
+ def validate(auto_correct: true, only_loaded: false, &block) #:yield: msg, correctable, object
601
621
  result = trailer.validate(auto_correct: auto_correct, &block)
602
622
  each(only_current: false, only_loaded: only_loaded) do |obj|
603
- cur_obj = obj
604
623
  result &&= obj.validate(auto_correct: auto_correct, &block)
605
624
  end
606
625
  result
@@ -643,7 +662,7 @@ module HexaPDF
643
662
  end
644
663
 
645
664
  if validate
646
- self.validate(auto_correct: true) do |obj, msg, correctable|
665
+ self.validate(auto_correct: true) do |msg, correctable, obj|
647
666
  next if correctable
648
667
  raise HexaPDF::Error, "Validation error for (#{obj.oid},#{obj.gen}): #{msg}"
649
668
  end