hexapdf 0.12.1 → 0.14.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +130 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/content/parser.rb +1 -1
  13. data/lib/hexapdf/dictionary.rb +9 -6
  14. data/lib/hexapdf/dictionary_fields.rb +1 -9
  15. data/lib/hexapdf/document.rb +41 -16
  16. data/lib/hexapdf/document/files.rb +0 -1
  17. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  18. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  19. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  20. data/lib/hexapdf/font/cmap.rb +1 -4
  21. data/lib/hexapdf/font/true_type/subsetter.rb +12 -3
  22. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  23. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  24. data/lib/hexapdf/font/true_type/table/post.rb +15 -10
  25. data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
  26. data/lib/hexapdf/font_loader/from_file.rb +18 -8
  27. data/lib/hexapdf/image_loader/png.rb +3 -2
  28. data/lib/hexapdf/importer.rb +3 -2
  29. data/lib/hexapdf/layout/line.rb +1 -1
  30. data/lib/hexapdf/layout/style.rb +23 -23
  31. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  32. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  33. data/lib/hexapdf/object.rb +52 -25
  34. data/lib/hexapdf/parser.rb +96 -4
  35. data/lib/hexapdf/pdf_array.rb +12 -5
  36. data/lib/hexapdf/revisions.rb +29 -21
  37. data/lib/hexapdf/serializer.rb +34 -8
  38. data/lib/hexapdf/task/optimize.rb +6 -4
  39. data/lib/hexapdf/tokenizer.rb +4 -3
  40. data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
  41. data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
  42. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  43. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  44. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  45. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  46. data/lib/hexapdf/type/actions/uri.rb +3 -2
  47. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  48. data/lib/hexapdf/type/catalog.rb +2 -2
  49. data/lib/hexapdf/type/cid_font.rb +1 -1
  50. data/lib/hexapdf/type/file_specification.rb +1 -1
  51. data/lib/hexapdf/type/font.rb +1 -1
  52. data/lib/hexapdf/type/font_simple.rb +4 -2
  53. data/lib/hexapdf/type/font_true_type.rb +6 -2
  54. data/lib/hexapdf/type/font_type0.rb +4 -4
  55. data/lib/hexapdf/type/form.rb +15 -2
  56. data/lib/hexapdf/type/image.rb +2 -2
  57. data/lib/hexapdf/type/page.rb +37 -13
  58. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  59. data/lib/hexapdf/type/resources.rb +1 -0
  60. data/lib/hexapdf/type/trailer.rb +2 -3
  61. data/lib/hexapdf/utils/object_hash.rb +0 -1
  62. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  63. data/lib/hexapdf/version.rb +1 -1
  64. data/test/hexapdf/common_tokenizer_tests.rb +6 -1
  65. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  66. data/test/hexapdf/content/test_canvas.rb +3 -3
  67. data/test/hexapdf/content/test_color_space.rb +1 -1
  68. data/test/hexapdf/encryption/test_aes.rb +4 -4
  69. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  70. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  71. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  72. data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
  73. data/test/hexapdf/font/true_type/test_subsetter.rb +5 -0
  74. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
  75. data/test/hexapdf/font_loader/test_from_file.rb +7 -0
  76. data/test/hexapdf/layout/test_style.rb +1 -1
  77. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  78. data/test/hexapdf/test_configuration.rb +2 -2
  79. data/test/hexapdf/test_dictionary.rb +8 -1
  80. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  81. data/test/hexapdf/test_document.rb +18 -10
  82. data/test/hexapdf/test_object.rb +71 -26
  83. data/test/hexapdf/test_parser.rb +171 -53
  84. data/test/hexapdf/test_pdf_array.rb +8 -1
  85. data/test/hexapdf/test_revisions.rb +35 -0
  86. data/test/hexapdf/test_writer.rb +2 -2
  87. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
  88. data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
  89. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  90. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  91. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  92. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  93. data/test/hexapdf/type/test_font_simple.rb +2 -1
  94. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  95. data/test/hexapdf/type/test_form.rb +26 -1
  96. data/test/hexapdf/type/test_page.rb +45 -7
  97. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  98. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  99. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  100. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  101. data/test/test_helper.rb +2 -0
  102. metadata +6 -11
@@ -162,9 +162,10 @@ module HexaPDF
162
162
  io.seek(length, IO::SEEK_CUR)
163
163
  end
164
164
  when 'tRNS' # PNG s11.3.2
165
- if @color_type == INDEXED
165
+ case @color_type
166
+ when INDEXED
166
167
  trns = io.read(length).unpack('C*')
167
- elsif @color_type == TRUECOLOR || @color_type == GREYSCALE
168
+ when TRUECOLOR, GREYSCALE
168
169
  dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten
169
170
  else
170
171
  io.seek(length, IO::SEEK_CUR)
@@ -90,7 +90,7 @@ module HexaPDF
90
90
  #
91
91
  # An error is raised if the object doesn't belong to the +source+ document.
92
92
  def import(object)
93
- mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
93
+ mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
94
94
  if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
95
  raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
96
  elsif mapped_object && mapped_object == @destination.object(mapped_object)
@@ -118,7 +118,8 @@ module HexaPDF
118
118
  if object.type == :Catalog || object.type == :Pages
119
119
  @mapper[object.data] = nil
120
120
  else
121
- obj = @mapper[object.data] = object.dup
121
+ obj = object.dup
122
+ @mapper[object.data] = NullableWeakRef.new(obj)
122
123
  obj.document = @destination.__getobj__
123
124
  obj.instance_variable_set(:@data, obj.data.dup)
124
125
  obj.data.oid = 0
@@ -198,7 +198,7 @@ module HexaPDF
198
198
  # Note: The cache is not cleared!
199
199
  def add(item)
200
200
  last = @items.last
201
- if last.class == item.class && item.kind_of?(TextFragment) && last.style == item.style
201
+ if last.instance_of?(item.class) && item.kind_of?(TextFragment) && last.style == item.style
202
202
  if last.items.frozen?
203
203
  @items[-1] = last = last.dup
204
204
  last.items = last.items.dup
@@ -524,7 +524,7 @@ module HexaPDF
524
524
  # Style.new(font_size: 15, align: :center, valign: center)
525
525
  def initialize(**properties)
526
526
  update(**properties)
527
- @scaled_item_widths = {}
527
+ @scaled_item_widths = {}.compare_by_identity
528
528
  end
529
529
 
530
530
  # Duplicates the complex properties that can be modified, as well as the cache.
@@ -883,41 +883,41 @@ module HexaPDF
883
883
  [:text_rise, 0],
884
884
  [:font_features, {}],
885
885
  [:text_rendering_mode, "Content::TextRenderingMode::FILL",
886
- setter: "Content::TextRenderingMode.normalize(value)"],
886
+ {setter: "Content::TextRenderingMode.normalize(value)"}],
887
887
  [:subscript, false,
888
- setter: "value; superscript(false) if superscript",
889
- valid_values: [true, false]],
888
+ {setter: "value; superscript(false) if superscript",
889
+ valid_values: [true, false]}],
890
890
  [:superscript, false,
891
- setter: "value; subscript(false) if subscript",
892
- valid_values: [true, false]],
893
- [:underline, false, valid_values: [true, false]],
894
- [:strikeout, false, valid_values: [true, false]],
891
+ {setter: "value; subscript(false) if subscript",
892
+ valid_values: [true, false]}],
893
+ [:underline, false, {valid_values: [true, false]}],
894
+ [:strikeout, false, {valid_values: [true, false]}],
895
895
  [:fill_color, "default_color"],
896
896
  [:fill_alpha, 1],
897
897
  [:stroke_color, "default_color"],
898
898
  [:stroke_alpha, 1],
899
899
  [:stroke_width, 1],
900
900
  [:stroke_cap_style, "Content::LineCapStyle::BUTT_CAP",
901
- setter: "Content::LineCapStyle.normalize(value)"],
901
+ {setter: "Content::LineCapStyle.normalize(value)"}],
902
902
  [:stroke_join_style, "Content::LineJoinStyle::MITER_JOIN",
903
- setter: "Content::LineJoinStyle.normalize(value)"],
903
+ {setter: "Content::LineJoinStyle.normalize(value)"}],
904
904
  [:stroke_miter_limit, 10.0],
905
905
  [:stroke_dash_pattern, "Content::LineDashPattern.new",
906
- setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"],
907
- [:align, :left, valid_values: [:left, :center, :right, :justify]],
908
- [:valign, :top, valid_values: [:top, :center, :bottom]],
906
+ {setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"}],
907
+ [:align, :left, {valid_values: [:left, :center, :right, :justify]}],
908
+ [:valign, :top, {valid_values: [:top, :center, :bottom]}],
909
909
  [:text_indent, 0],
910
910
  [:line_spacing, "LineSpacing.new(type: :single)",
911
- setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
- extra_args: ", extra_arg = nil"],
913
- [:last_line_gap, false, valid_values: [true, false]],
911
+ {setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
+ extra_args: ", extra_arg = nil"}],
913
+ [:last_line_gap, false, {valid_values: [true, false]}],
914
914
  [:background_color, nil],
915
- [:padding, "Quad.new(0)", setter: "Quad.new(value)"],
916
- [:margin, "Quad.new(0)", setter: "Quad.new(value)"],
917
- [:border, "Border.new", setter: "Border.new(**value)"],
918
- [:overlays, "Layers.new", setter: "Layers.new(value)"],
919
- [:underlays, "Layers.new", setter: "Layers.new(value)"],
920
- [:position, :default, valid_values: [:default, :float, :flow, :absolute]],
915
+ [:padding, "Quad.new(0)", {setter: "Quad.new(value)"}],
916
+ [:margin, "Quad.new(0)", {setter: "Quad.new(value)"}],
917
+ [:border, "Border.new", {setter: "Border.new(**value)"}],
918
+ [:overlays, "Layers.new", {setter: "Layers.new(value)"}],
919
+ [:underlays, "Layers.new", {setter: "Layers.new(value)"}],
920
+ [:position, :default, {valid_values: [:default, :float, :flow, :absolute]}],
921
921
  [:position_hint, nil],
922
922
  ].each do |name, default, options = {}|
923
923
  default = default.inspect unless default.kind_of?(String)
@@ -1075,7 +1075,7 @@ module HexaPDF
1075
1075
  # The item may be a (singleton) glyph object or an integer/float, i.e. items that can appear
1076
1076
  # inside a TextFragment.
1077
1077
  def scaled_item_width(item)
1078
- @scaled_item_widths[item.object_id] ||=
1078
+ @scaled_item_widths[item] ||=
1079
1079
  begin
1080
1080
  if item.kind_of?(Numeric)
1081
1081
  -item * scaled_font_size
@@ -388,7 +388,7 @@ module HexaPDF
388
388
  end
389
389
  when :penalty
390
390
  if item.penalty <= -Penalty::INFINITY
391
- add_box_item(item.item) if item.item
391
+ add_box_item(item.item) if item.width > 0
392
392
  break unless yield(create_unjustified_line, item)
393
393
  reset_after_line_break(index + 1)
394
394
  elsif item.penalty >= Penalty::INFINITY
@@ -458,7 +458,7 @@ module HexaPDF
458
458
  end
459
459
  when :penalty
460
460
  if item.penalty <= -Penalty::INFINITY
461
- add_box_item(item.item) if item.item
461
+ add_box_item(item.item) if item.width > 0
462
462
  break unless (action = yield(create_unjustified_line, item))
463
463
  reset_after_line_break_variable_width(index + 1, true, action)
464
464
  elsif item.penalty >= Penalty::INFINITY
@@ -68,9 +68,10 @@ module HexaPDF
68
68
  text_fragment.clear_cache
69
69
  end
70
70
  if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
71
- if font.font_type == :TrueType
71
+ case font.font_type
72
+ when :TrueType
72
73
  process_true_type_kerning(text_fragment)
73
- elsif font.font_type == :Type1
74
+ when :Type1
74
75
  process_type1_kerning(text_fragment)
75
76
  end
76
77
  text_fragment.clear_cache
@@ -122,9 +122,6 @@ module HexaPDF
122
122
 
123
123
  include Comparable
124
124
 
125
- # A list of classes whose objects cannot be duplicated.
126
- NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
127
-
128
125
  # :call-seq:
129
126
  # HexaPDF::Object.deep_copy(object) -> copy
130
127
  #
@@ -139,8 +136,6 @@ module HexaPDF
139
136
  (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
140
137
  when HexaPDF::Reference
141
138
  object
142
- when *NOT_DUPLICATABLE_CLASSES
143
- object
144
139
  else
145
140
  object.dup
146
141
  end
@@ -251,29 +246,31 @@ module HexaPDF
251
246
  end
252
247
 
253
248
  # :call-seq:
254
- # obj.validate(auto_correct: true) -> true or false
255
- # obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
249
+ # obj.validate(auto_correct: true) -> true or false
250
+ # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
256
251
  #
257
- # Validates the object and, optionally, corrects problems when the option +auto_correct+ is set.
258
- # The validation routine itself has to be implemented in the #perform_validation method - see
259
- # its documentation for more information.
252
+ # Validates the object, optionally corrects problems when the option +auto_correct+ is set and
253
+ # returns +true+ if the object is deemed valid and +false+ otherwise.
260
254
  #
261
255
  # If a block is given, it is called on validation problems with a problem description and
262
- # whether the problem is correctable.
256
+ # whether the problem is automatically correctable. The third argument to the block is usually
257
+ # this object but may be another object if during auto-correction a new object was created and
258
+ # validated.
263
259
  #
264
- # Returns +true+ if the object is deemed valid and +false+ otherwise.
260
+ # The validation routine itself has to be implemented in the #perform_validation method - see
261
+ # its documentation for more information.
265
262
  #
266
263
  # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
267
264
  # currently implement the full PDF spec. However, if the return value is +false+, there is
268
265
  # certainly a problem!
269
266
  def validate(auto_correct: true)
270
- catch do |catch_tag|
271
- perform_validation do |msg, correctable|
272
- yield(msg, correctable) if block_given?
273
- throw(catch_tag, false) unless auto_correct && correctable
274
- end
275
- true
267
+ result = true
268
+ perform_validation do |msg, correctable, object|
269
+ yield(msg, correctable, object || self) if block_given?
270
+ result = false unless correctable
271
+ return false unless auto_correct
276
272
  end
273
+ result
277
274
  end
278
275
 
279
276
  # Makes a deep copy of the source PDF object and resets the object identifier.
@@ -287,6 +284,28 @@ module HexaPDF
287
284
  obj
288
285
  end
289
286
 
287
+ # Caches and returns the given +value+ or the value of the block under the given cache key. If
288
+ # there is already a cached value for the key and +update+ is +false+, it is just returned.
289
+ #
290
+ # Set +update+ to +true+ to force an update of the cached value.
291
+ #
292
+ # This uses Document#cache internally.
293
+ def cache(key, value = Document::UNSET, update: false, &block)
294
+ document.cache(@data, key, value, update: update, &block)
295
+ end
296
+
297
+ # Returns +true+ if there is a cached value for the given key.
298
+ #
299
+ # This uses Document#cached? internally.
300
+ def cached?(key)
301
+ document.cached?(@data, key)
302
+ end
303
+
304
+ # Clears the cache for this object.
305
+ def clear_cache
306
+ document.clear_cache(@data)
307
+ end
308
+
290
309
  # Compares this object to another object.
291
310
  #
292
311
  # If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
@@ -339,17 +358,25 @@ module HexaPDF
339
358
  # are also performed!
340
359
  #
341
360
  # When the validation routine finds that the object is invalid, it has to yield a problem
342
- # description and whether the problem can be corrected. After yielding, the problem has to be
343
- # corrected which poses no problem because the #validate method makes sure that the yield only
344
- # returns if the problem is actually correctable and if it should be corrected.
361
+ # description and whether the problem can be corrected. An optional third argument may contain
362
+ # the object that gets validated if it is different from this object (may happen when
363
+ # auto-correction is used).
345
364
  #
346
- # Here is a sample validation routine for stream objects:
365
+ # After yielding, the problem has to be corrected if it is correctable. If it is not correctable
366
+ # and not correcting would lead to exceptions the method has to return early.
367
+ #
368
+ # Here is a sample validation routine for a dictionary object type:
347
369
  #
348
370
  # def perform_validation
349
371
  # super
350
- # unless value.kind_of?(Hash)
351
- # yield("A stream object needs a Hash as value")
352
- # self.value = {}
372
+ #
373
+ # if value[:SomeKey].length != 7
374
+ # yield("Length of /SomeKey is invalid")
375
+ # # No need to return early here because following check doesn't rely on /SomeKey
376
+ # end
377
+ #
378
+ # if value[:OtherKey] % 2 == 0
379
+ # yield("/OtherKey needs to contain an odd number of elements")
353
380
  # end
354
381
  # end
355
382
  def perform_validation(&block)
@@ -59,6 +59,7 @@ module HexaPDF
59
59
  @tokenizer = Tokenizer.new(io)
60
60
  @document = document
61
61
  @object_stream_data = {}
62
+ @reconstructed_revision = nil
62
63
  retrieve_pdf_header_offset_and_version
63
64
  end
64
65
 
@@ -86,6 +87,8 @@ module HexaPDF
86
87
  end
87
88
 
88
89
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
+ rescue HexaPDF::MalformedPDFError
91
+ reconstructed_revision.object(xref_entry)
89
92
  end
90
93
 
91
94
  # Parses the indirect object at the specified offset.
@@ -110,7 +113,15 @@ module HexaPDF
110
113
  maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
111
114
  object = nil
112
115
  else
113
- object = @tokenizer.next_object
116
+ begin
117
+ object = @tokenizer.next_object
118
+ rescue MalformedPDFError
119
+ # Handle often found invalid indirect object with missing whitespace after number
120
+ maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
121
+ force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
122
+ object = tok.to_i
123
+ @tokenizer.pos -= 6
124
+ end
114
125
  end
115
126
 
116
127
  tok = @tokenizer.next_token
@@ -235,14 +246,14 @@ module HexaPDF
235
246
  @tokenizer.skip_whitespace
236
247
  start.upto(start + number_of_entries - 1) do |oid|
237
248
  pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
238
- maybe_raise("Invalid cross-reference subsection entry", pos: @tokenizer.pos,
239
- force: matched_size == 20)
249
+ maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
250
+ force: !matched_size)
240
251
  end
241
252
  if xref.entry?(oid)
242
253
  next
243
254
  elsif type == 'n'
244
255
  if pos == 0 || gen > 65535
245
- maybe_raise("Invalid in use cross-reference entry in cross-reference section",
256
+ maybe_raise("Invalid in use cross-reference entry",
246
257
  pos: @tokenizer.pos)
247
258
  xref.add_free_entry(oid, gen)
248
259
  else
@@ -264,6 +275,27 @@ module HexaPDF
264
275
  raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
265
276
  end
266
277
 
278
+ unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
279
+ first_entry = xref[xref.oids[0]]
280
+ test_entry = xref[xref.oids[-1]]
281
+ @tokenizer.pos = test_entry.pos + @header_offset
282
+ test_oid = @tokenizer.next_token
283
+ first_oid = first_entry.oid
284
+
285
+ force_failure = !first_entry.free? || first_entry.gen != 65535 ||
286
+ !test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
287
+ maybe_raise("Main cross-reference section has invalid numbering",
288
+ pos: offset + @header_offset, force: force_failure)
289
+
290
+ new_xref = XRefSection.new
291
+ xref.oids.each do |oid|
292
+ entry = xref[oid]
293
+ entry.oid -= first_oid
294
+ new_xref.send(:[]=, entry.oid, entry.gen, entry)
295
+ end
296
+ xref = new_xref
297
+ end
298
+
267
299
  [xref, trailer]
268
300
  end
269
301
 
@@ -313,6 +345,11 @@ module HexaPDF
313
345
  @startxref_offset = lines[eof_index - 1].to_i
314
346
  end
315
347
 
348
+ # Returns the reconstructed revision.
349
+ def reconstructed_revision
350
+ @reconstructed_revision ||= reconstruct_revision
351
+ end
352
+
316
353
  # Returns the PDF version number that is stored in the file header.
317
354
  #
318
355
  # See: PDF1.7 s7.5.2
@@ -338,6 +375,61 @@ module HexaPDF
338
375
  @header_version = $1
339
376
  end
340
377
 
378
+ # Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
379
+ # file and returning a Revision object for loading the found objects.
380
+ #
381
+ # If the file contains multiple cross-reference sections, all objects will be put into a single
382
+ # cross-reference table, later objects overwriting prior ones.
383
+ def reconstruct_revision
384
+ raise unless @document.config['parser.try_xref_reconstruction']
385
+ msg = "#{$!} - trying cross-reference table reconstruction"
386
+ @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
387
+
388
+ xref = XRefSection.new
389
+ @tokenizer.pos = 0
390
+ while true
391
+ @tokenizer.skip_whitespace
392
+ pos = @tokenizer.pos
393
+ @tokenizer.scan_until(/(\n|\r\n?)+/)
394
+ next_new_line_pos = @tokenizer.pos
395
+ @tokenizer.pos = pos
396
+
397
+ token = @tokenizer.next_token rescue nil
398
+ if token.kind_of?(Integer)
399
+ gen = @tokenizer.next_token rescue nil
400
+ tok = @tokenizer.next_token rescue nil
401
+ if @tokenizer.pos > next_new_line_pos
402
+ @tokenizer.pos = next_new_line_pos
403
+ elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
404
+ xref.add_in_use_entry(token, gen, pos)
405
+ @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
406
+ end
407
+ elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
408
+ obj = @tokenizer.next_object rescue nil
409
+ # Use last trailer found in case of multiple revisions but use first trailer in case of
410
+ # linearized file.
411
+ trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
412
+ elsif token == Tokenizer::NO_MORE_TOKENS
413
+ break
414
+ else
415
+ @tokenizer.pos = next_new_line_pos
416
+ end
417
+ end
418
+
419
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
420
+ if !trailer || trailer.empty?
421
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
422
+ end
423
+
424
+ loader = lambda do |xref_entry|
425
+ obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
426
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
427
+ end
428
+
429
+ Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
430
+ loader: loader)
431
+ end
432
+
341
433
  # Raises a HexaPDF::MalformedPDFError with the given message and source position.
342
434
  def raise_malformed(msg, pos: nil)
343
435
  raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
@@ -66,7 +66,7 @@ module HexaPDF
66
66
  # subclasses of HexaPDF::Object are returned as is (it makes no sense, for example, to return
67
67
  # the hash that describes the Catalog instead of the Catalog object).
68
68
  def [](arg1, arg2 = nil)
69
- data = value[arg1, *arg2]
69
+ data = arg2 ? value[arg1, arg2] : value[arg1]
70
70
  return if data.nil?
71
71
 
72
72
  if arg2 || arg1.kind_of?(Range)
@@ -83,7 +83,7 @@ module HexaPDF
83
83
  # subclasses) and the given data has not (including subclasses), the data is stored inside the
84
84
  # HexaPDF::Object.
85
85
  def []=(index, data)
86
- if value[index].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
86
+ if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
87
87
  !data.kind_of?(HexaPDF::Reference)
88
88
  value[index].value = data
89
89
  else
@@ -113,6 +113,13 @@ module HexaPDF
113
113
  value.delete_at(index)
114
114
  end
115
115
 
116
+ # Deletes all values from the PDFArray that are equal to the given object.
117
+ #
118
+ # Returns the last deleted item, or +nil+ if no matching item is found.
119
+ def delete(object)
120
+ value.delete(object)
121
+ end
122
+
116
123
  # :call-seq:
117
124
  # array.slice!(index) -> obj or nil
118
125
  # array.slice!(start, length) -> new_array or nil
@@ -174,9 +181,9 @@ module HexaPDF
174
181
  self
175
182
  end
176
183
 
177
- # Returns a duplicate of the underlying array.
184
+ # Returns an array containing the preprocessed values (like in #[]).
178
185
  def to_ary
179
- value.dup
186
+ each.to_a
180
187
  end
181
188
 
182
189
  private
@@ -196,7 +203,7 @@ module HexaPDF
196
203
  data = document.deref(data)
197
204
  value[index] = data if index
198
205
  end
199
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
206
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
200
207
  data = data.value
201
208
  end
202
209
  data