hexapdf 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
@@ -117,7 +117,6 @@ module HexaPDF
117
117
 
118
118
  @document.pages.each do |page|
119
119
  page[:Annots]&.each do |annot|
120
- annot = @document.deref(annot)
121
120
  next unless annot[:Subtype] == :FileAttachment
122
121
  spec = @document.deref(annot[:FS])
123
122
  yield(spec) unless seen.key?(spec)
@@ -49,7 +49,7 @@ module HexaPDF
49
49
 
50
50
  # Creates a new FastARC4 object using the given encryption key.
51
51
  def initialize(key)
52
- @cipher = OpenSSL::Cipher::RC4.new
52
+ @cipher = OpenSSL::Cipher.new('rc4')
53
53
  @cipher.key_len = key.length
54
54
  @cipher.key = key
55
55
  end
@@ -72,6 +72,7 @@ module HexaPDF
72
72
  super
73
73
  unless [1, 2, 4, 5].include?(value[:V])
74
74
  yield("Value of /V is not one of 1, 2, 4 or 5", false)
75
+ return
75
76
  end
76
77
  if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
77
78
  value[:Length] > 128 || value[:Length] % 8 != 0)
@@ -69,6 +69,7 @@ module HexaPDF
69
69
  when 6
70
70
  if !key?(:OE) || !key?(:UE) || !key?(:Perms)
71
71
  yield("Value of /OE, /UE or /Perms is missing for dictionary revision 6", false)
72
+ return
72
73
  end
73
74
  if value[:U].length != 48 || value[:O].length != 48 || value[:UE].length != 32 ||
74
75
  value[:OE].length != 32 || value[:Perms].length != 16
@@ -100,10 +100,7 @@ module HexaPDF
100
100
  # The writing mode of the CMap: 0 for horizontal, 1 for vertical writing.
101
101
  attr_accessor :wmode
102
102
 
103
- attr_reader :codespace_ranges #: nodoc:
104
- attr_reader :cid_mapping # :nodoc:
105
- attr_reader :cid_range_mappings # :nodoc:
106
- attr_reader :unicode_mapping # :nodoc:
103
+ attr_reader :codespace_ranges, :cid_mapping, :cid_range_mappings, :unicode_mapping # :nodoc:
107
104
  protected :codespace_ranges, :cid_mapping, :cid_range_mappings, :unicode_mapping
108
105
 
109
106
  # Creates a new CMap object.
@@ -76,6 +76,7 @@ module HexaPDF
76
76
 
77
77
  # Apple Mac style information.
78
78
  attr_accessor :mac_style
79
+
79
80
  bit_field(:mac_style, {bold: 0, italic: 1, underline: 2, outline: 3, shadow: 4,
80
81
  condensed: 5, extended: 6})
81
82
 
@@ -65,6 +65,7 @@ module HexaPDF
65
65
 
66
66
  # Characteristics and properties of this font.
67
67
  attr_accessor :type
68
+
68
69
  bit_field(:type, {restricted_license_embedding: 1, preview_and_print_embedding: 2,
69
70
  editable_embedding: 3, no_subsetting: 8, bitmap_embedding_only: 9})
70
71
 
@@ -112,6 +113,7 @@ module HexaPDF
112
113
 
113
114
  # Information concerning the nature of the font patterns.
114
115
  attr_accessor :selection
116
+
115
117
  bit_field(:selection, {italic: 0, underscore: 1, negative: 2, outlined: 3, strikeout: 4,
116
118
  bold: 5, regular: 6, use_typo_metrics: 7, wws: 8, oblique: 9})
117
119
 
@@ -162,9 +162,10 @@ module HexaPDF
162
162
  io.seek(length, IO::SEEK_CUR)
163
163
  end
164
164
  when 'tRNS' # PNG s11.3.2
165
- if @color_type == INDEXED
165
+ case @color_type
166
+ when INDEXED
166
167
  trns = io.read(length).unpack('C*')
167
- elsif @color_type == TRUECOLOR || @color_type == GREYSCALE
168
+ when TRUECOLOR, GREYSCALE
168
169
  dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten
169
170
  else
170
171
  io.seek(length, IO::SEEK_CUR)
@@ -198,7 +198,7 @@ module HexaPDF
198
198
  # Note: The cache is not cleared!
199
199
  def add(item)
200
200
  last = @items.last
201
- if last.class == item.class && item.kind_of?(TextFragment) && last.style == item.style
201
+ if last.instance_of?(item.class) && item.kind_of?(TextFragment) && last.style == item.style
202
202
  if last.items.frozen?
203
203
  @items[-1] = last = last.dup
204
204
  last.items = last.items.dup
@@ -524,7 +524,7 @@ module HexaPDF
524
524
  # Style.new(font_size: 15, align: :center, valign: center)
525
525
  def initialize(**properties)
526
526
  update(**properties)
527
- @scaled_item_widths = {}
527
+ @scaled_item_widths = {}.compare_by_identity
528
528
  end
529
529
 
530
530
  # Duplicates the complex properties that can be modified, as well as the cache.
@@ -883,41 +883,41 @@ module HexaPDF
883
883
  [:text_rise, 0],
884
884
  [:font_features, {}],
885
885
  [:text_rendering_mode, "Content::TextRenderingMode::FILL",
886
- setter: "Content::TextRenderingMode.normalize(value)"],
886
+ {setter: "Content::TextRenderingMode.normalize(value)"}],
887
887
  [:subscript, false,
888
- setter: "value; superscript(false) if superscript",
889
- valid_values: [true, false]],
888
+ {setter: "value; superscript(false) if superscript",
889
+ valid_values: [true, false]}],
890
890
  [:superscript, false,
891
- setter: "value; subscript(false) if subscript",
892
- valid_values: [true, false]],
893
- [:underline, false, valid_values: [true, false]],
894
- [:strikeout, false, valid_values: [true, false]],
891
+ {setter: "value; subscript(false) if subscript",
892
+ valid_values: [true, false]}],
893
+ [:underline, false, {valid_values: [true, false]}],
894
+ [:strikeout, false, {valid_values: [true, false]}],
895
895
  [:fill_color, "default_color"],
896
896
  [:fill_alpha, 1],
897
897
  [:stroke_color, "default_color"],
898
898
  [:stroke_alpha, 1],
899
899
  [:stroke_width, 1],
900
900
  [:stroke_cap_style, "Content::LineCapStyle::BUTT_CAP",
901
- setter: "Content::LineCapStyle.normalize(value)"],
901
+ {setter: "Content::LineCapStyle.normalize(value)"}],
902
902
  [:stroke_join_style, "Content::LineJoinStyle::MITER_JOIN",
903
- setter: "Content::LineJoinStyle.normalize(value)"],
903
+ {setter: "Content::LineJoinStyle.normalize(value)"}],
904
904
  [:stroke_miter_limit, 10.0],
905
905
  [:stroke_dash_pattern, "Content::LineDashPattern.new",
906
- setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"],
907
- [:align, :left, valid_values: [:left, :center, :right, :justify]],
908
- [:valign, :top, valid_values: [:top, :center, :bottom]],
906
+ {setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"}],
907
+ [:align, :left, {valid_values: [:left, :center, :right, :justify]}],
908
+ [:valign, :top, {valid_values: [:top, :center, :bottom]}],
909
909
  [:text_indent, 0],
910
910
  [:line_spacing, "LineSpacing.new(type: :single)",
911
- setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
- extra_args: ", extra_arg = nil"],
913
- [:last_line_gap, false, valid_values: [true, false]],
911
+ {setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
+ extra_args: ", extra_arg = nil"}],
913
+ [:last_line_gap, false, {valid_values: [true, false]}],
914
914
  [:background_color, nil],
915
- [:padding, "Quad.new(0)", setter: "Quad.new(value)"],
916
- [:margin, "Quad.new(0)", setter: "Quad.new(value)"],
917
- [:border, "Border.new", setter: "Border.new(**value)"],
918
- [:overlays, "Layers.new", setter: "Layers.new(value)"],
919
- [:underlays, "Layers.new", setter: "Layers.new(value)"],
920
- [:position, :default, valid_values: [:default, :float, :flow, :absolute]],
915
+ [:padding, "Quad.new(0)", {setter: "Quad.new(value)"}],
916
+ [:margin, "Quad.new(0)", {setter: "Quad.new(value)"}],
917
+ [:border, "Border.new", {setter: "Border.new(**value)"}],
918
+ [:overlays, "Layers.new", {setter: "Layers.new(value)"}],
919
+ [:underlays, "Layers.new", {setter: "Layers.new(value)"}],
920
+ [:position, :default, {valid_values: [:default, :float, :flow, :absolute]}],
921
921
  [:position_hint, nil],
922
922
  ].each do |name, default, options = {}|
923
923
  default = default.inspect unless default.kind_of?(String)
@@ -1075,7 +1075,7 @@ module HexaPDF
1075
1075
  # The item may be a (singleton) glyph object or an integer/float, i.e. items that can appear
1076
1076
  # inside a TextFragment.
1077
1077
  def scaled_item_width(item)
1078
- @scaled_item_widths[item.object_id] ||=
1078
+ @scaled_item_widths[item] ||=
1079
1079
  begin
1080
1080
  if item.kind_of?(Numeric)
1081
1081
  -item * scaled_font_size
@@ -68,9 +68,10 @@ module HexaPDF
68
68
  text_fragment.clear_cache
69
69
  end
70
70
  if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
71
- if font.font_type == :TrueType
71
+ case font.font_type
72
+ when :TrueType
72
73
  process_true_type_kerning(text_fragment)
73
- elsif font.font_type == :Type1
74
+ when :Type1
74
75
  process_type1_kerning(text_fragment)
75
76
  end
76
77
  text_fragment.clear_cache
@@ -122,9 +122,6 @@ module HexaPDF
122
122
 
123
123
  include Comparable
124
124
 
125
- # A list of classes whose objects cannot be duplicated.
126
- NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
127
-
128
125
  # :call-seq:
129
126
  # HexaPDF::Object.deep_copy(object) -> copy
130
127
  #
@@ -139,8 +136,6 @@ module HexaPDF
139
136
  (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
140
137
  when HexaPDF::Reference
141
138
  object
142
- when *NOT_DUPLICATABLE_CLASSES
143
- object
144
139
  else
145
140
  object.dup
146
141
  end
@@ -251,29 +246,31 @@ module HexaPDF
251
246
  end
252
247
 
253
248
  # :call-seq:
254
- # obj.validate(auto_correct: true) -> true or false
255
- # obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
249
+ # obj.validate(auto_correct: true) -> true or false
250
+ # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
256
251
  #
257
- # Validates the object and, optionally, corrects problems when the option +auto_correct+ is set.
258
- # The validation routine itself has to be implemented in the #perform_validation method - see
259
- # its documentation for more information.
252
+ # Validates the object, optionally corrects problems when the option +auto_correct+ is set and
253
+ # returns +true+ if the object is deemed valid and +false+ otherwise.
260
254
  #
261
255
  # If a block is given, it is called on validation problems with a problem description and
262
- # whether the problem is correctable.
256
+ # whether the problem is automatically correctable. The third argument to the block is usually
257
+ # this object but may be another object if during auto-correction a new object was created and
258
+ # validated.
263
259
  #
264
- # Returns +true+ if the object is deemed valid and +false+ otherwise.
260
+ # The validation routine itself has to be implemented in the #perform_validation method - see
261
+ # its documentation for more information.
265
262
  #
266
263
  # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
267
264
  # currently implement the full PDF spec. However, if the return value is +false+, there is
268
265
  # certainly a problem!
269
266
  def validate(auto_correct: true)
270
- catch do |catch_tag|
271
- perform_validation do |msg, correctable|
272
- yield(msg, correctable) if block_given?
273
- throw(catch_tag, false) unless auto_correct && correctable
274
- end
275
- true
267
+ result = true
268
+ perform_validation do |msg, correctable, object|
269
+ yield(msg, correctable, object || self) if block_given?
270
+ result = false unless correctable
271
+ return false unless auto_correct
276
272
  end
273
+ result
277
274
  end
278
275
 
279
276
  # Makes a deep copy of the source PDF object and resets the object identifier.
@@ -339,17 +336,25 @@ module HexaPDF
339
336
  # are also performed!
340
337
  #
341
338
  # When the validation routine finds that the object is invalid, it has to yield a problem
342
- # description and whether the problem can be corrected. After yielding, the problem has to be
343
- # corrected which poses no problem because the #validate method makes sure that the yield only
344
- # returns if the problem is actually correctable and if it should be corrected.
339
+ # description and whether the problem can be corrected. An optional third argument may contain
340
+ # the object that gets validated if it is different from this object (may happen when
341
+ # auto-correction is used).
342
+ #
343
+ # After yielding, the problem has to be corrected if it is correctable. If it is not correctable
344
+ # and not correcting would lead to exceptions the method has to return early.
345
345
  #
346
- # Here is a sample validation routine for stream objects:
346
+ # Here is a sample validation routine for a dictionary object type:
347
347
  #
348
348
  # def perform_validation
349
349
  # super
350
- # unless value.kind_of?(Hash)
351
- # yield("A stream object needs a Hash as value")
352
- # self.value = {}
350
+ #
351
+ # if value[:SomeKey].length != 7
352
+ # yield("Length of /SomeKey is invalid")
353
+ # # No need to return early here because following check doesn't rely on /SomeKey
354
+ # end
355
+ #
356
+ # if value[:OtherKey] % 2 == 0
357
+ # yield("/OtherKey needs to contain an odd number of elements")
353
358
  # end
354
359
  # end
355
360
  def perform_validation(&block)
@@ -59,6 +59,7 @@ module HexaPDF
59
59
  @tokenizer = Tokenizer.new(io)
60
60
  @document = document
61
61
  @object_stream_data = {}
62
+ @reconstructed_revision = nil
62
63
  retrieve_pdf_header_offset_and_version
63
64
  end
64
65
 
@@ -86,6 +87,8 @@ module HexaPDF
86
87
  end
87
88
 
88
89
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
+ rescue HexaPDF::MalformedPDFError
91
+ reconstructed_revision.object(xref_entry)
89
92
  end
90
93
 
91
94
  # Parses the indirect object at the specified offset.
@@ -235,14 +238,14 @@ module HexaPDF
235
238
  @tokenizer.skip_whitespace
236
239
  start.upto(start + number_of_entries - 1) do |oid|
237
240
  pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
238
- maybe_raise("Invalid cross-reference subsection entry", pos: @tokenizer.pos,
239
- force: matched_size == 20)
241
+ maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
242
+ force: !matched_size)
240
243
  end
241
244
  if xref.entry?(oid)
242
245
  next
243
246
  elsif type == 'n'
244
247
  if pos == 0 || gen > 65535
245
- maybe_raise("Invalid in use cross-reference entry in cross-reference section",
248
+ maybe_raise("Invalid in use cross-reference entry",
246
249
  pos: @tokenizer.pos)
247
250
  xref.add_free_entry(oid, gen)
248
251
  else
@@ -313,6 +316,11 @@ module HexaPDF
313
316
  @startxref_offset = lines[eof_index - 1].to_i
314
317
  end
315
318
 
319
+ # Returns the reconstructed revision.
320
+ def reconstructed_revision
321
+ @reconstructed_revision ||= reconstruct_revision
322
+ end
323
+
316
324
  # Returns the PDF version number that is stored in the file header.
317
325
  #
318
326
  # See: PDF1.7 s7.5.2
@@ -338,6 +346,60 @@ module HexaPDF
338
346
  @header_version = $1
339
347
  end
340
348
 
349
+ # Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
350
+ # file and returning a Revision object for loading the found objects.
351
+ #
352
+ # If the file contains multiple cross-reference sections, all objects will be put into a single
353
+ # cross-reference table, later objects overwriting prior ones.
354
+ def reconstruct_revision
355
+ raise unless @document.config['parser.try_xref_reconstruction']
356
+ msg = "#{$!} - trying cross-reference table reconstruction"
357
+ @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
358
+
359
+ xref = XRefSection.new
360
+ @tokenizer.pos = 0
361
+ while true
362
+ pos = @tokenizer.pos
363
+ @tokenizer.scan_until(/(\n|\r\n?)+|\z/)
364
+ next_new_line_pos = @tokenizer.pos
365
+ @tokenizer.pos = pos
366
+
367
+ token = @tokenizer.next_token rescue nil
368
+ if token.kind_of?(Integer)
369
+ gen = @tokenizer.next_token rescue nil
370
+ tok = @tokenizer.next_token rescue nil
371
+ if @tokenizer.pos > next_new_line_pos
372
+ @tokenizer.pos = next_new_line_pos
373
+ elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
374
+ xref.add_in_use_entry(token, gen, pos)
375
+ @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
376
+ end
377
+ elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
378
+ obj = @tokenizer.next_object rescue nil
379
+ # Use last trailer found in case of multiple revisions but use first trailer in case of
380
+ # linearized file.
381
+ trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
382
+ elsif token == Tokenizer::NO_MORE_TOKENS
383
+ break
384
+ else
385
+ @tokenizer.pos = next_new_line_pos
386
+ end
387
+ end
388
+
389
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
390
+ if !trailer || trailer.empty?
391
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
392
+ end
393
+
394
+ loader = lambda do |xref_entry|
395
+ obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
396
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
397
+ end
398
+
399
+ Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
400
+ loader: loader)
401
+ end
402
+
341
403
  # Raises a HexaPDF::MalformedPDFError with the given message and source position.
342
404
  def raise_malformed(msg, pos: nil)
343
405
  raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
@@ -83,7 +83,7 @@ module HexaPDF
83
83
  # subclasses) and the given data has not (including subclasses), the data is stored inside the
84
84
  # HexaPDF::Object.
85
85
  def []=(index, data)
86
- if value[index].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
86
+ if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
87
87
  !data.kind_of?(HexaPDF::Reference)
88
88
  value[index].value = data
89
89
  else
@@ -113,6 +113,13 @@ module HexaPDF
113
113
  value.delete_at(index)
114
114
  end
115
115
 
116
+ # Deletes all values from the PDFArray that are equal to the given object.
117
+ #
118
+ # Returns the last deleted item, or +nil+ if no matching item is found.
119
+ def delete(object)
120
+ value.delete(object)
121
+ end
122
+
116
123
  # :call-seq:
117
124
  # array.slice!(index) -> obj or nil
118
125
  # array.slice!(start, length) -> new_array or nil
@@ -196,7 +203,7 @@ module HexaPDF
196
203
  data = document.deref(data)
197
204
  value[index] = data if index
198
205
  end
199
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
206
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
200
207
  data = data.value
201
208
  end
202
209
  data
@@ -67,30 +67,38 @@ module HexaPDF
67
67
  object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
68
68
 
69
69
  revisions = []
70
- xref_section, trailer = parser.load_revision(parser.startxref_offset)
71
- revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
72
- xref_section: xref_section, loader: object_loader)
73
- seen_xref_offsets = {parser.startxref_offset => true}
74
-
75
- while (prev = revisions[0].trailer.value[:Prev]) &&
76
- !seen_xref_offsets.key?(prev)
77
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
78
- # should be direct. Adobe's POV is followed here. Same with :XRefStm.
79
- xref_section, trailer = parser.load_revision(prev)
80
- seen_xref_offsets[prev] = true
81
-
82
- stm = revisions[0].trailer.value[:XRefStm]
83
- if stm && !seen_xref_offsets.key?(stm)
84
- stm_xref_section, = parser.load_revision(stm)
85
- xref_section.merge!(stm_xref_section)
86
- seen_xref_offsets[stm] = true
70
+ begin
71
+ xref_section, trailer = parser.load_revision(parser.startxref_offset)
72
+ revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
73
+ xref_section: xref_section, loader: object_loader)
74
+ seen_xref_offsets = {parser.startxref_offset => true}
75
+
76
+ while (prev = revisions[0].trailer.value[:Prev]) &&
77
+ !seen_xref_offsets.key?(prev)
78
+ # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # should be direct. Adobe's POV is followed here. Same with :XRefStm.
80
+ xref_section, trailer = parser.load_revision(prev)
81
+ seen_xref_offsets[prev] = true
82
+
83
+ stm = revisions[0].trailer.value[:XRefStm]
84
+ if stm && !seen_xref_offsets.key?(stm)
85
+ stm_xref_section, = parser.load_revision(stm)
86
+ xref_section.merge!(stm_xref_section)
87
+ seen_xref_offsets[stm] = true
88
+ end
89
+
90
+ revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
91
+ xref_section: xref_section, loader: object_loader))
87
92
  end
88
-
89
- revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
90
- xref_section: xref_section, loader: object_loader))
93
+ rescue HexaPDF::MalformedPDFError
94
+ reconstructed_revision = parser.reconstructed_revision
95
+ unless revisions.empty?
96
+ reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
97
+ end
98
+ revisions << reconstructed_revision
91
99
  end
92
100
 
93
- document.version = parser.file_header_version
101
+ document.version = parser.file_header_version rescue '1.0'
94
102
  new(document, initial_revisions: revisions, parser: parser)
95
103
  end
96
104