hexapdf 0.12.3 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/hexapdf/cli/command.rb +4 -2
  4. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  5. data/lib/hexapdf/cli/info.rb +51 -2
  6. data/lib/hexapdf/cli/inspect.rb +30 -8
  7. data/lib/hexapdf/cli/merge.rb +1 -1
  8. data/lib/hexapdf/configuration.rb +15 -0
  9. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  10. data/lib/hexapdf/dictionary.rb +4 -4
  11. data/lib/hexapdf/dictionary_fields.rb +1 -9
  12. data/lib/hexapdf/document.rb +31 -12
  13. data/lib/hexapdf/document/files.rb +0 -1
  14. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  15. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  16. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  17. data/lib/hexapdf/font/cmap.rb +1 -4
  18. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  19. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  20. data/lib/hexapdf/image_loader/png.rb +3 -2
  21. data/lib/hexapdf/layout/line.rb +1 -1
  22. data/lib/hexapdf/layout/style.rb +23 -23
  23. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  24. data/lib/hexapdf/object.rb +30 -25
  25. data/lib/hexapdf/parser.rb +65 -3
  26. data/lib/hexapdf/pdf_array.rb +9 -2
  27. data/lib/hexapdf/revisions.rb +29 -21
  28. data/lib/hexapdf/serializer.rb +1 -1
  29. data/lib/hexapdf/task/optimize.rb +6 -4
  30. data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
  31. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  32. data/lib/hexapdf/type/acro_form/form.rb +6 -4
  33. data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
  34. data/lib/hexapdf/type/actions/uri.rb +3 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  36. data/lib/hexapdf/type/catalog.rb +2 -2
  37. data/lib/hexapdf/type/file_specification.rb +1 -1
  38. data/lib/hexapdf/type/font_simple.rb +3 -1
  39. data/lib/hexapdf/type/font_true_type.rb +6 -2
  40. data/lib/hexapdf/type/font_type0.rb +1 -1
  41. data/lib/hexapdf/type/form.rb +2 -1
  42. data/lib/hexapdf/type/image.rb +2 -2
  43. data/lib/hexapdf/type/page.rb +16 -7
  44. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  45. data/lib/hexapdf/type/resources.rb +1 -0
  46. data/lib/hexapdf/type/trailer.rb +2 -3
  47. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  48. data/lib/hexapdf/version.rb +1 -1
  49. data/test/hexapdf/common_tokenizer_tests.rb +2 -2
  50. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  51. data/test/hexapdf/content/test_canvas.rb +3 -3
  52. data/test/hexapdf/content/test_color_space.rb +1 -1
  53. data/test/hexapdf/encryption/test_aes.rb +4 -4
  54. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  55. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  56. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  57. data/test/hexapdf/layout/test_text_layouter.rb +3 -4
  58. data/test/hexapdf/test_configuration.rb +2 -2
  59. data/test/hexapdf/test_dictionary.rb +3 -1
  60. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  61. data/test/hexapdf/test_document.rb +4 -4
  62. data/test/hexapdf/test_object.rb +44 -26
  63. data/test/hexapdf/test_parser.rb +115 -55
  64. data/test/hexapdf/test_pdf_array.rb +7 -0
  65. data/test/hexapdf/test_revisions.rb +35 -0
  66. data/test/hexapdf/test_writer.rb +2 -2
  67. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
  68. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  69. data/test/hexapdf/type/acro_form/test_form.rb +4 -4
  70. data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
  71. data/test/hexapdf/type/test_font_simple.rb +2 -1
  72. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  73. data/test/hexapdf/type/test_form.rb +1 -1
  74. data/test/hexapdf/type/test_page.rb +8 -1
  75. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  76. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  77. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  78. metadata +5 -12
@@ -117,7 +117,6 @@ module HexaPDF
117
117
 
118
118
  @document.pages.each do |page|
119
119
  page[:Annots]&.each do |annot|
120
- annot = @document.deref(annot)
121
120
  next unless annot[:Subtype] == :FileAttachment
122
121
  spec = @document.deref(annot[:FS])
123
122
  yield(spec) unless seen.key?(spec)
@@ -49,7 +49,7 @@ module HexaPDF
49
49
 
50
50
  # Creates a new FastARC4 object using the given encryption key.
51
51
  def initialize(key)
52
- @cipher = OpenSSL::Cipher::RC4.new
52
+ @cipher = OpenSSL::Cipher.new('rc4')
53
53
  @cipher.key_len = key.length
54
54
  @cipher.key = key
55
55
  end
@@ -72,6 +72,7 @@ module HexaPDF
72
72
  super
73
73
  unless [1, 2, 4, 5].include?(value[:V])
74
74
  yield("Value of /V is not one of 1, 2, 4 or 5", false)
75
+ return
75
76
  end
76
77
  if value[:V] == 2 && (!key?(:Length) || value[:Length] < 40 ||
77
78
  value[:Length] > 128 || value[:Length] % 8 != 0)
@@ -69,6 +69,7 @@ module HexaPDF
69
69
  when 6
70
70
  if !key?(:OE) || !key?(:UE) || !key?(:Perms)
71
71
  yield("Value of /OE, /UE or /Perms is missing for dictionary revision 6", false)
72
+ return
72
73
  end
73
74
  if value[:U].length != 48 || value[:O].length != 48 || value[:UE].length != 32 ||
74
75
  value[:OE].length != 32 || value[:Perms].length != 16
@@ -100,10 +100,7 @@ module HexaPDF
100
100
  # The writing mode of the CMap: 0 for horizontal, 1 for vertical writing.
101
101
  attr_accessor :wmode
102
102
 
103
- attr_reader :codespace_ranges #: nodoc:
104
- attr_reader :cid_mapping # :nodoc:
105
- attr_reader :cid_range_mappings # :nodoc:
106
- attr_reader :unicode_mapping # :nodoc:
103
+ attr_reader :codespace_ranges, :cid_mapping, :cid_range_mappings, :unicode_mapping # :nodoc:
107
104
  protected :codespace_ranges, :cid_mapping, :cid_range_mappings, :unicode_mapping
108
105
 
109
106
  # Creates a new CMap object.
@@ -76,6 +76,7 @@ module HexaPDF
76
76
 
77
77
  # Apple Mac style information.
78
78
  attr_accessor :mac_style
79
+
79
80
  bit_field(:mac_style, {bold: 0, italic: 1, underline: 2, outline: 3, shadow: 4,
80
81
  condensed: 5, extended: 6})
81
82
 
@@ -65,6 +65,7 @@ module HexaPDF
65
65
 
66
66
  # Characteristics and properties of this font.
67
67
  attr_accessor :type
68
+
68
69
  bit_field(:type, {restricted_license_embedding: 1, preview_and_print_embedding: 2,
69
70
  editable_embedding: 3, no_subsetting: 8, bitmap_embedding_only: 9})
70
71
 
@@ -112,6 +113,7 @@ module HexaPDF
112
113
 
113
114
  # Information concerning the nature of the font patterns.
114
115
  attr_accessor :selection
116
+
115
117
  bit_field(:selection, {italic: 0, underscore: 1, negative: 2, outlined: 3, strikeout: 4,
116
118
  bold: 5, regular: 6, use_typo_metrics: 7, wws: 8, oblique: 9})
117
119
 
@@ -162,9 +162,10 @@ module HexaPDF
162
162
  io.seek(length, IO::SEEK_CUR)
163
163
  end
164
164
  when 'tRNS' # PNG s11.3.2
165
- if @color_type == INDEXED
165
+ case @color_type
166
+ when INDEXED
166
167
  trns = io.read(length).unpack('C*')
167
- elsif @color_type == TRUECOLOR || @color_type == GREYSCALE
168
+ when TRUECOLOR, GREYSCALE
168
169
  dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten
169
170
  else
170
171
  io.seek(length, IO::SEEK_CUR)
@@ -198,7 +198,7 @@ module HexaPDF
198
198
  # Note: The cache is not cleared!
199
199
  def add(item)
200
200
  last = @items.last
201
- if last.class == item.class && item.kind_of?(TextFragment) && last.style == item.style
201
+ if last.instance_of?(item.class) && item.kind_of?(TextFragment) && last.style == item.style
202
202
  if last.items.frozen?
203
203
  @items[-1] = last = last.dup
204
204
  last.items = last.items.dup
@@ -524,7 +524,7 @@ module HexaPDF
524
524
  # Style.new(font_size: 15, align: :center, valign: center)
525
525
  def initialize(**properties)
526
526
  update(**properties)
527
- @scaled_item_widths = {}
527
+ @scaled_item_widths = {}.compare_by_identity
528
528
  end
529
529
 
530
530
  # Duplicates the complex properties that can be modified, as well as the cache.
@@ -883,41 +883,41 @@ module HexaPDF
883
883
  [:text_rise, 0],
884
884
  [:font_features, {}],
885
885
  [:text_rendering_mode, "Content::TextRenderingMode::FILL",
886
- setter: "Content::TextRenderingMode.normalize(value)"],
886
+ {setter: "Content::TextRenderingMode.normalize(value)"}],
887
887
  [:subscript, false,
888
- setter: "value; superscript(false) if superscript",
889
- valid_values: [true, false]],
888
+ {setter: "value; superscript(false) if superscript",
889
+ valid_values: [true, false]}],
890
890
  [:superscript, false,
891
- setter: "value; subscript(false) if subscript",
892
- valid_values: [true, false]],
893
- [:underline, false, valid_values: [true, false]],
894
- [:strikeout, false, valid_values: [true, false]],
891
+ {setter: "value; subscript(false) if subscript",
892
+ valid_values: [true, false]}],
893
+ [:underline, false, {valid_values: [true, false]}],
894
+ [:strikeout, false, {valid_values: [true, false]}],
895
895
  [:fill_color, "default_color"],
896
896
  [:fill_alpha, 1],
897
897
  [:stroke_color, "default_color"],
898
898
  [:stroke_alpha, 1],
899
899
  [:stroke_width, 1],
900
900
  [:stroke_cap_style, "Content::LineCapStyle::BUTT_CAP",
901
- setter: "Content::LineCapStyle.normalize(value)"],
901
+ {setter: "Content::LineCapStyle.normalize(value)"}],
902
902
  [:stroke_join_style, "Content::LineJoinStyle::MITER_JOIN",
903
- setter: "Content::LineJoinStyle.normalize(value)"],
903
+ {setter: "Content::LineJoinStyle.normalize(value)"}],
904
904
  [:stroke_miter_limit, 10.0],
905
905
  [:stroke_dash_pattern, "Content::LineDashPattern.new",
906
- setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"],
907
- [:align, :left, valid_values: [:left, :center, :right, :justify]],
908
- [:valign, :top, valid_values: [:top, :center, :bottom]],
906
+ {setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"}],
907
+ [:align, :left, {valid_values: [:left, :center, :right, :justify]}],
908
+ [:valign, :top, {valid_values: [:top, :center, :bottom]}],
909
909
  [:text_indent, 0],
910
910
  [:line_spacing, "LineSpacing.new(type: :single)",
911
- setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
- extra_args: ", extra_arg = nil"],
913
- [:last_line_gap, false, valid_values: [true, false]],
911
+ {setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
912
+ extra_args: ", extra_arg = nil"}],
913
+ [:last_line_gap, false, {valid_values: [true, false]}],
914
914
  [:background_color, nil],
915
- [:padding, "Quad.new(0)", setter: "Quad.new(value)"],
916
- [:margin, "Quad.new(0)", setter: "Quad.new(value)"],
917
- [:border, "Border.new", setter: "Border.new(**value)"],
918
- [:overlays, "Layers.new", setter: "Layers.new(value)"],
919
- [:underlays, "Layers.new", setter: "Layers.new(value)"],
920
- [:position, :default, valid_values: [:default, :float, :flow, :absolute]],
915
+ [:padding, "Quad.new(0)", {setter: "Quad.new(value)"}],
916
+ [:margin, "Quad.new(0)", {setter: "Quad.new(value)"}],
917
+ [:border, "Border.new", {setter: "Border.new(**value)"}],
918
+ [:overlays, "Layers.new", {setter: "Layers.new(value)"}],
919
+ [:underlays, "Layers.new", {setter: "Layers.new(value)"}],
920
+ [:position, :default, {valid_values: [:default, :float, :flow, :absolute]}],
921
921
  [:position_hint, nil],
922
922
  ].each do |name, default, options = {}|
923
923
  default = default.inspect unless default.kind_of?(String)
@@ -1075,7 +1075,7 @@ module HexaPDF
1075
1075
  # The item may be a (singleton) glyph object or an integer/float, i.e. items that can appear
1076
1076
  # inside a TextFragment.
1077
1077
  def scaled_item_width(item)
1078
- @scaled_item_widths[item.object_id] ||=
1078
+ @scaled_item_widths[item] ||=
1079
1079
  begin
1080
1080
  if item.kind_of?(Numeric)
1081
1081
  -item * scaled_font_size
@@ -68,9 +68,10 @@ module HexaPDF
68
68
  text_fragment.clear_cache
69
69
  end
70
70
  if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
71
- if font.font_type == :TrueType
71
+ case font.font_type
72
+ when :TrueType
72
73
  process_true_type_kerning(text_fragment)
73
- elsif font.font_type == :Type1
74
+ when :Type1
74
75
  process_type1_kerning(text_fragment)
75
76
  end
76
77
  text_fragment.clear_cache
@@ -122,9 +122,6 @@ module HexaPDF
122
122
 
123
123
  include Comparable
124
124
 
125
- # A list of classes whose objects cannot be duplicated.
126
- NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
127
-
128
125
  # :call-seq:
129
126
  # HexaPDF::Object.deep_copy(object) -> copy
130
127
  #
@@ -139,8 +136,6 @@ module HexaPDF
139
136
  (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
140
137
  when HexaPDF::Reference
141
138
  object
142
- when *NOT_DUPLICATABLE_CLASSES
143
- object
144
139
  else
145
140
  object.dup
146
141
  end
@@ -251,29 +246,31 @@ module HexaPDF
251
246
  end
252
247
 
253
248
  # :call-seq:
254
- # obj.validate(auto_correct: true) -> true or false
255
- # obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
249
+ # obj.validate(auto_correct: true) -> true or false
250
+ # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
256
251
  #
257
- # Validates the object and, optionally, corrects problems when the option +auto_correct+ is set.
258
- # The validation routine itself has to be implemented in the #perform_validation method - see
259
- # its documentation for more information.
252
+ # Validates the object, optionally corrects problems when the option +auto_correct+ is set and
253
+ # returns +true+ if the object is deemed valid and +false+ otherwise.
260
254
  #
261
255
  # If a block is given, it is called on validation problems with a problem description and
262
- # whether the problem is correctable.
256
+ # whether the problem is automatically correctable. The third argument to the block is usually
257
+ # this object but may be another object if during auto-correction a new object was created and
258
+ # validated.
263
259
  #
264
- # Returns +true+ if the object is deemed valid and +false+ otherwise.
260
+ # The validation routine itself has to be implemented in the #perform_validation method - see
261
+ # its documentation for more information.
265
262
  #
266
263
  # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
267
264
  # currently implement the full PDF spec. However, if the return value is +false+, there is
268
265
  # certainly a problem!
269
266
  def validate(auto_correct: true)
270
- catch do |catch_tag|
271
- perform_validation do |msg, correctable|
272
- yield(msg, correctable) if block_given?
273
- throw(catch_tag, false) unless auto_correct && correctable
274
- end
275
- true
267
+ result = true
268
+ perform_validation do |msg, correctable, object|
269
+ yield(msg, correctable, object || self) if block_given?
270
+ result = false unless correctable
271
+ return false unless auto_correct
276
272
  end
273
+ result
277
274
  end
278
275
 
279
276
  # Makes a deep copy of the source PDF object and resets the object identifier.
@@ -339,17 +336,25 @@ module HexaPDF
339
336
  # are also performed!
340
337
  #
341
338
  # When the validation routine finds that the object is invalid, it has to yield a problem
342
- # description and whether the problem can be corrected. After yielding, the problem has to be
343
- # corrected which poses no problem because the #validate method makes sure that the yield only
344
- # returns if the problem is actually correctable and if it should be corrected.
339
+ # description and whether the problem can be corrected. An optional third argument may contain
340
+ # the object that gets validated if it is different from this object (may happen when
341
+ # auto-correction is used).
342
+ #
343
+ # After yielding, the problem has to be corrected if it is correctable. If it is not correctable
344
+ # and not correcting would lead to exceptions the method has to return early.
345
345
  #
346
- # Here is a sample validation routine for stream objects:
346
+ # Here is a sample validation routine for a dictionary object type:
347
347
  #
348
348
  # def perform_validation
349
349
  # super
350
- # unless value.kind_of?(Hash)
351
- # yield("A stream object needs a Hash as value")
352
- # self.value = {}
350
+ #
351
+ # if value[:SomeKey].length != 7
352
+ # yield("Length of /SomeKey is invalid")
353
+ # # No need to return early here because following check doesn't rely on /SomeKey
354
+ # end
355
+ #
356
+ # if value[:OtherKey] % 2 == 0
357
+ # yield("/OtherKey needs to contain an odd number of elements")
353
358
  # end
354
359
  # end
355
360
  def perform_validation(&block)
@@ -59,6 +59,7 @@ module HexaPDF
59
59
  @tokenizer = Tokenizer.new(io)
60
60
  @document = document
61
61
  @object_stream_data = {}
62
+ @reconstructed_revision = nil
62
63
  retrieve_pdf_header_offset_and_version
63
64
  end
64
65
 
@@ -86,6 +87,8 @@ module HexaPDF
86
87
  end
87
88
 
88
89
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
+ rescue HexaPDF::MalformedPDFError
91
+ reconstructed_revision.object(xref_entry)
89
92
  end
90
93
 
91
94
  # Parses the indirect object at the specified offset.
@@ -235,14 +238,14 @@ module HexaPDF
235
238
  @tokenizer.skip_whitespace
236
239
  start.upto(start + number_of_entries - 1) do |oid|
237
240
  pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
238
- maybe_raise("Invalid cross-reference subsection entry", pos: @tokenizer.pos,
239
- force: matched_size == 20)
241
+ maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
242
+ force: !matched_size)
240
243
  end
241
244
  if xref.entry?(oid)
242
245
  next
243
246
  elsif type == 'n'
244
247
  if pos == 0 || gen > 65535
245
- maybe_raise("Invalid in use cross-reference entry in cross-reference section",
248
+ maybe_raise("Invalid in use cross-reference entry",
246
249
  pos: @tokenizer.pos)
247
250
  xref.add_free_entry(oid, gen)
248
251
  else
@@ -313,6 +316,11 @@ module HexaPDF
313
316
  @startxref_offset = lines[eof_index - 1].to_i
314
317
  end
315
318
 
319
+ # Returns the reconstructed revision.
320
+ def reconstructed_revision
321
+ @reconstructed_revision ||= reconstruct_revision
322
+ end
323
+
316
324
  # Returns the PDF version number that is stored in the file header.
317
325
  #
318
326
  # See: PDF1.7 s7.5.2
@@ -338,6 +346,60 @@ module HexaPDF
338
346
  @header_version = $1
339
347
  end
340
348
 
349
+ # Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
350
+ # file and returning a Revision object for loading the found objects.
351
+ #
352
+ # If the file contains multiple cross-reference sections, all objects will be put into a single
353
+ # cross-reference table, later objects overwriting prior ones.
354
+ def reconstruct_revision
355
+ raise unless @document.config['parser.try_xref_reconstruction']
356
+ msg = "#{$!} - trying cross-reference table reconstruction"
357
+ @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
358
+
359
+ xref = XRefSection.new
360
+ @tokenizer.pos = 0
361
+ while true
362
+ pos = @tokenizer.pos
363
+ @tokenizer.scan_until(/(\n|\r\n?)+|\z/)
364
+ next_new_line_pos = @tokenizer.pos
365
+ @tokenizer.pos = pos
366
+
367
+ token = @tokenizer.next_token rescue nil
368
+ if token.kind_of?(Integer)
369
+ gen = @tokenizer.next_token rescue nil
370
+ tok = @tokenizer.next_token rescue nil
371
+ if @tokenizer.pos > next_new_line_pos
372
+ @tokenizer.pos = next_new_line_pos
373
+ elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
374
+ xref.add_in_use_entry(token, gen, pos)
375
+ @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
376
+ end
377
+ elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
378
+ obj = @tokenizer.next_object rescue nil
379
+ # Use last trailer found in case of multiple revisions but use first trailer in case of
380
+ # linearized file.
381
+ trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
382
+ elsif token == Tokenizer::NO_MORE_TOKENS
383
+ break
384
+ else
385
+ @tokenizer.pos = next_new_line_pos
386
+ end
387
+ end
388
+
389
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
390
+ if !trailer || trailer.empty?
391
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
392
+ end
393
+
394
+ loader = lambda do |xref_entry|
395
+ obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
396
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
397
+ end
398
+
399
+ Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
400
+ loader: loader)
401
+ end
402
+
341
403
  # Raises a HexaPDF::MalformedPDFError with the given message and source position.
342
404
  def raise_malformed(msg, pos: nil)
343
405
  raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
@@ -83,7 +83,7 @@ module HexaPDF
83
83
  # subclasses) and the given data has not (including subclasses), the data is stored inside the
84
84
  # HexaPDF::Object.
85
85
  def []=(index, data)
86
- if value[index].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
86
+ if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
87
87
  !data.kind_of?(HexaPDF::Reference)
88
88
  value[index].value = data
89
89
  else
@@ -113,6 +113,13 @@ module HexaPDF
113
113
  value.delete_at(index)
114
114
  end
115
115
 
116
+ # Deletes all values from the PDFArray that are equal to the given object.
117
+ #
118
+ # Returns the last deleted item, or +nil+ if no matching item is found.
119
+ def delete(object)
120
+ value.delete(object)
121
+ end
122
+
116
123
  # :call-seq:
117
124
  # array.slice!(index) -> obj or nil
118
125
  # array.slice!(start, length) -> new_array or nil
@@ -196,7 +203,7 @@ module HexaPDF
196
203
  data = document.deref(data)
197
204
  value[index] = data if index
198
205
  end
199
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
206
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
200
207
  data = data.value
201
208
  end
202
209
  data
@@ -67,30 +67,38 @@ module HexaPDF
67
67
  object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
68
68
 
69
69
  revisions = []
70
- xref_section, trailer = parser.load_revision(parser.startxref_offset)
71
- revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
72
- xref_section: xref_section, loader: object_loader)
73
- seen_xref_offsets = {parser.startxref_offset => true}
74
-
75
- while (prev = revisions[0].trailer.value[:Prev]) &&
76
- !seen_xref_offsets.key?(prev)
77
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
78
- # should be direct. Adobe's POV is followed here. Same with :XRefStm.
79
- xref_section, trailer = parser.load_revision(prev)
80
- seen_xref_offsets[prev] = true
81
-
82
- stm = revisions[0].trailer.value[:XRefStm]
83
- if stm && !seen_xref_offsets.key?(stm)
84
- stm_xref_section, = parser.load_revision(stm)
85
- xref_section.merge!(stm_xref_section)
86
- seen_xref_offsets[stm] = true
70
+ begin
71
+ xref_section, trailer = parser.load_revision(parser.startxref_offset)
72
+ revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
73
+ xref_section: xref_section, loader: object_loader)
74
+ seen_xref_offsets = {parser.startxref_offset => true}
75
+
76
+ while (prev = revisions[0].trailer.value[:Prev]) &&
77
+ !seen_xref_offsets.key?(prev)
78
+ # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # should be direct. Adobe's POV is followed here. Same with :XRefStm.
80
+ xref_section, trailer = parser.load_revision(prev)
81
+ seen_xref_offsets[prev] = true
82
+
83
+ stm = revisions[0].trailer.value[:XRefStm]
84
+ if stm && !seen_xref_offsets.key?(stm)
85
+ stm_xref_section, = parser.load_revision(stm)
86
+ xref_section.merge!(stm_xref_section)
87
+ seen_xref_offsets[stm] = true
88
+ end
89
+
90
+ revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
91
+ xref_section: xref_section, loader: object_loader))
87
92
  end
88
-
89
- revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
90
- xref_section: xref_section, loader: object_loader))
93
+ rescue HexaPDF::MalformedPDFError
94
+ reconstructed_revision = parser.reconstructed_revision
95
+ unless revisions.empty?
96
+ reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
97
+ end
98
+ revisions << reconstructed_revision
91
99
  end
92
100
 
93
- document.version = parser.file_header_version
101
+ document.version = parser.file_header_version rescue '1.0'
94
102
  new(document, initial_revisions: revisions, parser: parser)
95
103
  end
96
104