hexapdf 0.12.1 → 0.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +130 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +9 -6
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +12 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +96 -4
- data/lib/hexapdf/pdf_array.rb +12 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +34 -8
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +5 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +171 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
@@ -162,9 +162,10 @@ module HexaPDF
|
|
162
162
|
io.seek(length, IO::SEEK_CUR)
|
163
163
|
end
|
164
164
|
when 'tRNS' # PNG s11.3.2
|
165
|
-
|
165
|
+
case @color_type
|
166
|
+
when INDEXED
|
166
167
|
trns = io.read(length).unpack('C*')
|
167
|
-
|
168
|
+
when TRUECOLOR, GREYSCALE
|
168
169
|
dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten
|
169
170
|
else
|
170
171
|
io.seek(length, IO::SEEK_CUR)
|
data/lib/hexapdf/importer.rb
CHANGED
@@ -90,7 +90,7 @@ module HexaPDF
|
|
90
90
|
#
|
91
91
|
# An error is raised if the object doesn't belong to the +source+ document.
|
92
92
|
def import(object)
|
93
|
-
mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
|
93
|
+
mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
|
94
94
|
if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
|
95
95
|
raise HexaPDF::Error, "Import error: Incorrect document object for importer"
|
96
96
|
elsif mapped_object && mapped_object == @destination.object(mapped_object)
|
@@ -118,7 +118,8 @@ module HexaPDF
|
|
118
118
|
if object.type == :Catalog || object.type == :Pages
|
119
119
|
@mapper[object.data] = nil
|
120
120
|
else
|
121
|
-
obj =
|
121
|
+
obj = object.dup
|
122
|
+
@mapper[object.data] = NullableWeakRef.new(obj)
|
122
123
|
obj.document = @destination.__getobj__
|
123
124
|
obj.instance_variable_set(:@data, obj.data.dup)
|
124
125
|
obj.data.oid = 0
|
data/lib/hexapdf/layout/line.rb
CHANGED
@@ -198,7 +198,7 @@ module HexaPDF
|
|
198
198
|
# Note: The cache is not cleared!
|
199
199
|
def add(item)
|
200
200
|
last = @items.last
|
201
|
-
if last.
|
201
|
+
if last.instance_of?(item.class) && item.kind_of?(TextFragment) && last.style == item.style
|
202
202
|
if last.items.frozen?
|
203
203
|
@items[-1] = last = last.dup
|
204
204
|
last.items = last.items.dup
|
data/lib/hexapdf/layout/style.rb
CHANGED
@@ -524,7 +524,7 @@ module HexaPDF
|
|
524
524
|
# Style.new(font_size: 15, align: :center, valign: center)
|
525
525
|
def initialize(**properties)
|
526
526
|
update(**properties)
|
527
|
-
@scaled_item_widths = {}
|
527
|
+
@scaled_item_widths = {}.compare_by_identity
|
528
528
|
end
|
529
529
|
|
530
530
|
# Duplicates the complex properties that can be modified, as well as the cache.
|
@@ -883,41 +883,41 @@ module HexaPDF
|
|
883
883
|
[:text_rise, 0],
|
884
884
|
[:font_features, {}],
|
885
885
|
[:text_rendering_mode, "Content::TextRenderingMode::FILL",
|
886
|
-
setter: "Content::TextRenderingMode.normalize(value)"],
|
886
|
+
{setter: "Content::TextRenderingMode.normalize(value)"}],
|
887
887
|
[:subscript, false,
|
888
|
-
setter: "value; superscript(false) if superscript",
|
889
|
-
|
888
|
+
{setter: "value; superscript(false) if superscript",
|
889
|
+
valid_values: [true, false]}],
|
890
890
|
[:superscript, false,
|
891
|
-
setter: "value; subscript(false) if subscript",
|
892
|
-
|
893
|
-
[:underline, false, valid_values: [true, false]],
|
894
|
-
[:strikeout, false, valid_values: [true, false]],
|
891
|
+
{setter: "value; subscript(false) if subscript",
|
892
|
+
valid_values: [true, false]}],
|
893
|
+
[:underline, false, {valid_values: [true, false]}],
|
894
|
+
[:strikeout, false, {valid_values: [true, false]}],
|
895
895
|
[:fill_color, "default_color"],
|
896
896
|
[:fill_alpha, 1],
|
897
897
|
[:stroke_color, "default_color"],
|
898
898
|
[:stroke_alpha, 1],
|
899
899
|
[:stroke_width, 1],
|
900
900
|
[:stroke_cap_style, "Content::LineCapStyle::BUTT_CAP",
|
901
|
-
setter: "Content::LineCapStyle.normalize(value)"],
|
901
|
+
{setter: "Content::LineCapStyle.normalize(value)"}],
|
902
902
|
[:stroke_join_style, "Content::LineJoinStyle::MITER_JOIN",
|
903
|
-
setter: "Content::LineJoinStyle.normalize(value)"],
|
903
|
+
{setter: "Content::LineJoinStyle.normalize(value)"}],
|
904
904
|
[:stroke_miter_limit, 10.0],
|
905
905
|
[:stroke_dash_pattern, "Content::LineDashPattern.new",
|
906
|
-
setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"],
|
907
|
-
[:align, :left, valid_values: [:left, :center, :right, :justify]],
|
908
|
-
[:valign, :top, valid_values: [:top, :center, :bottom]],
|
906
|
+
{setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"}],
|
907
|
+
[:align, :left, {valid_values: [:left, :center, :right, :justify]}],
|
908
|
+
[:valign, :top, {valid_values: [:top, :center, :bottom]}],
|
909
909
|
[:text_indent, 0],
|
910
910
|
[:line_spacing, "LineSpacing.new(type: :single)",
|
911
|
-
setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
|
912
|
-
|
913
|
-
[:last_line_gap, false, valid_values: [true, false]],
|
911
|
+
{setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
|
912
|
+
extra_args: ", extra_arg = nil"}],
|
913
|
+
[:last_line_gap, false, {valid_values: [true, false]}],
|
914
914
|
[:background_color, nil],
|
915
|
-
[:padding, "Quad.new(0)", setter: "Quad.new(value)"],
|
916
|
-
[:margin, "Quad.new(0)", setter: "Quad.new(value)"],
|
917
|
-
[:border, "Border.new", setter: "Border.new(**value)"],
|
918
|
-
[:overlays, "Layers.new", setter: "Layers.new(value)"],
|
919
|
-
[:underlays, "Layers.new", setter: "Layers.new(value)"],
|
920
|
-
[:position, :default, valid_values: [:default, :float, :flow, :absolute]],
|
915
|
+
[:padding, "Quad.new(0)", {setter: "Quad.new(value)"}],
|
916
|
+
[:margin, "Quad.new(0)", {setter: "Quad.new(value)"}],
|
917
|
+
[:border, "Border.new", {setter: "Border.new(**value)"}],
|
918
|
+
[:overlays, "Layers.new", {setter: "Layers.new(value)"}],
|
919
|
+
[:underlays, "Layers.new", {setter: "Layers.new(value)"}],
|
920
|
+
[:position, :default, {valid_values: [:default, :float, :flow, :absolute]}],
|
921
921
|
[:position_hint, nil],
|
922
922
|
].each do |name, default, options = {}|
|
923
923
|
default = default.inspect unless default.kind_of?(String)
|
@@ -1075,7 +1075,7 @@ module HexaPDF
|
|
1075
1075
|
# The item may be a (singleton) glyph object or an integer/float, i.e. items that can appear
|
1076
1076
|
# inside a TextFragment.
|
1077
1077
|
def scaled_item_width(item)
|
1078
|
-
@scaled_item_widths[item
|
1078
|
+
@scaled_item_widths[item] ||=
|
1079
1079
|
begin
|
1080
1080
|
if item.kind_of?(Numeric)
|
1081
1081
|
-item * scaled_font_size
|
@@ -388,7 +388,7 @@ module HexaPDF
|
|
388
388
|
end
|
389
389
|
when :penalty
|
390
390
|
if item.penalty <= -Penalty::INFINITY
|
391
|
-
add_box_item(item.item) if item.
|
391
|
+
add_box_item(item.item) if item.width > 0
|
392
392
|
break unless yield(create_unjustified_line, item)
|
393
393
|
reset_after_line_break(index + 1)
|
394
394
|
elsif item.penalty >= Penalty::INFINITY
|
@@ -458,7 +458,7 @@ module HexaPDF
|
|
458
458
|
end
|
459
459
|
when :penalty
|
460
460
|
if item.penalty <= -Penalty::INFINITY
|
461
|
-
add_box_item(item.item) if item.
|
461
|
+
add_box_item(item.item) if item.width > 0
|
462
462
|
break unless (action = yield(create_unjustified_line, item))
|
463
463
|
reset_after_line_break_variable_width(index + 1, true, action)
|
464
464
|
elsif item.penalty >= Penalty::INFINITY
|
@@ -68,9 +68,10 @@ module HexaPDF
|
|
68
68
|
text_fragment.clear_cache
|
69
69
|
end
|
70
70
|
if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
|
71
|
-
|
71
|
+
case font.font_type
|
72
|
+
when :TrueType
|
72
73
|
process_true_type_kerning(text_fragment)
|
73
|
-
|
74
|
+
when :Type1
|
74
75
|
process_type1_kerning(text_fragment)
|
75
76
|
end
|
76
77
|
text_fragment.clear_cache
|
data/lib/hexapdf/object.rb
CHANGED
@@ -122,9 +122,6 @@ module HexaPDF
|
|
122
122
|
|
123
123
|
include Comparable
|
124
124
|
|
125
|
-
# A list of classes whose objects cannot be duplicated.
|
126
|
-
NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
|
127
|
-
|
128
125
|
# :call-seq:
|
129
126
|
# HexaPDF::Object.deep_copy(object) -> copy
|
130
127
|
#
|
@@ -139,8 +136,6 @@ module HexaPDF
|
|
139
136
|
(object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
|
140
137
|
when HexaPDF::Reference
|
141
138
|
object
|
142
|
-
when *NOT_DUPLICATABLE_CLASSES
|
143
|
-
object
|
144
139
|
else
|
145
140
|
object.dup
|
146
141
|
end
|
@@ -251,29 +246,31 @@ module HexaPDF
|
|
251
246
|
end
|
252
247
|
|
253
248
|
# :call-seq:
|
254
|
-
# obj.validate(auto_correct: true)
|
255
|
-
# obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
|
249
|
+
# obj.validate(auto_correct: true) -> true or false
|
250
|
+
# obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
|
256
251
|
#
|
257
|
-
# Validates the object
|
258
|
-
#
|
259
|
-
# its documentation for more information.
|
252
|
+
# Validates the object, optionally corrects problems when the option +auto_correct+ is set and
|
253
|
+
# returns +true+ if the object is deemed valid and +false+ otherwise.
|
260
254
|
#
|
261
255
|
# If a block is given, it is called on validation problems with a problem description and
|
262
|
-
# whether the problem is correctable.
|
256
|
+
# whether the problem is automatically correctable. The third argument to the block is usually
|
257
|
+
# this object but may be another object if during auto-correction a new object was created and
|
258
|
+
# validated.
|
263
259
|
#
|
264
|
-
#
|
260
|
+
# The validation routine itself has to be implemented in the #perform_validation method - see
|
261
|
+
# its documentation for more information.
|
265
262
|
#
|
266
263
|
# *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
|
267
264
|
# currently implement the full PDF spec. However, if the return value is +false+, there is
|
268
265
|
# certainly a problem!
|
269
266
|
def validate(auto_correct: true)
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
true
|
267
|
+
result = true
|
268
|
+
perform_validation do |msg, correctable, object|
|
269
|
+
yield(msg, correctable, object || self) if block_given?
|
270
|
+
result = false unless correctable
|
271
|
+
return false unless auto_correct
|
276
272
|
end
|
273
|
+
result
|
277
274
|
end
|
278
275
|
|
279
276
|
# Makes a deep copy of the source PDF object and resets the object identifier.
|
@@ -287,6 +284,28 @@ module HexaPDF
|
|
287
284
|
obj
|
288
285
|
end
|
289
286
|
|
287
|
+
# Caches and returns the given +value+ or the value of the block under the given cache key. If
|
288
|
+
# there is already a cached value for the key and +update+ is +false+, it is just returned.
|
289
|
+
#
|
290
|
+
# Set +update+ to +true+ to force an update of the cached value.
|
291
|
+
#
|
292
|
+
# This uses Document#cache internally.
|
293
|
+
def cache(key, value = Document::UNSET, update: false, &block)
|
294
|
+
document.cache(@data, key, value, update: update, &block)
|
295
|
+
end
|
296
|
+
|
297
|
+
# Returns +true+ if there is a cached value for the given key.
|
298
|
+
#
|
299
|
+
# This uses Document#cached? internally.
|
300
|
+
def cached?(key)
|
301
|
+
document.cached?(@data, key)
|
302
|
+
end
|
303
|
+
|
304
|
+
# Clears the cache for this object.
|
305
|
+
def clear_cache
|
306
|
+
document.clear_cache(@data)
|
307
|
+
end
|
308
|
+
|
290
309
|
# Compares this object to another object.
|
291
310
|
#
|
292
311
|
# If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
|
@@ -339,17 +358,25 @@ module HexaPDF
|
|
339
358
|
# are also performed!
|
340
359
|
#
|
341
360
|
# When the validation routine finds that the object is invalid, it has to yield a problem
|
342
|
-
# description and whether the problem can be corrected.
|
343
|
-
#
|
344
|
-
#
|
361
|
+
# description and whether the problem can be corrected. An optional third argument may contain
|
362
|
+
# the object that gets validated if it is different from this object (may happen when
|
363
|
+
# auto-correction is used).
|
345
364
|
#
|
346
|
-
#
|
365
|
+
# After yielding, the problem has to be corrected if it is correctable. If it is not correctable
|
366
|
+
# and not correcting would lead to exceptions the method has to return early.
|
367
|
+
#
|
368
|
+
# Here is a sample validation routine for a dictionary object type:
|
347
369
|
#
|
348
370
|
# def perform_validation
|
349
371
|
# super
|
350
|
-
#
|
351
|
-
#
|
352
|
-
#
|
372
|
+
#
|
373
|
+
# if value[:SomeKey].length != 7
|
374
|
+
# yield("Length of /SomeKey is invalid")
|
375
|
+
# # No need to return early here because following check doesn't rely on /SomeKey
|
376
|
+
# end
|
377
|
+
#
|
378
|
+
# if value[:OtherKey] % 2 == 0
|
379
|
+
# yield("/OtherKey needs to contain an odd number of elements")
|
353
380
|
# end
|
354
381
|
# end
|
355
382
|
def perform_validation(&block)
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -59,6 +59,7 @@ module HexaPDF
|
|
59
59
|
@tokenizer = Tokenizer.new(io)
|
60
60
|
@document = document
|
61
61
|
@object_stream_data = {}
|
62
|
+
@reconstructed_revision = nil
|
62
63
|
retrieve_pdf_header_offset_and_version
|
63
64
|
end
|
64
65
|
|
@@ -86,6 +87,8 @@ module HexaPDF
|
|
86
87
|
end
|
87
88
|
|
88
89
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
90
|
+
rescue HexaPDF::MalformedPDFError
|
91
|
+
reconstructed_revision.object(xref_entry)
|
89
92
|
end
|
90
93
|
|
91
94
|
# Parses the indirect object at the specified offset.
|
@@ -110,7 +113,15 @@ module HexaPDF
|
|
110
113
|
maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
|
111
114
|
object = nil
|
112
115
|
else
|
113
|
-
|
116
|
+
begin
|
117
|
+
object = @tokenizer.next_object
|
118
|
+
rescue MalformedPDFError
|
119
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
120
|
+
maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
|
121
|
+
force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
|
122
|
+
object = tok.to_i
|
123
|
+
@tokenizer.pos -= 6
|
124
|
+
end
|
114
125
|
end
|
115
126
|
|
116
127
|
tok = @tokenizer.next_token
|
@@ -235,14 +246,14 @@ module HexaPDF
|
|
235
246
|
@tokenizer.skip_whitespace
|
236
247
|
start.upto(start + number_of_entries - 1) do |oid|
|
237
248
|
pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
|
238
|
-
maybe_raise("Invalid cross-reference
|
239
|
-
force: matched_size
|
249
|
+
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
250
|
+
force: !matched_size)
|
240
251
|
end
|
241
252
|
if xref.entry?(oid)
|
242
253
|
next
|
243
254
|
elsif type == 'n'
|
244
255
|
if pos == 0 || gen > 65535
|
245
|
-
maybe_raise("Invalid in use cross-reference entry
|
256
|
+
maybe_raise("Invalid in use cross-reference entry",
|
246
257
|
pos: @tokenizer.pos)
|
247
258
|
xref.add_free_entry(oid, gen)
|
248
259
|
else
|
@@ -264,6 +275,27 @@ module HexaPDF
|
|
264
275
|
raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
|
265
276
|
end
|
266
277
|
|
278
|
+
unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
|
279
|
+
first_entry = xref[xref.oids[0]]
|
280
|
+
test_entry = xref[xref.oids[-1]]
|
281
|
+
@tokenizer.pos = test_entry.pos + @header_offset
|
282
|
+
test_oid = @tokenizer.next_token
|
283
|
+
first_oid = first_entry.oid
|
284
|
+
|
285
|
+
force_failure = !first_entry.free? || first_entry.gen != 65535 ||
|
286
|
+
!test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
|
287
|
+
maybe_raise("Main cross-reference section has invalid numbering",
|
288
|
+
pos: offset + @header_offset, force: force_failure)
|
289
|
+
|
290
|
+
new_xref = XRefSection.new
|
291
|
+
xref.oids.each do |oid|
|
292
|
+
entry = xref[oid]
|
293
|
+
entry.oid -= first_oid
|
294
|
+
new_xref.send(:[]=, entry.oid, entry.gen, entry)
|
295
|
+
end
|
296
|
+
xref = new_xref
|
297
|
+
end
|
298
|
+
|
267
299
|
[xref, trailer]
|
268
300
|
end
|
269
301
|
|
@@ -313,6 +345,11 @@ module HexaPDF
|
|
313
345
|
@startxref_offset = lines[eof_index - 1].to_i
|
314
346
|
end
|
315
347
|
|
348
|
+
# Returns the reconstructed revision.
|
349
|
+
def reconstructed_revision
|
350
|
+
@reconstructed_revision ||= reconstruct_revision
|
351
|
+
end
|
352
|
+
|
316
353
|
# Returns the PDF version number that is stored in the file header.
|
317
354
|
#
|
318
355
|
# See: PDF1.7 s7.5.2
|
@@ -338,6 +375,61 @@ module HexaPDF
|
|
338
375
|
@header_version = $1
|
339
376
|
end
|
340
377
|
|
378
|
+
# Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
|
379
|
+
# file and returning a Revision object for loading the found objects.
|
380
|
+
#
|
381
|
+
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
382
|
+
# cross-reference table, later objects overwriting prior ones.
|
383
|
+
def reconstruct_revision
|
384
|
+
raise unless @document.config['parser.try_xref_reconstruction']
|
385
|
+
msg = "#{$!} - trying cross-reference table reconstruction"
|
386
|
+
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
387
|
+
|
388
|
+
xref = XRefSection.new
|
389
|
+
@tokenizer.pos = 0
|
390
|
+
while true
|
391
|
+
@tokenizer.skip_whitespace
|
392
|
+
pos = @tokenizer.pos
|
393
|
+
@tokenizer.scan_until(/(\n|\r\n?)+/)
|
394
|
+
next_new_line_pos = @tokenizer.pos
|
395
|
+
@tokenizer.pos = pos
|
396
|
+
|
397
|
+
token = @tokenizer.next_token rescue nil
|
398
|
+
if token.kind_of?(Integer)
|
399
|
+
gen = @tokenizer.next_token rescue nil
|
400
|
+
tok = @tokenizer.next_token rescue nil
|
401
|
+
if @tokenizer.pos > next_new_line_pos
|
402
|
+
@tokenizer.pos = next_new_line_pos
|
403
|
+
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
404
|
+
xref.add_in_use_entry(token, gen, pos)
|
405
|
+
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
406
|
+
end
|
407
|
+
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
408
|
+
obj = @tokenizer.next_object rescue nil
|
409
|
+
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
410
|
+
# linearized file.
|
411
|
+
trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
|
412
|
+
elsif token == Tokenizer::NO_MORE_TOKENS
|
413
|
+
break
|
414
|
+
else
|
415
|
+
@tokenizer.pos = next_new_line_pos
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
420
|
+
if !trailer || trailer.empty?
|
421
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
422
|
+
end
|
423
|
+
|
424
|
+
loader = lambda do |xref_entry|
|
425
|
+
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
426
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
427
|
+
end
|
428
|
+
|
429
|
+
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
430
|
+
loader: loader)
|
431
|
+
end
|
432
|
+
|
341
433
|
# Raises a HexaPDF::MalformedPDFError with the given message and source position.
|
342
434
|
def raise_malformed(msg, pos: nil)
|
343
435
|
raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
|
data/lib/hexapdf/pdf_array.rb
CHANGED
@@ -66,7 +66,7 @@ module HexaPDF
|
|
66
66
|
# subclasses of HexaPDF::Object are returned as is (it makes no sense, for example, to return
|
67
67
|
# the hash that describes the Catalog instead of the Catalog object).
|
68
68
|
def [](arg1, arg2 = nil)
|
69
|
-
data = value[arg1,
|
69
|
+
data = arg2 ? value[arg1, arg2] : value[arg1]
|
70
70
|
return if data.nil?
|
71
71
|
|
72
72
|
if arg2 || arg1.kind_of?(Range)
|
@@ -83,7 +83,7 @@ module HexaPDF
|
|
83
83
|
# subclasses) and the given data has not (including subclasses), the data is stored inside the
|
84
84
|
# HexaPDF::Object.
|
85
85
|
def []=(index, data)
|
86
|
-
if value[index].
|
86
|
+
if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
|
87
87
|
!data.kind_of?(HexaPDF::Reference)
|
88
88
|
value[index].value = data
|
89
89
|
else
|
@@ -113,6 +113,13 @@ module HexaPDF
|
|
113
113
|
value.delete_at(index)
|
114
114
|
end
|
115
115
|
|
116
|
+
# Deletes all values from the PDFArray that are equal to the given object.
|
117
|
+
#
|
118
|
+
# Returns the last deleted item, or +nil+ if no matching item is found.
|
119
|
+
def delete(object)
|
120
|
+
value.delete(object)
|
121
|
+
end
|
122
|
+
|
116
123
|
# :call-seq:
|
117
124
|
# array.slice!(index) -> obj or nil
|
118
125
|
# array.slice!(start, length) -> new_array or nil
|
@@ -174,9 +181,9 @@ module HexaPDF
|
|
174
181
|
self
|
175
182
|
end
|
176
183
|
|
177
|
-
# Returns
|
184
|
+
# Returns an array containing the preprocessed values (like in #[]).
|
178
185
|
def to_ary
|
179
|
-
|
186
|
+
each.to_a
|
180
187
|
end
|
181
188
|
|
182
189
|
private
|
@@ -196,7 +203,7 @@ module HexaPDF
|
|
196
203
|
data = document.deref(data)
|
197
204
|
value[index] = data if index
|
198
205
|
end
|
199
|
-
if data.
|
206
|
+
if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
|
200
207
|
data = data.value
|
201
208
|
end
|
202
209
|
data
|