hexapdf 0.12.3 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +132 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/dictionary.rb +12 -6
- data/lib/hexapdf/dictionary_fields.rb +2 -10
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/true_type/table/post.rb +15 -10
- data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
- data/lib/hexapdf/font_loader/from_file.rb +18 -8
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/importer.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +107 -7
- data/lib/hexapdf/pdf_array.rb +15 -5
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +37 -10
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +22 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
- data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +6 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +21 -12
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +5 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
- data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
- data/test/hexapdf/font_loader/test_from_file.rb +7 -0
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +8 -1
- data/test/hexapdf/test_dictionary_fields.rb +9 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +205 -51
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_serializer.rb +7 -0
- data/test/hexapdf/test_tokenizer.rb +28 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
- data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +8 -1
- data/test/hexapdf/type/test_page.rb +8 -1
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -12
|
@@ -162,9 +162,10 @@ module HexaPDF
|
|
|
162
162
|
io.seek(length, IO::SEEK_CUR)
|
|
163
163
|
end
|
|
164
164
|
when 'tRNS' # PNG s11.3.2
|
|
165
|
-
|
|
165
|
+
case @color_type
|
|
166
|
+
when INDEXED
|
|
166
167
|
trns = io.read(length).unpack('C*')
|
|
167
|
-
|
|
168
|
+
when TRUECOLOR, GREYSCALE
|
|
168
169
|
dict[:Mask] = io.read(length).unpack('n*').map {|val| [val, val] }.flatten
|
|
169
170
|
else
|
|
170
171
|
io.seek(length, IO::SEEK_CUR)
|
data/lib/hexapdf/importer.rb
CHANGED
|
@@ -90,7 +90,7 @@ module HexaPDF
|
|
|
90
90
|
#
|
|
91
91
|
# An error is raised if the object doesn't belong to the +source+ document.
|
|
92
92
|
def import(object)
|
|
93
|
-
mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
|
|
93
|
+
mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
|
|
94
94
|
if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
|
|
95
95
|
raise HexaPDF::Error, "Import error: Incorrect document object for importer"
|
|
96
96
|
elsif mapped_object && mapped_object == @destination.object(mapped_object)
|
|
@@ -118,7 +118,8 @@ module HexaPDF
|
|
|
118
118
|
if object.type == :Catalog || object.type == :Pages
|
|
119
119
|
@mapper[object.data] = nil
|
|
120
120
|
else
|
|
121
|
-
obj =
|
|
121
|
+
obj = object.dup
|
|
122
|
+
@mapper[object.data] = NullableWeakRef.new(obj)
|
|
122
123
|
obj.document = @destination.__getobj__
|
|
123
124
|
obj.instance_variable_set(:@data, obj.data.dup)
|
|
124
125
|
obj.data.oid = 0
|
data/lib/hexapdf/layout/line.rb
CHANGED
|
@@ -198,7 +198,7 @@ module HexaPDF
|
|
|
198
198
|
# Note: The cache is not cleared!
|
|
199
199
|
def add(item)
|
|
200
200
|
last = @items.last
|
|
201
|
-
if last.
|
|
201
|
+
if last.instance_of?(item.class) && item.kind_of?(TextFragment) && last.style == item.style
|
|
202
202
|
if last.items.frozen?
|
|
203
203
|
@items[-1] = last = last.dup
|
|
204
204
|
last.items = last.items.dup
|
data/lib/hexapdf/layout/style.rb
CHANGED
|
@@ -524,7 +524,7 @@ module HexaPDF
|
|
|
524
524
|
# Style.new(font_size: 15, align: :center, valign: center)
|
|
525
525
|
def initialize(**properties)
|
|
526
526
|
update(**properties)
|
|
527
|
-
@scaled_item_widths = {}
|
|
527
|
+
@scaled_item_widths = {}.compare_by_identity
|
|
528
528
|
end
|
|
529
529
|
|
|
530
530
|
# Duplicates the complex properties that can be modified, as well as the cache.
|
|
@@ -883,41 +883,41 @@ module HexaPDF
|
|
|
883
883
|
[:text_rise, 0],
|
|
884
884
|
[:font_features, {}],
|
|
885
885
|
[:text_rendering_mode, "Content::TextRenderingMode::FILL",
|
|
886
|
-
setter: "Content::TextRenderingMode.normalize(value)"],
|
|
886
|
+
{setter: "Content::TextRenderingMode.normalize(value)"}],
|
|
887
887
|
[:subscript, false,
|
|
888
|
-
setter: "value; superscript(false) if superscript",
|
|
889
|
-
|
|
888
|
+
{setter: "value; superscript(false) if superscript",
|
|
889
|
+
valid_values: [true, false]}],
|
|
890
890
|
[:superscript, false,
|
|
891
|
-
setter: "value; subscript(false) if subscript",
|
|
892
|
-
|
|
893
|
-
[:underline, false, valid_values: [true, false]],
|
|
894
|
-
[:strikeout, false, valid_values: [true, false]],
|
|
891
|
+
{setter: "value; subscript(false) if subscript",
|
|
892
|
+
valid_values: [true, false]}],
|
|
893
|
+
[:underline, false, {valid_values: [true, false]}],
|
|
894
|
+
[:strikeout, false, {valid_values: [true, false]}],
|
|
895
895
|
[:fill_color, "default_color"],
|
|
896
896
|
[:fill_alpha, 1],
|
|
897
897
|
[:stroke_color, "default_color"],
|
|
898
898
|
[:stroke_alpha, 1],
|
|
899
899
|
[:stroke_width, 1],
|
|
900
900
|
[:stroke_cap_style, "Content::LineCapStyle::BUTT_CAP",
|
|
901
|
-
setter: "Content::LineCapStyle.normalize(value)"],
|
|
901
|
+
{setter: "Content::LineCapStyle.normalize(value)"}],
|
|
902
902
|
[:stroke_join_style, "Content::LineJoinStyle::MITER_JOIN",
|
|
903
|
-
setter: "Content::LineJoinStyle.normalize(value)"],
|
|
903
|
+
{setter: "Content::LineJoinStyle.normalize(value)"}],
|
|
904
904
|
[:stroke_miter_limit, 10.0],
|
|
905
905
|
[:stroke_dash_pattern, "Content::LineDashPattern.new",
|
|
906
|
-
setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"],
|
|
907
|
-
[:align, :left, valid_values: [:left, :center, :right, :justify]],
|
|
908
|
-
[:valign, :top, valid_values: [:top, :center, :bottom]],
|
|
906
|
+
{setter: "Content::LineDashPattern.normalize(value, phase)", extra_args: ", phase = 0"}],
|
|
907
|
+
[:align, :left, {valid_values: [:left, :center, :right, :justify]}],
|
|
908
|
+
[:valign, :top, {valid_values: [:top, :center, :bottom]}],
|
|
909
909
|
[:text_indent, 0],
|
|
910
910
|
[:line_spacing, "LineSpacing.new(type: :single)",
|
|
911
|
-
setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
|
|
912
|
-
|
|
913
|
-
[:last_line_gap, false, valid_values: [true, false]],
|
|
911
|
+
{setter: "LineSpacing.new(**(value.kind_of?(Symbol) ? {type: value, value: extra_arg} : value))",
|
|
912
|
+
extra_args: ", extra_arg = nil"}],
|
|
913
|
+
[:last_line_gap, false, {valid_values: [true, false]}],
|
|
914
914
|
[:background_color, nil],
|
|
915
|
-
[:padding, "Quad.new(0)", setter: "Quad.new(value)"],
|
|
916
|
-
[:margin, "Quad.new(0)", setter: "Quad.new(value)"],
|
|
917
|
-
[:border, "Border.new", setter: "Border.new(**value)"],
|
|
918
|
-
[:overlays, "Layers.new", setter: "Layers.new(value)"],
|
|
919
|
-
[:underlays, "Layers.new", setter: "Layers.new(value)"],
|
|
920
|
-
[:position, :default, valid_values: [:default, :float, :flow, :absolute]],
|
|
915
|
+
[:padding, "Quad.new(0)", {setter: "Quad.new(value)"}],
|
|
916
|
+
[:margin, "Quad.new(0)", {setter: "Quad.new(value)"}],
|
|
917
|
+
[:border, "Border.new", {setter: "Border.new(**value)"}],
|
|
918
|
+
[:overlays, "Layers.new", {setter: "Layers.new(value)"}],
|
|
919
|
+
[:underlays, "Layers.new", {setter: "Layers.new(value)"}],
|
|
920
|
+
[:position, :default, {valid_values: [:default, :float, :flow, :absolute]}],
|
|
921
921
|
[:position_hint, nil],
|
|
922
922
|
].each do |name, default, options = {}|
|
|
923
923
|
default = default.inspect unless default.kind_of?(String)
|
|
@@ -1075,7 +1075,7 @@ module HexaPDF
|
|
|
1075
1075
|
# The item may be a (singleton) glyph object or an integer/float, i.e. items that can appear
|
|
1076
1076
|
# inside a TextFragment.
|
|
1077
1077
|
def scaled_item_width(item)
|
|
1078
|
-
@scaled_item_widths[item
|
|
1078
|
+
@scaled_item_widths[item] ||=
|
|
1079
1079
|
begin
|
|
1080
1080
|
if item.kind_of?(Numeric)
|
|
1081
1081
|
-item * scaled_font_size
|
|
@@ -388,7 +388,7 @@ module HexaPDF
|
|
|
388
388
|
end
|
|
389
389
|
when :penalty
|
|
390
390
|
if item.penalty <= -Penalty::INFINITY
|
|
391
|
-
add_box_item(item.item) if item.
|
|
391
|
+
add_box_item(item.item) if item.width > 0
|
|
392
392
|
break unless yield(create_unjustified_line, item)
|
|
393
393
|
reset_after_line_break(index + 1)
|
|
394
394
|
elsif item.penalty >= Penalty::INFINITY
|
|
@@ -458,7 +458,7 @@ module HexaPDF
|
|
|
458
458
|
end
|
|
459
459
|
when :penalty
|
|
460
460
|
if item.penalty <= -Penalty::INFINITY
|
|
461
|
-
add_box_item(item.item) if item.
|
|
461
|
+
add_box_item(item.item) if item.width > 0
|
|
462
462
|
break unless (action = yield(create_unjustified_line, item))
|
|
463
463
|
reset_after_line_break_variable_width(index + 1, true, action)
|
|
464
464
|
elsif item.penalty >= Penalty::INFINITY
|
|
@@ -68,9 +68,10 @@ module HexaPDF
|
|
|
68
68
|
text_fragment.clear_cache
|
|
69
69
|
end
|
|
70
70
|
if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
|
|
71
|
-
|
|
71
|
+
case font.font_type
|
|
72
|
+
when :TrueType
|
|
72
73
|
process_true_type_kerning(text_fragment)
|
|
73
|
-
|
|
74
|
+
when :Type1
|
|
74
75
|
process_type1_kerning(text_fragment)
|
|
75
76
|
end
|
|
76
77
|
text_fragment.clear_cache
|
data/lib/hexapdf/object.rb
CHANGED
|
@@ -122,9 +122,6 @@ module HexaPDF
|
|
|
122
122
|
|
|
123
123
|
include Comparable
|
|
124
124
|
|
|
125
|
-
# A list of classes whose objects cannot be duplicated.
|
|
126
|
-
NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
|
|
127
|
-
|
|
128
125
|
# :call-seq:
|
|
129
126
|
# HexaPDF::Object.deep_copy(object) -> copy
|
|
130
127
|
#
|
|
@@ -139,8 +136,6 @@ module HexaPDF
|
|
|
139
136
|
(object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
|
|
140
137
|
when HexaPDF::Reference
|
|
141
138
|
object
|
|
142
|
-
when *NOT_DUPLICATABLE_CLASSES
|
|
143
|
-
object
|
|
144
139
|
else
|
|
145
140
|
object.dup
|
|
146
141
|
end
|
|
@@ -251,29 +246,31 @@ module HexaPDF
|
|
|
251
246
|
end
|
|
252
247
|
|
|
253
248
|
# :call-seq:
|
|
254
|
-
# obj.validate(auto_correct: true)
|
|
255
|
-
# obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
|
|
249
|
+
# obj.validate(auto_correct: true) -> true or false
|
|
250
|
+
# obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
|
|
256
251
|
#
|
|
257
|
-
# Validates the object
|
|
258
|
-
#
|
|
259
|
-
# its documentation for more information.
|
|
252
|
+
# Validates the object, optionally corrects problems when the option +auto_correct+ is set and
|
|
253
|
+
# returns +true+ if the object is deemed valid and +false+ otherwise.
|
|
260
254
|
#
|
|
261
255
|
# If a block is given, it is called on validation problems with a problem description and
|
|
262
|
-
# whether the problem is correctable.
|
|
256
|
+
# whether the problem is automatically correctable. The third argument to the block is usually
|
|
257
|
+
# this object but may be another object if during auto-correction a new object was created and
|
|
258
|
+
# validated.
|
|
263
259
|
#
|
|
264
|
-
#
|
|
260
|
+
# The validation routine itself has to be implemented in the #perform_validation method - see
|
|
261
|
+
# its documentation for more information.
|
|
265
262
|
#
|
|
266
263
|
# *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
|
|
267
264
|
# currently implement the full PDF spec. However, if the return value is +false+, there is
|
|
268
265
|
# certainly a problem!
|
|
269
266
|
def validate(auto_correct: true)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
true
|
|
267
|
+
result = true
|
|
268
|
+
perform_validation do |msg, correctable, object|
|
|
269
|
+
yield(msg, correctable, object || self) if block_given?
|
|
270
|
+
result = false unless correctable
|
|
271
|
+
return false unless auto_correct
|
|
276
272
|
end
|
|
273
|
+
result
|
|
277
274
|
end
|
|
278
275
|
|
|
279
276
|
# Makes a deep copy of the source PDF object and resets the object identifier.
|
|
@@ -287,6 +284,28 @@ module HexaPDF
|
|
|
287
284
|
obj
|
|
288
285
|
end
|
|
289
286
|
|
|
287
|
+
# Caches and returns the given +value+ or the value of the block under the given cache key. If
|
|
288
|
+
# there is already a cached value for the key and +update+ is +false+, it is just returned.
|
|
289
|
+
#
|
|
290
|
+
# Set +update+ to +true+ to force an update of the cached value.
|
|
291
|
+
#
|
|
292
|
+
# This uses Document#cache internally.
|
|
293
|
+
def cache(key, value = Document::UNSET, update: false, &block)
|
|
294
|
+
document.cache(@data, key, value, update: update, &block)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Returns +true+ if there is a cached value for the given key.
|
|
298
|
+
#
|
|
299
|
+
# This uses Document#cached? internally.
|
|
300
|
+
def cached?(key)
|
|
301
|
+
document.cached?(@data, key)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Clears the cache for this object.
|
|
305
|
+
def clear_cache
|
|
306
|
+
document.clear_cache(@data)
|
|
307
|
+
end
|
|
308
|
+
|
|
290
309
|
# Compares this object to another object.
|
|
291
310
|
#
|
|
292
311
|
# If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
|
|
@@ -339,17 +358,25 @@ module HexaPDF
|
|
|
339
358
|
# are also performed!
|
|
340
359
|
#
|
|
341
360
|
# When the validation routine finds that the object is invalid, it has to yield a problem
|
|
342
|
-
# description and whether the problem can be corrected.
|
|
343
|
-
#
|
|
344
|
-
#
|
|
361
|
+
# description and whether the problem can be corrected. An optional third argument may contain
|
|
362
|
+
# the object that gets validated if it is different from this object (may happen when
|
|
363
|
+
# auto-correction is used).
|
|
345
364
|
#
|
|
346
|
-
#
|
|
365
|
+
# After yielding, the problem has to be corrected if it is correctable. If it is not correctable
|
|
366
|
+
# and not correcting would lead to exceptions the method has to return early.
|
|
367
|
+
#
|
|
368
|
+
# Here is a sample validation routine for a dictionary object type:
|
|
347
369
|
#
|
|
348
370
|
# def perform_validation
|
|
349
371
|
# super
|
|
350
|
-
#
|
|
351
|
-
#
|
|
352
|
-
#
|
|
372
|
+
#
|
|
373
|
+
# if value[:SomeKey].length != 7
|
|
374
|
+
# yield("Length of /SomeKey is invalid")
|
|
375
|
+
# # No need to return early here because following check doesn't rely on /SomeKey
|
|
376
|
+
# end
|
|
377
|
+
#
|
|
378
|
+
# if value[:OtherKey] % 2 == 0
|
|
379
|
+
# yield("/OtherKey needs to contain an odd number of elements")
|
|
353
380
|
# end
|
|
354
381
|
# end
|
|
355
382
|
def perform_validation(&block)
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -59,6 +59,7 @@ module HexaPDF
|
|
|
59
59
|
@tokenizer = Tokenizer.new(io)
|
|
60
60
|
@document = document
|
|
61
61
|
@object_stream_data = {}
|
|
62
|
+
@reconstructed_revision = nil
|
|
62
63
|
retrieve_pdf_header_offset_and_version
|
|
63
64
|
end
|
|
64
65
|
|
|
@@ -71,7 +72,13 @@ module HexaPDF
|
|
|
71
72
|
obj, oid, gen, stream =
|
|
72
73
|
case xref_entry.type
|
|
73
74
|
when :in_use
|
|
74
|
-
|
|
75
|
+
if xref_entry.pos == 0 && xref_entry.oid != 0
|
|
76
|
+
# Handle seen-in-the-wild objects with invalid offset 0
|
|
77
|
+
maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0)
|
|
78
|
+
[nil, xref_entry.oid, xref_entry.gen, nil]
|
|
79
|
+
else
|
|
80
|
+
parse_indirect_object(xref_entry.pos)
|
|
81
|
+
end
|
|
75
82
|
when :free
|
|
76
83
|
[nil, xref_entry.oid, xref_entry.gen, nil]
|
|
77
84
|
when :compressed
|
|
@@ -82,10 +89,12 @@ module HexaPDF
|
|
|
82
89
|
|
|
83
90
|
if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
|
|
84
91
|
raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
|
|
85
|
-
|
|
92
|
+
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
|
|
86
93
|
end
|
|
87
94
|
|
|
88
95
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
96
|
+
rescue HexaPDF::MalformedPDFError
|
|
97
|
+
reconstructed_revision.object(xref_entry)
|
|
89
98
|
end
|
|
90
99
|
|
|
91
100
|
# Parses the indirect object at the specified offset.
|
|
@@ -110,7 +119,15 @@ module HexaPDF
|
|
|
110
119
|
maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
|
|
111
120
|
object = nil
|
|
112
121
|
else
|
|
113
|
-
|
|
122
|
+
begin
|
|
123
|
+
object = @tokenizer.next_object
|
|
124
|
+
rescue MalformedPDFError
|
|
125
|
+
# Handle often found invalid indirect object with missing whitespace after number
|
|
126
|
+
maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
|
|
127
|
+
force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
|
|
128
|
+
object = tok.to_i
|
|
129
|
+
@tokenizer.pos -= 6
|
|
130
|
+
end
|
|
114
131
|
end
|
|
115
132
|
|
|
116
133
|
tok = @tokenizer.next_token
|
|
@@ -122,7 +139,9 @@ module HexaPDF
|
|
|
122
139
|
tok1 = @tokenizer.next_byte
|
|
123
140
|
tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
|
|
124
141
|
if tok1 != 10 && tok1 != 13
|
|
125
|
-
|
|
142
|
+
tok2 = @tokenizer.next_byte
|
|
143
|
+
maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
|
|
144
|
+
force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
|
|
126
145
|
elsif tok1 == 13 && tok2 != 10
|
|
127
146
|
maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
|
|
128
147
|
pos: @tokenizer.pos)
|
|
@@ -235,14 +254,14 @@ module HexaPDF
|
|
|
235
254
|
@tokenizer.skip_whitespace
|
|
236
255
|
start.upto(start + number_of_entries - 1) do |oid|
|
|
237
256
|
pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
|
|
238
|
-
maybe_raise("Invalid cross-reference
|
|
239
|
-
force: matched_size
|
|
257
|
+
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
|
258
|
+
force: !matched_size)
|
|
240
259
|
end
|
|
241
260
|
if xref.entry?(oid)
|
|
242
261
|
next
|
|
243
262
|
elsif type == 'n'
|
|
244
263
|
if pos == 0 || gen > 65535
|
|
245
|
-
maybe_raise("Invalid in use cross-reference entry
|
|
264
|
+
maybe_raise("Invalid in use cross-reference entry",
|
|
246
265
|
pos: @tokenizer.pos)
|
|
247
266
|
xref.add_free_entry(oid, gen)
|
|
248
267
|
else
|
|
@@ -264,6 +283,27 @@ module HexaPDF
|
|
|
264
283
|
raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
|
|
265
284
|
end
|
|
266
285
|
|
|
286
|
+
unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
|
|
287
|
+
first_entry = xref[xref.oids[0]]
|
|
288
|
+
test_entry = xref[xref.oids[-1]]
|
|
289
|
+
@tokenizer.pos = test_entry.pos + @header_offset
|
|
290
|
+
test_oid = @tokenizer.next_token
|
|
291
|
+
first_oid = first_entry.oid
|
|
292
|
+
|
|
293
|
+
force_failure = !first_entry.free? || first_entry.gen != 65535 ||
|
|
294
|
+
!test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
|
|
295
|
+
maybe_raise("Main cross-reference section has invalid numbering",
|
|
296
|
+
pos: offset + @header_offset, force: force_failure)
|
|
297
|
+
|
|
298
|
+
new_xref = XRefSection.new
|
|
299
|
+
xref.oids.each do |oid|
|
|
300
|
+
entry = xref[oid]
|
|
301
|
+
entry.oid -= first_oid
|
|
302
|
+
new_xref.send(:[]=, entry.oid, entry.gen, entry)
|
|
303
|
+
end
|
|
304
|
+
xref = new_xref
|
|
305
|
+
end
|
|
306
|
+
|
|
267
307
|
[xref, trailer]
|
|
268
308
|
end
|
|
269
309
|
|
|
@@ -313,6 +353,11 @@ module HexaPDF
|
|
|
313
353
|
@startxref_offset = lines[eof_index - 1].to_i
|
|
314
354
|
end
|
|
315
355
|
|
|
356
|
+
# Returns the reconstructed revision.
|
|
357
|
+
def reconstructed_revision
|
|
358
|
+
@reconstructed_revision ||= reconstruct_revision
|
|
359
|
+
end
|
|
360
|
+
|
|
316
361
|
# Returns the PDF version number that is stored in the file header.
|
|
317
362
|
#
|
|
318
363
|
# See: PDF1.7 s7.5.2
|
|
@@ -338,6 +383,61 @@ module HexaPDF
|
|
|
338
383
|
@header_version = $1
|
|
339
384
|
end
|
|
340
385
|
|
|
386
|
+
# Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
|
|
387
|
+
# file and returning a Revision object for loading the found objects.
|
|
388
|
+
#
|
|
389
|
+
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
|
390
|
+
# cross-reference table, later objects overwriting prior ones.
|
|
391
|
+
def reconstruct_revision
|
|
392
|
+
raise unless @document.config['parser.try_xref_reconstruction']
|
|
393
|
+
msg = "#{$!} - trying cross-reference table reconstruction"
|
|
394
|
+
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
|
395
|
+
|
|
396
|
+
xref = XRefSection.new
|
|
397
|
+
@tokenizer.pos = 0
|
|
398
|
+
while true
|
|
399
|
+
@tokenizer.skip_whitespace
|
|
400
|
+
pos = @tokenizer.pos
|
|
401
|
+
@tokenizer.scan_until(/(\n|\r\n?)+|\z/)
|
|
402
|
+
next_new_line_pos = @tokenizer.pos
|
|
403
|
+
@tokenizer.pos = pos
|
|
404
|
+
|
|
405
|
+
token = @tokenizer.next_integer_or_keyword rescue nil
|
|
406
|
+
if token.kind_of?(Integer)
|
|
407
|
+
gen = @tokenizer.next_integer_or_keyword rescue nil
|
|
408
|
+
tok = @tokenizer.next_integer_or_keyword rescue nil
|
|
409
|
+
if @tokenizer.pos > next_new_line_pos
|
|
410
|
+
@tokenizer.pos = next_new_line_pos
|
|
411
|
+
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
|
412
|
+
xref.add_in_use_entry(token, gen, pos)
|
|
413
|
+
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
|
414
|
+
end
|
|
415
|
+
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
|
416
|
+
obj = @tokenizer.next_object rescue nil
|
|
417
|
+
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
|
418
|
+
# linearized file.
|
|
419
|
+
trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
|
|
420
|
+
elsif token == Tokenizer::NO_MORE_TOKENS
|
|
421
|
+
break
|
|
422
|
+
else
|
|
423
|
+
@tokenizer.pos = next_new_line_pos
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
|
428
|
+
if !trailer || trailer.empty?
|
|
429
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
loader = lambda do |xref_entry|
|
|
433
|
+
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
|
434
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
|
438
|
+
loader: loader)
|
|
439
|
+
end
|
|
440
|
+
|
|
341
441
|
# Raises a HexaPDF::MalformedPDFError with the given message and source position.
|
|
342
442
|
def raise_malformed(msg, pos: nil)
|
|
343
443
|
raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
|