hexapdf 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +126 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/encoding/base.rb +8 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +87 -3
- data/lib/hexapdf/pdf_array.rb +11 -4
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/encoding/test_base.rb +10 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +159 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
|
@@ -68,9 +68,10 @@ module HexaPDF
|
|
|
68
68
|
text_fragment.clear_cache
|
|
69
69
|
end
|
|
70
70
|
if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
|
|
71
|
-
|
|
71
|
+
case font.font_type
|
|
72
|
+
when :TrueType
|
|
72
73
|
process_true_type_kerning(text_fragment)
|
|
73
|
-
|
|
74
|
+
when :Type1
|
|
74
75
|
process_type1_kerning(text_fragment)
|
|
75
76
|
end
|
|
76
77
|
text_fragment.clear_cache
|
data/lib/hexapdf/object.rb
CHANGED
|
@@ -122,9 +122,6 @@ module HexaPDF
|
|
|
122
122
|
|
|
123
123
|
include Comparable
|
|
124
124
|
|
|
125
|
-
# A list of classes whose objects cannot be duplicated.
|
|
126
|
-
NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
|
|
127
|
-
|
|
128
125
|
# :call-seq:
|
|
129
126
|
# HexaPDF::Object.deep_copy(object) -> copy
|
|
130
127
|
#
|
|
@@ -139,8 +136,6 @@ module HexaPDF
|
|
|
139
136
|
(object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
|
|
140
137
|
when HexaPDF::Reference
|
|
141
138
|
object
|
|
142
|
-
when *NOT_DUPLICATABLE_CLASSES
|
|
143
|
-
object
|
|
144
139
|
else
|
|
145
140
|
object.dup
|
|
146
141
|
end
|
|
@@ -251,29 +246,31 @@ module HexaPDF
|
|
|
251
246
|
end
|
|
252
247
|
|
|
253
248
|
# :call-seq:
|
|
254
|
-
# obj.validate(auto_correct: true)
|
|
255
|
-
# obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
|
|
249
|
+
# obj.validate(auto_correct: true) -> true or false
|
|
250
|
+
# obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
|
|
256
251
|
#
|
|
257
|
-
# Validates the object
|
|
258
|
-
#
|
|
259
|
-
# its documentation for more information.
|
|
252
|
+
# Validates the object, optionally corrects problems when the option +auto_correct+ is set and
|
|
253
|
+
# returns +true+ if the object is deemed valid and +false+ otherwise.
|
|
260
254
|
#
|
|
261
255
|
# If a block is given, it is called on validation problems with a problem description and
|
|
262
|
-
# whether the problem is correctable.
|
|
256
|
+
# whether the problem is automatically correctable. The third argument to the block is usually
|
|
257
|
+
# this object but may be another object if during auto-correction a new object was created and
|
|
258
|
+
# validated.
|
|
263
259
|
#
|
|
264
|
-
#
|
|
260
|
+
# The validation routine itself has to be implemented in the #perform_validation method - see
|
|
261
|
+
# its documentation for more information.
|
|
265
262
|
#
|
|
266
263
|
# *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
|
|
267
264
|
# currently implement the full PDF spec. However, if the return value is +false+, there is
|
|
268
265
|
# certainly a problem!
|
|
269
266
|
def validate(auto_correct: true)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
true
|
|
267
|
+
result = true
|
|
268
|
+
perform_validation do |msg, correctable, object|
|
|
269
|
+
yield(msg, correctable, object || self) if block_given?
|
|
270
|
+
result = false unless correctable
|
|
271
|
+
return false unless auto_correct
|
|
276
272
|
end
|
|
273
|
+
result
|
|
277
274
|
end
|
|
278
275
|
|
|
279
276
|
# Makes a deep copy of the source PDF object and resets the object identifier.
|
|
@@ -287,6 +284,28 @@ module HexaPDF
|
|
|
287
284
|
obj
|
|
288
285
|
end
|
|
289
286
|
|
|
287
|
+
# Caches and returns the given +value+ or the value of the block under the given cache key. If
|
|
288
|
+
# there is already a cached value for the key and +update+ is +false+, it is just returned.
|
|
289
|
+
#
|
|
290
|
+
# Set +update+ to +true+ to force an update of the cached value.
|
|
291
|
+
#
|
|
292
|
+
# This uses Document#cache internally.
|
|
293
|
+
def cache(key, value = Document::UNSET, update: false, &block)
|
|
294
|
+
document.cache(@data, key, value, update: update, &block)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Returns +true+ if there is a cached value for the given key.
|
|
298
|
+
#
|
|
299
|
+
# This uses Document#cached? internally.
|
|
300
|
+
def cached?(key)
|
|
301
|
+
document.cached?(@data, key)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Clears the cache for this object.
|
|
305
|
+
def clear_cache
|
|
306
|
+
document.clear_cache(@data)
|
|
307
|
+
end
|
|
308
|
+
|
|
290
309
|
# Compares this object to another object.
|
|
291
310
|
#
|
|
292
311
|
# If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
|
|
@@ -339,17 +358,25 @@ module HexaPDF
|
|
|
339
358
|
# are also performed!
|
|
340
359
|
#
|
|
341
360
|
# When the validation routine finds that the object is invalid, it has to yield a problem
|
|
342
|
-
# description and whether the problem can be corrected.
|
|
343
|
-
#
|
|
344
|
-
#
|
|
361
|
+
# description and whether the problem can be corrected. An optional third argument may contain
|
|
362
|
+
# the object that gets validated if it is different from this object (may happen when
|
|
363
|
+
# auto-correction is used).
|
|
345
364
|
#
|
|
346
|
-
#
|
|
365
|
+
# After yielding, the problem has to be corrected if it is correctable. If it is not correctable
|
|
366
|
+
# and not correcting would lead to exceptions the method has to return early.
|
|
367
|
+
#
|
|
368
|
+
# Here is a sample validation routine for a dictionary object type:
|
|
347
369
|
#
|
|
348
370
|
# def perform_validation
|
|
349
371
|
# super
|
|
350
|
-
#
|
|
351
|
-
#
|
|
352
|
-
#
|
|
372
|
+
#
|
|
373
|
+
# if value[:SomeKey].length != 7
|
|
374
|
+
# yield("Length of /SomeKey is invalid")
|
|
375
|
+
# # No need to return early here because following check doesn't rely on /SomeKey
|
|
376
|
+
# end
|
|
377
|
+
#
|
|
378
|
+
# if value[:OtherKey] % 2 == 0
|
|
379
|
+
# yield("/OtherKey needs to contain an odd number of elements")
|
|
353
380
|
# end
|
|
354
381
|
# end
|
|
355
382
|
def perform_validation(&block)
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -59,6 +59,7 @@ module HexaPDF
|
|
|
59
59
|
@tokenizer = Tokenizer.new(io)
|
|
60
60
|
@document = document
|
|
61
61
|
@object_stream_data = {}
|
|
62
|
+
@reconstructed_revision = nil
|
|
62
63
|
retrieve_pdf_header_offset_and_version
|
|
63
64
|
end
|
|
64
65
|
|
|
@@ -86,6 +87,8 @@ module HexaPDF
|
|
|
86
87
|
end
|
|
87
88
|
|
|
88
89
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
90
|
+
rescue HexaPDF::MalformedPDFError
|
|
91
|
+
reconstructed_revision.object(xref_entry)
|
|
89
92
|
end
|
|
90
93
|
|
|
91
94
|
# Parses the indirect object at the specified offset.
|
|
@@ -235,14 +238,14 @@ module HexaPDF
|
|
|
235
238
|
@tokenizer.skip_whitespace
|
|
236
239
|
start.upto(start + number_of_entries - 1) do |oid|
|
|
237
240
|
pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
|
|
238
|
-
maybe_raise("Invalid cross-reference
|
|
239
|
-
force: matched_size
|
|
241
|
+
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
|
242
|
+
force: !matched_size)
|
|
240
243
|
end
|
|
241
244
|
if xref.entry?(oid)
|
|
242
245
|
next
|
|
243
246
|
elsif type == 'n'
|
|
244
247
|
if pos == 0 || gen > 65535
|
|
245
|
-
maybe_raise("Invalid in use cross-reference entry
|
|
248
|
+
maybe_raise("Invalid in use cross-reference entry",
|
|
246
249
|
pos: @tokenizer.pos)
|
|
247
250
|
xref.add_free_entry(oid, gen)
|
|
248
251
|
else
|
|
@@ -264,6 +267,27 @@ module HexaPDF
|
|
|
264
267
|
raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
|
|
265
268
|
end
|
|
266
269
|
|
|
270
|
+
unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
|
|
271
|
+
first_entry = xref[xref.oids[0]]
|
|
272
|
+
test_entry = xref[xref.oids[-1]]
|
|
273
|
+
@tokenizer.pos = test_entry.pos + @header_offset
|
|
274
|
+
test_oid = @tokenizer.next_token
|
|
275
|
+
first_oid = first_entry.oid
|
|
276
|
+
|
|
277
|
+
force_failure = !first_entry.free? || first_entry.gen != 65535 ||
|
|
278
|
+
!test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
|
|
279
|
+
maybe_raise("Main cross-reference section has invalid numbering",
|
|
280
|
+
pos: offset + @header_offset, force: force_failure)
|
|
281
|
+
|
|
282
|
+
new_xref = XRefSection.new
|
|
283
|
+
xref.oids.each do |oid|
|
|
284
|
+
entry = xref[oid]
|
|
285
|
+
entry.oid -= first_oid
|
|
286
|
+
new_xref.send(:[]=, entry.oid, entry.gen, entry)
|
|
287
|
+
end
|
|
288
|
+
xref = new_xref
|
|
289
|
+
end
|
|
290
|
+
|
|
267
291
|
[xref, trailer]
|
|
268
292
|
end
|
|
269
293
|
|
|
@@ -313,6 +337,11 @@ module HexaPDF
|
|
|
313
337
|
@startxref_offset = lines[eof_index - 1].to_i
|
|
314
338
|
end
|
|
315
339
|
|
|
340
|
+
# Returns the reconstructed revision.
|
|
341
|
+
def reconstructed_revision
|
|
342
|
+
@reconstructed_revision ||= reconstruct_revision
|
|
343
|
+
end
|
|
344
|
+
|
|
316
345
|
# Returns the PDF version number that is stored in the file header.
|
|
317
346
|
#
|
|
318
347
|
# See: PDF1.7 s7.5.2
|
|
@@ -338,6 +367,61 @@ module HexaPDF
|
|
|
338
367
|
@header_version = $1
|
|
339
368
|
end
|
|
340
369
|
|
|
370
|
+
# Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
|
|
371
|
+
# file and returning a Revision object for loading the found objects.
|
|
372
|
+
#
|
|
373
|
+
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
|
374
|
+
# cross-reference table, later objects overwriting prior ones.
|
|
375
|
+
def reconstruct_revision
|
|
376
|
+
raise unless @document.config['parser.try_xref_reconstruction']
|
|
377
|
+
msg = "#{$!} - trying cross-reference table reconstruction"
|
|
378
|
+
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
|
379
|
+
|
|
380
|
+
xref = XRefSection.new
|
|
381
|
+
@tokenizer.pos = 0
|
|
382
|
+
while true
|
|
383
|
+
@tokenizer.skip_whitespace
|
|
384
|
+
pos = @tokenizer.pos
|
|
385
|
+
@tokenizer.scan_until(/(\n|\r\n?)+/)
|
|
386
|
+
next_new_line_pos = @tokenizer.pos
|
|
387
|
+
@tokenizer.pos = pos
|
|
388
|
+
|
|
389
|
+
token = @tokenizer.next_token rescue nil
|
|
390
|
+
if token.kind_of?(Integer)
|
|
391
|
+
gen = @tokenizer.next_token rescue nil
|
|
392
|
+
tok = @tokenizer.next_token rescue nil
|
|
393
|
+
if @tokenizer.pos > next_new_line_pos
|
|
394
|
+
@tokenizer.pos = next_new_line_pos
|
|
395
|
+
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
|
396
|
+
xref.add_in_use_entry(token, gen, pos)
|
|
397
|
+
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
|
398
|
+
end
|
|
399
|
+
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
|
400
|
+
obj = @tokenizer.next_object rescue nil
|
|
401
|
+
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
|
402
|
+
# linearized file.
|
|
403
|
+
trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
|
|
404
|
+
elsif token == Tokenizer::NO_MORE_TOKENS
|
|
405
|
+
break
|
|
406
|
+
else
|
|
407
|
+
@tokenizer.pos = next_new_line_pos
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
|
412
|
+
if !trailer || trailer.empty?
|
|
413
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
loader = lambda do |xref_entry|
|
|
417
|
+
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
|
418
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
|
422
|
+
loader: loader)
|
|
423
|
+
end
|
|
424
|
+
|
|
341
425
|
# Raises a HexaPDF::MalformedPDFError with the given message and source position.
|
|
342
426
|
def raise_malformed(msg, pos: nil)
|
|
343
427
|
raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
|
data/lib/hexapdf/pdf_array.rb
CHANGED
|
@@ -83,7 +83,7 @@ module HexaPDF
|
|
|
83
83
|
# subclasses) and the given data has not (including subclasses), the data is stored inside the
|
|
84
84
|
# HexaPDF::Object.
|
|
85
85
|
def []=(index, data)
|
|
86
|
-
if value[index].
|
|
86
|
+
if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
|
|
87
87
|
!data.kind_of?(HexaPDF::Reference)
|
|
88
88
|
value[index].value = data
|
|
89
89
|
else
|
|
@@ -113,6 +113,13 @@ module HexaPDF
|
|
|
113
113
|
value.delete_at(index)
|
|
114
114
|
end
|
|
115
115
|
|
|
116
|
+
# Deletes all values from the PDFArray that are equal to the given object.
|
|
117
|
+
#
|
|
118
|
+
# Returns the last deleted item, or +nil+ if no matching item is found.
|
|
119
|
+
def delete(object)
|
|
120
|
+
value.delete(object)
|
|
121
|
+
end
|
|
122
|
+
|
|
116
123
|
# :call-seq:
|
|
117
124
|
# array.slice!(index) -> obj or nil
|
|
118
125
|
# array.slice!(start, length) -> new_array or nil
|
|
@@ -174,9 +181,9 @@ module HexaPDF
|
|
|
174
181
|
self
|
|
175
182
|
end
|
|
176
183
|
|
|
177
|
-
# Returns
|
|
184
|
+
# Returns an array containing the preprocessed values (like in #[]).
|
|
178
185
|
def to_ary
|
|
179
|
-
|
|
186
|
+
each.to_a
|
|
180
187
|
end
|
|
181
188
|
|
|
182
189
|
private
|
|
@@ -196,7 +203,7 @@ module HexaPDF
|
|
|
196
203
|
data = document.deref(data)
|
|
197
204
|
value[index] = data if index
|
|
198
205
|
end
|
|
199
|
-
if data.
|
|
206
|
+
if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
|
|
200
207
|
data = data.value
|
|
201
208
|
end
|
|
202
209
|
data
|
data/lib/hexapdf/revisions.rb
CHANGED
|
@@ -67,30 +67,38 @@ module HexaPDF
|
|
|
67
67
|
object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
|
|
68
68
|
|
|
69
69
|
revisions = []
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
70
|
+
begin
|
|
71
|
+
xref_section, trailer = parser.load_revision(parser.startxref_offset)
|
|
72
|
+
revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
|
|
73
|
+
xref_section: xref_section, loader: object_loader)
|
|
74
|
+
seen_xref_offsets = {parser.startxref_offset => true}
|
|
75
|
+
|
|
76
|
+
while (prev = revisions[0].trailer.value[:Prev]) &&
|
|
77
|
+
!seen_xref_offsets.key?(prev)
|
|
78
|
+
# PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
|
|
79
|
+
# should be direct. Adobe's POV is followed here. Same with :XRefStm.
|
|
80
|
+
xref_section, trailer = parser.load_revision(prev)
|
|
81
|
+
seen_xref_offsets[prev] = true
|
|
82
|
+
|
|
83
|
+
stm = revisions[0].trailer.value[:XRefStm]
|
|
84
|
+
if stm && !seen_xref_offsets.key?(stm)
|
|
85
|
+
stm_xref_section, = parser.load_revision(stm)
|
|
86
|
+
xref_section.merge!(stm_xref_section)
|
|
87
|
+
seen_xref_offsets[stm] = true
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
|
|
91
|
+
xref_section: xref_section, loader: object_loader))
|
|
87
92
|
end
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
93
|
+
rescue HexaPDF::MalformedPDFError
|
|
94
|
+
reconstructed_revision = parser.reconstructed_revision
|
|
95
|
+
unless revisions.empty?
|
|
96
|
+
reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
|
|
97
|
+
end
|
|
98
|
+
revisions << reconstructed_revision
|
|
91
99
|
end
|
|
92
100
|
|
|
93
|
-
document.version = parser.file_header_version
|
|
101
|
+
document.version = parser.file_header_version rescue '1.0'
|
|
94
102
|
new(document, initial_revisions: revisions, parser: parser)
|
|
95
103
|
end
|
|
96
104
|
|
data/lib/hexapdf/serializer.rb
CHANGED
|
@@ -129,9 +129,10 @@ module HexaPDF
|
|
|
129
129
|
xref_stream = false
|
|
130
130
|
objects_to_delete = []
|
|
131
131
|
rev.each do |obj|
|
|
132
|
-
|
|
132
|
+
case obj.type
|
|
133
|
+
when :ObjStm
|
|
133
134
|
objects_to_delete << obj
|
|
134
|
-
|
|
135
|
+
when :XRef
|
|
135
136
|
xref_stream = true
|
|
136
137
|
objects_to_delete << obj if xref_streams == :delete
|
|
137
138
|
else
|
|
@@ -150,9 +151,10 @@ module HexaPDF
|
|
|
150
151
|
objstms = [doc.wrap({Type: :ObjStm})]
|
|
151
152
|
old_objstms = []
|
|
152
153
|
rev.each do |obj|
|
|
153
|
-
|
|
154
|
+
case obj.type
|
|
155
|
+
when :XRef
|
|
154
156
|
xref_stream = true
|
|
155
|
-
|
|
157
|
+
when :ObjStm
|
|
156
158
|
old_objstms << obj
|
|
157
159
|
end
|
|
158
160
|
delete_fields_with_defaults(obj)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
|
@@ -249,17 +249,18 @@ module HexaPDF
|
|
|
249
249
|
#
|
|
250
250
|
# See: PDF1.7 s7.3.3
|
|
251
251
|
def parse_number
|
|
252
|
-
|
|
252
|
+
val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
|
|
253
|
+
if val.match?(/\A[+-]?\d++(?!\.)\z/)
|
|
253
254
|
tmp = val.to_i
|
|
254
255
|
# Handle object references, see PDF1.7 s7.3.10
|
|
255
256
|
prepare_string_scanner(10)
|
|
256
257
|
tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
|
|
257
258
|
tmp
|
|
258
|
-
elsif
|
|
259
|
+
elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
|
|
259
260
|
val << '0' if val.getbyte(-1) == 46 # dot '.'
|
|
260
261
|
Float(val)
|
|
261
262
|
else
|
|
262
|
-
|
|
263
|
+
TOKEN_CACHE[val] # val is keyword
|
|
263
264
|
end
|
|
264
265
|
end
|
|
265
266
|
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
require 'hexapdf/error'
|
|
38
38
|
require 'hexapdf/layout/style'
|
|
39
39
|
require 'hexapdf/layout/text_fragment'
|
|
40
|
+
require 'hexapdf/layout/text_layouter'
|
|
40
41
|
|
|
41
42
|
module HexaPDF
|
|
42
43
|
module Type
|
|
@@ -80,14 +81,8 @@ module HexaPDF
|
|
|
80
81
|
else
|
|
81
82
|
raise HexaPDF::Error, "Unsupported button field type"
|
|
82
83
|
end
|
|
83
|
-
when :Tx
|
|
84
|
+
when :Tx, :Ch
|
|
84
85
|
create_text_appearances
|
|
85
|
-
when :Ch
|
|
86
|
-
if @field.combo_box?
|
|
87
|
-
create_text_appearances
|
|
88
|
-
else
|
|
89
|
-
raise HexaPDF::Error, "List box not supported yet"
|
|
90
|
-
end
|
|
91
86
|
else
|
|
92
87
|
raise HexaPDF::Error, "Unsupported field type #{@field.field_type}"
|
|
93
88
|
end
|
|
@@ -206,6 +201,10 @@ module HexaPDF
|
|
|
206
201
|
# * The font, font size and font color are taken from the associated field's default
|
|
207
202
|
# appearance string. See VariableTextField.
|
|
208
203
|
#
|
|
204
|
+
# If the font is not usable by HexaPDF (which may be due to a variety of reasons, e.g. no
|
|
205
|
+
# associated information in the form's default resources), the font specified by the
|
|
206
|
+
# configuration option +acro_form.fallback_font+ will be used.
|
|
207
|
+
#
|
|
209
208
|
# * The widget's rectangle /Rect must be defined. If the height is zero, it is auto-sized
|
|
210
209
|
# based on the font size. If additionally the font size is zero, a font size of
|
|
211
210
|
# +acro_form.default_font_size+ is used. If the width is zero, the
|
|
@@ -222,7 +221,7 @@ module HexaPDF
|
|
|
222
221
|
def create_text_appearances
|
|
223
222
|
font_name, font_size = @field.parse_default_appearance_string
|
|
224
223
|
default_resources = @document.acro_form.default_resources
|
|
225
|
-
font = default_resources.font(font_name).font_wrapper
|
|
224
|
+
font = default_resources.font(font_name).font_wrapper rescue nil
|
|
226
225
|
unless font
|
|
227
226
|
fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
|
|
228
227
|
if fallback_font_name
|
|
@@ -245,38 +244,35 @@ module HexaPDF
|
|
|
245
244
|
rect.height = style.scaled_y_max - style.scaled_y_min + 2 * padding
|
|
246
245
|
end
|
|
247
246
|
|
|
248
|
-
form = (@widget[:AP] ||= {})[:N]
|
|
249
|
-
|
|
247
|
+
form = (@widget[:AP] ||= {})[:N] ||= @document.add({Type: :XObject, Subtype: :Form})
|
|
248
|
+
form.value.replace({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
|
|
249
|
+
form.contents = ''
|
|
250
250
|
form[:Resources] = HexaPDF::Object.deep_copy(default_resources)
|
|
251
251
|
|
|
252
252
|
canvas = form.canvas
|
|
253
253
|
apply_background_and_border(border_style, canvas)
|
|
254
254
|
style.font_size = calculate_font_size(font, font_size, rect, border_style)
|
|
255
|
+
style.clear_cache
|
|
255
256
|
|
|
256
257
|
canvas.marked_content_sequence(:Tx) do
|
|
257
|
-
if
|
|
258
|
+
if @field.field_value || @field.concrete_field_type == :list_box
|
|
258
259
|
canvas.save_graphics_state do
|
|
259
260
|
canvas.rectangle(padding, padding, rect.width - 2 * padding,
|
|
260
261
|
rect.height - 2 * padding).clip_path.end_path
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
end
|
|
269
|
-
cap_height = font.wrapped_font.cap_height * font.scaling_factor / 1000.0 *
|
|
270
|
-
style.font_size
|
|
271
|
-
y = padding + (rect.height - 2 * padding - cap_height) / 2.0
|
|
272
|
-
y = padding - style.scaled_font_descender if y < 0
|
|
273
|
-
fragment.draw(canvas, x, y)
|
|
262
|
+
if @field.concrete_field_type == :multiline_text_field
|
|
263
|
+
draw_multiline_text(canvas, rect, style, padding)
|
|
264
|
+
elsif @field.concrete_field_type == :list_box
|
|
265
|
+
draw_list_box(canvas, rect, style, padding)
|
|
266
|
+
else
|
|
267
|
+
draw_single_line_text(canvas, rect, style, padding)
|
|
268
|
+
end
|
|
274
269
|
end
|
|
275
270
|
end
|
|
276
271
|
end
|
|
277
272
|
end
|
|
278
273
|
|
|
279
274
|
alias create_combo_box_appearances create_text_appearances
|
|
275
|
+
alias create_list_box_appearances create_text_appearances
|
|
280
276
|
|
|
281
277
|
private
|
|
282
278
|
|
|
@@ -337,6 +333,13 @@ module HexaPDF
|
|
|
337
333
|
canvas.circle(rect.width / 2.0, rect.height / 2.0, [width / 2.0, height / 2.0].min)
|
|
338
334
|
else
|
|
339
335
|
canvas.rectangle(offset, offset, width, height)
|
|
336
|
+
if @field.concrete_field_type == :comb_text_field
|
|
337
|
+
cell_width = rect.width.to_f / @field[:MaxLen]
|
|
338
|
+
1.upto(@field[:MaxLen] - 1) do |i|
|
|
339
|
+
canvas.line(i * cell_width, border_style.width,
|
|
340
|
+
i * cell_width, border_style.width + height)
|
|
341
|
+
end
|
|
342
|
+
end
|
|
340
343
|
end
|
|
341
344
|
end
|
|
342
345
|
canvas.stroke
|
|
@@ -381,14 +384,115 @@ module HexaPDF
|
|
|
381
384
|
end
|
|
382
385
|
end
|
|
383
386
|
|
|
387
|
+
# Draws a single line of text inside the widget's rectangle.
|
|
388
|
+
def draw_single_line_text(canvas, rect, style, padding)
|
|
389
|
+
value = @field.field_value
|
|
390
|
+
fragment = HexaPDF::Layout::TextFragment.create(value, style)
|
|
391
|
+
|
|
392
|
+
if @field.concrete_field_type == :comb_text_field
|
|
393
|
+
unless @field.key?(:MaxLen)
|
|
394
|
+
raise HexaPDF::Error, "Missing or invalid dictionary field /MaxLen for comb text field"
|
|
395
|
+
end
|
|
396
|
+
new_items = []
|
|
397
|
+
cell_width = rect.width.to_f / @field[:MaxLen]
|
|
398
|
+
scaled_cell_width = cell_width / style.scaled_font_size.to_f
|
|
399
|
+
fragment.items.each_cons(2) do |a, b|
|
|
400
|
+
new_items << a << -(scaled_cell_width - a.width / 2.0 - b.width / 2.0)
|
|
401
|
+
end
|
|
402
|
+
new_items << fragment.items.last
|
|
403
|
+
fragment.items.replace(new_items)
|
|
404
|
+
fragment.clear_cache
|
|
405
|
+
# Adobe always seems to add 1 to the first offset...
|
|
406
|
+
x_offset = 1 + (cell_width - style.scaled_item_width(fragment.items[0])) / 2.0
|
|
407
|
+
x = case @field.text_alignment
|
|
408
|
+
when :left then x_offset
|
|
409
|
+
when :right then x_offset + cell_width * (@field[:MaxLen] - value.length)
|
|
410
|
+
when :center then x_offset + cell_width * ((@field[:MaxLen] - value.length) / 2)
|
|
411
|
+
end
|
|
412
|
+
else
|
|
413
|
+
# Adobe seems to be left/right-aligning based on twice the border width
|
|
414
|
+
x = case @field.text_alignment
|
|
415
|
+
when :left then 2 * padding
|
|
416
|
+
when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
|
|
417
|
+
when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# Adobe seems to be vertically centering based on the cap height, if enough space is
|
|
422
|
+
# available
|
|
423
|
+
cap_height = style.font.wrapped_font.cap_height * style.font.scaling_factor / 1000.0 *
|
|
424
|
+
style.font_size
|
|
425
|
+
y = padding + (rect.height - 2 * padding - cap_height) / 2.0
|
|
426
|
+
y = padding - style.scaled_font_descender if y < 0
|
|
427
|
+
fragment.draw(canvas, x, y)
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Draws multiple lines of text inside the widget's rectangle.
|
|
431
|
+
def draw_multiline_text(canvas, rect, style, padding)
|
|
432
|
+
items = [Layout::TextFragment.create(@field.field_value, style)]
|
|
433
|
+
layouter = Layout::TextLayouter.new(style)
|
|
434
|
+
layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
|
|
435
|
+
|
|
436
|
+
result = nil
|
|
437
|
+
if style.font_size == 0 # need to auto-size text
|
|
438
|
+
style.font_size = 12 # Adobe seems to use this as starting point
|
|
439
|
+
style.clear_cache
|
|
440
|
+
loop do
|
|
441
|
+
result = layouter.fit(items, rect.width - 4 * padding, rect.height - 4 * padding)
|
|
442
|
+
break if result.status == :success || style.font_size <= 4 # don't make text too small
|
|
443
|
+
style.font_size -= 1
|
|
444
|
+
style.clear_cache
|
|
445
|
+
end
|
|
446
|
+
else
|
|
447
|
+
result = layouter.fit(items, rect.width - 4 * padding, 2**20)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
unless result.lines.empty?
|
|
451
|
+
result.draw(canvas, 2 * padding, rect.height - 2 * padding - result.lines[0].height / 2.0)
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Draws the visible option items of the list box in the widget's rectangle.
|
|
456
|
+
def draw_list_box(canvas, rect, style, padding)
|
|
457
|
+
option_items = @field.option_items
|
|
458
|
+
top_index = @field.list_box_top_index
|
|
459
|
+
items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
|
|
460
|
+
|
|
461
|
+
indices = @field[:I] || []
|
|
462
|
+
value_indices = [@field.field_value].flatten.compact.map {|val| option_items.index(val) }
|
|
463
|
+
indices = value_indices if indices != value_indices
|
|
464
|
+
|
|
465
|
+
layouter = Layout::TextLayouter.new(style)
|
|
466
|
+
layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
|
|
467
|
+
result = layouter.fit(items, rect.width - 4 * padding, rect.height)
|
|
468
|
+
|
|
469
|
+
unless result.lines.empty?
|
|
470
|
+
top_gap = style.line_spacing.gap(result.lines[0], result.lines[0])
|
|
471
|
+
line_height = style.line_spacing.baseline_distance(result.lines[0], result.lines[0])
|
|
472
|
+
canvas.fill_color(153, 193, 218) # Adobe's color for selection highlighting
|
|
473
|
+
indices.map! {|i| rect.height - padding - (i - top_index + 1) * line_height }.each do |y|
|
|
474
|
+
next if y + line_height > rect.height || y + line_height < padding
|
|
475
|
+
canvas.rectangle(padding, y, rect.width - 2 * padding, line_height)
|
|
476
|
+
end
|
|
477
|
+
canvas.fill if canvas.graphics_object == :path
|
|
478
|
+
result.draw(canvas, 2 * padding, rect.height - padding - top_gap)
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
384
482
|
# Calculates the font size for text fields based on the font and font size of the default
|
|
385
483
|
# appearance string, the annotation rectangle and the border style.
|
|
386
484
|
def calculate_font_size(font, font_size, rect, border_style)
|
|
387
485
|
if font_size == 0
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
486
|
+
if @field.concrete_field_type == :multiline_text_field
|
|
487
|
+
0 # Handled by multiline drawing code
|
|
488
|
+
elsif @field.concrete_field_type == :list_box
|
|
489
|
+
12 # Seems to be Adobe's default
|
|
490
|
+
else
|
|
491
|
+
unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
|
|
492
|
+
font.scaling_factor / 1000.0
|
|
493
|
+
# The constant factor was found empirically by checking what Adobe Reader etc. do
|
|
494
|
+
(rect.height - 2 * border_style.width) / unit_font_size * 0.83
|
|
495
|
+
end
|
|
392
496
|
else
|
|
393
497
|
font_size
|
|
394
498
|
end
|