hexapdf 0.12.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +126 -0
- data/examples/019-acro_form.rb +41 -4
- data/lib/hexapdf/cli/command.rb +4 -2
- data/lib/hexapdf/cli/image2pdf.rb +2 -1
- data/lib/hexapdf/cli/info.rb +51 -2
- data/lib/hexapdf/cli/inspect.rb +30 -8
- data/lib/hexapdf/cli/merge.rb +1 -1
- data/lib/hexapdf/cli/split.rb +74 -14
- data/lib/hexapdf/configuration.rb +15 -0
- data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
- data/lib/hexapdf/content/parser.rb +1 -1
- data/lib/hexapdf/dictionary.rb +4 -4
- data/lib/hexapdf/dictionary_fields.rb +1 -9
- data/lib/hexapdf/document.rb +41 -16
- data/lib/hexapdf/document/files.rb +0 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -0
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
- data/lib/hexapdf/font/cmap.rb +1 -4
- data/lib/hexapdf/font/encoding/base.rb +8 -0
- data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
- data/lib/hexapdf/font/true_type/table/head.rb +1 -0
- data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +3 -2
- data/lib/hexapdf/layout/line.rb +1 -1
- data/lib/hexapdf/layout/style.rb +23 -23
- data/lib/hexapdf/layout/text_layouter.rb +2 -2
- data/lib/hexapdf/layout/text_shaper.rb +3 -2
- data/lib/hexapdf/object.rb +52 -25
- data/lib/hexapdf/parser.rb +87 -3
- data/lib/hexapdf/pdf_array.rb +11 -4
- data/lib/hexapdf/revisions.rb +29 -21
- data/lib/hexapdf/serializer.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +6 -4
- data/lib/hexapdf/tokenizer.rb +4 -3
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
- data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
- data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
- data/lib/hexapdf/type/acro_form/field.rb +35 -5
- data/lib/hexapdf/type/acro_form/form.rb +139 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
- data/lib/hexapdf/type/actions/uri.rb +3 -2
- data/lib/hexapdf/type/annotations/widget.rb +3 -4
- data/lib/hexapdf/type/catalog.rb +2 -2
- data/lib/hexapdf/type/cid_font.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +4 -2
- data/lib/hexapdf/type/font_true_type.rb +6 -2
- data/lib/hexapdf/type/font_type0.rb +4 -4
- data/lib/hexapdf/type/form.rb +15 -2
- data/lib/hexapdf/type/image.rb +2 -2
- data/lib/hexapdf/type/page.rb +37 -13
- data/lib/hexapdf/type/page_tree_node.rb +29 -5
- data/lib/hexapdf/type/resources.rb +1 -0
- data/lib/hexapdf/type/trailer.rb +2 -3
- data/lib/hexapdf/utils/object_hash.rb +0 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/common_tokenizer_tests.rb +6 -1
- data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
- data/test/hexapdf/content/test_canvas.rb +3 -3
- data/test/hexapdf/content/test_color_space.rb +1 -1
- data/test/hexapdf/encryption/test_aes.rb +4 -4
- data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
- data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
- data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
- data/test/hexapdf/font/encoding/test_base.rb +10 -0
- data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
- data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
- data/test/hexapdf/layout/test_style.rb +1 -1
- data/test/hexapdf/layout/test_text_layouter.rb +12 -5
- data/test/hexapdf/test_configuration.rb +2 -2
- data/test/hexapdf/test_dictionary.rb +3 -1
- data/test/hexapdf/test_dictionary_fields.rb +2 -2
- data/test/hexapdf/test_document.rb +18 -10
- data/test/hexapdf/test_object.rb +71 -26
- data/test/hexapdf/test_parser.rb +159 -53
- data/test/hexapdf/test_pdf_array.rb +8 -1
- data/test/hexapdf/test_revisions.rb +35 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
- data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
- data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
- data/test/hexapdf/type/acro_form/test_field.rb +39 -0
- data/test/hexapdf/type/acro_form/test_form.rb +87 -15
- data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
- data/test/hexapdf/type/test_font_simple.rb +2 -1
- data/test/hexapdf/type/test_font_true_type.rb +6 -0
- data/test/hexapdf/type/test_form.rb +26 -1
- data/test/hexapdf/type/test_page.rb +45 -7
- data/test/hexapdf/type/test_page_tree_node.rb +42 -0
- data/test/hexapdf/utils/test_bit_field.rb +2 -0
- data/test/hexapdf/utils/test_object_hash.rb +5 -0
- data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
- data/test/test_helper.rb +2 -0
- metadata +6 -11
@@ -68,9 +68,10 @@ module HexaPDF
|
|
68
68
|
text_fragment.clear_cache
|
69
69
|
end
|
70
70
|
if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
|
71
|
-
|
71
|
+
case font.font_type
|
72
|
+
when :TrueType
|
72
73
|
process_true_type_kerning(text_fragment)
|
73
|
-
|
74
|
+
when :Type1
|
74
75
|
process_type1_kerning(text_fragment)
|
75
76
|
end
|
76
77
|
text_fragment.clear_cache
|
data/lib/hexapdf/object.rb
CHANGED
@@ -122,9 +122,6 @@ module HexaPDF
|
|
122
122
|
|
123
123
|
include Comparable
|
124
124
|
|
125
|
-
# A list of classes whose objects cannot be duplicated.
|
126
|
-
NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
|
127
|
-
|
128
125
|
# :call-seq:
|
129
126
|
# HexaPDF::Object.deep_copy(object) -> copy
|
130
127
|
#
|
@@ -139,8 +136,6 @@ module HexaPDF
|
|
139
136
|
(object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
|
140
137
|
when HexaPDF::Reference
|
141
138
|
object
|
142
|
-
when *NOT_DUPLICATABLE_CLASSES
|
143
|
-
object
|
144
139
|
else
|
145
140
|
object.dup
|
146
141
|
end
|
@@ -251,29 +246,31 @@ module HexaPDF
|
|
251
246
|
end
|
252
247
|
|
253
248
|
# :call-seq:
|
254
|
-
# obj.validate(auto_correct: true)
|
255
|
-
# obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
|
249
|
+
# obj.validate(auto_correct: true) -> true or false
|
250
|
+
# obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
|
256
251
|
#
|
257
|
-
# Validates the object
|
258
|
-
#
|
259
|
-
# its documentation for more information.
|
252
|
+
# Validates the object, optionally corrects problems when the option +auto_correct+ is set and
|
253
|
+
# returns +true+ if the object is deemed valid and +false+ otherwise.
|
260
254
|
#
|
261
255
|
# If a block is given, it is called on validation problems with a problem description and
|
262
|
-
# whether the problem is correctable.
|
256
|
+
# whether the problem is automatically correctable. The third argument to the block is usually
|
257
|
+
# this object but may be another object if during auto-correction a new object was created and
|
258
|
+
# validated.
|
263
259
|
#
|
264
|
-
#
|
260
|
+
# The validation routine itself has to be implemented in the #perform_validation method - see
|
261
|
+
# its documentation for more information.
|
265
262
|
#
|
266
263
|
# *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
|
267
264
|
# currently implement the full PDF spec. However, if the return value is +false+, there is
|
268
265
|
# certainly a problem!
|
269
266
|
def validate(auto_correct: true)
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
true
|
267
|
+
result = true
|
268
|
+
perform_validation do |msg, correctable, object|
|
269
|
+
yield(msg, correctable, object || self) if block_given?
|
270
|
+
result = false unless correctable
|
271
|
+
return false unless auto_correct
|
276
272
|
end
|
273
|
+
result
|
277
274
|
end
|
278
275
|
|
279
276
|
# Makes a deep copy of the source PDF object and resets the object identifier.
|
@@ -287,6 +284,28 @@ module HexaPDF
|
|
287
284
|
obj
|
288
285
|
end
|
289
286
|
|
287
|
+
# Caches and returns the given +value+ or the value of the block under the given cache key. If
|
288
|
+
# there is already a cached value for the key and +update+ is +false+, it is just returned.
|
289
|
+
#
|
290
|
+
# Set +update+ to +true+ to force an update of the cached value.
|
291
|
+
#
|
292
|
+
# This uses Document#cache internally.
|
293
|
+
def cache(key, value = Document::UNSET, update: false, &block)
|
294
|
+
document.cache(@data, key, value, update: update, &block)
|
295
|
+
end
|
296
|
+
|
297
|
+
# Returns +true+ if there is a cached value for the given key.
|
298
|
+
#
|
299
|
+
# This uses Document#cached? internally.
|
300
|
+
def cached?(key)
|
301
|
+
document.cached?(@data, key)
|
302
|
+
end
|
303
|
+
|
304
|
+
# Clears the cache for this object.
|
305
|
+
def clear_cache
|
306
|
+
document.clear_cache(@data)
|
307
|
+
end
|
308
|
+
|
290
309
|
# Compares this object to another object.
|
291
310
|
#
|
292
311
|
# If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
|
@@ -339,17 +358,25 @@ module HexaPDF
|
|
339
358
|
# are also performed!
|
340
359
|
#
|
341
360
|
# When the validation routine finds that the object is invalid, it has to yield a problem
|
342
|
-
# description and whether the problem can be corrected.
|
343
|
-
#
|
344
|
-
#
|
361
|
+
# description and whether the problem can be corrected. An optional third argument may contain
|
362
|
+
# the object that gets validated if it is different from this object (may happen when
|
363
|
+
# auto-correction is used).
|
345
364
|
#
|
346
|
-
#
|
365
|
+
# After yielding, the problem has to be corrected if it is correctable. If it is not correctable
|
366
|
+
# and not correcting would lead to exceptions the method has to return early.
|
367
|
+
#
|
368
|
+
# Here is a sample validation routine for a dictionary object type:
|
347
369
|
#
|
348
370
|
# def perform_validation
|
349
371
|
# super
|
350
|
-
#
|
351
|
-
#
|
352
|
-
#
|
372
|
+
#
|
373
|
+
# if value[:SomeKey].length != 7
|
374
|
+
# yield("Length of /SomeKey is invalid")
|
375
|
+
# # No need to return early here because following check doesn't rely on /SomeKey
|
376
|
+
# end
|
377
|
+
#
|
378
|
+
# if value[:OtherKey] % 2 == 0
|
379
|
+
# yield("/OtherKey needs to contain an odd number of elements")
|
353
380
|
# end
|
354
381
|
# end
|
355
382
|
def perform_validation(&block)
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -59,6 +59,7 @@ module HexaPDF
|
|
59
59
|
@tokenizer = Tokenizer.new(io)
|
60
60
|
@document = document
|
61
61
|
@object_stream_data = {}
|
62
|
+
@reconstructed_revision = nil
|
62
63
|
retrieve_pdf_header_offset_and_version
|
63
64
|
end
|
64
65
|
|
@@ -86,6 +87,8 @@ module HexaPDF
|
|
86
87
|
end
|
87
88
|
|
88
89
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
90
|
+
rescue HexaPDF::MalformedPDFError
|
91
|
+
reconstructed_revision.object(xref_entry)
|
89
92
|
end
|
90
93
|
|
91
94
|
# Parses the indirect object at the specified offset.
|
@@ -235,14 +238,14 @@ module HexaPDF
|
|
235
238
|
@tokenizer.skip_whitespace
|
236
239
|
start.upto(start + number_of_entries - 1) do |oid|
|
237
240
|
pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
|
238
|
-
maybe_raise("Invalid cross-reference
|
239
|
-
force: matched_size
|
241
|
+
maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
|
242
|
+
force: !matched_size)
|
240
243
|
end
|
241
244
|
if xref.entry?(oid)
|
242
245
|
next
|
243
246
|
elsif type == 'n'
|
244
247
|
if pos == 0 || gen > 65535
|
245
|
-
maybe_raise("Invalid in use cross-reference entry
|
248
|
+
maybe_raise("Invalid in use cross-reference entry",
|
246
249
|
pos: @tokenizer.pos)
|
247
250
|
xref.add_free_entry(oid, gen)
|
248
251
|
else
|
@@ -264,6 +267,27 @@ module HexaPDF
|
|
264
267
|
raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
|
265
268
|
end
|
266
269
|
|
270
|
+
unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
|
271
|
+
first_entry = xref[xref.oids[0]]
|
272
|
+
test_entry = xref[xref.oids[-1]]
|
273
|
+
@tokenizer.pos = test_entry.pos + @header_offset
|
274
|
+
test_oid = @tokenizer.next_token
|
275
|
+
first_oid = first_entry.oid
|
276
|
+
|
277
|
+
force_failure = !first_entry.free? || first_entry.gen != 65535 ||
|
278
|
+
!test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
|
279
|
+
maybe_raise("Main cross-reference section has invalid numbering",
|
280
|
+
pos: offset + @header_offset, force: force_failure)
|
281
|
+
|
282
|
+
new_xref = XRefSection.new
|
283
|
+
xref.oids.each do |oid|
|
284
|
+
entry = xref[oid]
|
285
|
+
entry.oid -= first_oid
|
286
|
+
new_xref.send(:[]=, entry.oid, entry.gen, entry)
|
287
|
+
end
|
288
|
+
xref = new_xref
|
289
|
+
end
|
290
|
+
|
267
291
|
[xref, trailer]
|
268
292
|
end
|
269
293
|
|
@@ -313,6 +337,11 @@ module HexaPDF
|
|
313
337
|
@startxref_offset = lines[eof_index - 1].to_i
|
314
338
|
end
|
315
339
|
|
340
|
+
# Returns the reconstructed revision.
|
341
|
+
def reconstructed_revision
|
342
|
+
@reconstructed_revision ||= reconstruct_revision
|
343
|
+
end
|
344
|
+
|
316
345
|
# Returns the PDF version number that is stored in the file header.
|
317
346
|
#
|
318
347
|
# See: PDF1.7 s7.5.2
|
@@ -338,6 +367,61 @@ module HexaPDF
|
|
338
367
|
@header_version = $1
|
339
368
|
end
|
340
369
|
|
370
|
+
# Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
|
371
|
+
# file and returning a Revision object for loading the found objects.
|
372
|
+
#
|
373
|
+
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
374
|
+
# cross-reference table, later objects overwriting prior ones.
|
375
|
+
def reconstruct_revision
|
376
|
+
raise unless @document.config['parser.try_xref_reconstruction']
|
377
|
+
msg = "#{$!} - trying cross-reference table reconstruction"
|
378
|
+
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
379
|
+
|
380
|
+
xref = XRefSection.new
|
381
|
+
@tokenizer.pos = 0
|
382
|
+
while true
|
383
|
+
@tokenizer.skip_whitespace
|
384
|
+
pos = @tokenizer.pos
|
385
|
+
@tokenizer.scan_until(/(\n|\r\n?)+/)
|
386
|
+
next_new_line_pos = @tokenizer.pos
|
387
|
+
@tokenizer.pos = pos
|
388
|
+
|
389
|
+
token = @tokenizer.next_token rescue nil
|
390
|
+
if token.kind_of?(Integer)
|
391
|
+
gen = @tokenizer.next_token rescue nil
|
392
|
+
tok = @tokenizer.next_token rescue nil
|
393
|
+
if @tokenizer.pos > next_new_line_pos
|
394
|
+
@tokenizer.pos = next_new_line_pos
|
395
|
+
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
396
|
+
xref.add_in_use_entry(token, gen, pos)
|
397
|
+
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
398
|
+
end
|
399
|
+
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
400
|
+
obj = @tokenizer.next_object rescue nil
|
401
|
+
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
402
|
+
# linearized file.
|
403
|
+
trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
|
404
|
+
elsif token == Tokenizer::NO_MORE_TOKENS
|
405
|
+
break
|
406
|
+
else
|
407
|
+
@tokenizer.pos = next_new_line_pos
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
412
|
+
if !trailer || trailer.empty?
|
413
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
414
|
+
end
|
415
|
+
|
416
|
+
loader = lambda do |xref_entry|
|
417
|
+
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
418
|
+
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
419
|
+
end
|
420
|
+
|
421
|
+
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
422
|
+
loader: loader)
|
423
|
+
end
|
424
|
+
|
341
425
|
# Raises a HexaPDF::MalformedPDFError with the given message and source position.
|
342
426
|
def raise_malformed(msg, pos: nil)
|
343
427
|
raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
|
data/lib/hexapdf/pdf_array.rb
CHANGED
@@ -83,7 +83,7 @@ module HexaPDF
|
|
83
83
|
# subclasses) and the given data has not (including subclasses), the data is stored inside the
|
84
84
|
# HexaPDF::Object.
|
85
85
|
def []=(index, data)
|
86
|
-
if value[index].
|
86
|
+
if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
|
87
87
|
!data.kind_of?(HexaPDF::Reference)
|
88
88
|
value[index].value = data
|
89
89
|
else
|
@@ -113,6 +113,13 @@ module HexaPDF
|
|
113
113
|
value.delete_at(index)
|
114
114
|
end
|
115
115
|
|
116
|
+
# Deletes all values from the PDFArray that are equal to the given object.
|
117
|
+
#
|
118
|
+
# Returns the last deleted item, or +nil+ if no matching item is found.
|
119
|
+
def delete(object)
|
120
|
+
value.delete(object)
|
121
|
+
end
|
122
|
+
|
116
123
|
# :call-seq:
|
117
124
|
# array.slice!(index) -> obj or nil
|
118
125
|
# array.slice!(start, length) -> new_array or nil
|
@@ -174,9 +181,9 @@ module HexaPDF
|
|
174
181
|
self
|
175
182
|
end
|
176
183
|
|
177
|
-
# Returns
|
184
|
+
# Returns an array containing the preprocessed values (like in #[]).
|
178
185
|
def to_ary
|
179
|
-
|
186
|
+
each.to_a
|
180
187
|
end
|
181
188
|
|
182
189
|
private
|
@@ -196,7 +203,7 @@ module HexaPDF
|
|
196
203
|
data = document.deref(data)
|
197
204
|
value[index] = data if index
|
198
205
|
end
|
199
|
-
if data.
|
206
|
+
if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
|
200
207
|
data = data.value
|
201
208
|
end
|
202
209
|
data
|
data/lib/hexapdf/revisions.rb
CHANGED
@@ -67,30 +67,38 @@ module HexaPDF
|
|
67
67
|
object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
|
68
68
|
|
69
69
|
revisions = []
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
70
|
+
begin
|
71
|
+
xref_section, trailer = parser.load_revision(parser.startxref_offset)
|
72
|
+
revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
|
73
|
+
xref_section: xref_section, loader: object_loader)
|
74
|
+
seen_xref_offsets = {parser.startxref_offset => true}
|
75
|
+
|
76
|
+
while (prev = revisions[0].trailer.value[:Prev]) &&
|
77
|
+
!seen_xref_offsets.key?(prev)
|
78
|
+
# PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
|
79
|
+
# should be direct. Adobe's POV is followed here. Same with :XRefStm.
|
80
|
+
xref_section, trailer = parser.load_revision(prev)
|
81
|
+
seen_xref_offsets[prev] = true
|
82
|
+
|
83
|
+
stm = revisions[0].trailer.value[:XRefStm]
|
84
|
+
if stm && !seen_xref_offsets.key?(stm)
|
85
|
+
stm_xref_section, = parser.load_revision(stm)
|
86
|
+
xref_section.merge!(stm_xref_section)
|
87
|
+
seen_xref_offsets[stm] = true
|
88
|
+
end
|
89
|
+
|
90
|
+
revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
|
91
|
+
xref_section: xref_section, loader: object_loader))
|
87
92
|
end
|
88
|
-
|
89
|
-
|
90
|
-
|
93
|
+
rescue HexaPDF::MalformedPDFError
|
94
|
+
reconstructed_revision = parser.reconstructed_revision
|
95
|
+
unless revisions.empty?
|
96
|
+
reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
|
97
|
+
end
|
98
|
+
revisions << reconstructed_revision
|
91
99
|
end
|
92
100
|
|
93
|
-
document.version = parser.file_header_version
|
101
|
+
document.version = parser.file_header_version rescue '1.0'
|
94
102
|
new(document, initial_revisions: revisions, parser: parser)
|
95
103
|
end
|
96
104
|
|
data/lib/hexapdf/serializer.rb
CHANGED
@@ -129,9 +129,10 @@ module HexaPDF
|
|
129
129
|
xref_stream = false
|
130
130
|
objects_to_delete = []
|
131
131
|
rev.each do |obj|
|
132
|
-
|
132
|
+
case obj.type
|
133
|
+
when :ObjStm
|
133
134
|
objects_to_delete << obj
|
134
|
-
|
135
|
+
when :XRef
|
135
136
|
xref_stream = true
|
136
137
|
objects_to_delete << obj if xref_streams == :delete
|
137
138
|
else
|
@@ -150,9 +151,10 @@ module HexaPDF
|
|
150
151
|
objstms = [doc.wrap({Type: :ObjStm})]
|
151
152
|
old_objstms = []
|
152
153
|
rev.each do |obj|
|
153
|
-
|
154
|
+
case obj.type
|
155
|
+
when :XRef
|
154
156
|
xref_stream = true
|
155
|
-
|
157
|
+
when :ObjStm
|
156
158
|
old_objstms << obj
|
157
159
|
end
|
158
160
|
delete_fields_with_defaults(obj)
|
data/lib/hexapdf/tokenizer.rb
CHANGED
@@ -249,17 +249,18 @@ module HexaPDF
|
|
249
249
|
#
|
250
250
|
# See: PDF1.7 s7.3.3
|
251
251
|
def parse_number
|
252
|
-
|
252
|
+
val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
|
253
|
+
if val.match?(/\A[+-]?\d++(?!\.)\z/)
|
253
254
|
tmp = val.to_i
|
254
255
|
# Handle object references, see PDF1.7 s7.3.10
|
255
256
|
prepare_string_scanner(10)
|
256
257
|
tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
|
257
258
|
tmp
|
258
|
-
elsif
|
259
|
+
elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
|
259
260
|
val << '0' if val.getbyte(-1) == 46 # dot '.'
|
260
261
|
Float(val)
|
261
262
|
else
|
262
|
-
|
263
|
+
TOKEN_CACHE[val] # val is keyword
|
263
264
|
end
|
264
265
|
end
|
265
266
|
|
@@ -37,6 +37,7 @@
|
|
37
37
|
require 'hexapdf/error'
|
38
38
|
require 'hexapdf/layout/style'
|
39
39
|
require 'hexapdf/layout/text_fragment'
|
40
|
+
require 'hexapdf/layout/text_layouter'
|
40
41
|
|
41
42
|
module HexaPDF
|
42
43
|
module Type
|
@@ -80,14 +81,8 @@ module HexaPDF
|
|
80
81
|
else
|
81
82
|
raise HexaPDF::Error, "Unsupported button field type"
|
82
83
|
end
|
83
|
-
when :Tx
|
84
|
+
when :Tx, :Ch
|
84
85
|
create_text_appearances
|
85
|
-
when :Ch
|
86
|
-
if @field.combo_box?
|
87
|
-
create_text_appearances
|
88
|
-
else
|
89
|
-
raise HexaPDF::Error, "List box not supported yet"
|
90
|
-
end
|
91
86
|
else
|
92
87
|
raise HexaPDF::Error, "Unsupported field type #{@field.field_type}"
|
93
88
|
end
|
@@ -206,6 +201,10 @@ module HexaPDF
|
|
206
201
|
# * The font, font size and font color are taken from the associated field's default
|
207
202
|
# appearance string. See VariableTextField.
|
208
203
|
#
|
204
|
+
# If the font is not usable by HexaPDF (which may be due to a variety of reasons, e.g. no
|
205
|
+
# associated information in the form's default resources), the font specified by the
|
206
|
+
# configuration option +acro_form.fallback_font+ will be used.
|
207
|
+
#
|
209
208
|
# * The widget's rectangle /Rect must be defined. If the height is zero, it is auto-sized
|
210
209
|
# based on the font size. If additionally the font size is zero, a font size of
|
211
210
|
# +acro_form.default_font_size+ is used. If the width is zero, the
|
@@ -222,7 +221,7 @@ module HexaPDF
|
|
222
221
|
def create_text_appearances
|
223
222
|
font_name, font_size = @field.parse_default_appearance_string
|
224
223
|
default_resources = @document.acro_form.default_resources
|
225
|
-
font = default_resources.font(font_name).font_wrapper
|
224
|
+
font = default_resources.font(font_name).font_wrapper rescue nil
|
226
225
|
unless font
|
227
226
|
fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
|
228
227
|
if fallback_font_name
|
@@ -245,38 +244,35 @@ module HexaPDF
|
|
245
244
|
rect.height = style.scaled_y_max - style.scaled_y_min + 2 * padding
|
246
245
|
end
|
247
246
|
|
248
|
-
form = (@widget[:AP] ||= {})[:N]
|
249
|
-
|
247
|
+
form = (@widget[:AP] ||= {})[:N] ||= @document.add({Type: :XObject, Subtype: :Form})
|
248
|
+
form.value.replace({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
|
249
|
+
form.contents = ''
|
250
250
|
form[:Resources] = HexaPDF::Object.deep_copy(default_resources)
|
251
251
|
|
252
252
|
canvas = form.canvas
|
253
253
|
apply_background_and_border(border_style, canvas)
|
254
254
|
style.font_size = calculate_font_size(font, font_size, rect, border_style)
|
255
|
+
style.clear_cache
|
255
256
|
|
256
257
|
canvas.marked_content_sequence(:Tx) do
|
257
|
-
if
|
258
|
+
if @field.field_value || @field.concrete_field_type == :list_box
|
258
259
|
canvas.save_graphics_state do
|
259
260
|
canvas.rectangle(padding, padding, rect.width - 2 * padding,
|
260
261
|
rect.height - 2 * padding).clip_path.end_path
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
end
|
269
|
-
cap_height = font.wrapped_font.cap_height * font.scaling_factor / 1000.0 *
|
270
|
-
style.font_size
|
271
|
-
y = padding + (rect.height - 2 * padding - cap_height) / 2.0
|
272
|
-
y = padding - style.scaled_font_descender if y < 0
|
273
|
-
fragment.draw(canvas, x, y)
|
262
|
+
if @field.concrete_field_type == :multiline_text_field
|
263
|
+
draw_multiline_text(canvas, rect, style, padding)
|
264
|
+
elsif @field.concrete_field_type == :list_box
|
265
|
+
draw_list_box(canvas, rect, style, padding)
|
266
|
+
else
|
267
|
+
draw_single_line_text(canvas, rect, style, padding)
|
268
|
+
end
|
274
269
|
end
|
275
270
|
end
|
276
271
|
end
|
277
272
|
end
|
278
273
|
|
279
274
|
alias create_combo_box_appearances create_text_appearances
|
275
|
+
alias create_list_box_appearances create_text_appearances
|
280
276
|
|
281
277
|
private
|
282
278
|
|
@@ -337,6 +333,13 @@ module HexaPDF
|
|
337
333
|
canvas.circle(rect.width / 2.0, rect.height / 2.0, [width / 2.0, height / 2.0].min)
|
338
334
|
else
|
339
335
|
canvas.rectangle(offset, offset, width, height)
|
336
|
+
if @field.concrete_field_type == :comb_text_field
|
337
|
+
cell_width = rect.width.to_f / @field[:MaxLen]
|
338
|
+
1.upto(@field[:MaxLen] - 1) do |i|
|
339
|
+
canvas.line(i * cell_width, border_style.width,
|
340
|
+
i * cell_width, border_style.width + height)
|
341
|
+
end
|
342
|
+
end
|
340
343
|
end
|
341
344
|
end
|
342
345
|
canvas.stroke
|
@@ -381,14 +384,115 @@ module HexaPDF
|
|
381
384
|
end
|
382
385
|
end
|
383
386
|
|
387
|
+
# Draws a single line of text inside the widget's rectangle.
|
388
|
+
def draw_single_line_text(canvas, rect, style, padding)
|
389
|
+
value = @field.field_value
|
390
|
+
fragment = HexaPDF::Layout::TextFragment.create(value, style)
|
391
|
+
|
392
|
+
if @field.concrete_field_type == :comb_text_field
|
393
|
+
unless @field.key?(:MaxLen)
|
394
|
+
raise HexaPDF::Error, "Missing or invalid dictionary field /MaxLen for comb text field"
|
395
|
+
end
|
396
|
+
new_items = []
|
397
|
+
cell_width = rect.width.to_f / @field[:MaxLen]
|
398
|
+
scaled_cell_width = cell_width / style.scaled_font_size.to_f
|
399
|
+
fragment.items.each_cons(2) do |a, b|
|
400
|
+
new_items << a << -(scaled_cell_width - a.width / 2.0 - b.width / 2.0)
|
401
|
+
end
|
402
|
+
new_items << fragment.items.last
|
403
|
+
fragment.items.replace(new_items)
|
404
|
+
fragment.clear_cache
|
405
|
+
# Adobe always seems to add 1 to the first offset...
|
406
|
+
x_offset = 1 + (cell_width - style.scaled_item_width(fragment.items[0])) / 2.0
|
407
|
+
x = case @field.text_alignment
|
408
|
+
when :left then x_offset
|
409
|
+
when :right then x_offset + cell_width * (@field[:MaxLen] - value.length)
|
410
|
+
when :center then x_offset + cell_width * ((@field[:MaxLen] - value.length) / 2)
|
411
|
+
end
|
412
|
+
else
|
413
|
+
# Adobe seems to be left/right-aligning based on twice the border width
|
414
|
+
x = case @field.text_alignment
|
415
|
+
when :left then 2 * padding
|
416
|
+
when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
|
417
|
+
when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
# Adobe seems to be vertically centering based on the cap height, if enough space is
|
422
|
+
# available
|
423
|
+
cap_height = style.font.wrapped_font.cap_height * style.font.scaling_factor / 1000.0 *
|
424
|
+
style.font_size
|
425
|
+
y = padding + (rect.height - 2 * padding - cap_height) / 2.0
|
426
|
+
y = padding - style.scaled_font_descender if y < 0
|
427
|
+
fragment.draw(canvas, x, y)
|
428
|
+
end
|
429
|
+
|
430
|
+
# Draws multiple lines of text inside the widget's rectangle.
|
431
|
+
def draw_multiline_text(canvas, rect, style, padding)
|
432
|
+
items = [Layout::TextFragment.create(@field.field_value, style)]
|
433
|
+
layouter = Layout::TextLayouter.new(style)
|
434
|
+
layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
|
435
|
+
|
436
|
+
result = nil
|
437
|
+
if style.font_size == 0 # need to auto-size text
|
438
|
+
style.font_size = 12 # Adobe seems to use this as starting point
|
439
|
+
style.clear_cache
|
440
|
+
loop do
|
441
|
+
result = layouter.fit(items, rect.width - 4 * padding, rect.height - 4 * padding)
|
442
|
+
break if result.status == :success || style.font_size <= 4 # don't make text too small
|
443
|
+
style.font_size -= 1
|
444
|
+
style.clear_cache
|
445
|
+
end
|
446
|
+
else
|
447
|
+
result = layouter.fit(items, rect.width - 4 * padding, 2**20)
|
448
|
+
end
|
449
|
+
|
450
|
+
unless result.lines.empty?
|
451
|
+
result.draw(canvas, 2 * padding, rect.height - 2 * padding - result.lines[0].height / 2.0)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# Draws the visible option items of the list box in the widget's rectangle.
|
456
|
+
def draw_list_box(canvas, rect, style, padding)
|
457
|
+
option_items = @field.option_items
|
458
|
+
top_index = @field.list_box_top_index
|
459
|
+
items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
|
460
|
+
|
461
|
+
indices = @field[:I] || []
|
462
|
+
value_indices = [@field.field_value].flatten.compact.map {|val| option_items.index(val) }
|
463
|
+
indices = value_indices if indices != value_indices
|
464
|
+
|
465
|
+
layouter = Layout::TextLayouter.new(style)
|
466
|
+
layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
|
467
|
+
result = layouter.fit(items, rect.width - 4 * padding, rect.height)
|
468
|
+
|
469
|
+
unless result.lines.empty?
|
470
|
+
top_gap = style.line_spacing.gap(result.lines[0], result.lines[0])
|
471
|
+
line_height = style.line_spacing.baseline_distance(result.lines[0], result.lines[0])
|
472
|
+
canvas.fill_color(153, 193, 218) # Adobe's color for selection highlighting
|
473
|
+
indices.map! {|i| rect.height - padding - (i - top_index + 1) * line_height }.each do |y|
|
474
|
+
next if y + line_height > rect.height || y + line_height < padding
|
475
|
+
canvas.rectangle(padding, y, rect.width - 2 * padding, line_height)
|
476
|
+
end
|
477
|
+
canvas.fill if canvas.graphics_object == :path
|
478
|
+
result.draw(canvas, 2 * padding, rect.height - padding - top_gap)
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
384
482
|
# Calculates the font size for text fields based on the font and font size of the default
|
385
483
|
# appearance string, the annotation rectangle and the border style.
|
386
484
|
def calculate_font_size(font, font_size, rect, border_style)
|
387
485
|
if font_size == 0
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
486
|
+
if @field.concrete_field_type == :multiline_text_field
|
487
|
+
0 # Handled by multiline drawing code
|
488
|
+
elsif @field.concrete_field_type == :list_box
|
489
|
+
12 # Seems to be Adobe's default
|
490
|
+
else
|
491
|
+
unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
|
492
|
+
font.scaling_factor / 1000.0
|
493
|
+
# The constant factor was found empirically by checking what Adobe Reader etc. do
|
494
|
+
(rect.height - 2 * border_style.width) / unit_font_size * 0.83
|
495
|
+
end
|
392
496
|
else
|
393
497
|
font_size
|
394
498
|
end
|