hexapdf 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +126 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/content/parser.rb +1 -1
  13. data/lib/hexapdf/dictionary.rb +4 -4
  14. data/lib/hexapdf/dictionary_fields.rb +1 -9
  15. data/lib/hexapdf/document.rb +41 -16
  16. data/lib/hexapdf/document/files.rb +0 -1
  17. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  18. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  19. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  20. data/lib/hexapdf/font/cmap.rb +1 -4
  21. data/lib/hexapdf/font/encoding/base.rb +8 -0
  22. data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
  23. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  24. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  25. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/layout/line.rb +1 -1
  28. data/lib/hexapdf/layout/style.rb +23 -23
  29. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  30. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  31. data/lib/hexapdf/object.rb +52 -25
  32. data/lib/hexapdf/parser.rb +87 -3
  33. data/lib/hexapdf/pdf_array.rb +11 -4
  34. data/lib/hexapdf/revisions.rb +29 -21
  35. data/lib/hexapdf/serializer.rb +1 -1
  36. data/lib/hexapdf/task/optimize.rb +6 -4
  37. data/lib/hexapdf/tokenizer.rb +4 -3
  38. data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
  39. data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
  40. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  41. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  42. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  43. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  44. data/lib/hexapdf/type/actions/uri.rb +3 -2
  45. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  46. data/lib/hexapdf/type/catalog.rb +2 -2
  47. data/lib/hexapdf/type/cid_font.rb +1 -1
  48. data/lib/hexapdf/type/file_specification.rb +1 -1
  49. data/lib/hexapdf/type/font.rb +1 -1
  50. data/lib/hexapdf/type/font_simple.rb +4 -2
  51. data/lib/hexapdf/type/font_true_type.rb +6 -2
  52. data/lib/hexapdf/type/font_type0.rb +4 -4
  53. data/lib/hexapdf/type/form.rb +15 -2
  54. data/lib/hexapdf/type/image.rb +2 -2
  55. data/lib/hexapdf/type/page.rb +37 -13
  56. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  57. data/lib/hexapdf/type/resources.rb +1 -0
  58. data/lib/hexapdf/type/trailer.rb +2 -3
  59. data/lib/hexapdf/utils/object_hash.rb +0 -1
  60. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  61. data/lib/hexapdf/version.rb +1 -1
  62. data/test/hexapdf/common_tokenizer_tests.rb +6 -1
  63. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  64. data/test/hexapdf/content/test_canvas.rb +3 -3
  65. data/test/hexapdf/content/test_color_space.rb +1 -1
  66. data/test/hexapdf/encryption/test_aes.rb +4 -4
  67. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  68. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  69. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  70. data/test/hexapdf/font/encoding/test_base.rb +10 -0
  71. data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
  72. data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
  73. data/test/hexapdf/layout/test_style.rb +1 -1
  74. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  75. data/test/hexapdf/test_configuration.rb +2 -2
  76. data/test/hexapdf/test_dictionary.rb +3 -1
  77. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  78. data/test/hexapdf/test_document.rb +18 -10
  79. data/test/hexapdf/test_object.rb +71 -26
  80. data/test/hexapdf/test_parser.rb +159 -53
  81. data/test/hexapdf/test_pdf_array.rb +8 -1
  82. data/test/hexapdf/test_revisions.rb +35 -0
  83. data/test/hexapdf/test_writer.rb +2 -2
  84. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
  85. data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
  86. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  87. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  88. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  89. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  90. data/test/hexapdf/type/test_font_simple.rb +2 -1
  91. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  92. data/test/hexapdf/type/test_form.rb +26 -1
  93. data/test/hexapdf/type/test_page.rb +45 -7
  94. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  95. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  96. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  97. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  98. data/test/test_helper.rb +2 -0
  99. metadata +6 -11
@@ -68,9 +68,10 @@ module HexaPDF
68
68
  text_fragment.clear_cache
69
69
  end
70
70
  if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
71
- if font.font_type == :TrueType
71
+ case font.font_type
72
+ when :TrueType
72
73
  process_true_type_kerning(text_fragment)
73
- elsif font.font_type == :Type1
74
+ when :Type1
74
75
  process_type1_kerning(text_fragment)
75
76
  end
76
77
  text_fragment.clear_cache
@@ -122,9 +122,6 @@ module HexaPDF
122
122
 
123
123
  include Comparable
124
124
 
125
- # A list of classes whose objects cannot be duplicated.
126
- NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
127
-
128
125
  # :call-seq:
129
126
  # HexaPDF::Object.deep_copy(object) -> copy
130
127
  #
@@ -139,8 +136,6 @@ module HexaPDF
139
136
  (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
140
137
  when HexaPDF::Reference
141
138
  object
142
- when *NOT_DUPLICATABLE_CLASSES
143
- object
144
139
  else
145
140
  object.dup
146
141
  end
@@ -251,29 +246,31 @@ module HexaPDF
251
246
  end
252
247
 
253
248
  # :call-seq:
254
- # obj.validate(auto_correct: true) -> true or false
255
- # obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
249
+ # obj.validate(auto_correct: true) -> true or false
250
+ # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
256
251
  #
257
- # Validates the object and, optionally, corrects problems when the option +auto_correct+ is set.
258
- # The validation routine itself has to be implemented in the #perform_validation method - see
259
- # its documentation for more information.
252
+ # Validates the object, optionally corrects problems when the option +auto_correct+ is set and
253
+ # returns +true+ if the object is deemed valid and +false+ otherwise.
260
254
  #
261
255
  # If a block is given, it is called on validation problems with a problem description and
262
- # whether the problem is correctable.
256
+ # whether the problem is automatically correctable. The third argument to the block is usually
257
+ # this object but may be another object if during auto-correction a new object was created and
258
+ # validated.
263
259
  #
264
- # Returns +true+ if the object is deemed valid and +false+ otherwise.
260
+ # The validation routine itself has to be implemented in the #perform_validation method - see
261
+ # its documentation for more information.
265
262
  #
266
263
  # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
267
264
  # currently implement the full PDF spec. However, if the return value is +false+, there is
268
265
  # certainly a problem!
269
266
  def validate(auto_correct: true)
270
- catch do |catch_tag|
271
- perform_validation do |msg, correctable|
272
- yield(msg, correctable) if block_given?
273
- throw(catch_tag, false) unless auto_correct && correctable
274
- end
275
- true
267
+ result = true
268
+ perform_validation do |msg, correctable, object|
269
+ yield(msg, correctable, object || self) if block_given?
270
+ result = false unless correctable
271
+ return false unless auto_correct
276
272
  end
273
+ result
277
274
  end
278
275
 
279
276
  # Makes a deep copy of the source PDF object and resets the object identifier.
@@ -287,6 +284,28 @@ module HexaPDF
287
284
  obj
288
285
  end
289
286
 
287
+ # Caches and returns the given +value+ or the value of the block under the given cache key. If
288
+ # there is already a cached value for the key and +update+ is +false+, it is just returned.
289
+ #
290
+ # Set +update+ to +true+ to force an update of the cached value.
291
+ #
292
+ # This uses Document#cache internally.
293
+ def cache(key, value = Document::UNSET, update: false, &block)
294
+ document.cache(@data, key, value, update: update, &block)
295
+ end
296
+
297
+ # Returns +true+ if there is a cached value for the given key.
298
+ #
299
+ # This uses Document#cached? internally.
300
+ def cached?(key)
301
+ document.cached?(@data, key)
302
+ end
303
+
304
+ # Clears the cache for this object.
305
+ def clear_cache
306
+ document.clear_cache(@data)
307
+ end
308
+
290
309
  # Compares this object to another object.
291
310
  #
292
311
  # If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
@@ -339,17 +358,25 @@ module HexaPDF
339
358
  # are also performed!
340
359
  #
341
360
  # When the validation routine finds that the object is invalid, it has to yield a problem
342
- # description and whether the problem can be corrected. After yielding, the problem has to be
343
- # corrected which poses no problem because the #validate method makes sure that the yield only
344
- # returns if the problem is actually correctable and if it should be corrected.
361
+ # description and whether the problem can be corrected. An optional third argument may contain
362
+ # the object that gets validated if it is different from this object (may happen when
363
+ # auto-correction is used).
345
364
  #
346
- # Here is a sample validation routine for stream objects:
365
+ # After yielding, the problem has to be corrected if it is correctable. If it is not correctable
366
+ # and not correcting would lead to exceptions the method has to return early.
367
+ #
368
+ # Here is a sample validation routine for a dictionary object type:
347
369
  #
348
370
  # def perform_validation
349
371
  # super
350
- # unless value.kind_of?(Hash)
351
- # yield("A stream object needs a Hash as value")
352
- # self.value = {}
372
+ #
373
+ # if value[:SomeKey].length != 7
374
+ # yield("Length of /SomeKey is invalid")
375
+ # # No need to return early here because following check doesn't rely on /SomeKey
376
+ # end
377
+ #
378
+ # if value[:OtherKey] % 2 == 0
379
+ # yield("/OtherKey needs to contain an odd number of elements")
353
380
  # end
354
381
  # end
355
382
  def perform_validation(&block)
@@ -59,6 +59,7 @@ module HexaPDF
59
59
  @tokenizer = Tokenizer.new(io)
60
60
  @document = document
61
61
  @object_stream_data = {}
62
+ @reconstructed_revision = nil
62
63
  retrieve_pdf_header_offset_and_version
63
64
  end
64
65
 
@@ -86,6 +87,8 @@ module HexaPDF
86
87
  end
87
88
 
88
89
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
+ rescue HexaPDF::MalformedPDFError
91
+ reconstructed_revision.object(xref_entry)
89
92
  end
90
93
 
91
94
  # Parses the indirect object at the specified offset.
@@ -235,14 +238,14 @@ module HexaPDF
235
238
  @tokenizer.skip_whitespace
236
239
  start.upto(start + number_of_entries - 1) do |oid|
237
240
  pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
238
- maybe_raise("Invalid cross-reference subsection entry", pos: @tokenizer.pos,
239
- force: matched_size == 20)
241
+ maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
242
+ force: !matched_size)
240
243
  end
241
244
  if xref.entry?(oid)
242
245
  next
243
246
  elsif type == 'n'
244
247
  if pos == 0 || gen > 65535
245
- maybe_raise("Invalid in use cross-reference entry in cross-reference section",
248
+ maybe_raise("Invalid in use cross-reference entry",
246
249
  pos: @tokenizer.pos)
247
250
  xref.add_free_entry(oid, gen)
248
251
  else
@@ -264,6 +267,27 @@ module HexaPDF
264
267
  raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
265
268
  end
266
269
 
270
+ unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
271
+ first_entry = xref[xref.oids[0]]
272
+ test_entry = xref[xref.oids[-1]]
273
+ @tokenizer.pos = test_entry.pos + @header_offset
274
+ test_oid = @tokenizer.next_token
275
+ first_oid = first_entry.oid
276
+
277
+ force_failure = !first_entry.free? || first_entry.gen != 65535 ||
278
+ !test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
279
+ maybe_raise("Main cross-reference section has invalid numbering",
280
+ pos: offset + @header_offset, force: force_failure)
281
+
282
+ new_xref = XRefSection.new
283
+ xref.oids.each do |oid|
284
+ entry = xref[oid]
285
+ entry.oid -= first_oid
286
+ new_xref.send(:[]=, entry.oid, entry.gen, entry)
287
+ end
288
+ xref = new_xref
289
+ end
290
+
267
291
  [xref, trailer]
268
292
  end
269
293
 
@@ -313,6 +337,11 @@ module HexaPDF
313
337
  @startxref_offset = lines[eof_index - 1].to_i
314
338
  end
315
339
 
340
+ # Returns the reconstructed revision.
341
+ def reconstructed_revision
342
+ @reconstructed_revision ||= reconstruct_revision
343
+ end
344
+
316
345
  # Returns the PDF version number that is stored in the file header.
317
346
  #
318
347
  # See: PDF1.7 s7.5.2
@@ -338,6 +367,61 @@ module HexaPDF
338
367
  @header_version = $1
339
368
  end
340
369
 
370
+ # Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
371
+ # file and returning a Revision object for loading the found objects.
372
+ #
373
+ # If the file contains multiple cross-reference sections, all objects will be put into a single
374
+ # cross-reference table, later objects overwriting prior ones.
375
+ def reconstruct_revision
376
+ raise unless @document.config['parser.try_xref_reconstruction']
377
+ msg = "#{$!} - trying cross-reference table reconstruction"
378
+ @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
379
+
380
+ xref = XRefSection.new
381
+ @tokenizer.pos = 0
382
+ while true
383
+ @tokenizer.skip_whitespace
384
+ pos = @tokenizer.pos
385
+ @tokenizer.scan_until(/(\n|\r\n?)+/)
386
+ next_new_line_pos = @tokenizer.pos
387
+ @tokenizer.pos = pos
388
+
389
+ token = @tokenizer.next_token rescue nil
390
+ if token.kind_of?(Integer)
391
+ gen = @tokenizer.next_token rescue nil
392
+ tok = @tokenizer.next_token rescue nil
393
+ if @tokenizer.pos > next_new_line_pos
394
+ @tokenizer.pos = next_new_line_pos
395
+ elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
396
+ xref.add_in_use_entry(token, gen, pos)
397
+ @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
398
+ end
399
+ elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
400
+ obj = @tokenizer.next_object rescue nil
401
+ # Use last trailer found in case of multiple revisions but use first trailer in case of
402
+ # linearized file.
403
+ trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
404
+ elsif token == Tokenizer::NO_MORE_TOKENS
405
+ break
406
+ else
407
+ @tokenizer.pos = next_new_line_pos
408
+ end
409
+ end
410
+
411
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
412
+ if !trailer || trailer.empty?
413
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
414
+ end
415
+
416
+ loader = lambda do |xref_entry|
417
+ obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
418
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
419
+ end
420
+
421
+ Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
422
+ loader: loader)
423
+ end
424
+
341
425
  # Raises a HexaPDF::MalformedPDFError with the given message and source position.
342
426
  def raise_malformed(msg, pos: nil)
343
427
  raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
@@ -83,7 +83,7 @@ module HexaPDF
83
83
  # subclasses) and the given data has not (including subclasses), the data is stored inside the
84
84
  # HexaPDF::Object.
85
85
  def []=(index, data)
86
- if value[index].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
86
+ if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
87
87
  !data.kind_of?(HexaPDF::Reference)
88
88
  value[index].value = data
89
89
  else
@@ -113,6 +113,13 @@ module HexaPDF
113
113
  value.delete_at(index)
114
114
  end
115
115
 
116
+ # Deletes all values from the PDFArray that are equal to the given object.
117
+ #
118
+ # Returns the last deleted item, or +nil+ if no matching item is found.
119
+ def delete(object)
120
+ value.delete(object)
121
+ end
122
+
116
123
  # :call-seq:
117
124
  # array.slice!(index) -> obj or nil
118
125
  # array.slice!(start, length) -> new_array or nil
@@ -174,9 +181,9 @@ module HexaPDF
174
181
  self
175
182
  end
176
183
 
177
- # Returns a duplicate of the underlying array.
184
+ # Returns an array containing the preprocessed values (like in #[]).
178
185
  def to_ary
179
- value.dup
186
+ each.to_a
180
187
  end
181
188
 
182
189
  private
@@ -196,7 +203,7 @@ module HexaPDF
196
203
  data = document.deref(data)
197
204
  value[index] = data if index
198
205
  end
199
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
206
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
200
207
  data = data.value
201
208
  end
202
209
  data
@@ -67,30 +67,38 @@ module HexaPDF
67
67
  object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
68
68
 
69
69
  revisions = []
70
- xref_section, trailer = parser.load_revision(parser.startxref_offset)
71
- revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
72
- xref_section: xref_section, loader: object_loader)
73
- seen_xref_offsets = {parser.startxref_offset => true}
74
-
75
- while (prev = revisions[0].trailer.value[:Prev]) &&
76
- !seen_xref_offsets.key?(prev)
77
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
78
- # should be direct. Adobe's POV is followed here. Same with :XRefStm.
79
- xref_section, trailer = parser.load_revision(prev)
80
- seen_xref_offsets[prev] = true
81
-
82
- stm = revisions[0].trailer.value[:XRefStm]
83
- if stm && !seen_xref_offsets.key?(stm)
84
- stm_xref_section, = parser.load_revision(stm)
85
- xref_section.merge!(stm_xref_section)
86
- seen_xref_offsets[stm] = true
70
+ begin
71
+ xref_section, trailer = parser.load_revision(parser.startxref_offset)
72
+ revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
73
+ xref_section: xref_section, loader: object_loader)
74
+ seen_xref_offsets = {parser.startxref_offset => true}
75
+
76
+ while (prev = revisions[0].trailer.value[:Prev]) &&
77
+ !seen_xref_offsets.key?(prev)
78
+ # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # should be direct. Adobe's POV is followed here. Same with :XRefStm.
80
+ xref_section, trailer = parser.load_revision(prev)
81
+ seen_xref_offsets[prev] = true
82
+
83
+ stm = revisions[0].trailer.value[:XRefStm]
84
+ if stm && !seen_xref_offsets.key?(stm)
85
+ stm_xref_section, = parser.load_revision(stm)
86
+ xref_section.merge!(stm_xref_section)
87
+ seen_xref_offsets[stm] = true
88
+ end
89
+
90
+ revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
91
+ xref_section: xref_section, loader: object_loader))
87
92
  end
88
-
89
- revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
90
- xref_section: xref_section, loader: object_loader))
93
+ rescue HexaPDF::MalformedPDFError
94
+ reconstructed_revision = parser.reconstructed_revision
95
+ unless revisions.empty?
96
+ reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
97
+ end
98
+ revisions << reconstructed_revision
91
99
  end
92
100
 
93
- document.version = parser.file_header_version
101
+ document.version = parser.file_header_version rescue '1.0'
94
102
  new(document, initial_revisions: revisions, parser: parser)
95
103
  end
96
104
 
@@ -243,7 +243,7 @@ module HexaPDF
243
243
  else
244
244
  obj.dup
245
245
  end
246
- obj.gsub!(/[\(\)\\\r]/n, STRING_ESCAPE_MAP)
246
+ obj.gsub!(/[()\\\r]/n, STRING_ESCAPE_MAP)
247
247
  "(#{obj})"
248
248
  end
249
249
 
@@ -129,9 +129,10 @@ module HexaPDF
129
129
  xref_stream = false
130
130
  objects_to_delete = []
131
131
  rev.each do |obj|
132
- if obj.type == :ObjStm
132
+ case obj.type
133
+ when :ObjStm
133
134
  objects_to_delete << obj
134
- elsif obj.type == :XRef
135
+ when :XRef
135
136
  xref_stream = true
136
137
  objects_to_delete << obj if xref_streams == :delete
137
138
  else
@@ -150,9 +151,10 @@ module HexaPDF
150
151
  objstms = [doc.wrap({Type: :ObjStm})]
151
152
  old_objstms = []
152
153
  rev.each do |obj|
153
- if obj.type == :XRef
154
+ case obj.type
155
+ when :XRef
154
156
  xref_stream = true
155
- elsif obj.type == :ObjStm
157
+ when :ObjStm
156
158
  old_objstms << obj
157
159
  end
158
160
  delete_fields_with_defaults(obj)
@@ -249,17 +249,18 @@ module HexaPDF
249
249
  #
250
250
  # See: PDF1.7 s7.3.3
251
251
  def parse_number
252
- if (val = @ss.scan(/[+-]?\d++(?!\.)/))
252
+ val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
253
+ if val.match?(/\A[+-]?\d++(?!\.)\z/)
253
254
  tmp = val.to_i
254
255
  # Handle object references, see PDF1.7 s7.3.10
255
256
  prepare_string_scanner(10)
256
257
  tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
257
258
  tmp
258
- elsif (val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/))
259
+ elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
259
260
  val << '0' if val.getbyte(-1) == 46 # dot '.'
260
261
  Float(val)
261
262
  else
262
- parse_keyword
263
+ TOKEN_CACHE[val] # val is keyword
263
264
  end
264
265
  end
265
266
 
@@ -37,6 +37,7 @@
37
37
  require 'hexapdf/error'
38
38
  require 'hexapdf/layout/style'
39
39
  require 'hexapdf/layout/text_fragment'
40
+ require 'hexapdf/layout/text_layouter'
40
41
 
41
42
  module HexaPDF
42
43
  module Type
@@ -80,14 +81,8 @@ module HexaPDF
80
81
  else
81
82
  raise HexaPDF::Error, "Unsupported button field type"
82
83
  end
83
- when :Tx
84
+ when :Tx, :Ch
84
85
  create_text_appearances
85
- when :Ch
86
- if @field.combo_box?
87
- create_text_appearances
88
- else
89
- raise HexaPDF::Error, "List box not supported yet"
90
- end
91
86
  else
92
87
  raise HexaPDF::Error, "Unsupported field type #{@field.field_type}"
93
88
  end
@@ -206,6 +201,10 @@ module HexaPDF
206
201
  # * The font, font size and font color are taken from the associated field's default
207
202
  # appearance string. See VariableTextField.
208
203
  #
204
+ # If the font is not usable by HexaPDF (which may be due to a variety of reasons, e.g. no
205
+ # associated information in the form's default resources), the font specified by the
206
+ # configuration option +acro_form.fallback_font+ will be used.
207
+ #
209
208
  # * The widget's rectangle /Rect must be defined. If the height is zero, it is auto-sized
210
209
  # based on the font size. If additionally the font size is zero, a font size of
211
210
  # +acro_form.default_font_size+ is used. If the width is zero, the
@@ -222,7 +221,7 @@ module HexaPDF
222
221
  def create_text_appearances
223
222
  font_name, font_size = @field.parse_default_appearance_string
224
223
  default_resources = @document.acro_form.default_resources
225
- font = default_resources.font(font_name).font_wrapper
224
+ font = default_resources.font(font_name).font_wrapper rescue nil
226
225
  unless font
227
226
  fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
228
227
  if fallback_font_name
@@ -245,38 +244,35 @@ module HexaPDF
245
244
  rect.height = style.scaled_y_max - style.scaled_y_min + 2 * padding
246
245
  end
247
246
 
248
- form = (@widget[:AP] ||= {})[:N] = @document.add({Type: :XObject, Subtype: :Form,
249
- BBox: [0, 0, rect.width, rect.height]})
247
+ form = (@widget[:AP] ||= {})[:N] ||= @document.add({Type: :XObject, Subtype: :Form})
248
+ form.value.replace({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
249
+ form.contents = ''
250
250
  form[:Resources] = HexaPDF::Object.deep_copy(default_resources)
251
251
 
252
252
  canvas = form.canvas
253
253
  apply_background_and_border(border_style, canvas)
254
254
  style.font_size = calculate_font_size(font, font_size, rect, border_style)
255
+ style.clear_cache
255
256
 
256
257
  canvas.marked_content_sequence(:Tx) do
257
- if (value = @field.field_value)
258
+ if @field.field_value || @field.concrete_field_type == :list_box
258
259
  canvas.save_graphics_state do
259
260
  canvas.rectangle(padding, padding, rect.width - 2 * padding,
260
261
  rect.height - 2 * padding).clip_path.end_path
261
- fragment = HexaPDF::Layout::TextFragment.create(value, style)
262
- # Adobe seems to be left/right-aligning based on twice the border width and
263
- # vertically centering based on the cap height, if enough space is available
264
- x = case @field.text_alignment
265
- when :left then 2 * padding
266
- when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
267
- when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
268
- end
269
- cap_height = font.wrapped_font.cap_height * font.scaling_factor / 1000.0 *
270
- style.font_size
271
- y = padding + (rect.height - 2 * padding - cap_height) / 2.0
272
- y = padding - style.scaled_font_descender if y < 0
273
- fragment.draw(canvas, x, y)
262
+ if @field.concrete_field_type == :multiline_text_field
263
+ draw_multiline_text(canvas, rect, style, padding)
264
+ elsif @field.concrete_field_type == :list_box
265
+ draw_list_box(canvas, rect, style, padding)
266
+ else
267
+ draw_single_line_text(canvas, rect, style, padding)
268
+ end
274
269
  end
275
270
  end
276
271
  end
277
272
  end
278
273
 
279
274
  alias create_combo_box_appearances create_text_appearances
275
+ alias create_list_box_appearances create_text_appearances
280
276
 
281
277
  private
282
278
 
@@ -337,6 +333,13 @@ module HexaPDF
337
333
  canvas.circle(rect.width / 2.0, rect.height / 2.0, [width / 2.0, height / 2.0].min)
338
334
  else
339
335
  canvas.rectangle(offset, offset, width, height)
336
+ if @field.concrete_field_type == :comb_text_field
337
+ cell_width = rect.width.to_f / @field[:MaxLen]
338
+ 1.upto(@field[:MaxLen] - 1) do |i|
339
+ canvas.line(i * cell_width, border_style.width,
340
+ i * cell_width, border_style.width + height)
341
+ end
342
+ end
340
343
  end
341
344
  end
342
345
  canvas.stroke
@@ -381,14 +384,115 @@ module HexaPDF
381
384
  end
382
385
  end
383
386
 
387
+ # Draws a single line of text inside the widget's rectangle.
388
+ def draw_single_line_text(canvas, rect, style, padding)
389
+ value = @field.field_value
390
+ fragment = HexaPDF::Layout::TextFragment.create(value, style)
391
+
392
+ if @field.concrete_field_type == :comb_text_field
393
+ unless @field.key?(:MaxLen)
394
+ raise HexaPDF::Error, "Missing or invalid dictionary field /MaxLen for comb text field"
395
+ end
396
+ new_items = []
397
+ cell_width = rect.width.to_f / @field[:MaxLen]
398
+ scaled_cell_width = cell_width / style.scaled_font_size.to_f
399
+ fragment.items.each_cons(2) do |a, b|
400
+ new_items << a << -(scaled_cell_width - a.width / 2.0 - b.width / 2.0)
401
+ end
402
+ new_items << fragment.items.last
403
+ fragment.items.replace(new_items)
404
+ fragment.clear_cache
405
+ # Adobe always seems to add 1 to the first offset...
406
+ x_offset = 1 + (cell_width - style.scaled_item_width(fragment.items[0])) / 2.0
407
+ x = case @field.text_alignment
408
+ when :left then x_offset
409
+ when :right then x_offset + cell_width * (@field[:MaxLen] - value.length)
410
+ when :center then x_offset + cell_width * ((@field[:MaxLen] - value.length) / 2)
411
+ end
412
+ else
413
+ # Adobe seems to be left/right-aligning based on twice the border width
414
+ x = case @field.text_alignment
415
+ when :left then 2 * padding
416
+ when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
417
+ when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
418
+ end
419
+ end
420
+
421
+ # Adobe seems to be vertically centering based on the cap height, if enough space is
422
+ # available
423
+ cap_height = style.font.wrapped_font.cap_height * style.font.scaling_factor / 1000.0 *
424
+ style.font_size
425
+ y = padding + (rect.height - 2 * padding - cap_height) / 2.0
426
+ y = padding - style.scaled_font_descender if y < 0
427
+ fragment.draw(canvas, x, y)
428
+ end
429
+
430
+ # Draws multiple lines of text inside the widget's rectangle.
431
+ def draw_multiline_text(canvas, rect, style, padding)
432
+ items = [Layout::TextFragment.create(@field.field_value, style)]
433
+ layouter = Layout::TextLayouter.new(style)
434
+ layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
435
+
436
+ result = nil
437
+ if style.font_size == 0 # need to auto-size text
438
+ style.font_size = 12 # Adobe seems to use this as starting point
439
+ style.clear_cache
440
+ loop do
441
+ result = layouter.fit(items, rect.width - 4 * padding, rect.height - 4 * padding)
442
+ break if result.status == :success || style.font_size <= 4 # don't make text too small
443
+ style.font_size -= 1
444
+ style.clear_cache
445
+ end
446
+ else
447
+ result = layouter.fit(items, rect.width - 4 * padding, 2**20)
448
+ end
449
+
450
+ unless result.lines.empty?
451
+ result.draw(canvas, 2 * padding, rect.height - 2 * padding - result.lines[0].height / 2.0)
452
+ end
453
+ end
454
+
455
+ # Draws the visible option items of the list box in the widget's rectangle.
456
+ def draw_list_box(canvas, rect, style, padding)
457
+ option_items = @field.option_items
458
+ top_index = @field.list_box_top_index
459
+ items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
460
+
461
+ indices = @field[:I] || []
462
+ value_indices = [@field.field_value].flatten.compact.map {|val| option_items.index(val) }
463
+ indices = value_indices if indices != value_indices
464
+
465
+ layouter = Layout::TextLayouter.new(style)
466
+ layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
467
+ result = layouter.fit(items, rect.width - 4 * padding, rect.height)
468
+
469
+ unless result.lines.empty?
470
+ top_gap = style.line_spacing.gap(result.lines[0], result.lines[0])
471
+ line_height = style.line_spacing.baseline_distance(result.lines[0], result.lines[0])
472
+ canvas.fill_color(153, 193, 218) # Adobe's color for selection highlighting
473
+ indices.map! {|i| rect.height - padding - (i - top_index + 1) * line_height }.each do |y|
474
+ next if y + line_height > rect.height || y + line_height < padding
475
+ canvas.rectangle(padding, y, rect.width - 2 * padding, line_height)
476
+ end
477
+ canvas.fill if canvas.graphics_object == :path
478
+ result.draw(canvas, 2 * padding, rect.height - padding - top_gap)
479
+ end
480
+ end
481
+
384
482
  # Calculates the font size for text fields based on the font and font size of the default
385
483
  # appearance string, the annotation rectangle and the border style.
386
484
  def calculate_font_size(font, font_size, rect, border_style)
387
485
  if font_size == 0
388
- unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
389
- font.scaling_factor / 1000.0
390
- # The constant factor was found empirically by checking what Adobe Reader etc. do
391
- (rect.height - 2 * border_style.width) / unit_font_size * 0.83
486
+ if @field.concrete_field_type == :multiline_text_field
487
+ 0 # Handled by multiline drawing code
488
+ elsif @field.concrete_field_type == :list_box
489
+ 12 # Seems to be Adobe's default
490
+ else
491
+ unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
492
+ font.scaling_factor / 1000.0
493
+ # The constant factor was found empirically by checking what Adobe Reader etc. do
494
+ (rect.height - 2 * border_style.width) / unit_font_size * 0.83
495
+ end
392
496
  else
393
497
  font_size
394
498
  end