hexapdf 0.12.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +126 -0
  3. data/examples/019-acro_form.rb +41 -4
  4. data/lib/hexapdf/cli/command.rb +4 -2
  5. data/lib/hexapdf/cli/image2pdf.rb +2 -1
  6. data/lib/hexapdf/cli/info.rb +51 -2
  7. data/lib/hexapdf/cli/inspect.rb +30 -8
  8. data/lib/hexapdf/cli/merge.rb +1 -1
  9. data/lib/hexapdf/cli/split.rb +74 -14
  10. data/lib/hexapdf/configuration.rb +15 -0
  11. data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
  12. data/lib/hexapdf/content/parser.rb +1 -1
  13. data/lib/hexapdf/dictionary.rb +4 -4
  14. data/lib/hexapdf/dictionary_fields.rb +1 -9
  15. data/lib/hexapdf/document.rb +41 -16
  16. data/lib/hexapdf/document/files.rb +0 -1
  17. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  18. data/lib/hexapdf/encryption/security_handler.rb +1 -0
  19. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
  20. data/lib/hexapdf/font/cmap.rb +1 -4
  21. data/lib/hexapdf/font/encoding/base.rb +8 -0
  22. data/lib/hexapdf/font/encoding/difference_encoding.rb +6 -0
  23. data/lib/hexapdf/font/true_type/table/head.rb +1 -0
  24. data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
  25. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  26. data/lib/hexapdf/image_loader/png.rb +3 -2
  27. data/lib/hexapdf/layout/line.rb +1 -1
  28. data/lib/hexapdf/layout/style.rb +23 -23
  29. data/lib/hexapdf/layout/text_layouter.rb +2 -2
  30. data/lib/hexapdf/layout/text_shaper.rb +3 -2
  31. data/lib/hexapdf/object.rb +52 -25
  32. data/lib/hexapdf/parser.rb +87 -3
  33. data/lib/hexapdf/pdf_array.rb +11 -4
  34. data/lib/hexapdf/revisions.rb +29 -21
  35. data/lib/hexapdf/serializer.rb +1 -1
  36. data/lib/hexapdf/task/optimize.rb +6 -4
  37. data/lib/hexapdf/tokenizer.rb +4 -3
  38. data/lib/hexapdf/type/acro_form/appearance_generator.rb +132 -28
  39. data/lib/hexapdf/type/acro_form/button_field.rb +21 -13
  40. data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
  41. data/lib/hexapdf/type/acro_form/field.rb +35 -5
  42. data/lib/hexapdf/type/acro_form/form.rb +139 -14
  43. data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
  44. data/lib/hexapdf/type/actions/uri.rb +3 -2
  45. data/lib/hexapdf/type/annotations/widget.rb +3 -4
  46. data/lib/hexapdf/type/catalog.rb +2 -2
  47. data/lib/hexapdf/type/cid_font.rb +1 -1
  48. data/lib/hexapdf/type/file_specification.rb +1 -1
  49. data/lib/hexapdf/type/font.rb +1 -1
  50. data/lib/hexapdf/type/font_simple.rb +4 -2
  51. data/lib/hexapdf/type/font_true_type.rb +6 -2
  52. data/lib/hexapdf/type/font_type0.rb +4 -4
  53. data/lib/hexapdf/type/form.rb +15 -2
  54. data/lib/hexapdf/type/image.rb +2 -2
  55. data/lib/hexapdf/type/page.rb +37 -13
  56. data/lib/hexapdf/type/page_tree_node.rb +29 -5
  57. data/lib/hexapdf/type/resources.rb +1 -0
  58. data/lib/hexapdf/type/trailer.rb +2 -3
  59. data/lib/hexapdf/utils/object_hash.rb +0 -1
  60. data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
  61. data/lib/hexapdf/version.rb +1 -1
  62. data/test/hexapdf/common_tokenizer_tests.rb +6 -1
  63. data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
  64. data/test/hexapdf/content/test_canvas.rb +3 -3
  65. data/test/hexapdf/content/test_color_space.rb +1 -1
  66. data/test/hexapdf/encryption/test_aes.rb +4 -4
  67. data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
  68. data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
  69. data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
  70. data/test/hexapdf/font/encoding/test_base.rb +10 -0
  71. data/test/hexapdf/font/encoding/test_difference_encoding.rb +8 -0
  72. data/test/hexapdf/font/test_type1_wrapper.rb +4 -3
  73. data/test/hexapdf/layout/test_style.rb +1 -1
  74. data/test/hexapdf/layout/test_text_layouter.rb +12 -5
  75. data/test/hexapdf/test_configuration.rb +2 -2
  76. data/test/hexapdf/test_dictionary.rb +3 -1
  77. data/test/hexapdf/test_dictionary_fields.rb +2 -2
  78. data/test/hexapdf/test_document.rb +18 -10
  79. data/test/hexapdf/test_object.rb +71 -26
  80. data/test/hexapdf/test_parser.rb +159 -53
  81. data/test/hexapdf/test_pdf_array.rb +8 -1
  82. data/test/hexapdf/test_revisions.rb +35 -0
  83. data/test/hexapdf/test_writer.rb +2 -2
  84. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +296 -38
  85. data/test/hexapdf/type/acro_form/test_button_field.rb +22 -2
  86. data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
  87. data/test/hexapdf/type/acro_form/test_field.rb +39 -0
  88. data/test/hexapdf/type/acro_form/test_form.rb +87 -15
  89. data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
  90. data/test/hexapdf/type/test_font_simple.rb +2 -1
  91. data/test/hexapdf/type/test_font_true_type.rb +6 -0
  92. data/test/hexapdf/type/test_form.rb +26 -1
  93. data/test/hexapdf/type/test_page.rb +45 -7
  94. data/test/hexapdf/type/test_page_tree_node.rb +42 -0
  95. data/test/hexapdf/utils/test_bit_field.rb +2 -0
  96. data/test/hexapdf/utils/test_object_hash.rb +5 -0
  97. data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
  98. data/test/test_helper.rb +2 -0
  99. metadata +6 -11
@@ -68,9 +68,10 @@ module HexaPDF
68
68
  text_fragment.clear_cache
69
69
  end
70
70
  if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
71
- if font.font_type == :TrueType
71
+ case font.font_type
72
+ when :TrueType
72
73
  process_true_type_kerning(text_fragment)
73
- elsif font.font_type == :Type1
74
+ when :Type1
74
75
  process_type1_kerning(text_fragment)
75
76
  end
76
77
  text_fragment.clear_cache
@@ -122,9 +122,6 @@ module HexaPDF
122
122
 
123
123
  include Comparable
124
124
 
125
- # A list of classes whose objects cannot be duplicated.
126
- NOT_DUPLICATABLE_CLASSES = [NilClass, FalseClass, TrueClass, Symbol, Integer, Float].freeze
127
-
128
125
  # :call-seq:
129
126
  # HexaPDF::Object.deep_copy(object) -> copy
130
127
  #
@@ -139,8 +136,6 @@ module HexaPDF
139
136
  (object.indirect? || object.must_be_indirect? ? object : deep_copy(object.value))
140
137
  when HexaPDF::Reference
141
138
  object
142
- when *NOT_DUPLICATABLE_CLASSES
143
- object
144
139
  else
145
140
  object.dup
146
141
  end
@@ -251,29 +246,31 @@ module HexaPDF
251
246
  end
252
247
 
253
248
  # :call-seq:
254
- # obj.validate(auto_correct: true) -> true or false
255
- # obj.validate(auto_correct: true) {|msg, correctable| block } -> true or false
249
+ # obj.validate(auto_correct: true) -> true or false
250
+ # obj.validate(auto_correct: true) {|msg, correctable, obj| block } -> true or false
256
251
  #
257
- # Validates the object and, optionally, corrects problems when the option +auto_correct+ is set.
258
- # The validation routine itself has to be implemented in the #perform_validation method - see
259
- # its documentation for more information.
252
+ # Validates the object, optionally corrects problems when the option +auto_correct+ is set and
253
+ # returns +true+ if the object is deemed valid and +false+ otherwise.
260
254
  #
261
255
  # If a block is given, it is called on validation problems with a problem description and
262
- # whether the problem is correctable.
256
+ # whether the problem is automatically correctable. The third argument to the block is usually
257
+ # this object but may be another object if during auto-correction a new object was created and
258
+ # validated.
263
259
  #
264
- # Returns +true+ if the object is deemed valid and +false+ otherwise.
260
+ # The validation routine itself has to be implemented in the #perform_validation method - see
261
+ # its documentation for more information.
265
262
  #
266
263
  # *Note*: Even if the return value is +true+ there may be problems since HexaPDF doesn't
267
264
  # currently implement the full PDF spec. However, if the return value is +false+, there is
268
265
  # certainly a problem!
269
266
  def validate(auto_correct: true)
270
- catch do |catch_tag|
271
- perform_validation do |msg, correctable|
272
- yield(msg, correctable) if block_given?
273
- throw(catch_tag, false) unless auto_correct && correctable
274
- end
275
- true
267
+ result = true
268
+ perform_validation do |msg, correctable, object|
269
+ yield(msg, correctable, object || self) if block_given?
270
+ result = false unless correctable
271
+ return false unless auto_correct
276
272
  end
273
+ result
277
274
  end
278
275
 
279
276
  # Makes a deep copy of the source PDF object and resets the object identifier.
@@ -287,6 +284,28 @@ module HexaPDF
287
284
  obj
288
285
  end
289
286
 
287
+ # Caches and returns the given +value+ or the value of the block under the given cache key. If
288
+ # there is already a cached value for the key and +update+ is +false+, it is just returned.
289
+ #
290
+ # Set +update+ to +true+ to force an update of the cached value.
291
+ #
292
+ # This uses Document#cache internally.
293
+ def cache(key, value = Document::UNSET, update: false, &block)
294
+ document.cache(@data, key, value, update: update, &block)
295
+ end
296
+
297
+ # Returns +true+ if there is a cached value for the given key.
298
+ #
299
+ # This uses Document#cached? internally.
300
+ def cached?(key)
301
+ document.cached?(@data, key)
302
+ end
303
+
304
+ # Clears the cache for this object.
305
+ def clear_cache
306
+ document.clear_cache(@data)
307
+ end
308
+
290
309
  # Compares this object to another object.
291
310
  #
292
311
  # If the other object does not respond to +oid+ or +gen+, +nil+ is returned. Otherwise objects
@@ -339,17 +358,25 @@ module HexaPDF
339
358
  # are also performed!
340
359
  #
341
360
  # When the validation routine finds that the object is invalid, it has to yield a problem
342
- # description and whether the problem can be corrected. After yielding, the problem has to be
343
- # corrected which poses no problem because the #validate method makes sure that the yield only
344
- # returns if the problem is actually correctable and if it should be corrected.
361
+ # description and whether the problem can be corrected. An optional third argument may contain
362
+ # the object that gets validated if it is different from this object (may happen when
363
+ # auto-correction is used).
345
364
  #
346
- # Here is a sample validation routine for stream objects:
365
+ # After yielding, the problem has to be corrected if it is correctable. If it is not correctable
366
+ # and not correcting would lead to exceptions the method has to return early.
367
+ #
368
+ # Here is a sample validation routine for a dictionary object type:
347
369
  #
348
370
  # def perform_validation
349
371
  # super
350
- # unless value.kind_of?(Hash)
351
- # yield("A stream object needs a Hash as value")
352
- # self.value = {}
372
+ #
373
+ # if value[:SomeKey].length != 7
374
+ # yield("Length of /SomeKey is invalid")
375
+ # # No need to return early here because following check doesn't rely on /SomeKey
376
+ # end
377
+ #
378
+ # if value[:OtherKey] % 2 == 0
379
+ # yield("/OtherKey needs to contain an odd number of elements")
353
380
  # end
354
381
  # end
355
382
  def perform_validation(&block)
@@ -59,6 +59,7 @@ module HexaPDF
59
59
  @tokenizer = Tokenizer.new(io)
60
60
  @document = document
61
61
  @object_stream_data = {}
62
+ @reconstructed_revision = nil
62
63
  retrieve_pdf_header_offset_and_version
63
64
  end
64
65
 
@@ -86,6 +87,8 @@ module HexaPDF
86
87
  end
87
88
 
88
89
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
+ rescue HexaPDF::MalformedPDFError
91
+ reconstructed_revision.object(xref_entry)
89
92
  end
90
93
 
91
94
  # Parses the indirect object at the specified offset.
@@ -235,14 +238,14 @@ module HexaPDF
235
238
  @tokenizer.skip_whitespace
236
239
  start.upto(start + number_of_entries - 1) do |oid|
237
240
  pos, gen, type = @tokenizer.next_xref_entry do |matched_size|
238
- maybe_raise("Invalid cross-reference subsection entry", pos: @tokenizer.pos,
239
- force: matched_size == 20)
241
+ maybe_raise("Invalid cross-reference entry", pos: @tokenizer.pos,
242
+ force: !matched_size)
240
243
  end
241
244
  if xref.entry?(oid)
242
245
  next
243
246
  elsif type == 'n'
244
247
  if pos == 0 || gen > 65535
245
- maybe_raise("Invalid in use cross-reference entry in cross-reference section",
248
+ maybe_raise("Invalid in use cross-reference entry",
246
249
  pos: @tokenizer.pos)
247
250
  xref.add_free_entry(oid, gen)
248
251
  else
@@ -264,6 +267,27 @@ module HexaPDF
264
267
  raise_malformed("Trailer is #{trailer.class} instead of dictionary ", pos: @tokenizer.pos)
265
268
  end
266
269
 
270
+ unless trailer[:Prev] || xref.max_oid == 0 || xref.entry?(0)
271
+ first_entry = xref[xref.oids[0]]
272
+ test_entry = xref[xref.oids[-1]]
273
+ @tokenizer.pos = test_entry.pos + @header_offset
274
+ test_oid = @tokenizer.next_token
275
+ first_oid = first_entry.oid
276
+
277
+ force_failure = !first_entry.free? || first_entry.gen != 65535 ||
278
+ !test_oid.kind_of?(Integer) || xref.oids[-1] - test_oid != first_oid
279
+ maybe_raise("Main cross-reference section has invalid numbering",
280
+ pos: offset + @header_offset, force: force_failure)
281
+
282
+ new_xref = XRefSection.new
283
+ xref.oids.each do |oid|
284
+ entry = xref[oid]
285
+ entry.oid -= first_oid
286
+ new_xref.send(:[]=, entry.oid, entry.gen, entry)
287
+ end
288
+ xref = new_xref
289
+ end
290
+
267
291
  [xref, trailer]
268
292
  end
269
293
 
@@ -313,6 +337,11 @@ module HexaPDF
313
337
  @startxref_offset = lines[eof_index - 1].to_i
314
338
  end
315
339
 
340
+ # Returns the reconstructed revision.
341
+ def reconstructed_revision
342
+ @reconstructed_revision ||= reconstruct_revision
343
+ end
344
+
316
345
  # Returns the PDF version number that is stored in the file header.
317
346
  #
318
347
  # See: PDF1.7 s7.5.2
@@ -338,6 +367,61 @@ module HexaPDF
338
367
  @header_version = $1
339
368
  end
340
369
 
370
+ # Tries to reconstruct the PDF document's main cross-reference table by serially parsing the
371
+ # file and returning a Revision object for loading the found objects.
372
+ #
373
+ # If the file contains multiple cross-reference sections, all objects will be put into a single
374
+ # cross-reference table, later objects overwriting prior ones.
375
+ def reconstruct_revision
376
+ raise unless @document.config['parser.try_xref_reconstruction']
377
+ msg = "#{$!} - trying cross-reference table reconstruction"
378
+ @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
379
+
380
+ xref = XRefSection.new
381
+ @tokenizer.pos = 0
382
+ while true
383
+ @tokenizer.skip_whitespace
384
+ pos = @tokenizer.pos
385
+ @tokenizer.scan_until(/(\n|\r\n?)+/)
386
+ next_new_line_pos = @tokenizer.pos
387
+ @tokenizer.pos = pos
388
+
389
+ token = @tokenizer.next_token rescue nil
390
+ if token.kind_of?(Integer)
391
+ gen = @tokenizer.next_token rescue nil
392
+ tok = @tokenizer.next_token rescue nil
393
+ if @tokenizer.pos > next_new_line_pos
394
+ @tokenizer.pos = next_new_line_pos
395
+ elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
396
+ xref.add_in_use_entry(token, gen, pos)
397
+ @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
398
+ end
399
+ elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
400
+ obj = @tokenizer.next_object rescue nil
401
+ # Use last trailer found in case of multiple revisions but use first trailer in case of
402
+ # linearized file.
403
+ trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
404
+ elsif token == Tokenizer::NO_MORE_TOKENS
405
+ break
406
+ else
407
+ @tokenizer.pos = next_new_line_pos
408
+ end
409
+ end
410
+
411
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
412
+ if !trailer || trailer.empty?
413
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
414
+ end
415
+
416
+ loader = lambda do |xref_entry|
417
+ obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
418
+ @document.wrap(obj, oid: oid, gen: gen, stream: stream)
419
+ end
420
+
421
+ Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
422
+ loader: loader)
423
+ end
424
+
341
425
  # Raises a HexaPDF::MalformedPDFError with the given message and source position.
342
426
  def raise_malformed(msg, pos: nil)
343
427
  raise HexaPDF::MalformedPDFError.new(msg, pos: pos)
@@ -83,7 +83,7 @@ module HexaPDF
83
83
  # subclasses) and the given data has not (including subclasses), the data is stored inside the
84
84
  # HexaPDF::Object.
85
85
  def []=(index, data)
86
- if value[index].class == HexaPDF::Object && !data.kind_of?(HexaPDF::Object) &&
86
+ if value[index].instance_of?(HexaPDF::Object) && !data.kind_of?(HexaPDF::Object) &&
87
87
  !data.kind_of?(HexaPDF::Reference)
88
88
  value[index].value = data
89
89
  else
@@ -113,6 +113,13 @@ module HexaPDF
113
113
  value.delete_at(index)
114
114
  end
115
115
 
116
+ # Deletes all values from the PDFArray that are equal to the given object.
117
+ #
118
+ # Returns the last deleted item, or +nil+ if no matching item is found.
119
+ def delete(object)
120
+ value.delete(object)
121
+ end
122
+
116
123
  # :call-seq:
117
124
  # array.slice!(index) -> obj or nil
118
125
  # array.slice!(start, length) -> new_array or nil
@@ -174,9 +181,9 @@ module HexaPDF
174
181
  self
175
182
  end
176
183
 
177
- # Returns a duplicate of the underlying array.
184
+ # Returns an array containing the preprocessed values (like in #[]).
178
185
  def to_ary
179
- value.dup
186
+ each.to_a
180
187
  end
181
188
 
182
189
  private
@@ -196,7 +203,7 @@ module HexaPDF
196
203
  data = document.deref(data)
197
204
  value[index] = data if index
198
205
  end
199
- if data.class == HexaPDF::Object || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
206
+ if data.instance_of?(HexaPDF::Object) || (data.kind_of?(HexaPDF::Object) && data.value.nil?)
200
207
  data = data.value
201
208
  end
202
209
  data
@@ -67,30 +67,38 @@ module HexaPDF
67
67
  object_loader = lambda {|xref_entry| parser.load_object(xref_entry) }
68
68
 
69
69
  revisions = []
70
- xref_section, trailer = parser.load_revision(parser.startxref_offset)
71
- revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
72
- xref_section: xref_section, loader: object_loader)
73
- seen_xref_offsets = {parser.startxref_offset => true}
74
-
75
- while (prev = revisions[0].trailer.value[:Prev]) &&
76
- !seen_xref_offsets.key?(prev)
77
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
78
- # should be direct. Adobe's POV is followed here. Same with :XRefStm.
79
- xref_section, trailer = parser.load_revision(prev)
80
- seen_xref_offsets[prev] = true
81
-
82
- stm = revisions[0].trailer.value[:XRefStm]
83
- if stm && !seen_xref_offsets.key?(stm)
84
- stm_xref_section, = parser.load_revision(stm)
85
- xref_section.merge!(stm_xref_section)
86
- seen_xref_offsets[stm] = true
70
+ begin
71
+ xref_section, trailer = parser.load_revision(parser.startxref_offset)
72
+ revisions << Revision.new(document.wrap(trailer, type: :XXTrailer),
73
+ xref_section: xref_section, loader: object_loader)
74
+ seen_xref_offsets = {parser.startxref_offset => true}
75
+
76
+ while (prev = revisions[0].trailer.value[:Prev]) &&
77
+ !seen_xref_offsets.key?(prev)
78
+ # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # should be direct. Adobe's POV is followed here. Same with :XRefStm.
80
+ xref_section, trailer = parser.load_revision(prev)
81
+ seen_xref_offsets[prev] = true
82
+
83
+ stm = revisions[0].trailer.value[:XRefStm]
84
+ if stm && !seen_xref_offsets.key?(stm)
85
+ stm_xref_section, = parser.load_revision(stm)
86
+ xref_section.merge!(stm_xref_section)
87
+ seen_xref_offsets[stm] = true
88
+ end
89
+
90
+ revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
91
+ xref_section: xref_section, loader: object_loader))
87
92
  end
88
-
89
- revisions.unshift(Revision.new(document.wrap(trailer, type: :XXTrailer),
90
- xref_section: xref_section, loader: object_loader))
93
+ rescue HexaPDF::MalformedPDFError
94
+ reconstructed_revision = parser.reconstructed_revision
95
+ unless revisions.empty?
96
+ reconstructed_revision.trailer.data.value = revisions.last.trailer.data.value
97
+ end
98
+ revisions << reconstructed_revision
91
99
  end
92
100
 
93
- document.version = parser.file_header_version
101
+ document.version = parser.file_header_version rescue '1.0'
94
102
  new(document, initial_revisions: revisions, parser: parser)
95
103
  end
96
104
 
@@ -243,7 +243,7 @@ module HexaPDF
243
243
  else
244
244
  obj.dup
245
245
  end
246
- obj.gsub!(/[\(\)\\\r]/n, STRING_ESCAPE_MAP)
246
+ obj.gsub!(/[()\\\r]/n, STRING_ESCAPE_MAP)
247
247
  "(#{obj})"
248
248
  end
249
249
 
@@ -129,9 +129,10 @@ module HexaPDF
129
129
  xref_stream = false
130
130
  objects_to_delete = []
131
131
  rev.each do |obj|
132
- if obj.type == :ObjStm
132
+ case obj.type
133
+ when :ObjStm
133
134
  objects_to_delete << obj
134
- elsif obj.type == :XRef
135
+ when :XRef
135
136
  xref_stream = true
136
137
  objects_to_delete << obj if xref_streams == :delete
137
138
  else
@@ -150,9 +151,10 @@ module HexaPDF
150
151
  objstms = [doc.wrap({Type: :ObjStm})]
151
152
  old_objstms = []
152
153
  rev.each do |obj|
153
- if obj.type == :XRef
154
+ case obj.type
155
+ when :XRef
154
156
  xref_stream = true
155
- elsif obj.type == :ObjStm
157
+ when :ObjStm
156
158
  old_objstms << obj
157
159
  end
158
160
  delete_fields_with_defaults(obj)
@@ -249,17 +249,18 @@ module HexaPDF
249
249
  #
250
250
  # See: PDF1.7 s7.3.3
251
251
  def parse_number
252
- if (val = @ss.scan(/[+-]?\d++(?!\.)/))
252
+ val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
253
+ if val.match?(/\A[+-]?\d++(?!\.)\z/)
253
254
  tmp = val.to_i
254
255
  # Handle object references, see PDF1.7 s7.3.10
255
256
  prepare_string_scanner(10)
256
257
  tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
257
258
  tmp
258
- elsif (val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/))
259
+ elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
259
260
  val << '0' if val.getbyte(-1) == 46 # dot '.'
260
261
  Float(val)
261
262
  else
262
- parse_keyword
263
+ TOKEN_CACHE[val] # val is keyword
263
264
  end
264
265
  end
265
266
 
@@ -37,6 +37,7 @@
37
37
  require 'hexapdf/error'
38
38
  require 'hexapdf/layout/style'
39
39
  require 'hexapdf/layout/text_fragment'
40
+ require 'hexapdf/layout/text_layouter'
40
41
 
41
42
  module HexaPDF
42
43
  module Type
@@ -80,14 +81,8 @@ module HexaPDF
80
81
  else
81
82
  raise HexaPDF::Error, "Unsupported button field type"
82
83
  end
83
- when :Tx
84
+ when :Tx, :Ch
84
85
  create_text_appearances
85
- when :Ch
86
- if @field.combo_box?
87
- create_text_appearances
88
- else
89
- raise HexaPDF::Error, "List box not supported yet"
90
- end
91
86
  else
92
87
  raise HexaPDF::Error, "Unsupported field type #{@field.field_type}"
93
88
  end
@@ -206,6 +201,10 @@ module HexaPDF
206
201
  # * The font, font size and font color are taken from the associated field's default
207
202
  # appearance string. See VariableTextField.
208
203
  #
204
+ # If the font is not usable by HexaPDF (which may be due to a variety of reasons, e.g. no
205
+ # associated information in the form's default resources), the font specified by the
206
+ # configuration option +acro_form.fallback_font+ will be used.
207
+ #
209
208
  # * The widget's rectangle /Rect must be defined. If the height is zero, it is auto-sized
210
209
  # based on the font size. If additionally the font size is zero, a font size of
211
210
  # +acro_form.default_font_size+ is used. If the width is zero, the
@@ -222,7 +221,7 @@ module HexaPDF
222
221
  def create_text_appearances
223
222
  font_name, font_size = @field.parse_default_appearance_string
224
223
  default_resources = @document.acro_form.default_resources
225
- font = default_resources.font(font_name).font_wrapper
224
+ font = default_resources.font(font_name).font_wrapper rescue nil
226
225
  unless font
227
226
  fallback_font_name, fallback_font_options = @document.config['acro_form.fallback_font']
228
227
  if fallback_font_name
@@ -245,38 +244,35 @@ module HexaPDF
245
244
  rect.height = style.scaled_y_max - style.scaled_y_min + 2 * padding
246
245
  end
247
246
 
248
- form = (@widget[:AP] ||= {})[:N] = @document.add({Type: :XObject, Subtype: :Form,
249
- BBox: [0, 0, rect.width, rect.height]})
247
+ form = (@widget[:AP] ||= {})[:N] ||= @document.add({Type: :XObject, Subtype: :Form})
248
+ form.value.replace({Type: :XObject, Subtype: :Form, BBox: [0, 0, rect.width, rect.height]})
249
+ form.contents = ''
250
250
  form[:Resources] = HexaPDF::Object.deep_copy(default_resources)
251
251
 
252
252
  canvas = form.canvas
253
253
  apply_background_and_border(border_style, canvas)
254
254
  style.font_size = calculate_font_size(font, font_size, rect, border_style)
255
+ style.clear_cache
255
256
 
256
257
  canvas.marked_content_sequence(:Tx) do
257
- if (value = @field.field_value)
258
+ if @field.field_value || @field.concrete_field_type == :list_box
258
259
  canvas.save_graphics_state do
259
260
  canvas.rectangle(padding, padding, rect.width - 2 * padding,
260
261
  rect.height - 2 * padding).clip_path.end_path
261
- fragment = HexaPDF::Layout::TextFragment.create(value, style)
262
- # Adobe seems to be left/right-aligning based on twice the border width and
263
- # vertically centering based on the cap height, if enough space is available
264
- x = case @field.text_alignment
265
- when :left then 2 * padding
266
- when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
267
- when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
268
- end
269
- cap_height = font.wrapped_font.cap_height * font.scaling_factor / 1000.0 *
270
- style.font_size
271
- y = padding + (rect.height - 2 * padding - cap_height) / 2.0
272
- y = padding - style.scaled_font_descender if y < 0
273
- fragment.draw(canvas, x, y)
262
+ if @field.concrete_field_type == :multiline_text_field
263
+ draw_multiline_text(canvas, rect, style, padding)
264
+ elsif @field.concrete_field_type == :list_box
265
+ draw_list_box(canvas, rect, style, padding)
266
+ else
267
+ draw_single_line_text(canvas, rect, style, padding)
268
+ end
274
269
  end
275
270
  end
276
271
  end
277
272
  end
278
273
 
279
274
  alias create_combo_box_appearances create_text_appearances
275
+ alias create_list_box_appearances create_text_appearances
280
276
 
281
277
  private
282
278
 
@@ -337,6 +333,13 @@ module HexaPDF
337
333
  canvas.circle(rect.width / 2.0, rect.height / 2.0, [width / 2.0, height / 2.0].min)
338
334
  else
339
335
  canvas.rectangle(offset, offset, width, height)
336
+ if @field.concrete_field_type == :comb_text_field
337
+ cell_width = rect.width.to_f / @field[:MaxLen]
338
+ 1.upto(@field[:MaxLen] - 1) do |i|
339
+ canvas.line(i * cell_width, border_style.width,
340
+ i * cell_width, border_style.width + height)
341
+ end
342
+ end
340
343
  end
341
344
  end
342
345
  canvas.stroke
@@ -381,14 +384,115 @@ module HexaPDF
381
384
  end
382
385
  end
383
386
 
387
+ # Draws a single line of text inside the widget's rectangle.
388
+ def draw_single_line_text(canvas, rect, style, padding)
389
+ value = @field.field_value
390
+ fragment = HexaPDF::Layout::TextFragment.create(value, style)
391
+
392
+ if @field.concrete_field_type == :comb_text_field
393
+ unless @field.key?(:MaxLen)
394
+ raise HexaPDF::Error, "Missing or invalid dictionary field /MaxLen for comb text field"
395
+ end
396
+ new_items = []
397
+ cell_width = rect.width.to_f / @field[:MaxLen]
398
+ scaled_cell_width = cell_width / style.scaled_font_size.to_f
399
+ fragment.items.each_cons(2) do |a, b|
400
+ new_items << a << -(scaled_cell_width - a.width / 2.0 - b.width / 2.0)
401
+ end
402
+ new_items << fragment.items.last
403
+ fragment.items.replace(new_items)
404
+ fragment.clear_cache
405
+ # Adobe always seems to add 1 to the first offset...
406
+ x_offset = 1 + (cell_width - style.scaled_item_width(fragment.items[0])) / 2.0
407
+ x = case @field.text_alignment
408
+ when :left then x_offset
409
+ when :right then x_offset + cell_width * (@field[:MaxLen] - value.length)
410
+ when :center then x_offset + cell_width * ((@field[:MaxLen] - value.length) / 2)
411
+ end
412
+ else
413
+ # Adobe seems to be left/right-aligning based on twice the border width
414
+ x = case @field.text_alignment
415
+ when :left then 2 * padding
416
+ when :right then [rect.width - 2 * padding - fragment.width, 2 * padding].max
417
+ when :center then [(rect.width - fragment.width) / 2.0, 2 * padding].max
418
+ end
419
+ end
420
+
421
+ # Adobe seems to be vertically centering based on the cap height, if enough space is
422
+ # available
423
+ cap_height = style.font.wrapped_font.cap_height * style.font.scaling_factor / 1000.0 *
424
+ style.font_size
425
+ y = padding + (rect.height - 2 * padding - cap_height) / 2.0
426
+ y = padding - style.scaled_font_descender if y < 0
427
+ fragment.draw(canvas, x, y)
428
+ end
429
+
430
+ # Draws multiple lines of text inside the widget's rectangle.
431
+ def draw_multiline_text(canvas, rect, style, padding)
432
+ items = [Layout::TextFragment.create(@field.field_value, style)]
433
+ layouter = Layout::TextLayouter.new(style)
434
+ layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
435
+
436
+ result = nil
437
+ if style.font_size == 0 # need to auto-size text
438
+ style.font_size = 12 # Adobe seems to use this as starting point
439
+ style.clear_cache
440
+ loop do
441
+ result = layouter.fit(items, rect.width - 4 * padding, rect.height - 4 * padding)
442
+ break if result.status == :success || style.font_size <= 4 # don't make text too small
443
+ style.font_size -= 1
444
+ style.clear_cache
445
+ end
446
+ else
447
+ result = layouter.fit(items, rect.width - 4 * padding, 2**20)
448
+ end
449
+
450
+ unless result.lines.empty?
451
+ result.draw(canvas, 2 * padding, rect.height - 2 * padding - result.lines[0].height / 2.0)
452
+ end
453
+ end
454
+
455
+ # Draws the visible option items of the list box in the widget's rectangle.
456
+ def draw_list_box(canvas, rect, style, padding)
457
+ option_items = @field.option_items
458
+ top_index = @field.list_box_top_index
459
+ items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
460
+
461
+ indices = @field[:I] || []
462
+ value_indices = [@field.field_value].flatten.compact.map {|val| option_items.index(val) }
463
+ indices = value_indices if indices != value_indices
464
+
465
+ layouter = Layout::TextLayouter.new(style)
466
+ layouter.style.align(@field.text_alignment).line_spacing(:proportional, 1.25)
467
+ result = layouter.fit(items, rect.width - 4 * padding, rect.height)
468
+
469
+ unless result.lines.empty?
470
+ top_gap = style.line_spacing.gap(result.lines[0], result.lines[0])
471
+ line_height = style.line_spacing.baseline_distance(result.lines[0], result.lines[0])
472
+ canvas.fill_color(153, 193, 218) # Adobe's color for selection highlighting
473
+ indices.map! {|i| rect.height - padding - (i - top_index + 1) * line_height }.each do |y|
474
+ next if y + line_height > rect.height || y + line_height < padding
475
+ canvas.rectangle(padding, y, rect.width - 2 * padding, line_height)
476
+ end
477
+ canvas.fill if canvas.graphics_object == :path
478
+ result.draw(canvas, 2 * padding, rect.height - padding - top_gap)
479
+ end
480
+ end
481
+
384
482
  # Calculates the font size for text fields based on the font and font size of the default
385
483
  # appearance string, the annotation rectangle and the border style.
386
484
  def calculate_font_size(font, font_size, rect, border_style)
387
485
  if font_size == 0
388
- unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
389
- font.scaling_factor / 1000.0
390
- # The constant factor was found empirically by checking what Adobe Reader etc. do
391
- (rect.height - 2 * border_style.width) / unit_font_size * 0.83
486
+ if @field.concrete_field_type == :multiline_text_field
487
+ 0 # Handled by multiline drawing code
488
+ elsif @field.concrete_field_type == :list_box
489
+ 12 # Seems to be Adobe's default
490
+ else
491
+ unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
492
+ font.scaling_factor / 1000.0
493
+ # The constant factor was found empirically by checking what Adobe Reader etc. do
494
+ (rect.height - 2 * border_style.width) / unit_font_size * 0.83
495
+ end
392
496
  else
393
497
  font_size
394
498
  end