hexapdf 0.32.1 → 0.33.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +76 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/cli.rb +4 -0
  22. data/lib/hexapdf/composer.rb +104 -52
  23. data/lib/hexapdf/configuration.rb +44 -39
  24. data/lib/hexapdf/content/canvas.rb +393 -267
  25. data/lib/hexapdf/content/color_space.rb +72 -25
  26. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  27. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  28. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  29. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  30. data/lib/hexapdf/content/graphic_object.rb +6 -7
  31. data/lib/hexapdf/content/graphics_state.rb +54 -45
  32. data/lib/hexapdf/content/operator.rb +52 -54
  33. data/lib/hexapdf/content/parser.rb +2 -2
  34. data/lib/hexapdf/content/processor.rb +15 -15
  35. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  36. data/lib/hexapdf/content.rb +5 -0
  37. data/lib/hexapdf/dictionary.rb +6 -5
  38. data/lib/hexapdf/dictionary_fields.rb +42 -14
  39. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  40. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  41. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  42. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  43. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  44. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  45. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  46. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  47. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  48. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  49. data/lib/hexapdf/digital_signature.rb +7 -2
  50. data/lib/hexapdf/document/destinations.rb +12 -11
  51. data/lib/hexapdf/document/files.rb +1 -1
  52. data/lib/hexapdf/document/fonts.rb +1 -1
  53. data/lib/hexapdf/document/layout.rb +167 -39
  54. data/lib/hexapdf/document/pages.rb +3 -2
  55. data/lib/hexapdf/document.rb +89 -55
  56. data/lib/hexapdf/encryption/aes.rb +5 -5
  57. data/lib/hexapdf/encryption/arc4.rb +1 -1
  58. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  59. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  60. data/lib/hexapdf/encryption/identity.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  62. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  63. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  64. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  65. data/lib/hexapdf/encryption.rb +7 -2
  66. data/lib/hexapdf/error.rb +18 -0
  67. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  68. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  69. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  70. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  71. data/lib/hexapdf/filter/pass_through.rb +1 -1
  72. data/lib/hexapdf/filter/predictor.rb +1 -1
  73. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  74. data/lib/hexapdf/filter.rb +55 -6
  75. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  76. data/lib/hexapdf/font/cmap.rb +1 -1
  77. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  79. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  80. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  82. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  83. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  84. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  85. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  86. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  87. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  88. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  89. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  90. data/lib/hexapdf/font_loader.rb +3 -0
  91. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  92. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  93. data/lib/hexapdf/image_loader/png.rb +2 -2
  94. data/lib/hexapdf/image_loader.rb +1 -1
  95. data/lib/hexapdf/importer.rb +13 -0
  96. data/lib/hexapdf/layout/box.rb +9 -2
  97. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  98. data/lib/hexapdf/layout/column_box.rb +18 -4
  99. data/lib/hexapdf/layout/frame.rb +30 -12
  100. data/lib/hexapdf/layout/image_box.rb +5 -0
  101. data/lib/hexapdf/layout/inline_box.rb +1 -0
  102. data/lib/hexapdf/layout/list_box.rb +17 -1
  103. data/lib/hexapdf/layout/page_style.rb +4 -4
  104. data/lib/hexapdf/layout/style.rb +18 -3
  105. data/lib/hexapdf/layout/table_box.rb +682 -0
  106. data/lib/hexapdf/layout/text_box.rb +5 -3
  107. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  108. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  109. data/lib/hexapdf/layout.rb +1 -0
  110. data/lib/hexapdf/name_tree_node.rb +1 -1
  111. data/lib/hexapdf/number_tree_node.rb +1 -1
  112. data/lib/hexapdf/object.rb +18 -7
  113. data/lib/hexapdf/parser.rb +8 -8
  114. data/lib/hexapdf/pdf_array.rb +1 -1
  115. data/lib/hexapdf/rectangle.rb +1 -1
  116. data/lib/hexapdf/reference.rb +1 -1
  117. data/lib/hexapdf/revision.rb +1 -1
  118. data/lib/hexapdf/revisions.rb +3 -3
  119. data/lib/hexapdf/serializer.rb +15 -15
  120. data/lib/hexapdf/stream.rb +4 -2
  121. data/lib/hexapdf/tokenizer.rb +14 -14
  122. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  123. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  125. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  126. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  127. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  128. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  130. data/lib/hexapdf/type/acro_form.rb +1 -1
  131. data/lib/hexapdf/type/action.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  133. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  134. data/lib/hexapdf/type/actions/launch.rb +1 -1
  135. data/lib/hexapdf/type/actions/uri.rb +1 -1
  136. data/lib/hexapdf/type/actions.rb +1 -1
  137. data/lib/hexapdf/type/annotation.rb +3 -3
  138. data/lib/hexapdf/type/annotations/link.rb +1 -1
  139. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  140. data/lib/hexapdf/type/annotations/text.rb +1 -1
  141. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  142. data/lib/hexapdf/type/annotations.rb +1 -1
  143. data/lib/hexapdf/type/catalog.rb +1 -1
  144. data/lib/hexapdf/type/cid_font.rb +3 -3
  145. data/lib/hexapdf/type/embedded_file.rb +1 -1
  146. data/lib/hexapdf/type/file_specification.rb +2 -2
  147. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  148. data/lib/hexapdf/type/font_simple.rb +2 -2
  149. data/lib/hexapdf/type/font_type0.rb +3 -3
  150. data/lib/hexapdf/type/font_type3.rb +1 -1
  151. data/lib/hexapdf/type/form.rb +1 -1
  152. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  153. data/lib/hexapdf/type/icon_fit.rb +1 -1
  154. data/lib/hexapdf/type/image.rb +1 -1
  155. data/lib/hexapdf/type/info.rb +1 -1
  156. data/lib/hexapdf/type/mark_information.rb +1 -1
  157. data/lib/hexapdf/type/names.rb +2 -2
  158. data/lib/hexapdf/type/object_stream.rb +7 -3
  159. data/lib/hexapdf/type/outline.rb +1 -1
  160. data/lib/hexapdf/type/outline_item.rb +1 -1
  161. data/lib/hexapdf/type/page.rb +19 -10
  162. data/lib/hexapdf/type/page_label.rb +1 -1
  163. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  164. data/lib/hexapdf/type/resources.rb +1 -1
  165. data/lib/hexapdf/type/trailer.rb +2 -2
  166. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  167. data/lib/hexapdf/type/xref_stream.rb +2 -2
  168. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  169. data/lib/hexapdf/version.rb +1 -1
  170. data/lib/hexapdf/writer.rb +4 -4
  171. data/lib/hexapdf/xref_section.rb +2 -2
  172. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  173. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  174. data/test/hexapdf/content/test_canvas.rb +0 -1
  175. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  176. data/test/hexapdf/document/test_files.rb +2 -2
  177. data/test/hexapdf/document/test_layout.rb +98 -0
  178. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  179. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  180. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  181. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  182. data/test/hexapdf/layout/test_box.rb +1 -1
  183. data/test/hexapdf/layout/test_column_box.rb +65 -21
  184. data/test/hexapdf/layout/test_frame.rb +14 -14
  185. data/test/hexapdf/layout/test_image_box.rb +4 -0
  186. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  187. data/test/hexapdf/layout/test_list_box.rb +40 -6
  188. data/test/hexapdf/layout/test_page_style.rb +3 -2
  189. data/test/hexapdf/layout/test_style.rb +50 -0
  190. data/test/hexapdf/layout/test_table_box.rb +722 -0
  191. data/test/hexapdf/layout/test_text_box.rb +18 -0
  192. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  193. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  194. data/test/hexapdf/test_document.rb +1 -0
  195. data/test/hexapdf/test_filter.rb +8 -0
  196. data/test/hexapdf/test_importer.rb +9 -0
  197. data/test/hexapdf/test_object.rb +16 -5
  198. data/test/hexapdf/test_parser.rb +1 -1
  199. data/test/hexapdf/test_stream.rb +7 -0
  200. data/test/hexapdf/test_writer.rb +3 -3
  201. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  202. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  203. data/test/hexapdf/type/test_object_stream.rb +9 -3
  204. data/test/hexapdf/type/test_page.rb +18 -4
  205. metadata +17 -8
@@ -74,13 +74,14 @@ module HexaPDF
74
74
 
75
75
  @width = @height = 0
76
76
  @result = if style.position == :flow
77
- @tl.fit(@items, frame.width_specification, frame.shape.bbox.height)
77
+ @tl.fit(@items, frame.width_specification, frame.shape.bbox.height,
78
+ apply_first_text_indent: !split_box?)
78
79
  else
79
80
  @width = reserved_width
80
81
  @height = reserved_height
81
82
  width = (@initial_width > 0 ? @initial_width : available_width) - @width
82
83
  height = (@initial_height > 0 ? @initial_height : available_height) - @height
83
- @tl.fit(@items, width, height)
84
+ @tl.fit(@items, width, height, apply_first_text_indent: !split_box?)
84
85
  end
85
86
  @width += if @initial_width > 0 || style.align == :center || style.align == :right
86
87
  width
@@ -103,7 +104,8 @@ module HexaPDF
103
104
  def split(available_width, available_height, frame)
104
105
  fit(available_width, available_height, frame) unless @result
105
106
 
106
- if style.position != :flow && (@width > available_width || @height > available_height)
107
+ if style.position != :flow && (float_compare(@width, available_width) > 0 ||
108
+ float_compare(@height, available_height) > 0)
107
109
  [nil, self]
108
110
  elsif @result.remaining_items.empty?
109
111
  [self]
@@ -259,7 +259,7 @@ module HexaPDF
259
259
  # The width of the text fragment.
260
260
  #
261
261
  # It is the sum of the widths of its items and is calculated by using the algorithm presented
262
- # in PDF1.7 s9.4.4. By using kerning values as the first and/or last items, the text contained
262
+ # in PDF2.0 s9.4.4. By using kerning values as the first and/or last items, the text contained
263
263
  # in the fragment may spill over the left and/or right boundary.
264
264
  def width
265
265
  @width ||= @items.sum {|item| style.scaled_item_width(item) }
@@ -51,7 +51,8 @@ module HexaPDF
51
51
  # * Existing line breaking characters inside of TextFragment objects are respected when fitting
52
52
  # text. If this is not wanted, they have to be removed beforehand.
53
53
  #
54
- # * The first line may be indented by setting Style#text_indent which may also be negative.
54
+ # * The first line of each paragraph may be indented by setting Style#text_indent which may also
55
+ # be negative.
55
56
  #
56
57
  # * Text can be fitted into arbitrarily shaped areas, even containing holes.
57
58
  #
@@ -658,7 +659,7 @@ module HexaPDF
658
659
  end
659
660
 
660
661
  # :call-seq:
661
- # text_layouter.fit(items, width, height) -> result
662
+ # text_layouter.fit(items, width, height, apply_first_text_indent: true) -> result
662
663
  #
663
664
  # Fits the items into the given area and returns a Result object with all the information.
664
665
  #
@@ -693,7 +694,14 @@ module HexaPDF
693
694
  # The text segmentation algorithm specified via #style is applied to the items in case they
694
695
  # are not already in segmented form. This also means that Result#remaining_items always
695
696
  # contains segmented items.
696
- def fit(items, width, height)
697
+ #
698
+ # Optional arguments:
699
+ #
700
+ # +apply_first_text_indent+::
701
+ # Specifies whether style.text_indent should be applied to the first line. This should be
702
+ # set to +false+ if the items start with a continuation of a paragraph instead of starting
703
+ # a new paragraph (e.g. after a page break).
704
+ def fit(items, width, height, apply_first_text_indent: true)
697
705
  unless items.empty? || items[0].respond_to?(:type)
698
706
  items = style.text_segmentation_algorithm.call(items)
699
707
  end
@@ -704,7 +712,7 @@ module HexaPDF
704
712
  rest = items
705
713
 
706
714
  # processing state variables
707
- indent = style.text_indent
715
+ indent = apply_first_text_indent ? style.text_indent : 0
708
716
  line_fragments = []
709
717
  line_height = 0
710
718
  previous_line = nil
@@ -57,6 +57,7 @@ module HexaPDF
57
57
  autoload(:ColumnBox, 'hexapdf/layout/column_box')
58
58
  autoload(:ListBox, 'hexapdf/layout/list_box')
59
59
  autoload(:PageStyle, 'hexapdf/layout/page_style')
60
+ autoload(:TableBox, 'hexapdf/layout/table_box')
60
61
 
61
62
  end
62
63
 
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # HexaPDF::Utils::SortedTreeNode) to add or retrieve entries. They ensure that the name tree stays
56
56
  # valid.
57
57
  #
58
- # See: PDF1.7 s7.9.6
58
+ # See: PDF2.0 s7.9.6
59
59
  class NameTreeNode < Dictionary
60
60
 
61
61
  include Utils::SortedTreeNode
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # Number trees are similar to name trees but use integers as keys instead of strings. See
45
45
  # HexaPDF::NameTreeNode for a more detailed explanation.
46
46
  #
47
- # See: PDF1.7 s7.9.7, HexaPDF::NameTreeNode
47
+ # See: PDF2.0 s7.9.7, HexaPDF::NameTreeNode
48
48
  class NumberTreeNode < Dictionary
49
49
 
50
50
  include Utils::SortedTreeNode
@@ -117,7 +117,7 @@ module HexaPDF
117
117
  #
118
118
  # See: HexaPDF::Dictionary, HexaPDF::Stream, HexaPDF::Reference, HexaPDF::Document
119
119
  #
120
- # See: PDF1.7 s7.3.10, s7.3.8
120
+ # See: PDF2.0 s7.3.10, s7.3.8
121
121
  class Object
122
122
 
123
123
  include Comparable
@@ -143,18 +143,27 @@ module HexaPDF
143
143
 
144
144
  # Makes sure that the object itself as well as all nested values are direct objects.
145
145
  #
146
+ # The +document+ argument needs to contain the Document instance to which +object+ belongs so
147
+ # that references can be correctly resolved.
148
+ #
146
149
  # If an indirect object is found, it is turned into a direct object and the indirect object is
147
150
  # deleted from the document.
148
- def self.make_direct(object)
151
+ def self.make_direct(object, document)
149
152
  if object.kind_of?(HexaPDF::Object) && object.indirect?
153
+ raise HexaPDF::Error, "Can't make a stream object a direct object" if object.data.stream
150
154
  object_to_delete = object
151
155
  object = object.value
152
156
  object_to_delete.document.delete(object_to_delete)
153
157
  end
154
- if object.kind_of?(Hash)
155
- object.transform_values! {|val| make_direct(val) }
156
- elsif object.kind_of?(Array)
157
- object.map! {|val| make_direct(val) }
158
+ case object
159
+ when HexaPDF::Object
160
+ object.data.value = make_direct(object.data.value, document)
161
+ when Hash
162
+ object.transform_values! {|val| make_direct(val, document) }
163
+ when Array
164
+ object.map! {|val| make_direct(val, document) }
165
+ when Reference
166
+ object = make_direct(document.object(object), document)
158
167
  end
159
168
  object
160
169
  end
@@ -255,7 +264,7 @@ module HexaPDF
255
264
  # type.
256
265
  #
257
266
  # However, the Type and Subtype fields can easily be used for this. Subclasses for PDF objects
258
- # that don't have such fields may use a unique name that has to begin with XX (see PDF1.7 sE.2)
267
+ # that don't have such fields may use a unique name that has to begin with XX (see PDF2.0 sE.2)
259
268
  # and therefore doesn't clash with names defined by the PDF specification.
260
269
  #
261
270
  # For basic objects this always returns +:Unknown+.
@@ -297,6 +306,8 @@ module HexaPDF
297
306
  end
298
307
 
299
308
  # Makes a deep copy of the source PDF object and resets the object identifier.
309
+ #
310
+ # Note that indirect references are *not* copied! If that is also needed, use Importer::copy.
300
311
  def deep_copy
301
312
  obj = dup
302
313
  obj.instance_variable_set(:@data, @data.dup)
@@ -41,11 +41,11 @@ require 'hexapdf/xref_section'
41
41
 
42
42
  module HexaPDF
43
43
 
44
- # Parses an IO stream according to PDF1.7 to get at the contained objects.
44
+ # Parses an IO stream according to PDF2.0 to get at the contained objects.
45
45
  #
46
46
  # This class also contains higher-level methods for getting indirect objects and revisions.
47
47
  #
48
- # See: PDF1.7 s7
48
+ # See: PDF2.0 s7
49
49
  class Parser
50
50
 
51
51
  # The IO stream which is parsed.
@@ -125,7 +125,7 @@ module HexaPDF
125
125
  #
126
126
  # Returns an array containing [object, oid, gen, stream].
127
127
  #
128
- # See: PDF1.7 s7.3.10, s7.3.8
128
+ # See: PDF2.0 s7.3.10, s7.3.8
129
129
  def parse_indirect_object(offset = nil)
130
130
  @tokenizer.pos = offset + @header_offset if offset
131
131
  oid = @tokenizer.next_token
@@ -267,7 +267,7 @@ module HexaPDF
267
267
  #
268
268
  # This method can only parse cross-reference sections, not cross-reference streams!
269
269
  #
270
- # See: PDF1.7 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
270
+ # See: PDF2.0 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
271
271
  def parse_xref_section_and_trailer(offset)
272
272
  @tokenizer.pos = offset + @header_offset
273
273
  token = @tokenizer.next_token
@@ -346,7 +346,7 @@ module HexaPDF
346
346
  #
347
347
  # If strict parsing is disabled, the whole file is searched for the offset.
348
348
  #
349
- # See: PDF1.7 s7.5.5, ADB1.7 sH.3-3.4.4
349
+ # See: PDF2.0 s7.5.5, ADB1.7 sH.3-3.4.4
350
350
  def startxref_offset
351
351
  return @startxref_offset if defined?(@startxref_offset)
352
352
 
@@ -397,7 +397,7 @@ module HexaPDF
397
397
 
398
398
  # Returns the PDF version number that is stored in the file header.
399
399
  #
400
- # See: PDF1.7 s7.5.2
400
+ # See: PDF2.0 s7.5.2
401
401
  def file_header_version
402
402
  unless @header_version
403
403
  raise_malformed("PDF file header is missing or corrupt", pos: 0)
@@ -413,7 +413,7 @@ module HexaPDF
413
413
  # restriction so that the header may appear in the first 1024 bytes. We follow the Adobe
414
414
  # convention.
415
415
  #
416
- # See: PDF1.7 s7.5.2, ADB1.7 sH.3-3.4.1
416
+ # See: PDF2.0 s7.5.2, ADB1.7 sH.3-3.4.1
417
417
  def retrieve_pdf_header_offset_and_version
418
418
  @io.seek(0)
419
419
  @header_offset = (@io.read(1024) || '').index(/%PDF-(\d\.\d)/) || 0
@@ -458,7 +458,7 @@ module HexaPDF
458
458
  linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
459
459
  @tokenizer.pos = pos
460
460
  end
461
- @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
461
+ @tokenizer.scan_until(/\bendobj\b/)
462
462
  end
463
463
  elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
464
464
  obj = @tokenizer.next_object rescue nil
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # #[] method. Therefore not all Array methods are implemented - use the #value directly if other
45
45
  # methods are needed.
46
46
  #
47
- # See: PDF1.7 s7.3.6
47
+ # See: PDF2.0 s7.3.6
48
48
  class PDFArray < HexaPDF::Object
49
49
 
50
50
  include Enumerable
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  # where +left+ is the bottom left x-coordinate, +bottom+ is the bottom left y-coordinate, +right+
52
52
  # is the top right x-coordinate and +top+ is the top right y-coordinate.
53
53
  #
54
- # See: PDF1.7 s7.9.5
54
+ # See: PDF2.0 s7.9.5
55
55
  class Rectangle < HexaPDF::PDFArray
56
56
 
57
57
  # Returns the x-coordinate of the bottom-left corner.
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  # keys. Furthermore the implementation is compatible to the one of Object, i.e. the hash of a
51
51
  # Reference object is the same as the hash of an indirect Object.
52
52
  #
53
- # See: PDF1.7 s7.3.10, Object
53
+ # See: PDF2.0 s7.3.10, Object
54
54
  class Reference
55
55
 
56
56
  include Comparable
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  # If a revision doesn't have an associated cross-reference section, it wasn't created from a PDF
49
49
  # file.
50
50
  #
51
- # See: PDF1.7 s7.5.6, Revisions
51
+ # See: PDF2.0 s7.5.6, Revisions
52
52
  class Revision
53
53
 
54
54
  include Enumerable
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # this should only be done if one is familiar with the inner workings of HexaPDF. Otherwise it is
56
56
  # best to use the convenience methods of this class to create, access or delete indirect objects.
57
57
  #
58
- # See: PDF1.7 s7.5.6, HexaPDF::Revision
58
+ # See: PDF2.0 s7.5.6, HexaPDF::Revision
59
59
  class Revisions
60
60
 
61
61
  class << self
@@ -76,7 +76,7 @@ module HexaPDF
76
76
  seen_xref_offsets = {}
77
77
 
78
78
  while offset && !seen_xref_offsets.key?(offset)
79
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # PDF2.0 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
80
80
  # should be direct. Adobe's POV is followed here. Same with :XRefStm.
81
81
  xref_section, trailer = parser.load_revision(offset)
82
82
  seen_xref_offsets[offset] = true
@@ -167,7 +167,7 @@ module HexaPDF
167
167
  # For references to unknown objects, +nil+ is returned but free objects are represented by a
168
168
  # PDF Null object, not by +nil+!
169
169
  #
170
- # See: PDF1.7 s7.3.9
170
+ # See: PDF2.0 s7.3.9
171
171
  def object(ref)
172
172
  i = @revisions.size - 1
173
173
  while i >= 0
@@ -79,7 +79,7 @@ module HexaPDF
79
79
  #
80
80
  # If no serialization method for a specific class is found, the ancestors classes are tried.
81
81
  #
82
- # See: PDF1.7 s7.3
82
+ # See: PDF2.0 s7.3
83
83
  class Serializer
84
84
 
85
85
  # The encrypter to use for encrypting strings and streams. If +nil+, strings and streams are not
@@ -163,21 +163,21 @@ module HexaPDF
163
163
 
164
164
  # Serializes the +nil+ value.
165
165
  #
166
- # See: PDF1.7 s7.3.9
166
+ # See: PDF2.0 s7.3.9
167
167
  def serialize_nilclass(_obj)
168
168
  "null"
169
169
  end
170
170
 
171
171
  # Serializes the +true+ value.
172
172
  #
173
- # See: PDF1.7 s7.3.2
173
+ # See: PDF2.0 s7.3.2
174
174
  def serialize_trueclass(_obj)
175
175
  "true"
176
176
  end
177
177
 
178
178
  # Serializes the +false+ value.
179
179
  #
180
- # See: PDF1.7 s7.3.2
180
+ # See: PDF2.0 s7.3.2
181
181
  def serialize_falseclass(_obj)
182
182
  "false"
183
183
  end
@@ -187,21 +187,21 @@ module HexaPDF
187
187
  # This method should be used for cases where it is known that the object is either an Integer
188
188
  # or a Float.
189
189
  #
190
- # See: PDF1.7 s7.3.3
190
+ # See: PDF2.0 s7.3.3
191
191
  def serialize_numeric(obj)
192
192
  obj.kind_of?(Integer) ? obj.to_s : serialize_float(obj)
193
193
  end
194
194
 
195
195
  # Serializes an Integer object.
196
196
  #
197
- # See: PDF1.7 s7.3.3
197
+ # See: PDF2.0 s7.3.3
198
198
  def serialize_integer(obj)
199
199
  obj.to_s
200
200
  end
201
201
 
202
202
  # Serializes a Float object.
203
203
  #
204
- # See: PDF1.7 s7.3.3
204
+ # See: PDF2.0 s7.3.3
205
205
  def serialize_float(obj)
206
206
  if -0.0001 < obj && obj < 0.0001 && obj != 0
207
207
  sprintf("%.6f", obj)
@@ -215,7 +215,7 @@ module HexaPDF
215
215
  # The regexp matches all characters that need to be escaped and the substs hash contains the
216
216
  # mapping from these characters to their escaped form.
217
217
  #
218
- # See PDF1.7 s7.3.5
218
+ # See PDF2.0 s7.3.5
219
219
  NAME_SUBSTS = {} # :nodoc:
220
220
  [0..32, 127..255, Tokenizer::DELIMITER.bytes, Tokenizer::WHITESPACE.bytes, [35]].each do |a|
221
221
  a.each {|c| NAME_SUBSTS[c.chr] = "##{c.to_s(16).rjust(2, '0')}" }
@@ -225,7 +225,7 @@ module HexaPDF
225
225
 
226
226
  # Serializes a Symbol object (i.e. a PDF name object).
227
227
  #
228
- # See: PDF1.7 s7.3.5
228
+ # See: PDF2.0 s7.3.5
229
229
  def serialize_symbol(obj)
230
230
  NAME_CACHE[obj] ||=
231
231
  begin
@@ -240,7 +240,7 @@ module HexaPDF
240
240
 
241
241
  # Serializes an Array object.
242
242
  #
243
- # See: PDF1.7 s7.3.6
243
+ # See: PDF2.0 s7.3.6
244
244
  def serialize_array(obj)
245
245
  str = +"["
246
246
  index = 0
@@ -256,7 +256,7 @@ module HexaPDF
256
256
 
257
257
  # Serializes a Hash object (i.e. a PDF dictionary object).
258
258
  #
259
- # See: PDF1.7 s7.3.7
259
+ # See: PDF2.0 s7.3.7
260
260
  def serialize_hash(obj)
261
261
  str = +"<<"
262
262
  obj.each do |k, v|
@@ -274,7 +274,7 @@ module HexaPDF
274
274
 
275
275
  # Serializes a String object.
276
276
  #
277
- # See: PDF1.7 s7.3.4
277
+ # See: PDF2.0 s7.3.4
278
278
  def serialize_string(obj)
279
279
  obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
280
280
  encrypter.encrypt_string(obj, @object)
@@ -294,7 +294,7 @@ module HexaPDF
294
294
  # The ISO PDF specification differs in respect to the supported date format. When converting
295
295
  # to a date string, a format suitable for both is output.
296
296
  #
297
- # See: PDF1.7 s7.9.4, ADB1.7 3.8.3
297
+ # See: PDF2.0 s7.9.4, ADB1.7 3.8.3
298
298
  def serialize_time(obj)
299
299
  zone = obj.strftime("%z'")
300
300
  if zone == "+0000'"
@@ -330,14 +330,14 @@ module HexaPDF
330
330
  end
331
331
  end
332
332
 
333
- # See: PDF1.7 s7.3.10
333
+ # See: PDF2.0 s7.3.10
334
334
  def serialize_hexapdf_reference(obj)
335
335
  "#{obj.oid} #{obj.gen} R"
336
336
  end
337
337
 
338
338
  # Serializes the streams dictionary and its stream.
339
339
  #
340
- # See: PDF1.7 s7.3.8
340
+ # See: PDF2.0 s7.3.8
341
341
  def serialize_hexapdf_stream(obj)
342
342
  if !obj.indirect?
343
343
  raise HexaPDF::Error, "Can't serialize PDF stream without object identifier"
@@ -88,7 +88,9 @@ module HexaPDF
88
88
 
89
89
  # Returns a Fiber for getting at the data of the stream represented by this object.
90
90
  def fiber(chunk_size = 0)
91
- if @source.kind_of?(Proc)
91
+ if @source.kind_of?(FiberDoubleForString)
92
+ @source.dup
93
+ elsif @source.kind_of?(Proc)
92
94
  FiberWithLength.new(@length, &@source)
93
95
  elsif @source.kind_of?(String)
94
96
  HexaPDF::Filter.source_from_file(@source, pos: @offset || 0, length: @length || -1,
@@ -134,7 +136,7 @@ module HexaPDF
134
136
  #
135
137
  # Note that support for external streams (/F, /FFilter, /FDecodeParms) is not yet implemented!
136
138
  #
137
- # See: PDF1.7 s7.3.8, Dictionary
139
+ # See: PDF2.0 s7.3.8, Dictionary
138
140
  class Stream < Dictionary
139
141
 
140
142
  define_field :Length, type: Integer # not required, will be auto-filled when writing
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # Tokenizes the content of an IO object following the PDF rules.
44
44
  #
45
- # See: PDF1.7 s7.2
45
+ # See: PDF2.0 s7.2
46
46
  class Tokenizer
47
47
 
48
48
  # Represents a keyword in a PDF file.
@@ -61,12 +61,12 @@ module HexaPDF
61
61
 
62
62
  # Characters defined as whitespace.
63
63
  #
64
- # See: PDF1.7 s7.2.2
64
+ # See: PDF2.0 s7.2.2
65
65
  WHITESPACE = " \n\r\0\t\f"
66
66
 
67
67
  # Characters defined as delimiters.
68
68
  #
69
- # See: PDF1.7 s7.2.2
69
+ # See: PDF2.0 s7.2.2
70
70
  DELIMITER = "()<>{}/[]%"
71
71
 
72
72
  WHITESPACE_MULTI_RE = /[#{WHITESPACE}]+/ # :nodoc:
@@ -171,7 +171,7 @@ module HexaPDF
171
171
  # If the +allow_end_array_token+ argument is +true+, the ']' token is permitted to facilitate
172
172
  # the use of this method during array parsing.
173
173
  #
174
- # See: PDF1.7 s7.3
174
+ # See: PDF2.0 s7.3
175
175
  def next_object(allow_end_array_token: false, allow_keyword: false)
176
176
  token = next_token
177
177
 
@@ -231,7 +231,7 @@ module HexaPDF
231
231
  # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
232
232
  # problem is recoverable.
233
233
  #
234
- # See: PDF1.7 7.5.4
234
+ # See: PDF2.0 7.5.4
235
235
  def next_xref_entry #:yield: recoverable
236
236
  prepare_string_scanner(20)
237
237
  if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
@@ -242,7 +242,7 @@ module HexaPDF
242
242
 
243
243
  # Skips all whitespace at the current position.
244
244
  #
245
- # See: PDF1.7 s7.2.2
245
+ # See: PDF2.0 s7.2.2
246
246
  def skip_whitespace
247
247
  prepare_string_scanner
248
248
  prepare_string_scanner while @ss.skip(WHITESPACE_MULTI_RE)
@@ -268,7 +268,7 @@ module HexaPDF
268
268
 
269
269
  # Parses the keyword at the current position.
270
270
  #
271
- # See: PDF1.7 s7.2
271
+ # See: PDF2.0 s7.2
272
272
  def parse_keyword
273
273
  str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
274
274
  TOKEN_CACHE[str]
@@ -278,12 +278,12 @@ module HexaPDF
278
278
 
279
279
  # Parses the number (integer or real) at the current position.
280
280
  #
281
- # See: PDF1.7 s7.3.3
281
+ # See: PDF2.0 s7.3.3
282
282
  def parse_number
283
283
  val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
284
284
  if val.match?(/\A[+-]?\d++(?!\.)\z/)
285
285
  tmp = val.to_i
286
- # Handle object references, see PDF1.7 s7.3.10
286
+ # Handle object references, see PDF2.0 s7.3.10
287
287
  prepare_string_scanner(10)
288
288
  if @ss.scan(REFERENCE_RE)
289
289
  tmp = if tmp > 0
@@ -315,7 +315,7 @@ module HexaPDF
315
315
 
316
316
  # Parses the literal string at the current position.
317
317
  #
318
- # See: PDF1.7 s7.3.4.2
318
+ # See: PDF2.0 s7.3.4.2
319
319
  def parse_literal_string
320
320
  @ss.pos += 1
321
321
  str = "".b
@@ -358,7 +358,7 @@ module HexaPDF
358
358
 
359
359
  # Parses the hex string at the current position.
360
360
  #
361
- # See: PDF1.7 s7.3.4.3
361
+ # See: PDF2.0 s7.3.4.3
362
362
  def parse_hex_string
363
363
  @ss.pos += 1
364
364
  data = scan_until(/(?=>)/)
@@ -373,7 +373,7 @@ module HexaPDF
373
373
 
374
374
  # Parses the name at the current position.
375
375
  #
376
- # See: PDF1.7 s7.3.5
376
+ # See: PDF2.0 s7.3.5
377
377
  def parse_name
378
378
  @ss.pos += 1
379
379
  str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
@@ -389,7 +389,7 @@ module HexaPDF
389
389
  #
390
390
  # It is assumed that the initial '[' has already been scanned.
391
391
  #
392
- # See: PDF1.7 s7.3.6
392
+ # See: PDF2.0 s7.3.6
393
393
  def parse_array
394
394
  result = []
395
395
  while true
@@ -408,7 +408,7 @@ module HexaPDF
408
408
  #
409
409
  # It is assumed that the initial '<<' has already been scanned.
410
410
  #
411
- # See: PDF1.7 s7.3.7
411
+ # See: PDF2.0 s7.3.7
412
412
  def parse_dictionary
413
413
  result = {}
414
414
  while true
@@ -61,7 +61,7 @@ module HexaPDF
61
61
  # By subclassing and overriding the necessary methods it is possible to define custom
62
62
  # appearances.
63
63
  #
64
- # See: PDF1.7 s12.5.5, s12.7
64
+ # See: PDF2.0 s12.5.5, s12.7
65
65
  class AppearanceGenerator
66
66
 
67
67
  # Creates a new instance for the given +widget+.
@@ -200,7 +200,7 @@ module HexaPDF
200
200
  def create_text_appearances
201
201
  default_resources = @document.acro_form.default_resources
202
202
  font, font_size, font_color = retrieve_font_information(default_resources)
203
- style = HexaPDF::Layout::Style.new(font: font, fill_color: font_color)
203
+ style = HexaPDF::Layout::Style.new(font: font, font_size: font_size, fill_color: font_color)
204
204
  border_style = @widget.border_style
205
205
  padding = [1, border_style.width].max
206
206
 
@@ -226,8 +226,6 @@ module HexaPDF
226
226
 
227
227
  canvas = form.canvas
228
228
  apply_background_and_border(border_style, canvas)
229
- style.font_size = calculate_font_size(font, font_size, height, border_style)
230
- style.clear_cache
231
229
 
232
230
  canvas.marked_content_sequence(:Tx) do
233
231
  if @field.field_value || @field.concrete_field_type == :list_box
@@ -362,6 +360,7 @@ module HexaPDF
362
360
  def draw_single_line_text(canvas, width, height, style, padding)
363
361
  value, text_color = apply_javascript_formatting(@field.field_value)
364
362
  style.fill_color = text_color if text_color
363
+ calculate_and_apply_font_size(value, style, width, height, padding)
365
364
  fragment = HexaPDF::Layout::TextFragment.create(value, style)
366
365
 
367
366
  if @field.concrete_field_type == :comb_text_field
@@ -431,6 +430,11 @@ module HexaPDF
431
430
 
432
431
  # Draws the visible option items of the list box in the widget's rectangle.
433
432
  def draw_list_box(canvas, width, height, style, padding)
433
+ if style.font_size == 0
434
+ style.font_size = 12 # Seems to be Adobe's default
435
+ style.clear_cache
436
+ end
437
+
434
438
  option_items = @field.option_items
435
439
  top_index = @field.list_box_top_index
436
440
  items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
@@ -475,24 +479,20 @@ module HexaPDF
475
479
  [font, font_size, font_color]
476
480
  end
477
481
 
478
- # Calculates the font size for text fields based on the font and font size of the default
479
- # appearance string, the annotation rectangle's height and the border style.
480
- def calculate_font_size(font, font_size, height, border_style)
481
- if font_size == 0
482
- case @field.concrete_field_type
483
- when :multiline_text_field
484
- 0 # Handled by multiline drawing code
485
- when :list_box
486
- 12 # Seems to be Adobe's default
487
- else
488
- unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
489
- font.scaling_factor / 1000.0
490
- # The constant factor was found empirically by checking what Adobe Reader etc. do
491
- (height - 2 * border_style.width) / unit_font_size * 0.83
492
- end
493
- else
494
- font_size
495
- end
482
+ # Calculates the font size for single line text fields using auto-sizing, based on the font
483
+ # and font size of the default appearance string, the annotation rectangle's height and
484
+ # width and the given padding. The font size is then applied to the provided style object.
485
+ def calculate_and_apply_font_size(value, style, width, height, padding)
486
+ return if style.font_size != 0
487
+
488
+ font = style.font
489
+ unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
490
+ font.scaling_factor / 1000.0
491
+ # The constant factor was found empirically by checking what Adobe Reader etc. do
492
+ style.font_size = (height - 2 * padding) / unit_font_size * 0.85
493
+ fragment = HexaPDF::Layout::TextFragment.create(value, style)
494
+ style.font_size = [style.font_size, style.font_size * (width - 4 * padding) / fragment.width].min
495
+ style.clear_cache
496
496
  end
497
497
 
498
498
  # Handles Javascript formatting routines for single-line text fields.
@@ -81,7 +81,7 @@ module HexaPDF
81
81
  # :radios_in_unison:: A group of radio buttons with the same value for the on state will turn
82
82
  # on or off in unison.
83
83
  #
84
- # See: PDF1.7 s12.7.4.2
84
+ # See: PDF2.0 s12.7.4.2
85
85
  class ButtonField < Field
86
86
 
87
87
  define_type :XXAcroFormField