hexapdf 0.32.2 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +63 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/lib/hexapdf/cli/command.rb +5 -2
  19. data/lib/hexapdf/cli/form.rb +5 -5
  20. data/lib/hexapdf/cli/inspect.rb +3 -3
  21. data/lib/hexapdf/composer.rb +104 -52
  22. data/lib/hexapdf/configuration.rb +44 -39
  23. data/lib/hexapdf/content/canvas.rb +393 -267
  24. data/lib/hexapdf/content/color_space.rb +72 -25
  25. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  26. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  27. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  28. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  29. data/lib/hexapdf/content/graphic_object.rb +6 -7
  30. data/lib/hexapdf/content/graphics_state.rb +54 -45
  31. data/lib/hexapdf/content/operator.rb +52 -54
  32. data/lib/hexapdf/content/parser.rb +2 -2
  33. data/lib/hexapdf/content/processor.rb +15 -15
  34. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  35. data/lib/hexapdf/content.rb +5 -0
  36. data/lib/hexapdf/dictionary.rb +6 -5
  37. data/lib/hexapdf/dictionary_fields.rb +42 -14
  38. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  39. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  40. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  41. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  42. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  43. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  44. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  45. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  46. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  47. data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
  48. data/lib/hexapdf/digital_signature.rb +7 -2
  49. data/lib/hexapdf/document/destinations.rb +12 -11
  50. data/lib/hexapdf/document/files.rb +1 -1
  51. data/lib/hexapdf/document/fonts.rb +1 -1
  52. data/lib/hexapdf/document/layout.rb +167 -39
  53. data/lib/hexapdf/document/pages.rb +3 -2
  54. data/lib/hexapdf/document.rb +89 -55
  55. data/lib/hexapdf/encryption/aes.rb +5 -5
  56. data/lib/hexapdf/encryption/arc4.rb +1 -1
  57. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  58. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  59. data/lib/hexapdf/encryption/identity.rb +1 -1
  60. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  61. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  62. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  63. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  64. data/lib/hexapdf/encryption.rb +7 -2
  65. data/lib/hexapdf/error.rb +18 -0
  66. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  67. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  68. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  69. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  70. data/lib/hexapdf/filter/pass_through.rb +1 -1
  71. data/lib/hexapdf/filter/predictor.rb +1 -1
  72. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  73. data/lib/hexapdf/filter.rb +55 -6
  74. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  75. data/lib/hexapdf/font/cmap.rb +1 -1
  76. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  77. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  78. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  79. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  80. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  82. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  83. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  84. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  85. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  86. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  87. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  88. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  89. data/lib/hexapdf/font_loader.rb +3 -0
  90. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  91. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  92. data/lib/hexapdf/image_loader/png.rb +2 -2
  93. data/lib/hexapdf/image_loader.rb +1 -1
  94. data/lib/hexapdf/importer.rb +13 -0
  95. data/lib/hexapdf/layout/box.rb +9 -2
  96. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  97. data/lib/hexapdf/layout/column_box.rb +18 -4
  98. data/lib/hexapdf/layout/frame.rb +30 -12
  99. data/lib/hexapdf/layout/image_box.rb +5 -0
  100. data/lib/hexapdf/layout/inline_box.rb +1 -0
  101. data/lib/hexapdf/layout/list_box.rb +17 -1
  102. data/lib/hexapdf/layout/page_style.rb +4 -4
  103. data/lib/hexapdf/layout/style.rb +18 -3
  104. data/lib/hexapdf/layout/table_box.rb +682 -0
  105. data/lib/hexapdf/layout/text_box.rb +5 -3
  106. data/lib/hexapdf/layout/text_fragment.rb +1 -1
  107. data/lib/hexapdf/layout/text_layouter.rb +12 -4
  108. data/lib/hexapdf/layout.rb +1 -0
  109. data/lib/hexapdf/name_tree_node.rb +1 -1
  110. data/lib/hexapdf/number_tree_node.rb +1 -1
  111. data/lib/hexapdf/object.rb +18 -7
  112. data/lib/hexapdf/parser.rb +7 -7
  113. data/lib/hexapdf/pdf_array.rb +1 -1
  114. data/lib/hexapdf/rectangle.rb +1 -1
  115. data/lib/hexapdf/reference.rb +1 -1
  116. data/lib/hexapdf/revision.rb +1 -1
  117. data/lib/hexapdf/revisions.rb +3 -3
  118. data/lib/hexapdf/serializer.rb +15 -15
  119. data/lib/hexapdf/stream.rb +4 -2
  120. data/lib/hexapdf/tokenizer.rb +14 -14
  121. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  122. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  123. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  124. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  125. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  126. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  127. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  128. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  129. data/lib/hexapdf/type/acro_form.rb +1 -1
  130. data/lib/hexapdf/type/action.rb +1 -1
  131. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  132. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  133. data/lib/hexapdf/type/actions/launch.rb +1 -1
  134. data/lib/hexapdf/type/actions/uri.rb +1 -1
  135. data/lib/hexapdf/type/actions.rb +1 -1
  136. data/lib/hexapdf/type/annotation.rb +3 -3
  137. data/lib/hexapdf/type/annotations/link.rb +1 -1
  138. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  139. data/lib/hexapdf/type/annotations/text.rb +1 -1
  140. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  141. data/lib/hexapdf/type/annotations.rb +1 -1
  142. data/lib/hexapdf/type/catalog.rb +1 -1
  143. data/lib/hexapdf/type/cid_font.rb +3 -3
  144. data/lib/hexapdf/type/embedded_file.rb +1 -1
  145. data/lib/hexapdf/type/file_specification.rb +2 -2
  146. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  147. data/lib/hexapdf/type/font_simple.rb +2 -2
  148. data/lib/hexapdf/type/font_type0.rb +3 -3
  149. data/lib/hexapdf/type/font_type3.rb +1 -1
  150. data/lib/hexapdf/type/form.rb +1 -1
  151. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  152. data/lib/hexapdf/type/icon_fit.rb +1 -1
  153. data/lib/hexapdf/type/image.rb +1 -1
  154. data/lib/hexapdf/type/info.rb +1 -1
  155. data/lib/hexapdf/type/mark_information.rb +1 -1
  156. data/lib/hexapdf/type/names.rb +2 -2
  157. data/lib/hexapdf/type/object_stream.rb +2 -1
  158. data/lib/hexapdf/type/outline.rb +1 -1
  159. data/lib/hexapdf/type/outline_item.rb +1 -1
  160. data/lib/hexapdf/type/page.rb +19 -10
  161. data/lib/hexapdf/type/page_label.rb +1 -1
  162. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  163. data/lib/hexapdf/type/resources.rb +1 -1
  164. data/lib/hexapdf/type/trailer.rb +2 -2
  165. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  166. data/lib/hexapdf/type/xref_stream.rb +2 -2
  167. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  168. data/lib/hexapdf/version.rb +1 -1
  169. data/lib/hexapdf/writer.rb +4 -4
  170. data/lib/hexapdf/xref_section.rb +2 -2
  171. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  172. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  173. data/test/hexapdf/content/test_canvas.rb +0 -1
  174. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  175. data/test/hexapdf/document/test_files.rb +2 -2
  176. data/test/hexapdf/document/test_layout.rb +98 -0
  177. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  178. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  179. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  180. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  181. data/test/hexapdf/layout/test_box.rb +1 -1
  182. data/test/hexapdf/layout/test_column_box.rb +65 -21
  183. data/test/hexapdf/layout/test_frame.rb +14 -14
  184. data/test/hexapdf/layout/test_image_box.rb +4 -0
  185. data/test/hexapdf/layout/test_inline_box.rb +5 -0
  186. data/test/hexapdf/layout/test_list_box.rb +40 -6
  187. data/test/hexapdf/layout/test_page_style.rb +3 -2
  188. data/test/hexapdf/layout/test_style.rb +50 -0
  189. data/test/hexapdf/layout/test_table_box.rb +722 -0
  190. data/test/hexapdf/layout/test_text_box.rb +18 -0
  191. data/test/hexapdf/layout/test_text_layouter.rb +4 -0
  192. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  193. data/test/hexapdf/test_document.rb +1 -0
  194. data/test/hexapdf/test_filter.rb +8 -0
  195. data/test/hexapdf/test_importer.rb +9 -0
  196. data/test/hexapdf/test_object.rb +16 -5
  197. data/test/hexapdf/test_stream.rb +7 -0
  198. data/test/hexapdf/test_writer.rb +3 -3
  199. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  200. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  201. data/test/hexapdf/type/test_page.rb +18 -4
  202. metadata +17 -8
@@ -74,13 +74,14 @@ module HexaPDF
74
74
 
75
75
  @width = @height = 0
76
76
  @result = if style.position == :flow
77
- @tl.fit(@items, frame.width_specification, frame.shape.bbox.height)
77
+ @tl.fit(@items, frame.width_specification, frame.shape.bbox.height,
78
+ apply_first_text_indent: !split_box?)
78
79
  else
79
80
  @width = reserved_width
80
81
  @height = reserved_height
81
82
  width = (@initial_width > 0 ? @initial_width : available_width) - @width
82
83
  height = (@initial_height > 0 ? @initial_height : available_height) - @height
83
- @tl.fit(@items, width, height)
84
+ @tl.fit(@items, width, height, apply_first_text_indent: !split_box?)
84
85
  end
85
86
  @width += if @initial_width > 0 || style.align == :center || style.align == :right
86
87
  width
@@ -103,7 +104,8 @@ module HexaPDF
103
104
  def split(available_width, available_height, frame)
104
105
  fit(available_width, available_height, frame) unless @result
105
106
 
106
- if style.position != :flow && (@width > available_width || @height > available_height)
107
+ if style.position != :flow && (float_compare(@width, available_width) > 0 ||
108
+ float_compare(@height, available_height) > 0)
107
109
  [nil, self]
108
110
  elsif @result.remaining_items.empty?
109
111
  [self]
@@ -259,7 +259,7 @@ module HexaPDF
259
259
  # The width of the text fragment.
260
260
  #
261
261
  # It is the sum of the widths of its items and is calculated by using the algorithm presented
262
- # in PDF1.7 s9.4.4. By using kerning values as the first and/or last items, the text contained
262
+ # in PDF2.0 s9.4.4. By using kerning values as the first and/or last items, the text contained
263
263
  # in the fragment may spill over the left and/or right boundary.
264
264
  def width
265
265
  @width ||= @items.sum {|item| style.scaled_item_width(item) }
@@ -51,7 +51,8 @@ module HexaPDF
51
51
  # * Existing line breaking characters inside of TextFragment objects are respected when fitting
52
52
  # text. If this is not wanted, they have to be removed beforehand.
53
53
  #
54
- # * The first line may be indented by setting Style#text_indent which may also be negative.
54
+ # * The first line of each paragraph may be indented by setting Style#text_indent which may also
55
+ # be negative.
55
56
  #
56
57
  # * Text can be fitted into arbitrarily shaped areas, even containing holes.
57
58
  #
@@ -658,7 +659,7 @@ module HexaPDF
658
659
  end
659
660
 
660
661
  # :call-seq:
661
- # text_layouter.fit(items, width, height) -> result
662
+ # text_layouter.fit(items, width, height, apply_first_text_indent: true) -> result
662
663
  #
663
664
  # Fits the items into the given area and returns a Result object with all the information.
664
665
  #
@@ -693,7 +694,14 @@ module HexaPDF
693
694
  # The text segmentation algorithm specified via #style is applied to the items in case they
694
695
  # are not already in segmented form. This also means that Result#remaining_items always
695
696
  # contains segmented items.
696
- def fit(items, width, height)
697
+ #
698
+ # Optional arguments:
699
+ #
700
+ # +apply_first_text_indent+::
701
+ # Specifies whether style.text_indent should be applied to the first line. This should be
702
+ # set to +false+ if the items start with a continuation of a paragraph instead of starting
703
+ # a new paragraph (e.g. after a page break).
704
+ def fit(items, width, height, apply_first_text_indent: true)
697
705
  unless items.empty? || items[0].respond_to?(:type)
698
706
  items = style.text_segmentation_algorithm.call(items)
699
707
  end
@@ -704,7 +712,7 @@ module HexaPDF
704
712
  rest = items
705
713
 
706
714
  # processing state variables
707
- indent = style.text_indent
715
+ indent = apply_first_text_indent ? style.text_indent : 0
708
716
  line_fragments = []
709
717
  line_height = 0
710
718
  previous_line = nil
@@ -57,6 +57,7 @@ module HexaPDF
57
57
  autoload(:ColumnBox, 'hexapdf/layout/column_box')
58
58
  autoload(:ListBox, 'hexapdf/layout/list_box')
59
59
  autoload(:PageStyle, 'hexapdf/layout/page_style')
60
+ autoload(:TableBox, 'hexapdf/layout/table_box')
60
61
 
61
62
  end
62
63
 
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # HexaPDF::Utils::SortedTreeNode) to add or retrieve entries. They ensure that the name tree stays
56
56
  # valid.
57
57
  #
58
- # See: PDF1.7 s7.9.6
58
+ # See: PDF2.0 s7.9.6
59
59
  class NameTreeNode < Dictionary
60
60
 
61
61
  include Utils::SortedTreeNode
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # Number trees are similar to name trees but use integers as keys instead of strings. See
45
45
  # HexaPDF::NameTreeNode for a more detailed explanation.
46
46
  #
47
- # See: PDF1.7 s7.9.7, HexaPDF::NameTreeNode
47
+ # See: PDF2.0 s7.9.7, HexaPDF::NameTreeNode
48
48
  class NumberTreeNode < Dictionary
49
49
 
50
50
  include Utils::SortedTreeNode
@@ -117,7 +117,7 @@ module HexaPDF
117
117
  #
118
118
  # See: HexaPDF::Dictionary, HexaPDF::Stream, HexaPDF::Reference, HexaPDF::Document
119
119
  #
120
- # See: PDF1.7 s7.3.10, s7.3.8
120
+ # See: PDF2.0 s7.3.10, s7.3.8
121
121
  class Object
122
122
 
123
123
  include Comparable
@@ -143,18 +143,27 @@ module HexaPDF
143
143
 
144
144
  # Makes sure that the object itself as well as all nested values are direct objects.
145
145
  #
146
+ # The +document+ argument needs to contain the Document instance to which +object+ belongs so
147
+ # that references can be correctly resolved.
148
+ #
146
149
  # If an indirect object is found, it is turned into a direct object and the indirect object is
147
150
  # deleted from the document.
148
- def self.make_direct(object)
151
+ def self.make_direct(object, document)
149
152
  if object.kind_of?(HexaPDF::Object) && object.indirect?
153
+ raise HexaPDF::Error, "Can't make a stream object a direct object" if object.data.stream
150
154
  object_to_delete = object
151
155
  object = object.value
152
156
  object_to_delete.document.delete(object_to_delete)
153
157
  end
154
- if object.kind_of?(Hash)
155
- object.transform_values! {|val| make_direct(val) }
156
- elsif object.kind_of?(Array)
157
- object.map! {|val| make_direct(val) }
158
+ case object
159
+ when HexaPDF::Object
160
+ object.data.value = make_direct(object.data.value, document)
161
+ when Hash
162
+ object.transform_values! {|val| make_direct(val, document) }
163
+ when Array
164
+ object.map! {|val| make_direct(val, document) }
165
+ when Reference
166
+ object = make_direct(document.object(object), document)
158
167
  end
159
168
  object
160
169
  end
@@ -255,7 +264,7 @@ module HexaPDF
255
264
  # type.
256
265
  #
257
266
  # However, the Type and Subtype fields can easily be used for this. Subclasses for PDF objects
258
- # that don't have such fields may use a unique name that has to begin with XX (see PDF1.7 sE.2)
267
+ # that don't have such fields may use a unique name that has to begin with XX (see PDF2.0 sE.2)
259
268
  # and therefore doesn't clash with names defined by the PDF specification.
260
269
  #
261
270
  # For basic objects this always returns +:Unknown+.
@@ -297,6 +306,8 @@ module HexaPDF
297
306
  end
298
307
 
299
308
  # Makes a deep copy of the source PDF object and resets the object identifier.
309
+ #
310
+ # Note that indirect references are *not* copied! If that is also needed, use Importer::copy.
300
311
  def deep_copy
301
312
  obj = dup
302
313
  obj.instance_variable_set(:@data, @data.dup)
@@ -41,11 +41,11 @@ require 'hexapdf/xref_section'
41
41
 
42
42
  module HexaPDF
43
43
 
44
- # Parses an IO stream according to PDF1.7 to get at the contained objects.
44
+ # Parses an IO stream according to PDF2.0 to get at the contained objects.
45
45
  #
46
46
  # This class also contains higher-level methods for getting indirect objects and revisions.
47
47
  #
48
- # See: PDF1.7 s7
48
+ # See: PDF2.0 s7
49
49
  class Parser
50
50
 
51
51
  # The IO stream which is parsed.
@@ -125,7 +125,7 @@ module HexaPDF
125
125
  #
126
126
  # Returns an array containing [object, oid, gen, stream].
127
127
  #
128
- # See: PDF1.7 s7.3.10, s7.3.8
128
+ # See: PDF2.0 s7.3.10, s7.3.8
129
129
  def parse_indirect_object(offset = nil)
130
130
  @tokenizer.pos = offset + @header_offset if offset
131
131
  oid = @tokenizer.next_token
@@ -267,7 +267,7 @@ module HexaPDF
267
267
  #
268
268
  # This method can only parse cross-reference sections, not cross-reference streams!
269
269
  #
270
- # See: PDF1.7 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
270
+ # See: PDF2.0 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
271
271
  def parse_xref_section_and_trailer(offset)
272
272
  @tokenizer.pos = offset + @header_offset
273
273
  token = @tokenizer.next_token
@@ -346,7 +346,7 @@ module HexaPDF
346
346
  #
347
347
  # If strict parsing is disabled, the whole file is searched for the offset.
348
348
  #
349
- # See: PDF1.7 s7.5.5, ADB1.7 sH.3-3.4.4
349
+ # See: PDF2.0 s7.5.5, ADB1.7 sH.3-3.4.4
350
350
  def startxref_offset
351
351
  return @startxref_offset if defined?(@startxref_offset)
352
352
 
@@ -397,7 +397,7 @@ module HexaPDF
397
397
 
398
398
  # Returns the PDF version number that is stored in the file header.
399
399
  #
400
- # See: PDF1.7 s7.5.2
400
+ # See: PDF2.0 s7.5.2
401
401
  def file_header_version
402
402
  unless @header_version
403
403
  raise_malformed("PDF file header is missing or corrupt", pos: 0)
@@ -413,7 +413,7 @@ module HexaPDF
413
413
  # restriction so that the header may appear in the first 1024 bytes. We follow the Adobe
414
414
  # convention.
415
415
  #
416
- # See: PDF1.7 s7.5.2, ADB1.7 sH.3-3.4.1
416
+ # See: PDF2.0 s7.5.2, ADB1.7 sH.3-3.4.1
417
417
  def retrieve_pdf_header_offset_and_version
418
418
  @io.seek(0)
419
419
  @header_offset = (@io.read(1024) || '').index(/%PDF-(\d\.\d)/) || 0
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # #[] method. Therefore not all Array methods are implemented - use the #value directly if other
45
45
  # methods are needed.
46
46
  #
47
- # See: PDF1.7 s7.3.6
47
+ # See: PDF2.0 s7.3.6
48
48
  class PDFArray < HexaPDF::Object
49
49
 
50
50
  include Enumerable
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  # where +left+ is the bottom left x-coordinate, +bottom+ is the bottom left y-coordinate, +right+
52
52
  # is the top right x-coordinate and +top+ is the top right y-coordinate.
53
53
  #
54
- # See: PDF1.7 s7.9.5
54
+ # See: PDF2.0 s7.9.5
55
55
  class Rectangle < HexaPDF::PDFArray
56
56
 
57
57
  # Returns the x-coordinate of the bottom-left corner.
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  # keys. Furthermore the implementation is compatible to the one of Object, i.e. the hash of a
51
51
  # Reference object is the same as the hash of an indirect Object.
52
52
  #
53
- # See: PDF1.7 s7.3.10, Object
53
+ # See: PDF2.0 s7.3.10, Object
54
54
  class Reference
55
55
 
56
56
  include Comparable
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  # If a revision doesn't have an associated cross-reference section, it wasn't created from a PDF
49
49
  # file.
50
50
  #
51
- # See: PDF1.7 s7.5.6, Revisions
51
+ # See: PDF2.0 s7.5.6, Revisions
52
52
  class Revision
53
53
 
54
54
  include Enumerable
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # this should only be done if one is familiar with the inner workings of HexaPDF. Otherwise it is
56
56
  # best to use the convenience methods of this class to create, access or delete indirect objects.
57
57
  #
58
- # See: PDF1.7 s7.5.6, HexaPDF::Revision
58
+ # See: PDF2.0 s7.5.6, HexaPDF::Revision
59
59
  class Revisions
60
60
 
61
61
  class << self
@@ -76,7 +76,7 @@ module HexaPDF
76
76
  seen_xref_offsets = {}
77
77
 
78
78
  while offset && !seen_xref_offsets.key?(offset)
79
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # PDF2.0 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
80
80
  # should be direct. Adobe's POV is followed here. Same with :XRefStm.
81
81
  xref_section, trailer = parser.load_revision(offset)
82
82
  seen_xref_offsets[offset] = true
@@ -167,7 +167,7 @@ module HexaPDF
167
167
  # For references to unknown objects, +nil+ is returned but free objects are represented by a
168
168
  # PDF Null object, not by +nil+!
169
169
  #
170
- # See: PDF1.7 s7.3.9
170
+ # See: PDF2.0 s7.3.9
171
171
  def object(ref)
172
172
  i = @revisions.size - 1
173
173
  while i >= 0
@@ -79,7 +79,7 @@ module HexaPDF
79
79
  #
80
80
  # If no serialization method for a specific class is found, the ancestors classes are tried.
81
81
  #
82
- # See: PDF1.7 s7.3
82
+ # See: PDF2.0 s7.3
83
83
  class Serializer
84
84
 
85
85
  # The encrypter to use for encrypting strings and streams. If +nil+, strings and streams are not
@@ -163,21 +163,21 @@ module HexaPDF
163
163
 
164
164
  # Serializes the +nil+ value.
165
165
  #
166
- # See: PDF1.7 s7.3.9
166
+ # See: PDF2.0 s7.3.9
167
167
  def serialize_nilclass(_obj)
168
168
  "null"
169
169
  end
170
170
 
171
171
  # Serializes the +true+ value.
172
172
  #
173
- # See: PDF1.7 s7.3.2
173
+ # See: PDF2.0 s7.3.2
174
174
  def serialize_trueclass(_obj)
175
175
  "true"
176
176
  end
177
177
 
178
178
  # Serializes the +false+ value.
179
179
  #
180
- # See: PDF1.7 s7.3.2
180
+ # See: PDF2.0 s7.3.2
181
181
  def serialize_falseclass(_obj)
182
182
  "false"
183
183
  end
@@ -187,21 +187,21 @@ module HexaPDF
187
187
  # This method should be used for cases where it is known that the object is either an Integer
188
188
  # or a Float.
189
189
  #
190
- # See: PDF1.7 s7.3.3
190
+ # See: PDF2.0 s7.3.3
191
191
  def serialize_numeric(obj)
192
192
  obj.kind_of?(Integer) ? obj.to_s : serialize_float(obj)
193
193
  end
194
194
 
195
195
  # Serializes an Integer object.
196
196
  #
197
- # See: PDF1.7 s7.3.3
197
+ # See: PDF2.0 s7.3.3
198
198
  def serialize_integer(obj)
199
199
  obj.to_s
200
200
  end
201
201
 
202
202
  # Serializes a Float object.
203
203
  #
204
- # See: PDF1.7 s7.3.3
204
+ # See: PDF2.0 s7.3.3
205
205
  def serialize_float(obj)
206
206
  if -0.0001 < obj && obj < 0.0001 && obj != 0
207
207
  sprintf("%.6f", obj)
@@ -215,7 +215,7 @@ module HexaPDF
215
215
  # The regexp matches all characters that need to be escaped and the substs hash contains the
216
216
  # mapping from these characters to their escaped form.
217
217
  #
218
- # See PDF1.7 s7.3.5
218
+ # See PDF2.0 s7.3.5
219
219
  NAME_SUBSTS = {} # :nodoc:
220
220
  [0..32, 127..255, Tokenizer::DELIMITER.bytes, Tokenizer::WHITESPACE.bytes, [35]].each do |a|
221
221
  a.each {|c| NAME_SUBSTS[c.chr] = "##{c.to_s(16).rjust(2, '0')}" }
@@ -225,7 +225,7 @@ module HexaPDF
225
225
 
226
226
  # Serializes a Symbol object (i.e. a PDF name object).
227
227
  #
228
- # See: PDF1.7 s7.3.5
228
+ # See: PDF2.0 s7.3.5
229
229
  def serialize_symbol(obj)
230
230
  NAME_CACHE[obj] ||=
231
231
  begin
@@ -240,7 +240,7 @@ module HexaPDF
240
240
 
241
241
  # Serializes an Array object.
242
242
  #
243
- # See: PDF1.7 s7.3.6
243
+ # See: PDF2.0 s7.3.6
244
244
  def serialize_array(obj)
245
245
  str = +"["
246
246
  index = 0
@@ -256,7 +256,7 @@ module HexaPDF
256
256
 
257
257
  # Serializes a Hash object (i.e. a PDF dictionary object).
258
258
  #
259
- # See: PDF1.7 s7.3.7
259
+ # See: PDF2.0 s7.3.7
260
260
  def serialize_hash(obj)
261
261
  str = +"<<"
262
262
  obj.each do |k, v|
@@ -274,7 +274,7 @@ module HexaPDF
274
274
 
275
275
  # Serializes a String object.
276
276
  #
277
- # See: PDF1.7 s7.3.4
277
+ # See: PDF2.0 s7.3.4
278
278
  def serialize_string(obj)
279
279
  obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
280
280
  encrypter.encrypt_string(obj, @object)
@@ -294,7 +294,7 @@ module HexaPDF
294
294
  # The ISO PDF specification differs in respect to the supported date format. When converting
295
295
  # to a date string, a format suitable for both is output.
296
296
  #
297
- # See: PDF1.7 s7.9.4, ADB1.7 3.8.3
297
+ # See: PDF2.0 s7.9.4, ADB1.7 3.8.3
298
298
  def serialize_time(obj)
299
299
  zone = obj.strftime("%z'")
300
300
  if zone == "+0000'"
@@ -330,14 +330,14 @@ module HexaPDF
330
330
  end
331
331
  end
332
332
 
333
- # See: PDF1.7 s7.3.10
333
+ # See: PDF2.0 s7.3.10
334
334
  def serialize_hexapdf_reference(obj)
335
335
  "#{obj.oid} #{obj.gen} R"
336
336
  end
337
337
 
338
338
  # Serializes the streams dictionary and its stream.
339
339
  #
340
- # See: PDF1.7 s7.3.8
340
+ # See: PDF2.0 s7.3.8
341
341
  def serialize_hexapdf_stream(obj)
342
342
  if !obj.indirect?
343
343
  raise HexaPDF::Error, "Can't serialize PDF stream without object identifier"
@@ -88,7 +88,9 @@ module HexaPDF
88
88
 
89
89
  # Returns a Fiber for getting at the data of the stream represented by this object.
90
90
  def fiber(chunk_size = 0)
91
- if @source.kind_of?(Proc)
91
+ if @source.kind_of?(FiberDoubleForString)
92
+ @source.dup
93
+ elsif @source.kind_of?(Proc)
92
94
  FiberWithLength.new(@length, &@source)
93
95
  elsif @source.kind_of?(String)
94
96
  HexaPDF::Filter.source_from_file(@source, pos: @offset || 0, length: @length || -1,
@@ -134,7 +136,7 @@ module HexaPDF
134
136
  #
135
137
  # Note that support for external streams (/F, /FFilter, /FDecodeParms) is not yet implemented!
136
138
  #
137
- # See: PDF1.7 s7.3.8, Dictionary
139
+ # See: PDF2.0 s7.3.8, Dictionary
138
140
  class Stream < Dictionary
139
141
 
140
142
  define_field :Length, type: Integer # not required, will be auto-filled when writing
@@ -42,7 +42,7 @@ module HexaPDF
42
42
 
43
43
  # Tokenizes the content of an IO object following the PDF rules.
44
44
  #
45
- # See: PDF1.7 s7.2
45
+ # See: PDF2.0 s7.2
46
46
  class Tokenizer
47
47
 
48
48
  # Represents a keyword in a PDF file.
@@ -61,12 +61,12 @@ module HexaPDF
61
61
 
62
62
  # Characters defined as whitespace.
63
63
  #
64
- # See: PDF1.7 s7.2.2
64
+ # See: PDF2.0 s7.2.2
65
65
  WHITESPACE = " \n\r\0\t\f"
66
66
 
67
67
  # Characters defined as delimiters.
68
68
  #
69
- # See: PDF1.7 s7.2.2
69
+ # See: PDF2.0 s7.2.2
70
70
  DELIMITER = "()<>{}/[]%"
71
71
 
72
72
  WHITESPACE_MULTI_RE = /[#{WHITESPACE}]+/ # :nodoc:
@@ -171,7 +171,7 @@ module HexaPDF
171
171
  # If the +allow_end_array_token+ argument is +true+, the ']' token is permitted to facilitate
172
172
  # the use of this method during array parsing.
173
173
  #
174
- # See: PDF1.7 s7.3
174
+ # See: PDF2.0 s7.3
175
175
  def next_object(allow_end_array_token: false, allow_keyword: false)
176
176
  token = next_token
177
177
 
@@ -231,7 +231,7 @@ module HexaPDF
231
231
  # If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
232
232
  # problem is recoverable.
233
233
  #
234
- # See: PDF1.7 7.5.4
234
+ # See: PDF2.0 7.5.4
235
235
  def next_xref_entry #:yield: recoverable
236
236
  prepare_string_scanner(20)
237
237
  if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
@@ -242,7 +242,7 @@ module HexaPDF
242
242
 
243
243
  # Skips all whitespace at the current position.
244
244
  #
245
- # See: PDF1.7 s7.2.2
245
+ # See: PDF2.0 s7.2.2
246
246
  def skip_whitespace
247
247
  prepare_string_scanner
248
248
  prepare_string_scanner while @ss.skip(WHITESPACE_MULTI_RE)
@@ -268,7 +268,7 @@ module HexaPDF
268
268
 
269
269
  # Parses the keyword at the current position.
270
270
  #
271
- # See: PDF1.7 s7.2
271
+ # See: PDF2.0 s7.2
272
272
  def parse_keyword
273
273
  str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
274
274
  TOKEN_CACHE[str]
@@ -278,12 +278,12 @@ module HexaPDF
278
278
 
279
279
  # Parses the number (integer or real) at the current position.
280
280
  #
281
- # See: PDF1.7 s7.3.3
281
+ # See: PDF2.0 s7.3.3
282
282
  def parse_number
283
283
  val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
284
284
  if val.match?(/\A[+-]?\d++(?!\.)\z/)
285
285
  tmp = val.to_i
286
- # Handle object references, see PDF1.7 s7.3.10
286
+ # Handle object references, see PDF2.0 s7.3.10
287
287
  prepare_string_scanner(10)
288
288
  if @ss.scan(REFERENCE_RE)
289
289
  tmp = if tmp > 0
@@ -315,7 +315,7 @@ module HexaPDF
315
315
 
316
316
  # Parses the literal string at the current position.
317
317
  #
318
- # See: PDF1.7 s7.3.4.2
318
+ # See: PDF2.0 s7.3.4.2
319
319
  def parse_literal_string
320
320
  @ss.pos += 1
321
321
  str = "".b
@@ -358,7 +358,7 @@ module HexaPDF
358
358
 
359
359
  # Parses the hex string at the current position.
360
360
  #
361
- # See: PDF1.7 s7.3.4.3
361
+ # See: PDF2.0 s7.3.4.3
362
362
  def parse_hex_string
363
363
  @ss.pos += 1
364
364
  data = scan_until(/(?=>)/)
@@ -373,7 +373,7 @@ module HexaPDF
373
373
 
374
374
  # Parses the name at the current position.
375
375
  #
376
- # See: PDF1.7 s7.3.5
376
+ # See: PDF2.0 s7.3.5
377
377
  def parse_name
378
378
  @ss.pos += 1
379
379
  str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
@@ -389,7 +389,7 @@ module HexaPDF
389
389
  #
390
390
  # It is assumed that the initial '[' has already been scanned.
391
391
  #
392
- # See: PDF1.7 s7.3.6
392
+ # See: PDF2.0 s7.3.6
393
393
  def parse_array
394
394
  result = []
395
395
  while true
@@ -408,7 +408,7 @@ module HexaPDF
408
408
  #
409
409
  # It is assumed that the initial '<<' has already been scanned.
410
410
  #
411
- # See: PDF1.7 s7.3.7
411
+ # See: PDF2.0 s7.3.7
412
412
  def parse_dictionary
413
413
  result = {}
414
414
  while true
@@ -61,7 +61,7 @@ module HexaPDF
61
61
  # By subclassing and overriding the necessary methods it is possible to define custom
62
62
  # appearances.
63
63
  #
64
- # See: PDF1.7 s12.5.5, s12.7
64
+ # See: PDF2.0 s12.5.5, s12.7
65
65
  class AppearanceGenerator
66
66
 
67
67
  # Creates a new instance for the given +widget+.
@@ -200,7 +200,7 @@ module HexaPDF
200
200
  def create_text_appearances
201
201
  default_resources = @document.acro_form.default_resources
202
202
  font, font_size, font_color = retrieve_font_information(default_resources)
203
- style = HexaPDF::Layout::Style.new(font: font, fill_color: font_color)
203
+ style = HexaPDF::Layout::Style.new(font: font, font_size: font_size, fill_color: font_color)
204
204
  border_style = @widget.border_style
205
205
  padding = [1, border_style.width].max
206
206
 
@@ -226,8 +226,6 @@ module HexaPDF
226
226
 
227
227
  canvas = form.canvas
228
228
  apply_background_and_border(border_style, canvas)
229
- style.font_size = calculate_font_size(font, font_size, height, border_style)
230
- style.clear_cache
231
229
 
232
230
  canvas.marked_content_sequence(:Tx) do
233
231
  if @field.field_value || @field.concrete_field_type == :list_box
@@ -362,6 +360,7 @@ module HexaPDF
362
360
  def draw_single_line_text(canvas, width, height, style, padding)
363
361
  value, text_color = apply_javascript_formatting(@field.field_value)
364
362
  style.fill_color = text_color if text_color
363
+ calculate_and_apply_font_size(value, style, width, height, padding)
365
364
  fragment = HexaPDF::Layout::TextFragment.create(value, style)
366
365
 
367
366
  if @field.concrete_field_type == :comb_text_field
@@ -431,6 +430,11 @@ module HexaPDF
431
430
 
432
431
  # Draws the visible option items of the list box in the widget's rectangle.
433
432
  def draw_list_box(canvas, width, height, style, padding)
433
+ if style.font_size == 0
434
+ style.font_size = 12 # Seems to be Adobe's default
435
+ style.clear_cache
436
+ end
437
+
434
438
  option_items = @field.option_items
435
439
  top_index = @field.list_box_top_index
436
440
  items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
@@ -475,24 +479,20 @@ module HexaPDF
475
479
  [font, font_size, font_color]
476
480
  end
477
481
 
478
- # Calculates the font size for text fields based on the font and font size of the default
479
- # appearance string, the annotation rectangle's height and the border style.
480
- def calculate_font_size(font, font_size, height, border_style)
481
- if font_size == 0
482
- case @field.concrete_field_type
483
- when :multiline_text_field
484
- 0 # Handled by multiline drawing code
485
- when :list_box
486
- 12 # Seems to be Adobe's default
487
- else
488
- unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
489
- font.scaling_factor / 1000.0
490
- # The constant factor was found empirically by checking what Adobe Reader etc. do
491
- (height - 2 * border_style.width) / unit_font_size * 0.83
492
- end
493
- else
494
- font_size
495
- end
482
+ # Calculates the font size for single line text fields using auto-sizing, based on the font
483
+ # and font size of the default appearance string, the annotation rectangle's height and
484
+ # width and the given padding. The font size is then applied to the provided style object.
485
+ def calculate_and_apply_font_size(value, style, width, height, padding)
486
+ return if style.font_size != 0
487
+
488
+ font = style.font
489
+ unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
490
+ font.scaling_factor / 1000.0
491
+ # The constant factor was found empirically by checking what Adobe Reader etc. do
492
+ style.font_size = (height - 2 * padding) / unit_font_size * 0.85
493
+ fragment = HexaPDF::Layout::TextFragment.create(value, style)
494
+ style.font_size = [style.font_size, style.font_size * (width - 4 * padding) / fragment.width].min
495
+ style.clear_cache
496
496
  end
497
497
 
498
498
  # Handles Javascript formatting routines for single-line text fields.
@@ -81,7 +81,7 @@ module HexaPDF
81
81
  # :radios_in_unison:: A group of radio buttons with the same value for the on state will turn
82
82
  # on or off in unison.
83
83
  #
84
- # See: PDF1.7 s12.7.4.2
84
+ # See: PDF2.0 s12.7.4.2
85
85
  class ButtonField < Field
86
86
 
87
87
  define_type :XXAcroFormField
@@ -66,7 +66,7 @@ module HexaPDF
66
66
  # :commit_on_sel_change:: If set, a new value should be commited as soon as a selection is
67
67
  # made.
68
68
  #
69
- # See: PDF1.7 s12.7.4.4
69
+ # See: PDF2.0 s12.7.5.4
70
70
  class ChoiceField < VariableTextField
71
71
 
72
72
  define_type :XXAcroFormField