hexapdf 0.32.2 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +104 -1
  3. data/README.md +9 -0
  4. data/examples/002-graphics.rb +15 -17
  5. data/examples/003-arcs.rb +9 -9
  6. data/examples/009-text_layouter_alignment.rb +1 -1
  7. data/examples/010-text_layouter_inline_boxes.rb +2 -2
  8. data/examples/011-text_layouter_line_wrapping.rb +1 -1
  9. data/examples/012-text_layouter_styling.rb +7 -7
  10. data/examples/013-text_layouter_shapes.rb +1 -1
  11. data/examples/014-text_in_polygon.rb +1 -1
  12. data/examples/015-boxes.rb +8 -7
  13. data/examples/016-frame_automatic_box_placement.rb +2 -2
  14. data/examples/017-frame_text_flow.rb +2 -1
  15. data/examples/018-composer.rb +1 -1
  16. data/examples/020-column_box.rb +2 -1
  17. data/examples/025-table_box.rb +46 -0
  18. data/examples/026-optional_content.rb +55 -0
  19. data/examples/027-composer_optional_content.rb +83 -0
  20. data/lib/hexapdf/cli/command.rb +12 -3
  21. data/lib/hexapdf/cli/fonts.rb +1 -1
  22. data/lib/hexapdf/cli/form.rb +5 -5
  23. data/lib/hexapdf/cli/inspect.rb +5 -7
  24. data/lib/hexapdf/composer.rb +106 -53
  25. data/lib/hexapdf/configuration.rb +65 -40
  26. data/lib/hexapdf/content/canvas.rb +445 -267
  27. data/lib/hexapdf/content/color_space.rb +72 -25
  28. data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
  29. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
  30. data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
  31. data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
  32. data/lib/hexapdf/content/graphic_object.rb +6 -7
  33. data/lib/hexapdf/content/graphics_state.rb +54 -45
  34. data/lib/hexapdf/content/operator.rb +54 -54
  35. data/lib/hexapdf/content/parser.rb +2 -2
  36. data/lib/hexapdf/content/processor.rb +15 -15
  37. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  38. data/lib/hexapdf/content.rb +5 -0
  39. data/lib/hexapdf/dictionary.rb +7 -5
  40. data/lib/hexapdf/dictionary_fields.rb +43 -16
  41. data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
  42. data/lib/hexapdf/digital_signature/handler.rb +1 -1
  43. data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
  44. data/lib/hexapdf/digital_signature/signature.rb +6 -6
  45. data/lib/hexapdf/digital_signature/signatures.rb +13 -12
  46. data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
  47. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
  48. data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
  49. data/lib/hexapdf/digital_signature/signing.rb +4 -0
  50. data/lib/hexapdf/digital_signature/verification_result.rb +3 -4
  51. data/lib/hexapdf/digital_signature.rb +7 -2
  52. data/lib/hexapdf/document/destinations.rb +12 -11
  53. data/lib/hexapdf/document/files.rb +1 -1
  54. data/lib/hexapdf/document/fonts.rb +1 -1
  55. data/lib/hexapdf/document/layout.rb +170 -39
  56. data/lib/hexapdf/document/pages.rb +4 -3
  57. data/lib/hexapdf/document.rb +96 -55
  58. data/lib/hexapdf/encryption/aes.rb +5 -5
  59. data/lib/hexapdf/encryption/arc4.rb +1 -1
  60. data/lib/hexapdf/encryption/fast_aes.rb +2 -2
  61. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  62. data/lib/hexapdf/encryption/identity.rb +1 -1
  63. data/lib/hexapdf/encryption/ruby_aes.rb +11 -21
  64. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  65. data/lib/hexapdf/encryption/security_handler.rb +31 -24
  66. data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
  67. data/lib/hexapdf/encryption.rb +7 -2
  68. data/lib/hexapdf/error.rb +18 -0
  69. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  70. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  71. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  72. data/lib/hexapdf/filter/lzw_decode.rb +1 -1
  73. data/lib/hexapdf/filter/pass_through.rb +1 -1
  74. data/lib/hexapdf/filter/predictor.rb +1 -1
  75. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  76. data/lib/hexapdf/filter.rb +55 -6
  77. data/lib/hexapdf/font/cmap/parser.rb +2 -2
  78. data/lib/hexapdf/font/cmap.rb +1 -1
  79. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  80. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  81. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
  82. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  83. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  84. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
  85. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  86. data/lib/hexapdf/font/invalid_glyph.rb +3 -0
  87. data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
  88. data/lib/hexapdf/font/type1_wrapper.rb +19 -4
  89. data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
  90. data/lib/hexapdf/font_loader/from_file.rb +5 -5
  91. data/lib/hexapdf/font_loader/standard14.rb +3 -3
  92. data/lib/hexapdf/font_loader.rb +3 -0
  93. data/lib/hexapdf/image_loader/jpeg.rb +2 -2
  94. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  95. data/lib/hexapdf/image_loader/png.rb +2 -2
  96. data/lib/hexapdf/image_loader.rb +1 -1
  97. data/lib/hexapdf/importer.rb +13 -0
  98. data/lib/hexapdf/layout/box.rb +32 -5
  99. data/lib/hexapdf/layout/box_fitter.rb +2 -2
  100. data/lib/hexapdf/layout/column_box.rb +20 -5
  101. data/lib/hexapdf/layout/frame.rb +53 -18
  102. data/lib/hexapdf/layout/image_box.rb +5 -0
  103. data/lib/hexapdf/layout/inline_box.rb +21 -9
  104. data/lib/hexapdf/layout/list_box.rb +50 -20
  105. data/lib/hexapdf/layout/page_style.rb +6 -5
  106. data/lib/hexapdf/layout/style.rb +64 -9
  107. data/lib/hexapdf/layout/table_box.rb +684 -0
  108. data/lib/hexapdf/layout/text_box.rb +12 -3
  109. data/lib/hexapdf/layout/text_fragment.rb +29 -3
  110. data/lib/hexapdf/layout/text_layouter.rb +32 -8
  111. data/lib/hexapdf/layout.rb +1 -0
  112. data/lib/hexapdf/name_tree_node.rb +1 -1
  113. data/lib/hexapdf/number_tree_node.rb +1 -1
  114. data/lib/hexapdf/object.rb +18 -7
  115. data/lib/hexapdf/parser.rb +7 -7
  116. data/lib/hexapdf/pdf_array.rb +1 -1
  117. data/lib/hexapdf/rectangle.rb +1 -1
  118. data/lib/hexapdf/reference.rb +1 -1
  119. data/lib/hexapdf/revision.rb +1 -1
  120. data/lib/hexapdf/revisions.rb +3 -3
  121. data/lib/hexapdf/serializer.rb +15 -15
  122. data/lib/hexapdf/stream.rb +5 -4
  123. data/lib/hexapdf/tokenizer.rb +14 -14
  124. data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
  125. data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
  126. data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
  127. data/lib/hexapdf/type/acro_form/field.rb +2 -2
  128. data/lib/hexapdf/type/acro_form/form.rb +1 -1
  129. data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
  130. data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
  131. data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
  132. data/lib/hexapdf/type/acro_form.rb +1 -1
  133. data/lib/hexapdf/type/action.rb +1 -1
  134. data/lib/hexapdf/type/actions/go_to.rb +1 -1
  135. data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
  136. data/lib/hexapdf/type/actions/launch.rb +1 -1
  137. data/lib/hexapdf/type/actions/set_ocg_state.rb +86 -0
  138. data/lib/hexapdf/type/actions/uri.rb +1 -1
  139. data/lib/hexapdf/type/actions.rb +2 -1
  140. data/lib/hexapdf/type/annotation.rb +3 -3
  141. data/lib/hexapdf/type/annotations/link.rb +1 -1
  142. data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
  143. data/lib/hexapdf/type/annotations/text.rb +2 -3
  144. data/lib/hexapdf/type/annotations/widget.rb +2 -2
  145. data/lib/hexapdf/type/annotations.rb +1 -1
  146. data/lib/hexapdf/type/catalog.rb +11 -2
  147. data/lib/hexapdf/type/cid_font.rb +18 -4
  148. data/lib/hexapdf/type/embedded_file.rb +1 -1
  149. data/lib/hexapdf/type/file_specification.rb +2 -2
  150. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  151. data/lib/hexapdf/type/font_simple.rb +2 -2
  152. data/lib/hexapdf/type/font_type0.rb +3 -3
  153. data/lib/hexapdf/type/font_type3.rb +1 -1
  154. data/lib/hexapdf/type/form.rb +76 -6
  155. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  156. data/lib/hexapdf/type/icon_fit.rb +1 -1
  157. data/lib/hexapdf/type/image.rb +1 -1
  158. data/lib/hexapdf/type/info.rb +1 -1
  159. data/lib/hexapdf/type/mark_information.rb +1 -1
  160. data/lib/hexapdf/type/names.rb +2 -2
  161. data/lib/hexapdf/type/object_stream.rb +2 -1
  162. data/lib/hexapdf/type/optional_content_configuration.rb +170 -0
  163. data/lib/hexapdf/type/optional_content_group.rb +370 -0
  164. data/lib/hexapdf/type/optional_content_membership.rb +63 -0
  165. data/lib/hexapdf/type/optional_content_properties.rb +158 -0
  166. data/lib/hexapdf/type/outline.rb +1 -1
  167. data/lib/hexapdf/type/outline_item.rb +1 -1
  168. data/lib/hexapdf/type/page.rb +46 -21
  169. data/lib/hexapdf/type/page_label.rb +5 -9
  170. data/lib/hexapdf/type/page_tree_node.rb +1 -1
  171. data/lib/hexapdf/type/resources.rb +1 -1
  172. data/lib/hexapdf/type/trailer.rb +2 -2
  173. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  174. data/lib/hexapdf/type/xref_stream.rb +2 -2
  175. data/lib/hexapdf/type.rb +4 -0
  176. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -2
  177. data/lib/hexapdf/version.rb +1 -1
  178. data/lib/hexapdf/writer.rb +4 -4
  179. data/lib/hexapdf/xref_section.rb +2 -2
  180. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
  181. data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
  182. data/test/hexapdf/content/test_canvas.rb +49 -1
  183. data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
  184. data/test/hexapdf/document/test_files.rb +2 -2
  185. data/test/hexapdf/document/test_layout.rb +105 -2
  186. data/test/hexapdf/document/test_pages.rb +6 -6
  187. data/test/hexapdf/encryption/test_security_handler.rb +12 -11
  188. data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
  189. data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
  190. data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
  191. data/test/hexapdf/layout/test_box.rb +14 -5
  192. data/test/hexapdf/layout/test_column_box.rb +65 -21
  193. data/test/hexapdf/layout/test_frame.rb +27 -15
  194. data/test/hexapdf/layout/test_image_box.rb +4 -0
  195. data/test/hexapdf/layout/test_inline_box.rb +17 -3
  196. data/test/hexapdf/layout/test_list_box.rb +84 -33
  197. data/test/hexapdf/layout/test_page_style.rb +3 -2
  198. data/test/hexapdf/layout/test_style.rb +60 -0
  199. data/test/hexapdf/layout/test_table_box.rb +728 -0
  200. data/test/hexapdf/layout/test_text_box.rb +26 -0
  201. data/test/hexapdf/layout/test_text_fragment.rb +33 -0
  202. data/test/hexapdf/layout/test_text_layouter.rb +36 -5
  203. data/test/hexapdf/test_composer.rb +10 -0
  204. data/test/hexapdf/test_dictionary.rb +10 -0
  205. data/test/hexapdf/test_dictionary_fields.rb +4 -1
  206. data/test/hexapdf/test_document.rb +5 -0
  207. data/test/hexapdf/test_filter.rb +8 -0
  208. data/test/hexapdf/test_importer.rb +9 -0
  209. data/test/hexapdf/test_object.rb +16 -5
  210. data/test/hexapdf/test_stream.rb +7 -0
  211. data/test/hexapdf/test_writer.rb +3 -3
  212. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
  213. data/test/hexapdf/type/acro_form/test_form.rb +4 -3
  214. data/test/hexapdf/type/actions/test_set_ocg_state.rb +40 -0
  215. data/test/hexapdf/type/test_catalog.rb +11 -0
  216. data/test/hexapdf/type/test_form.rb +119 -0
  217. data/test/hexapdf/type/test_optional_content_configuration.rb +112 -0
  218. data/test/hexapdf/type/test_optional_content_group.rb +158 -0
  219. data/test/hexapdf/type/test_optional_content_properties.rb +109 -0
  220. data/test/hexapdf/type/test_page.rb +20 -6
  221. metadata +28 -8
@@ -54,6 +54,13 @@ module HexaPDF
54
54
  @result = nil
55
55
  end
56
56
 
57
+ # Returns the text that will be drawn.
58
+ #
59
+ # This will ignore any inline boxes or kerning values.
60
+ def text
61
+ @items.map {|item| item.kind_of?(TextFragment) ? item.text : '' }.join
62
+ end
63
+
57
64
  # Returns +true+ as the 'position' style property value :flow is supported.
58
65
  def supports_position_flow?
59
66
  true
@@ -74,13 +81,14 @@ module HexaPDF
74
81
 
75
82
  @width = @height = 0
76
83
  @result = if style.position == :flow
77
- @tl.fit(@items, frame.width_specification, frame.shape.bbox.height)
84
+ @tl.fit(@items, frame.width_specification, frame.shape.bbox.height,
85
+ apply_first_text_indent: !split_box?, frame: frame)
78
86
  else
79
87
  @width = reserved_width
80
88
  @height = reserved_height
81
89
  width = (@initial_width > 0 ? @initial_width : available_width) - @width
82
90
  height = (@initial_height > 0 ? @initial_height : available_height) - @height
83
- @tl.fit(@items, width, height)
91
+ @tl.fit(@items, width, height, apply_first_text_indent: !split_box?, frame: frame)
84
92
  end
85
93
  @width += if @initial_width > 0 || style.align == :center || style.align == :right
86
94
  width
@@ -103,7 +111,8 @@ module HexaPDF
103
111
  def split(available_width, available_height, frame)
104
112
  fit(available_width, available_height, frame) unless @result
105
113
 
106
- if style.position != :flow && (@width > available_width || @height > available_height)
114
+ if style.position != :flow && (float_compare(@width, available_width) > 0 ||
115
+ float_compare(@height, available_height) > 0)
107
116
  [nil, self]
108
117
  elsif @result.remaining_items.empty?
109
118
  [self]
@@ -111,6 +111,11 @@ module HexaPDF
111
111
  @properties = properties
112
112
  end
113
113
 
114
+ # Returns the text of the fragment.
115
+ def text
116
+ items.reject {|i| i.kind_of?(Numeric) }.map(&:str).join
117
+ end
118
+
114
119
  # Creates a new TextFragment with the same style and custom properties as this one but with
115
120
  # the given +items+.
116
121
  def dup_attributes(items)
@@ -259,7 +264,7 @@ module HexaPDF
259
264
  # The width of the text fragment.
260
265
  #
261
266
  # It is the sum of the widths of its items and is calculated by using the algorithm presented
262
- # in PDF1.7 s9.4.4. By using kerning values as the first and/or last items, the text contained
267
+ # in PDF2.0 s9.4.4. By using kerning values as the first and/or last items, the text contained
263
268
  # in the fragment may spill over the left and/or right boundary.
264
269
  def width
265
270
  @width ||= @items.sum {|item| style.scaled_item_width(item) }
@@ -283,6 +288,28 @@ module HexaPDF
283
288
  :text
284
289
  end
285
290
 
291
+ # Creates a new text fragment that repeats this fragment's items and applies the necessary
292
+ # spacing so that the returned text fragment fills the given +width+ completely.
293
+ #
294
+ # If the given +width+ is less than the fragment's width, +self+ is returned.
295
+ def fill_horizontal!(width)
296
+ return self if width < self.width
297
+
298
+ factor, rest = width.divmod(self.width)
299
+ items = @items * factor
300
+ rest = @items.inject(rest) do |available_width, item|
301
+ new_available_width = available_width - style.scaled_item_width(item)
302
+ break available_width if new_available_width < 0
303
+ items << item
304
+ new_available_width
305
+ end
306
+
307
+ spacing = rest / (items.size - 1)
308
+ new_style = @style.dup.update(character_spacing: spacing)
309
+ items << spacing / new_style.scaled_font_size # correct spacing after last item
310
+ self.class.new(items, new_style, properties: @properties.dup)
311
+ end
312
+
286
313
  # Clears all cached values.
287
314
  #
288
315
  # This method needs to be called if the fragment's items or attributes are changed!
@@ -293,8 +320,7 @@ module HexaPDF
293
320
 
294
321
  # :nodoc:
295
322
  def inspect
296
- "#<#{self.class.name} #{items.reject {|i| i.kind_of?(Numeric) }.map(&:str).join.inspect} " \
297
- "#{items.inspect}>"
323
+ "#<#{self.class.name} #{text.inspect} #{items.inspect}>"
298
324
  end
299
325
 
300
326
  private
@@ -51,7 +51,8 @@ module HexaPDF
51
51
  # * Existing line breaking characters inside of TextFragment objects are respected when fitting
52
52
  # text. If this is not wanted, they have to be removed beforehand.
53
53
  #
54
- # * The first line may be indented by setting Style#text_indent which may also be negative.
54
+ # * The first line of each paragraph may be indented by setting Style#text_indent which may also
55
+ # be negative.
55
56
  #
56
57
  # * Text can be fitted into arbitrarily shaped areas, even containing holes.
57
58
  #
@@ -339,8 +340,8 @@ module HexaPDF
339
340
  # current start of the line index should be stored for later use.
340
341
  #
341
342
  # After the algorithm is finished, it returns the unused items.
342
- def self.call(items, width_block, &block)
343
- obj = new(items, width_block)
343
+ def self.call(items, width_block, frame, &block)
344
+ obj = new(items, width_block, frame)
344
345
  if width_block.arity == 1
345
346
  obj.variable_width_wrapping(&block)
346
347
  else
@@ -352,9 +353,10 @@ module HexaPDF
352
353
 
353
354
  # Creates a new line wrapping object that arranges the +items+ on lines with the given
354
355
  # width.
355
- def initialize(items, width_block)
356
+ def initialize(items, width_block, frame)
356
357
  @items = items
357
358
  @width_block = width_block
359
+ @frame = frame
358
360
  @line_items = []
359
361
  @width = 0
360
362
  @glue_items = []
@@ -362,6 +364,7 @@ module HexaPDF
362
364
  @last_breakpoint_index = 0
363
365
  @last_breakpoint_line_items_index = 0
364
366
  @break_prohibited_state = false
367
+ @fill_horizontal = false
365
368
 
366
369
  @height_calc = Line::HeightCalculator.new
367
370
  @line = DummyLine.new(0, 0)
@@ -504,9 +507,11 @@ module HexaPDF
504
507
  #
505
508
  # Returns +true+ if the item could be added and +false+ otherwise.
506
509
  def add_box_item(item)
510
+ item.fit_wrapped_box(@frame&.context) if item.kind_of?(InlineBox)
507
511
  return false unless @width + item.width <= @available_width
508
512
  @line_items.concat(@glue_items).push(item)
509
513
  @width += item.width
514
+ @fill_horizontal ||= item.style.fill_horizontal
510
515
  @glue_items.clear
511
516
  true
512
517
  end
@@ -546,6 +551,17 @@ module HexaPDF
546
551
 
547
552
  # Creates a Line object from the current line items.
548
553
  def create_line
554
+ if @fill_horizontal
555
+ rest_width = @available_width - @width
556
+ indices = []
557
+ @line_items.each_with_index do |item, index|
558
+ next unless item.style.fill_horizontal
559
+ indices << [index, item.style.fill_horizontal]
560
+ rest_width += item.width
561
+ end
562
+ unit_width = rest_width / indices.sum(&:last)
563
+ indices.each {|i, count| @line_items[i] = @line_items[i].fill_horizontal!(unit_width * count) }
564
+ end
549
565
  Line.new(@line_items)
550
566
  end
551
567
 
@@ -565,6 +581,7 @@ module HexaPDF
565
581
  @last_breakpoint_index = index
566
582
  @last_breakpoint_line_items_index = 0
567
583
  @break_prohibited_state = false
584
+ @fill_horizontal = false
568
585
  @available_width = @width_block.call(@line)
569
586
  end
570
587
 
@@ -658,7 +675,7 @@ module HexaPDF
658
675
  end
659
676
 
660
677
  # :call-seq:
661
- # text_layouter.fit(items, width, height) -> result
678
+ # text_layouter.fit(items, width, height, apply_first_text_indent: true) -> result
662
679
  #
663
680
  # Fits the items into the given area and returns a Result object with all the information.
664
681
  #
@@ -693,7 +710,14 @@ module HexaPDF
693
710
  # The text segmentation algorithm specified via #style is applied to the items in case they
694
711
  # are not already in segmented form. This also means that Result#remaining_items always
695
712
  # contains segmented items.
696
- def fit(items, width, height)
713
+ #
714
+ # Optional arguments:
715
+ #
716
+ # +apply_first_text_indent+::
717
+ # Specifies whether style.text_indent should be applied to the first line. This should be
718
+ # set to +false+ if the items start with a continuation of a paragraph instead of starting
719
+ # a new paragraph (e.g. after a page break).
720
+ def fit(items, width, height, apply_first_text_indent: true, frame: nil)
697
721
  unless items.empty? || items[0].respond_to?(:type)
698
722
  items = style.text_segmentation_algorithm.call(items)
699
723
  end
@@ -704,7 +728,7 @@ module HexaPDF
704
728
  rest = items
705
729
 
706
730
  # processing state variables
707
- indent = style.text_indent
731
+ indent = apply_first_text_indent ? style.text_indent : 0
708
732
  line_fragments = []
709
733
  line_height = 0
710
734
  previous_line = nil
@@ -757,7 +781,7 @@ module HexaPDF
757
781
  too_wide_box = nil
758
782
  line_height = 0
759
783
 
760
- rest = style.text_line_wrapping_algorithm.call(rest, width_block) do |line, item|
784
+ rest = style.text_line_wrapping_algorithm.call(rest, width_block, frame) do |line, item|
761
785
  # make sure empty lines broken by mandatory paragraph breaks are not empty
762
786
  line << TextFragment.new([], style) if item&.type != :box && line.items.empty?
763
787
 
@@ -57,6 +57,7 @@ module HexaPDF
57
57
  autoload(:ColumnBox, 'hexapdf/layout/column_box')
58
58
  autoload(:ListBox, 'hexapdf/layout/list_box')
59
59
  autoload(:PageStyle, 'hexapdf/layout/page_style')
60
+ autoload(:TableBox, 'hexapdf/layout/table_box')
60
61
 
61
62
  end
62
63
 
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # HexaPDF::Utils::SortedTreeNode) to add or retrieve entries. They ensure that the name tree stays
56
56
  # valid.
57
57
  #
58
- # See: PDF1.7 s7.9.6
58
+ # See: PDF2.0 s7.9.6
59
59
  class NameTreeNode < Dictionary
60
60
 
61
61
  include Utils::SortedTreeNode
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # Number trees are similar to name trees but use integers as keys instead of strings. See
45
45
  # HexaPDF::NameTreeNode for a more detailed explanation.
46
46
  #
47
- # See: PDF1.7 s7.9.7, HexaPDF::NameTreeNode
47
+ # See: PDF2.0 s7.9.7, HexaPDF::NameTreeNode
48
48
  class NumberTreeNode < Dictionary
49
49
 
50
50
  include Utils::SortedTreeNode
@@ -117,7 +117,7 @@ module HexaPDF
117
117
  #
118
118
  # See: HexaPDF::Dictionary, HexaPDF::Stream, HexaPDF::Reference, HexaPDF::Document
119
119
  #
120
- # See: PDF1.7 s7.3.10, s7.3.8
120
+ # See: PDF2.0 s7.3.10, s7.3.8
121
121
  class Object
122
122
 
123
123
  include Comparable
@@ -143,18 +143,27 @@ module HexaPDF
143
143
 
144
144
  # Makes sure that the object itself as well as all nested values are direct objects.
145
145
  #
146
+ # The +document+ argument needs to contain the Document instance to which +object+ belongs so
147
+ # that references can be correctly resolved.
148
+ #
146
149
  # If an indirect object is found, it is turned into a direct object and the indirect object is
147
150
  # deleted from the document.
148
- def self.make_direct(object)
151
+ def self.make_direct(object, document)
149
152
  if object.kind_of?(HexaPDF::Object) && object.indirect?
153
+ raise HexaPDF::Error, "Can't make a stream object a direct object" if object.data.stream
150
154
  object_to_delete = object
151
155
  object = object.value
152
156
  object_to_delete.document.delete(object_to_delete)
153
157
  end
154
- if object.kind_of?(Hash)
155
- object.transform_values! {|val| make_direct(val) }
156
- elsif object.kind_of?(Array)
157
- object.map! {|val| make_direct(val) }
158
+ case object
159
+ when HexaPDF::Object
160
+ object.data.value = make_direct(object.data.value, document)
161
+ when Hash
162
+ object.transform_values! {|val| make_direct(val, document) }
163
+ when Array
164
+ object.map! {|val| make_direct(val, document) }
165
+ when Reference
166
+ object = make_direct(document.object(object), document)
158
167
  end
159
168
  object
160
169
  end
@@ -255,7 +264,7 @@ module HexaPDF
255
264
  # type.
256
265
  #
257
266
  # However, the Type and Subtype fields can easily be used for this. Subclasses for PDF objects
258
- # that don't have such fields may use a unique name that has to begin with XX (see PDF1.7 sE.2)
267
+ # that don't have such fields may use a unique name that has to begin with XX (see PDF2.0 sE.2)
259
268
  # and therefore doesn't clash with names defined by the PDF specification.
260
269
  #
261
270
  # For basic objects this always returns +:Unknown+.
@@ -297,6 +306,8 @@ module HexaPDF
297
306
  end
298
307
 
299
308
  # Makes a deep copy of the source PDF object and resets the object identifier.
309
+ #
310
+ # Note that indirect references are *not* copied! If that is also needed, use Importer::copy.
300
311
  def deep_copy
301
312
  obj = dup
302
313
  obj.instance_variable_set(:@data, @data.dup)
@@ -41,11 +41,11 @@ require 'hexapdf/xref_section'
41
41
 
42
42
  module HexaPDF
43
43
 
44
- # Parses an IO stream according to PDF1.7 to get at the contained objects.
44
+ # Parses an IO stream according to PDF2.0 to get at the contained objects.
45
45
  #
46
46
  # This class also contains higher-level methods for getting indirect objects and revisions.
47
47
  #
48
- # See: PDF1.7 s7
48
+ # See: PDF2.0 s7
49
49
  class Parser
50
50
 
51
51
  # The IO stream which is parsed.
@@ -125,7 +125,7 @@ module HexaPDF
125
125
  #
126
126
  # Returns an array containing [object, oid, gen, stream].
127
127
  #
128
- # See: PDF1.7 s7.3.10, s7.3.8
128
+ # See: PDF2.0 s7.3.10, s7.3.8
129
129
  def parse_indirect_object(offset = nil)
130
130
  @tokenizer.pos = offset + @header_offset if offset
131
131
  oid = @tokenizer.next_token
@@ -267,7 +267,7 @@ module HexaPDF
267
267
  #
268
268
  # This method can only parse cross-reference sections, not cross-reference streams!
269
269
  #
270
- # See: PDF1.7 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
270
+ # See: PDF2.0 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
271
271
  def parse_xref_section_and_trailer(offset)
272
272
  @tokenizer.pos = offset + @header_offset
273
273
  token = @tokenizer.next_token
@@ -346,7 +346,7 @@ module HexaPDF
346
346
  #
347
347
  # If strict parsing is disabled, the whole file is searched for the offset.
348
348
  #
349
- # See: PDF1.7 s7.5.5, ADB1.7 sH.3-3.4.4
349
+ # See: PDF2.0 s7.5.5, ADB1.7 sH.3-3.4.4
350
350
  def startxref_offset
351
351
  return @startxref_offset if defined?(@startxref_offset)
352
352
 
@@ -397,7 +397,7 @@ module HexaPDF
397
397
 
398
398
  # Returns the PDF version number that is stored in the file header.
399
399
  #
400
- # See: PDF1.7 s7.5.2
400
+ # See: PDF2.0 s7.5.2
401
401
  def file_header_version
402
402
  unless @header_version
403
403
  raise_malformed("PDF file header is missing or corrupt", pos: 0)
@@ -413,7 +413,7 @@ module HexaPDF
413
413
  # restriction so that the header may appear in the first 1024 bytes. We follow the Adobe
414
414
  # convention.
415
415
  #
416
- # See: PDF1.7 s7.5.2, ADB1.7 sH.3-3.4.1
416
+ # See: PDF2.0 s7.5.2, ADB1.7 sH.3-3.4.1
417
417
  def retrieve_pdf_header_offset_and_version
418
418
  @io.seek(0)
419
419
  @header_offset = (@io.read(1024) || '').index(/%PDF-(\d\.\d)/) || 0
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # #[] method. Therefore not all Array methods are implemented - use the #value directly if other
45
45
  # methods are needed.
46
46
  #
47
- # See: PDF1.7 s7.3.6
47
+ # See: PDF2.0 s7.3.6
48
48
  class PDFArray < HexaPDF::Object
49
49
 
50
50
  include Enumerable
@@ -51,7 +51,7 @@ module HexaPDF
51
51
  # where +left+ is the bottom left x-coordinate, +bottom+ is the bottom left y-coordinate, +right+
52
52
  # is the top right x-coordinate and +top+ is the top right y-coordinate.
53
53
  #
54
- # See: PDF1.7 s7.9.5
54
+ # See: PDF2.0 s7.9.5
55
55
  class Rectangle < HexaPDF::PDFArray
56
56
 
57
57
  # Returns the x-coordinate of the bottom-left corner.
@@ -50,7 +50,7 @@ module HexaPDF
50
50
  # keys. Furthermore the implementation is compatible to the one of Object, i.e. the hash of a
51
51
  # Reference object is the same as the hash of an indirect Object.
52
52
  #
53
- # See: PDF1.7 s7.3.10, Object
53
+ # See: PDF2.0 s7.3.10, Object
54
54
  class Reference
55
55
 
56
56
  include Comparable
@@ -48,7 +48,7 @@ module HexaPDF
48
48
  # If a revision doesn't have an associated cross-reference section, it wasn't created from a PDF
49
49
  # file.
50
50
  #
51
- # See: PDF1.7 s7.5.6, Revisions
51
+ # See: PDF2.0 s7.5.6, Revisions
52
52
  class Revision
53
53
 
54
54
  include Enumerable
@@ -55,7 +55,7 @@ module HexaPDF
55
55
  # this should only be done if one is familiar with the inner workings of HexaPDF. Otherwise it is
56
56
  # best to use the convenience methods of this class to create, access or delete indirect objects.
57
57
  #
58
- # See: PDF1.7 s7.5.6, HexaPDF::Revision
58
+ # See: PDF2.0 s7.5.6, HexaPDF::Revision
59
59
  class Revisions
60
60
 
61
61
  class << self
@@ -76,7 +76,7 @@ module HexaPDF
76
76
  seen_xref_offsets = {}
77
77
 
78
78
  while offset && !seen_xref_offsets.key?(offset)
79
- # PDF1.7 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
79
+ # PDF2.0 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
80
80
  # should be direct. Adobe's POV is followed here. Same with :XRefStm.
81
81
  xref_section, trailer = parser.load_revision(offset)
82
82
  seen_xref_offsets[offset] = true
@@ -167,7 +167,7 @@ module HexaPDF
167
167
  # For references to unknown objects, +nil+ is returned but free objects are represented by a
168
168
  # PDF Null object, not by +nil+!
169
169
  #
170
- # See: PDF1.7 s7.3.9
170
+ # See: PDF2.0 s7.3.9
171
171
  def object(ref)
172
172
  i = @revisions.size - 1
173
173
  while i >= 0
@@ -79,7 +79,7 @@ module HexaPDF
79
79
  #
80
80
  # If no serialization method for a specific class is found, the ancestors classes are tried.
81
81
  #
82
- # See: PDF1.7 s7.3
82
+ # See: PDF2.0 s7.3
83
83
  class Serializer
84
84
 
85
85
  # The encrypter to use for encrypting strings and streams. If +nil+, strings and streams are not
@@ -163,21 +163,21 @@ module HexaPDF
163
163
 
164
164
  # Serializes the +nil+ value.
165
165
  #
166
- # See: PDF1.7 s7.3.9
166
+ # See: PDF2.0 s7.3.9
167
167
  def serialize_nilclass(_obj)
168
168
  "null"
169
169
  end
170
170
 
171
171
  # Serializes the +true+ value.
172
172
  #
173
- # See: PDF1.7 s7.3.2
173
+ # See: PDF2.0 s7.3.2
174
174
  def serialize_trueclass(_obj)
175
175
  "true"
176
176
  end
177
177
 
178
178
  # Serializes the +false+ value.
179
179
  #
180
- # See: PDF1.7 s7.3.2
180
+ # See: PDF2.0 s7.3.2
181
181
  def serialize_falseclass(_obj)
182
182
  "false"
183
183
  end
@@ -187,21 +187,21 @@ module HexaPDF
187
187
  # This method should be used for cases where it is known that the object is either an Integer
188
188
  # or a Float.
189
189
  #
190
- # See: PDF1.7 s7.3.3
190
+ # See: PDF2.0 s7.3.3
191
191
  def serialize_numeric(obj)
192
192
  obj.kind_of?(Integer) ? obj.to_s : serialize_float(obj)
193
193
  end
194
194
 
195
195
  # Serializes an Integer object.
196
196
  #
197
- # See: PDF1.7 s7.3.3
197
+ # See: PDF2.0 s7.3.3
198
198
  def serialize_integer(obj)
199
199
  obj.to_s
200
200
  end
201
201
 
202
202
  # Serializes a Float object.
203
203
  #
204
- # See: PDF1.7 s7.3.3
204
+ # See: PDF2.0 s7.3.3
205
205
  def serialize_float(obj)
206
206
  if -0.0001 < obj && obj < 0.0001 && obj != 0
207
207
  sprintf("%.6f", obj)
@@ -215,7 +215,7 @@ module HexaPDF
215
215
  # The regexp matches all characters that need to be escaped and the substs hash contains the
216
216
  # mapping from these characters to their escaped form.
217
217
  #
218
- # See PDF1.7 s7.3.5
218
+ # See PDF2.0 s7.3.5
219
219
  NAME_SUBSTS = {} # :nodoc:
220
220
  [0..32, 127..255, Tokenizer::DELIMITER.bytes, Tokenizer::WHITESPACE.bytes, [35]].each do |a|
221
221
  a.each {|c| NAME_SUBSTS[c.chr] = "##{c.to_s(16).rjust(2, '0')}" }
@@ -225,7 +225,7 @@ module HexaPDF
225
225
 
226
226
  # Serializes a Symbol object (i.e. a PDF name object).
227
227
  #
228
- # See: PDF1.7 s7.3.5
228
+ # See: PDF2.0 s7.3.5
229
229
  def serialize_symbol(obj)
230
230
  NAME_CACHE[obj] ||=
231
231
  begin
@@ -240,7 +240,7 @@ module HexaPDF
240
240
 
241
241
  # Serializes an Array object.
242
242
  #
243
- # See: PDF1.7 s7.3.6
243
+ # See: PDF2.0 s7.3.6
244
244
  def serialize_array(obj)
245
245
  str = +"["
246
246
  index = 0
@@ -256,7 +256,7 @@ module HexaPDF
256
256
 
257
257
  # Serializes a Hash object (i.e. a PDF dictionary object).
258
258
  #
259
- # See: PDF1.7 s7.3.7
259
+ # See: PDF2.0 s7.3.7
260
260
  def serialize_hash(obj)
261
261
  str = +"<<"
262
262
  obj.each do |k, v|
@@ -274,7 +274,7 @@ module HexaPDF
274
274
 
275
275
  # Serializes a String object.
276
276
  #
277
- # See: PDF1.7 s7.3.4
277
+ # See: PDF2.0 s7.3.4
278
278
  def serialize_string(obj)
279
279
  obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
280
280
  encrypter.encrypt_string(obj, @object)
@@ -294,7 +294,7 @@ module HexaPDF
294
294
  # The ISO PDF specification differs in respect to the supported date format. When converting
295
295
  # to a date string, a format suitable for both is output.
296
296
  #
297
- # See: PDF1.7 s7.9.4, ADB1.7 3.8.3
297
+ # See: PDF2.0 s7.9.4, ADB1.7 3.8.3
298
298
  def serialize_time(obj)
299
299
  zone = obj.strftime("%z'")
300
300
  if zone == "+0000'"
@@ -330,14 +330,14 @@ module HexaPDF
330
330
  end
331
331
  end
332
332
 
333
- # See: PDF1.7 s7.3.10
333
+ # See: PDF2.0 s7.3.10
334
334
  def serialize_hexapdf_reference(obj)
335
335
  "#{obj.oid} #{obj.gen} R"
336
336
  end
337
337
 
338
338
  # Serializes the streams dictionary and its stream.
339
339
  #
340
- # See: PDF1.7 s7.3.8
340
+ # See: PDF2.0 s7.3.8
341
341
  def serialize_hexapdf_stream(obj)
342
342
  if !obj.indirect?
343
343
  raise HexaPDF::Error, "Can't serialize PDF stream without object identifier"
@@ -88,7 +88,9 @@ module HexaPDF
88
88
 
89
89
  # Returns a Fiber for getting at the data of the stream represented by this object.
90
90
  def fiber(chunk_size = 0)
91
- if @source.kind_of?(Proc)
91
+ if @source.kind_of?(FiberDoubleForString)
92
+ @source.dup
93
+ elsif @source.kind_of?(Proc)
92
94
  FiberWithLength.new(@length, &@source)
93
95
  elsif @source.kind_of?(String)
94
96
  HexaPDF::Filter.source_from_file(@source, pos: @offset || 0, length: @length || -1,
@@ -134,7 +136,7 @@ module HexaPDF
134
136
  #
135
137
  # Note that support for external streams (/F, /FFilter, /FDecodeParms) is not yet implemented!
136
138
  #
137
- # See: PDF1.7 s7.3.8, Dictionary
139
+ # See: PDF2.0 s7.3.8, Dictionary
138
140
  class Stream < Dictionary
139
141
 
140
142
  define_field :Length, type: Integer # not required, will be auto-filled when writing
@@ -276,9 +278,8 @@ module HexaPDF
276
278
  end
277
279
  end
278
280
 
279
- # :nodoc:
280
281
  # A mapping from short name to long name for filters.
281
- FILTER_MAP = {AHx: :ASCIIHexDecode, A85: :ASCII85Decode, LZW: :LZWDecode,
282
+ FILTER_MAP = {AHx: :ASCIIHexDecode, A85: :ASCII85Decode, LZW: :LZWDecode, # :nodoc:
282
283
  Fl: :FlateDecode, RL: :RunLengthDecode, CCF: :CCITTFaxDecode,
283
284
  DCT: :DCTDecode}.freeze
284
285