hexapdf 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +5 -5
  5. data/VERSION +1 -1
  6. data/examples/emoji-smile.png +0 -0
  7. data/examples/emoji-wink.png +0 -0
  8. data/examples/graphics.rb +9 -8
  9. data/examples/standard_pdf_fonts.rb +2 -1
  10. data/examples/text_box_alignment.rb +47 -0
  11. data/examples/text_box_inline_boxes.rb +56 -0
  12. data/examples/text_box_line_wrapping.rb +57 -0
  13. data/examples/text_box_shapes.rb +166 -0
  14. data/examples/text_box_styling.rb +72 -0
  15. data/examples/truetype.rb +3 -4
  16. data/lib/hexapdf/cli/optimize.rb +2 -2
  17. data/lib/hexapdf/configuration.rb +8 -6
  18. data/lib/hexapdf/content/canvas.rb +8 -5
  19. data/lib/hexapdf/content/parser.rb +3 -2
  20. data/lib/hexapdf/content/processor.rb +14 -3
  21. data/lib/hexapdf/document.rb +1 -0
  22. data/lib/hexapdf/document/fonts.rb +2 -1
  23. data/lib/hexapdf/document/pages.rb +23 -0
  24. data/lib/hexapdf/font/invalid_glyph.rb +78 -0
  25. data/lib/hexapdf/font/true_type/font.rb +14 -3
  26. data/lib/hexapdf/font/true_type/table.rb +1 -0
  27. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  28. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -0
  29. data/lib/hexapdf/font/true_type/table/glyf.rb +4 -0
  30. data/lib/hexapdf/font/true_type/table/kern.rb +170 -0
  31. data/lib/hexapdf/font/true_type/table/post.rb +5 -1
  32. data/lib/hexapdf/font/true_type_wrapper.rb +71 -24
  33. data/lib/hexapdf/font/type1/afm_parser.rb +3 -2
  34. data/lib/hexapdf/font/type1/character_metrics.rb +0 -9
  35. data/lib/hexapdf/font/type1/font.rb +11 -0
  36. data/lib/hexapdf/font/type1/font_metrics.rb +6 -1
  37. data/lib/hexapdf/font/type1_wrapper.rb +51 -7
  38. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  39. data/lib/hexapdf/layout.rb +51 -0
  40. data/lib/hexapdf/layout/inline_box.rb +95 -0
  41. data/lib/hexapdf/layout/line_fragment.rb +333 -0
  42. data/lib/hexapdf/layout/numeric_refinements.rb +56 -0
  43. data/lib/hexapdf/layout/style.rb +365 -0
  44. data/lib/hexapdf/layout/text_box.rb +727 -0
  45. data/lib/hexapdf/layout/text_fragment.rb +206 -0
  46. data/lib/hexapdf/layout/text_shaper.rb +155 -0
  47. data/lib/hexapdf/task.rb +0 -1
  48. data/lib/hexapdf/task/dereference.rb +1 -1
  49. data/lib/hexapdf/tokenizer.rb +3 -2
  50. data/lib/hexapdf/type/font_descriptor.rb +2 -1
  51. data/lib/hexapdf/type/font_type0.rb +3 -1
  52. data/lib/hexapdf/type/form.rb +12 -4
  53. data/lib/hexapdf/version.rb +1 -1
  54. data/test/hexapdf/common_tokenizer_tests.rb +7 -0
  55. data/test/hexapdf/content/common.rb +8 -0
  56. data/test/hexapdf/content/test_canvas.rb +10 -22
  57. data/test/hexapdf/content/test_processor.rb +4 -1
  58. data/test/hexapdf/document/test_pages.rb +16 -0
  59. data/test/hexapdf/font/test_invalid_glyph.rb +34 -0
  60. data/test/hexapdf/font/test_true_type_wrapper.rb +25 -11
  61. data/test/hexapdf/font/test_type1_wrapper.rb +26 -10
  62. data/test/hexapdf/font/true_type/table/common.rb +27 -0
  63. data/test/hexapdf/font/true_type/table/test_cmap.rb +14 -20
  64. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +7 -0
  65. data/test/hexapdf/font/true_type/table/test_glyf.rb +8 -6
  66. data/test/hexapdf/font/true_type/table/test_head.rb +9 -13
  67. data/test/hexapdf/font/true_type/table/test_hhea.rb +16 -23
  68. data/test/hexapdf/font/true_type/table/test_hmtx.rb +4 -7
  69. data/test/hexapdf/font/true_type/table/test_kern.rb +61 -0
  70. data/test/hexapdf/font/true_type/table/test_loca.rb +7 -13
  71. data/test/hexapdf/font/true_type/table/test_maxp.rb +4 -9
  72. data/test/hexapdf/font/true_type/table/test_name.rb +14 -17
  73. data/test/hexapdf/font/true_type/table/test_os2.rb +3 -5
  74. data/test/hexapdf/font/true_type/table/test_post.rb +21 -19
  75. data/test/hexapdf/font/true_type/test_font.rb +4 -0
  76. data/test/hexapdf/font/type1/common.rb +6 -0
  77. data/test/hexapdf/font/type1/test_afm_parser.rb +9 -0
  78. data/test/hexapdf/font/type1/test_font.rb +6 -0
  79. data/test/hexapdf/layout/test_inline_box.rb +40 -0
  80. data/test/hexapdf/layout/test_line_fragment.rb +206 -0
  81. data/test/hexapdf/layout/test_style.rb +143 -0
  82. data/test/hexapdf/layout/test_text_box.rb +640 -0
  83. data/test/hexapdf/layout/test_text_fragment.rb +208 -0
  84. data/test/hexapdf/layout/test_text_shaper.rb +64 -0
  85. data/test/hexapdf/task/test_dereference.rb +1 -0
  86. data/test/hexapdf/test_writer.rb +2 -2
  87. data/test/hexapdf/type/test_font_descriptor.rb +4 -2
  88. data/test/hexapdf/type/test_font_type0.rb +7 -0
  89. data/test/hexapdf/type/test_form.rb +12 -0
  90. metadata +29 -2
@@ -0,0 +1,206 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/layout/style'
35
+ require 'hexapdf/layout/text_shaper'
36
+ require 'hexapdf/layout/numeric_refinements'
37
+
38
+ module HexaPDF
39
+ module Layout
40
+
41
+ # A TextFragment describes an optionally kerned piece of text that shares the same font, font
42
+ # size and other properties.
43
+ #
44
+ # Its items are either glyph objects of the font or numeric values describing kerning
45
+ # information. All returned measurement values are in text space units. If the items or the
46
+ # style are changed, the #clear_cache has to be called. Otherwise the measurements may not be
47
+ # correct!
48
+ #
49
+ # The items of a text fragment may be frozen to indicate that the fragment is potentially used
50
+ # multiple times.
51
+ #
52
+ # The rectangle with the lower-left corner (#x_min, #y_min) and the upper right corner (#x_max,
53
+ # #y_max) describes the minimum bounding box of the whole text fragment and is usually *not*
54
+ # equal to the box (0, 0)-(#width, #height).
55
+ class TextFragment
56
+
57
+ using NumericRefinements
58
+
59
+ # Creates a new TextFragment object for the given text, shapes it and returns it.
60
+ #
61
+ # The style of the text fragment can be specified using additional options, of which font is
62
+ # mandatory.
63
+ def self.create(text, font:, **options)
64
+ fragment = new(items: font.decode_utf8(text), style: Style.new(font: font, **options))
65
+ TextShaper.new.shape_text(fragment)
66
+ end
67
+
68
+ # The items (glyphs and kerning values) of the text fragment.
69
+ attr_accessor :items
70
+
71
+ # The style to be applied.
72
+ #
73
+ # Only the following properties are used: Style#font, Style#font_size,
74
+ # Style#horizontal_scaling, Style#character_spacing, Style#word_spacing and Style#text_rise.
75
+ attr_reader :style
76
+
77
+ # Creates a new TextFragment object with the given items and style.
78
+ def initialize(items:, style: Style.new)
79
+ @items = items || []
80
+ @style = style
81
+ end
82
+
83
+ # Draws the text onto the canvas at the given position.
84
+ #
85
+ # Before the text is drawn using HexaPDF::Content;:Canvas#show_glyphs, the text properties
86
+ # mentioned in the description of #style are set.
87
+ def draw(canvas, x, y)
88
+ canvas.move_text_cursor(offset: [x, y])
89
+ canvas.font(style.font, size: style.font_size).
90
+ horizontal_scaling(style.horizontal_scaling).
91
+ character_spacing(style.character_spacing).
92
+ word_spacing(style.word_spacing).
93
+ text_rise(style.text_rise)
94
+ canvas.show_glyphs_only(items)
95
+ end
96
+
97
+ # The minimum x-coordinate of the first glyph.
98
+ def x_min
99
+ @x_min ||= calculate_x_min
100
+ end
101
+
102
+ # The maximum x-coordinate of the last glyph.
103
+ def x_max
104
+ @x_max ||= calculate_x_max
105
+ end
106
+
107
+ # The minimum y-coordinate, calculated using the scaled descender of the font.
108
+ def y_min
109
+ @y_min ||= style.scaled_font_descender + style.text_rise
110
+ end
111
+
112
+ # The maximum y-coordinate, calculated using the scaled ascender of the font.
113
+ def y_max
114
+ @y_max ||= style.scaled_font_ascender + style.text_rise
115
+ end
116
+
117
+ # The minimum y-coordinate of any item.
118
+ def exact_y_min
119
+ @exact_y_min ||= (@items.min_by(&:y_min)&.y_min || 0) * style.font_size / 1000.0 +
120
+ style.text_rise
121
+ end
122
+
123
+ # The maximum y-coordinate of any item.
124
+ def exact_y_max
125
+ @exact_y_max ||= (@items.max_by(&:y_max)&.y_max || 0) * style.font_size / 1000.0 +
126
+ style.text_rise
127
+ end
128
+
129
+ # The width of the text fragment.
130
+ #
131
+ # It is the sum of the widths of its items and is calculated by using the algorithm presented
132
+ # in PDF1.7 s9.4.4. By using kerning values as the first and/or last items, the text contained
133
+ # in the fragment may spill over the left and/or right boundary.
134
+ def width
135
+ @width ||= @items.sum {|item| style.scaled_item_width(item)}
136
+ end
137
+
138
+ # The height of the text fragment.
139
+ #
140
+ # It is calculated as the difference of the maximum of the +y_max+ values and the minimum of
141
+ # the +y_min+ values of the items. However, the text rise value is also taken into account so
142
+ # that the baseline is always *inside* the bounds. For example, if a large negative text rise
143
+ # value is used, the baseline will be equal to the top boundary; if a large positive value is
144
+ # used, it will be equal to the bottom boundary.
145
+ def height
146
+ @height ||= [y_max, 0].max - [y_min, 0].min
147
+ end
148
+
149
+ # Returns the vertical alignment inside a line which is always :text for text fragments.
150
+ #
151
+ # See LineFragment for details.
152
+ def valign
153
+ :text
154
+ end
155
+
156
+ # Clears all cached values.
157
+ #
158
+ # This method needs to be called if the fragment's items or attributes are changed!
159
+ def clear_cache
160
+ @x_min = @x_max = @y_min = @y_max = @exact_y_min = @exact_y_max = @width = @height = nil
161
+ self
162
+ end
163
+
164
+ # :nodoc:
165
+ def inspect
166
+ "#<#{self.class.name} #{items.inspect}>"
167
+ end
168
+
169
+ private
170
+
171
+ def calculate_x_min
172
+ if !@items.empty? && !@items[0].kind_of?(Numeric)
173
+ @items[0].x_min * style.scaled_font_size
174
+ else
175
+ @items.inject(0) do |sum, item|
176
+ sum += item.x_min * style.scaled_font_size
177
+ break sum unless item.kind_of?(Numeric)
178
+ sum
179
+ end
180
+ end
181
+ end
182
+
183
+ def calculate_x_max
184
+ if !@items.empty? && !@items[0].kind_of?(Numeric)
185
+ width - scaled_glyph_right_side_bearing(@items[-1])
186
+ else
187
+ @items.reverse_each.inject(width) do |sum, item|
188
+ if item.kind_of?(Numeric)
189
+ sum + item * style.scaled_font_size
190
+ else
191
+ break sum - scaled_glyph_right_side_bearing(item)
192
+ end
193
+ end
194
+ end
195
+ end
196
+
197
+ def scaled_glyph_right_side_bearing(glyph)
198
+ (glyph.x_max <= 0 ? 0 : glyph.width - glyph.x_max) * style.scaled_font_size +
199
+ style.scaled_character_spacing +
200
+ (glyph.apply_word_spacing? ? style.scaled_word_spacing : 0)
201
+ end
202
+
203
+ end
204
+
205
+ end
206
+ end
@@ -0,0 +1,155 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/layout/numeric_refinements'
35
+
36
+ module HexaPDF
37
+ module Layout
38
+
39
+ using NumericRefinements
40
+
41
+ # This class is used to perform text shaping, i.e. changing the position of glyphs (e.g. for
42
+ # kerning) or substituting one or more glyphs for other glyphs (e.g. for ligatures).
43
+ #
44
+ # Status of the implementation:
45
+ #
46
+ # * All text shaping functionality possible for Type1 fonts is implemented, i.e. kerning and
47
+ # ligature substitution.
48
+ #
49
+ # * For TrueType fonts only kerning via the 'kern' table is implemented.
50
+ class TextShaper
51
+
52
+ # Shapes the given text fragment in-place.
53
+ #
54
+ # The following shaping options, retrieved from the text fragment's Style#font_features, are
55
+ # supported:
56
+ #
57
+ # :kern:: Pair-wise kerning.
58
+ # :liga:: Ligature substitution.
59
+ def shape_text(text_fragment)
60
+ font = text_fragment.style.font
61
+ if text_fragment.style.font_features[:liga] && font.wrapped_font.features.include?(:liga)
62
+ if font.font_type == :Type1
63
+ process_type1_ligatures(text_fragment)
64
+ end
65
+ text_fragment.clear_cache
66
+ end
67
+ if text_fragment.style.font_features[:kern] && font.wrapped_font.features.include?(:kern)
68
+ if font.font_type == :TrueType
69
+ process_true_type_kerning(text_fragment)
70
+ elsif font.font_type == :Type1
71
+ process_type1_kerning(text_fragment)
72
+ end
73
+ text_fragment.clear_cache
74
+ end
75
+ text_fragment
76
+ end
77
+
78
+ private
79
+
80
+ # Processes the text fragment and substitutes ligatures.
81
+ def process_type1_ligatures(text_fragment)
82
+ items = text_fragment.items
83
+ font = text_fragment.style.font
84
+ pairs = font.wrapped_font.metrics.ligature_pairs
85
+ each_glyph_pair(items) do |left_item, right_item, left, right|
86
+ if (ligature = pairs.dig(left_item.id, right_item.id))
87
+ items[left..right] = font.glyph(ligature)
88
+ left
89
+ else
90
+ right
91
+ end
92
+ end
93
+ end
94
+
95
+ # Processes the text fragment and does pair-wise kerning.
96
+ def process_type1_kerning(text_fragment)
97
+ pairs = text_fragment.style.font.wrapped_font.metrics.kerning_pairs
98
+ items = text_fragment.items
99
+ each_glyph_pair(items) do |left_item, right_item, left, right|
100
+ if (left + 1 == right) && (kerning = pairs.dig(left_item.id, right_item.id))
101
+ items.insert(right, -kerning)
102
+ right + 1
103
+ else
104
+ right
105
+ end
106
+ end
107
+ end
108
+
109
+ # Processes the text fragment and does pair-wise kerning.
110
+ def process_true_type_kerning(text_fragment)
111
+ font = text_fragment.style.font
112
+ table = font.wrapped_font[:kern].horizontal_kerning_subtable
113
+ items = text_fragment.items
114
+ each_glyph_pair(items) do |left_item, right_item, left, right|
115
+ if (left + 1 == right) && (kerning = table.kern(left_item.id, right_item.id))
116
+ items.insert(right, -kerning * font.scaling_factor)
117
+ right + 1
118
+ else
119
+ right
120
+ end
121
+ end
122
+ end
123
+
124
+ # :call-seq:
125
+ # each_glyph_pair(items) {|left_item, right_item, left, right}
126
+ #
127
+ # Yields each pair of glyphs of the items array (so left must not be right + 1 if between two
128
+ # glyphs are one or more kerning values).
129
+ #
130
+ # The return value of the block is taken as the next *left* item position.
131
+ def each_glyph_pair(items)
132
+ left = 0
133
+ left_item = items[left]
134
+ right = 1
135
+ right_item = items[right]
136
+ while left_item && right_item
137
+ if left_item.kind_of?(Numeric)
138
+ left += 1
139
+ left_item = items[left]
140
+ right = left + 1
141
+ elsif right_item.kind_of?(Numeric)
142
+ right += 1
143
+ else
144
+ left = yield(left_item, right_item, left, right)
145
+ left_item = items[left]
146
+ right = left + 1
147
+ end
148
+ right_item = items[right]
149
+ end
150
+ end
151
+
152
+ end
153
+
154
+ end
155
+ end
@@ -59,7 +59,6 @@ module HexaPDF
59
59
  # end
60
60
  module Task
61
61
 
62
- autoload(:SetMinPDFVersion, 'hexapdf/task/set_min_pdf_version')
63
62
  autoload(:Optimize, 'hexapdf/task/optimize')
64
63
  autoload(:Dereference, 'hexapdf/task/dereference')
65
64
 
@@ -81,7 +81,7 @@ module HexaPDF
81
81
  end
82
82
 
83
83
  def dereference(object) #:nodoc:
84
- return object if @seen.key?(object.data)
84
+ return object if object.nil? || @seen.key?(object.data)
85
85
  @seen[object.data] = true
86
86
  recurse(object.value)
87
87
  object
@@ -251,10 +251,11 @@ module HexaPDF
251
251
  prepare_string_scanner(10)
252
252
  tmp = Reference.new(tmp, @ss[1].to_i) if @ss.scan(REFERENCE_RE)
253
253
  tmp
254
- else
255
- val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)
254
+ elsif (val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/))
256
255
  val << '0'.freeze if val.getbyte(-1) == 46 # dot '.'
257
256
  Float(val)
257
+ else
258
+ parse_keyword
258
259
  end
259
260
  end
260
261
 
@@ -99,7 +99,8 @@ module HexaPDF
99
99
 
100
100
  descent = self[:Descent]
101
101
  if descent && descent > 0
102
- yield("The /Descent value needs to be a negative number", false)
102
+ yield("The /Descent value needs to be a negative number", true)
103
+ self[:Descent] = -descent
103
104
  end
104
105
  end
105
106
 
@@ -54,7 +54,9 @@ module HexaPDF
54
54
 
55
55
  # Returns the CID font of this type 0 font.
56
56
  def descendant_font
57
- document.cache(@data, :descendant_font) { document.deref(self[:DescendantFonts][0]) }
57
+ document.cache(@data, :descendant_font) do
58
+ document.wrap(document.deref(self[:DescendantFonts][0]))
59
+ end
58
60
  end
59
61
 
60
62
  # Returns the writing mode which is either :horizontal or :vertical.
@@ -89,12 +89,20 @@ module HexaPDF
89
89
  self[:Resources] ||= document.wrap({}, type: :XXResources)
90
90
  end
91
91
 
92
- # Processes the content streams associated with the page with the given processor object.
92
+ # Processes the content stream of the form XObject with the given processor object.
93
+ #
94
+ # The +original_resources+ argument has to be set to a page's resources if this form XObject
95
+ # is processed as part of this page.
93
96
  #
94
97
  # See: HexaPDF::Content::Processor
95
- def process_contents(processor)
96
- self[:Resources] = {} if self[:Resources].nil?
97
- processor.resources = self[:Resources]
98
+ def process_contents(processor, original_resources: nil)
99
+ processor.resources = if self[:Resources]
100
+ self[:Resources]
101
+ elsif original_resources
102
+ original_resources
103
+ else
104
+ document.wrap({}, type: :XXResources)
105
+ end
98
106
  Content::Parser.parse(contents, processor)
99
107
  end
100
108
 
@@ -34,6 +34,6 @@
34
34
  module HexaPDF
35
35
 
36
36
  # The version of HexaPDF.
37
- VERSION = '0.4.0'.freeze
37
+ VERSION = '0.5.0'.freeze
38
38
 
39
39
  end