pdf-reader 1.1.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module PDF
4
5
  class Reader
@@ -20,17 +21,23 @@ module PDF
20
21
  # the raw PDF object that defines this page
21
22
  attr_reader :page_object
22
23
 
24
+ # a Hash-like object for storing cached data. Generally this is scoped to
25
+ # the current document and is used to avoid repeating expensive
26
+ # operations
27
+ attr_reader :cache
28
+
23
29
  # creates a new page wrapper.
24
30
  #
25
31
  # * objects - an ObjectHash instance that wraps a PDF file
26
32
  # * pagenum - an int specifying the page number to expose. 1 indexed.
27
33
  #
28
- def initialize(objects, pagenum)
34
+ def initialize(objects, pagenum, options = {})
29
35
  @objects, @pagenum = objects, pagenum
30
36
  @page_object = objects.deref(objects.page_references[pagenum - 1])
37
+ @cache = options[:cache] || {}
31
38
 
32
39
  unless @page_object.is_a?(::Hash)
33
- raise ArgumentError, "invalid page: #{pagenum}"
40
+ raise InvalidPageError, "Invalid page: #{pagenum}"
34
41
  end
35
42
  end
36
43
 
@@ -55,6 +62,16 @@ module PDF
55
62
  hash.merge!(@objects.deref(obj))
56
63
  end
57
64
  }
65
+ # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
66
+ # out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
67
+ @attributes[:MediaBox] ||= [0,0,612,792]
68
+ @attributes
69
+ end
70
+
71
+ # Convenience method to identify the page's orientation.
72
+ #
73
+ def orientation
74
+ OrientationDetector.new(attributes).orientation
58
75
  end
59
76
 
60
77
  # returns the plain text content of this page encoded as UTF-8. Any
@@ -107,6 +124,34 @@ module PDF
107
124
  }.join(" ")
108
125
  end
109
126
 
127
+ # returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
128
+ #
129
+ def rotate
130
+ value = attributes[:Rotate].to_i
131
+ case value
132
+ when 0, 90, 180, 270
133
+ value
134
+ else
135
+ 0
136
+ end
137
+ end
138
+
139
+ # returns the "boxes" that define the page object.
140
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
+ #
142
+ def boxes
143
+ mediabox = attributes[:MediaBox]
144
+ cropbox = attributes[:Cropbox] || mediabox
145
+
146
+ {
147
+ MediaBox: objects.deref!(mediabox),
148
+ CropBox: objects.deref!(cropbox),
149
+ BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
+ TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
+ ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
152
+ }
153
+ end
154
+
110
155
  private
111
156
 
112
157
  def root
@@ -139,7 +184,7 @@ module PDF
139
184
 
140
185
  # calls the name callback method on each receiver object with params as the arguments
141
186
  #
142
- def callback (receivers, name, params=[])
187
+ def callback(receivers, name, params=[])
143
188
  receivers.each do |receiver|
144
189
  receiver.send(name, *params) if receiver.respond_to?(name)
145
190
  end
@@ -0,0 +1,125 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'pdf/reader/overlapping_runs_filter'
5
+
6
+ class PDF::Reader
7
+
8
+ # Takes a collection of TextRun objects and renders them into a single
9
+ # string that best approximates the way they'd appear on a render PDF page.
10
+ #
11
+ # media box should be a 4 number array that describes the dimensions of the
12
+ # page to be rendered as described by the page's MediaBox attribute
13
+ class PageLayout
14
+
15
+ DEFAULT_FONT_SIZE = 12
16
+
17
+ def initialize(runs, mediabox)
18
+ raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
19
+
20
+ @runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
21
+ @mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
22
+ @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
23
+ @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
24
+ @page_width = (mediabox[2] - mediabox[0]).abs
25
+ @page_height = (mediabox[3] - mediabox[1]).abs
26
+ @x_offset = @runs.map(&:x).sort.first || 0
27
+ lowest_y = @runs.map(&:y).sort.first || 0
28
+ @y_offset = lowest_y > 0 ? 0 : lowest_y
29
+ end
30
+
31
+ def to_s
32
+ return "" if @runs.empty?
33
+ return "" if row_count == 0
34
+
35
+ page = row_count.times.map { |i| " " * col_count }
36
+ @runs.each do |run|
37
+ x_pos = ((run.x - @x_offset) / col_multiplier).round
38
+ y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
39
+ if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
40
+ local_string_insert(page[y_pos-1], run.text, x_pos)
41
+ end
42
+ end
43
+ interesting_rows(page).map(&:rstrip).join("\n")
44
+ end
45
+
46
+ private
47
+
48
+ # given an array of strings, return a new array with empty rows from the
49
+ # beginning and end removed.
50
+ #
51
+ # interesting_rows([ "", "one", "two", "" ])
52
+ # => [ "one", "two" ]
53
+ #
54
+ def interesting_rows(rows)
55
+ line_lengths = rows.map { |l| l.strip.length }
56
+
57
+ return [] if line_lengths.all?(&:zero?)
58
+
59
+ first_line_with_text = line_lengths.index { |l| l > 0 }
60
+ last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
61
+ interesting_line_count = last_line_with_text - first_line_with_text
62
+ rows[first_line_with_text, interesting_line_count].map
63
+ end
64
+
65
+ def row_count
66
+ @row_count ||= (@page_height / @mean_font_size).floor
67
+ end
68
+
69
+ def col_count
70
+ @col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
71
+ end
72
+
73
+ def row_multiplier
74
+ @row_multiplier ||= @page_height.to_f / row_count.to_f
75
+ end
76
+
77
+ def col_multiplier
78
+ @col_multiplier ||= @page_width.to_f / col_count.to_f
79
+ end
80
+
81
+ def mean(collection)
82
+ if collection.size == 0
83
+ 0
84
+ else
85
+ collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
86
+ end
87
+ end
88
+
89
+ def each_line(&block)
90
+ @runs.sort.group_by { |run|
91
+ run.y.to_i
92
+ }.map { |y, collection|
93
+ yield y, collection
94
+ }
95
+ end
96
+
97
+ # take a collection of TextRun objects and merge any that are in close
98
+ # proximity
99
+ def merge_runs(runs)
100
+ runs.group_by { |char|
101
+ char.y.to_i
102
+ }.map { |y, chars|
103
+ group_chars_into_runs(chars.sort)
104
+ }.flatten.sort
105
+ end
106
+
107
+ def group_chars_into_runs(chars)
108
+ runs = []
109
+ while head = chars.shift
110
+ if runs.empty?
111
+ runs << head
112
+ elsif runs.last.mergable?(head)
113
+ runs[-1] = runs.last + head
114
+ else
115
+ runs << head
116
+ end
117
+ end
118
+ runs
119
+ end
120
+
121
+ def local_string_insert(haystack, needle, index)
122
+ haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
123
+ end
124
+ end
125
+ end
@@ -1,42 +1,52 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
- require 'matrix'
4
+ require 'pdf/reader/transformation_matrix'
4
5
 
5
- module PDF
6
- class Reader
6
+ class PDF::Reader
7
+ # encapsulates logic for tracking graphics state as the instructions for
8
+ # a single page are processed. Most of the public methods correspond
9
+ # directly to PDF operators.
7
10
  class PageState
8
11
 
9
12
  DEFAULT_GRAPHICS_STATE = {
10
- :ctm => Matrix.identity(3),
11
- :char_spacing => 0,
12
- :word_spacing => 0,
13
- :h_scaling => 100,
14
- :text_leading => 0,
15
- :text_font => nil,
13
+ :char_spacing => 0,
14
+ :word_spacing => 0,
15
+ :h_scaling => 1.0,
16
+ :text_leading => 0,
17
+ :text_font => nil,
16
18
  :text_font_size => nil,
17
- :text_mode => 0,
18
- :text_rise => 0,
19
- :text_knockout => 0
19
+ :text_mode => 0,
20
+ :text_rise => 0,
21
+ :text_knockout => 0
20
22
  }
21
23
 
22
24
  # starting a new page
23
25
  def initialize(page)
24
26
  @page = page
27
+ @cache = page.cache
25
28
  @objects = page.objects
26
29
  @font_stack = [build_fonts(page.fonts)]
27
30
  @xobject_stack = [page.xobjects]
28
31
  @cs_stack = [page.color_spaces]
29
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
+ state[:ctm] = identity_matrix
30
34
  end
31
35
 
32
36
  #####################################################
33
37
  # Graphics State Operators
34
38
  #####################################################
35
39
 
40
+ # Clones the current graphics state and push it onto the top of the stack.
41
+ # Any changes that are subsequently made to the state can then by reversed
42
+ # by calling restore_graphics_state.
43
+ #
36
44
  def save_graphics_state
37
45
  @stack.push clone_state
38
46
  end
39
47
 
48
+ # Restore the state to the previous value on the stack.
49
+ #
40
50
  def restore_graphics_state
41
51
  @stack.pop
42
52
  end
@@ -53,16 +63,17 @@ module PDF
53
63
  # with the new matrix to form the updated matrix.
54
64
  #
55
65
  def concatenate_matrix(a, b, c, d, e, f)
56
- transform = Matrix[
57
- [a, b, 0],
58
- [c, d, 0],
59
- [e, f, 1]
60
- ]
61
66
  if state[:ctm]
62
- state[:ctm] = transform * state[:ctm]
67
+ ctm = state[:ctm]
68
+ state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
69
+ ctm.a, ctm.b,
70
+ ctm.c, ctm.d,
71
+ ctm.e, ctm.f
72
+ )
63
73
  else
64
- state[:ctm] = transform
74
+ state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
65
75
  end
76
+ @text_rendering_matrix = nil # invalidate cached value
66
77
  end
67
78
 
68
79
  #####################################################
@@ -70,13 +81,13 @@ module PDF
70
81
  #####################################################
71
82
 
72
83
  def begin_text_object
73
- @text_matrix = Matrix.identity(3)
74
- @text_line_matrix = Matrix.identity(3)
84
+ @text_matrix = identity_matrix
85
+ @text_line_matrix = identity_matrix
86
+ @font_size = nil
75
87
  end
76
88
 
77
89
  def end_text_object
78
- @text_matrix = Matrix.identity(3)
79
- @text_line_matrix = Matrix.identity(3)
90
+ # don't need to do anything
80
91
  end
81
92
 
82
93
  #####################################################
@@ -88,7 +99,7 @@ module PDF
88
99
  end
89
100
 
90
101
  def set_horizontal_text_scaling(h_scaling)
91
- state[:h_scaling] = h_scaling
102
+ state[:h_scaling] = h_scaling / 100.0
92
103
  end
93
104
 
94
105
  def set_text_font_and_size(label, size)
@@ -97,7 +108,11 @@ module PDF
97
108
  end
98
109
 
99
110
  def font_size
100
- state[:text_font_size] * @text_matrix[0,0]
111
+ @font_size ||= begin
112
+ _, zero = trm_transform(0,0)
113
+ _, one = trm_transform(1,1)
114
+ (zero - one).abs
115
+ end
101
116
  end
102
117
 
103
118
  def set_text_leading(leading)
@@ -121,12 +136,16 @@ module PDF
121
136
  #####################################################
122
137
 
123
138
  def move_text_position(x, y) # Td
124
- temp_matrix = Matrix[
125
- [1, 0, 0],
126
- [0, 1, 0],
127
- [x, y, 1]
128
- ]
129
- @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
139
+ temp = TransformationMatrix.new(1, 0,
140
+ 0, 1,
141
+ x, y)
142
+ @text_line_matrix = temp.multiply!(
143
+ @text_line_matrix.a, @text_line_matrix.b,
144
+ @text_line_matrix.c, @text_line_matrix.d,
145
+ @text_line_matrix.e, @text_line_matrix.f
146
+ )
147
+ @text_matrix = @text_line_matrix.dup
148
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
130
149
  end
131
150
 
132
151
  def move_text_position_and_set_leading(x, y) # TD
@@ -135,11 +154,13 @@ module PDF
135
154
  end
136
155
 
137
156
  def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
138
- @text_matrix = @text_line_matrix = Matrix[
139
- [a, b, 0],
140
- [c, d, 0],
141
- [e, f, 1]
142
- ]
157
+ @text_matrix = TransformationMatrix.new(
158
+ a, b,
159
+ c, d,
160
+ e, f
161
+ )
162
+ @text_line_matrix = @text_matrix.dup
163
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
143
164
  end
144
165
 
145
166
  def move_to_start_of_next_line # T*
@@ -176,7 +197,7 @@ module PDF
176
197
  concatenate_matrix(*matrix) if matrix
177
198
 
178
199
  if xobject.hash[:Subtype] == :Form
179
- form = PDF::Reader::FormXObject.new(@page, xobject)
200
+ form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
180
201
  @font_stack.unshift(form.font_objects)
181
202
  @xobject_stack.unshift(form.xobjects)
182
203
  yield form if block_given?
@@ -196,22 +217,29 @@ module PDF
196
217
  # transform x and y co-ordinates from the current user space to the
197
218
  # underlying device space.
198
219
  #
199
- def ctm_transform(x, y, z = 1)
220
+ def ctm_transform(x, y)
200
221
  [
201
- (ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
202
- (ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
222
+ (ctm.a * x) + (ctm.c * y) + (ctm.e),
223
+ (ctm.b * x) + (ctm.d * y) + (ctm.f)
203
224
  ]
204
225
  end
205
226
 
206
227
  # transform x and y co-ordinates from the current text space to the
207
228
  # underlying device space.
208
229
  #
209
- def trm_transform(x, y, z = 1)
230
+ # transforming (0,0) is a really common case, so optimise for it to
231
+ # avoid unnecessary object allocations
232
+ #
233
+ def trm_transform(x, y)
210
234
  trm = text_rendering_matrix
211
- [
212
- (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
213
- (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
214
- ]
235
+ if x == 0 && y == 0
236
+ [trm.e, trm.f]
237
+ else
238
+ [
239
+ (trm.a * x) + (trm.c * y) + (trm.e),
240
+ (trm.b * x) + (trm.d * y) + (trm.f)
241
+ ]
242
+ end
215
243
  end
216
244
 
217
245
  def current_font
@@ -239,16 +267,106 @@ module PDF
239
267
  dict ? dict[label] : nil
240
268
  end
241
269
 
270
+ # when save_graphics_state is called, we need to push a new copy of the
271
+ # current state onto the stack. That way any modifications to the state
272
+ # will be undone once restore_graphics_state is called.
273
+ #
274
+ def stack_depth
275
+ @stack.size
276
+ end
277
+
278
+ # This returns a deep clone of the current state, ensuring changes are
279
+ # keep separate from earlier states.
280
+ #
281
+ # Marshal is used to round-trip the state through a string to easily
282
+ # perform the deep clone. Kinda hacky, but effective.
283
+ #
284
+ def clone_state
285
+ if @stack.empty?
286
+ {}
287
+ else
288
+ Marshal.load Marshal.dump(@stack.last)
289
+ end
290
+ end
291
+
292
+ # after each glyph is painted onto the page the text matrix must be
293
+ # modified. There's no defined operator for this, but depending on
294
+ # the use case some receivers may need to mutate the state with this
295
+ # while walking a page.
296
+ #
297
+ # NOTE: some of the variable names in this method are obscure because
298
+ # they mirror variable names from the PDF spec
299
+ #
300
+ # NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
301
+ #
302
+ # Arguments:
303
+ #
304
+ # w0 - the glyph width in *text space*. This generally means the width
305
+ # in glyph space should be divded by 1000 before being passed to
306
+ # this function
307
+ # tj - any kerning that should be applied to the text matrix before the
308
+ # following glyph is painted. This is usually the numeric arguments
309
+ # in the array passed to a TJ operator
310
+ # word_boundary - a boolean indicating if a word boundary was just
311
+ # reached. Depending on the current state extra space
312
+ # may need to be added
313
+ #
314
+ def process_glyph_displacement(w0, tj, word_boundary)
315
+ fs = font_size # font size
316
+ tc = state[:char_spacing]
317
+ if word_boundary
318
+ tw = state[:word_spacing]
319
+ else
320
+ tw = 0
321
+ end
322
+ th = state[:h_scaling]
323
+ # optimise the common path to reduce Float allocations
324
+ if th == 1 && tj == 0 && tc == 0 && tw == 0
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
329
+ else
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
332
+ end
333
+
334
+ # TODO: I'm pretty sure that tx shouldn't need to be divided by
335
+ # ctm[0] here, but this gets my tests green and I'm out of
336
+ # ideas for now
337
+ # TODO: support ty > 0
338
+ if ctm.a == 1 || ctm.a == 0
339
+ @text_matrix.horizontal_displacement_multiply!(tx)
340
+ else
341
+ @text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
342
+ end
343
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
344
+ end
345
+
242
346
  private
243
347
 
348
+ # used for many and varied text positioning calculations. We potentially
349
+ # need to access the results of this method many times when working with
350
+ # text, so memoize it
351
+ #
244
352
  def text_rendering_matrix
245
- state_matrix = Matrix[
246
- [font_size * state[:h_scaling], 0, 0],
247
- [0, font_size, 0],
248
- [0, state[:text_rise], 1]
249
- ]
250
-
251
- state_matrix * @text_matrix * ctm
353
+ @text_rendering_matrix ||= begin
354
+ state_matrix = TransformationMatrix.new(
355
+ state[:text_font_size] * state[:h_scaling], 0,
356
+ 0, state[:text_font_size],
357
+ 0, state[:text_rise]
358
+ )
359
+ state_matrix.multiply!(
360
+ @text_matrix.a, @text_matrix.b,
361
+ @text_matrix.c, @text_matrix.d,
362
+ @text_matrix.e, @text_matrix.f
363
+ )
364
+ state_matrix.multiply!(
365
+ ctm.a, ctm.b,
366
+ ctm.c, ctm.d,
367
+ ctm.e, ctm.f
368
+ )
369
+ end
252
370
  end
253
371
 
254
372
  # return the current transformation matrix
@@ -271,25 +389,22 @@ module PDF
271
389
  ::Hash[wrapped_fonts]
272
390
  end
273
391
 
274
- # when save_graphics_state is called, we need to push a new copy of the
275
- # current state onto the stack. That way any modifications to the state
276
- # will be undone once restore_graphics_state is called.
277
- #
278
- # This returns a deep clone of the current state, ensuring changes are
279
- # keep separate from earlier states.
280
- #
281
- # Marshal is used to round-trip the state through a string to easily
282
- # perform the deep clone. Kinda hacky, but effective.
283
- #
284
- def clone_state
285
- if @stack.empty?
286
- {}
287
- else
288
- Marshal.load Marshal.dump(@stack.last)
289
- end
392
+ #####################################################
393
+ # Low-level Matrix Operations
394
+ #####################################################
395
+
396
+ # This class uses 3x3 matrices to represent geometric transformations
397
+ # These matrices are represented by arrays with 9 elements
398
+ # The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
399
+ # a b c
400
+ # d e f
401
+ # g h i
402
+
403
+ def identity_matrix
404
+ TransformationMatrix.new(1, 0,
405
+ 0, 1,
406
+ 0, 0)
290
407
  end
291
408
 
292
409
  end
293
- end
294
410
  end
295
-