pdf-reader 1.1.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module PDF
4
5
  class Reader
@@ -20,17 +21,23 @@ module PDF
20
21
  # the raw PDF object that defines this page
21
22
  attr_reader :page_object
22
23
 
24
+ # a Hash-like object for storing cached data. Generally this is scoped to
25
+ # the current document and is used to avoid repeating expensive
26
+ # operations
27
+ attr_reader :cache
28
+
23
29
  # creates a new page wrapper.
24
30
  #
25
31
  # * objects - an ObjectHash instance that wraps a PDF file
26
32
  # * pagenum - an int specifying the page number to expose. 1 indexed.
27
33
  #
28
- def initialize(objects, pagenum)
34
+ def initialize(objects, pagenum, options = {})
29
35
  @objects, @pagenum = objects, pagenum
30
36
  @page_object = objects.deref(objects.page_references[pagenum - 1])
37
+ @cache = options[:cache] || {}
31
38
 
32
39
  unless @page_object.is_a?(::Hash)
33
- raise ArgumentError, "invalid page: #{pagenum}"
40
+ raise InvalidPageError, "Invalid page: #{pagenum}"
34
41
  end
35
42
  end
36
43
 
@@ -55,6 +62,16 @@ module PDF
55
62
  hash.merge!(@objects.deref(obj))
56
63
  end
57
64
  }
65
+ # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
66
+ # out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
67
+ @attributes[:MediaBox] ||= [0,0,612,792]
68
+ @attributes
69
+ end
70
+
71
+ # Convenience method to identify the page's orientation.
72
+ #
73
+ def orientation
74
+ OrientationDetector.new(attributes).orientation
58
75
  end
59
76
 
60
77
  # returns the plain text content of this page encoded as UTF-8. Any
@@ -107,6 +124,34 @@ module PDF
107
124
  }.join(" ")
108
125
  end
109
126
 
127
+ # returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
128
+ #
129
+ def rotate
130
+ value = attributes[:Rotate].to_i
131
+ case value
132
+ when 0, 90, 180, 270
133
+ value
134
+ else
135
+ 0
136
+ end
137
+ end
138
+
139
+ # returns the "boxes" that define the page object.
140
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
+ #
142
+ def boxes
143
+ mediabox = attributes[:MediaBox]
144
+ cropbox = attributes[:Cropbox] || mediabox
145
+
146
+ {
147
+ MediaBox: objects.deref!(mediabox),
148
+ CropBox: objects.deref!(cropbox),
149
+ BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
+ TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
+ ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
152
+ }
153
+ end
154
+
110
155
  private
111
156
 
112
157
  def root
@@ -139,7 +184,7 @@ module PDF
139
184
 
140
185
  # calls the name callback method on each receiver object with params as the arguments
141
186
  #
142
- def callback (receivers, name, params=[])
187
+ def callback(receivers, name, params=[])
143
188
  receivers.each do |receiver|
144
189
  receiver.send(name, *params) if receiver.respond_to?(name)
145
190
  end
@@ -0,0 +1,125 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'pdf/reader/overlapping_runs_filter'
5
+
6
+ class PDF::Reader
7
+
8
+ # Takes a collection of TextRun objects and renders them into a single
9
+ # string that best approximates the way they'd appear on a render PDF page.
10
+ #
11
+ # media box should be a 4 number array that describes the dimensions of the
12
+ # page to be rendered as described by the page's MediaBox attribute
13
+ class PageLayout
14
+
15
+ DEFAULT_FONT_SIZE = 12
16
+
17
+ def initialize(runs, mediabox)
18
+ raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
19
+
20
+ @runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
21
+ @mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
22
+ @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
23
+ @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
24
+ @page_width = (mediabox[2] - mediabox[0]).abs
25
+ @page_height = (mediabox[3] - mediabox[1]).abs
26
+ @x_offset = @runs.map(&:x).sort.first || 0
27
+ lowest_y = @runs.map(&:y).sort.first || 0
28
+ @y_offset = lowest_y > 0 ? 0 : lowest_y
29
+ end
30
+
31
+ def to_s
32
+ return "" if @runs.empty?
33
+ return "" if row_count == 0
34
+
35
+ page = row_count.times.map { |i| " " * col_count }
36
+ @runs.each do |run|
37
+ x_pos = ((run.x - @x_offset) / col_multiplier).round
38
+ y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
39
+ if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
40
+ local_string_insert(page[y_pos-1], run.text, x_pos)
41
+ end
42
+ end
43
+ interesting_rows(page).map(&:rstrip).join("\n")
44
+ end
45
+
46
+ private
47
+
48
+ # given an array of strings, return a new array with empty rows from the
49
+ # beginning and end removed.
50
+ #
51
+ # interesting_rows([ "", "one", "two", "" ])
52
+ # => [ "one", "two" ]
53
+ #
54
+ def interesting_rows(rows)
55
+ line_lengths = rows.map { |l| l.strip.length }
56
+
57
+ return [] if line_lengths.all?(&:zero?)
58
+
59
+ first_line_with_text = line_lengths.index { |l| l > 0 }
60
+ last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
61
+ interesting_line_count = last_line_with_text - first_line_with_text
62
+ rows[first_line_with_text, interesting_line_count].map
63
+ end
64
+
65
+ def row_count
66
+ @row_count ||= (@page_height / @mean_font_size).floor
67
+ end
68
+
69
+ def col_count
70
+ @col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
71
+ end
72
+
73
+ def row_multiplier
74
+ @row_multiplier ||= @page_height.to_f / row_count.to_f
75
+ end
76
+
77
+ def col_multiplier
78
+ @col_multiplier ||= @page_width.to_f / col_count.to_f
79
+ end
80
+
81
+ def mean(collection)
82
+ if collection.size == 0
83
+ 0
84
+ else
85
+ collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
86
+ end
87
+ end
88
+
89
+ def each_line(&block)
90
+ @runs.sort.group_by { |run|
91
+ run.y.to_i
92
+ }.map { |y, collection|
93
+ yield y, collection
94
+ }
95
+ end
96
+
97
+ # take a collection of TextRun objects and merge any that are in close
98
+ # proximity
99
+ def merge_runs(runs)
100
+ runs.group_by { |char|
101
+ char.y.to_i
102
+ }.map { |y, chars|
103
+ group_chars_into_runs(chars.sort)
104
+ }.flatten.sort
105
+ end
106
+
107
+ def group_chars_into_runs(chars)
108
+ runs = []
109
+ while head = chars.shift
110
+ if runs.empty?
111
+ runs << head
112
+ elsif runs.last.mergable?(head)
113
+ runs[-1] = runs.last + head
114
+ else
115
+ runs << head
116
+ end
117
+ end
118
+ runs
119
+ end
120
+
121
+ def local_string_insert(haystack, needle, index)
122
+ haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
123
+ end
124
+ end
125
+ end
@@ -1,42 +1,52 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
- require 'matrix'
4
+ require 'pdf/reader/transformation_matrix'
4
5
 
5
- module PDF
6
- class Reader
6
+ class PDF::Reader
7
+ # encapsulates logic for tracking graphics state as the instructions for
8
+ # a single page are processed. Most of the public methods correspond
9
+ # directly to PDF operators.
7
10
  class PageState
8
11
 
9
12
  DEFAULT_GRAPHICS_STATE = {
10
- :ctm => Matrix.identity(3),
11
- :char_spacing => 0,
12
- :word_spacing => 0,
13
- :h_scaling => 100,
14
- :text_leading => 0,
15
- :text_font => nil,
13
+ :char_spacing => 0,
14
+ :word_spacing => 0,
15
+ :h_scaling => 1.0,
16
+ :text_leading => 0,
17
+ :text_font => nil,
16
18
  :text_font_size => nil,
17
- :text_mode => 0,
18
- :text_rise => 0,
19
- :text_knockout => 0
19
+ :text_mode => 0,
20
+ :text_rise => 0,
21
+ :text_knockout => 0
20
22
  }
21
23
 
22
24
  # starting a new page
23
25
  def initialize(page)
24
26
  @page = page
27
+ @cache = page.cache
25
28
  @objects = page.objects
26
29
  @font_stack = [build_fonts(page.fonts)]
27
30
  @xobject_stack = [page.xobjects]
28
31
  @cs_stack = [page.color_spaces]
29
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
+ state[:ctm] = identity_matrix
30
34
  end
31
35
 
32
36
  #####################################################
33
37
  # Graphics State Operators
34
38
  #####################################################
35
39
 
40
+ # Clones the current graphics state and push it onto the top of the stack.
41
+ # Any changes that are subsequently made to the state can then by reversed
42
+ # by calling restore_graphics_state.
43
+ #
36
44
  def save_graphics_state
37
45
  @stack.push clone_state
38
46
  end
39
47
 
48
+ # Restore the state to the previous value on the stack.
49
+ #
40
50
  def restore_graphics_state
41
51
  @stack.pop
42
52
  end
@@ -53,16 +63,17 @@ module PDF
53
63
  # with the new matrix to form the updated matrix.
54
64
  #
55
65
  def concatenate_matrix(a, b, c, d, e, f)
56
- transform = Matrix[
57
- [a, b, 0],
58
- [c, d, 0],
59
- [e, f, 1]
60
- ]
61
66
  if state[:ctm]
62
- state[:ctm] = transform * state[:ctm]
67
+ ctm = state[:ctm]
68
+ state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
69
+ ctm.a, ctm.b,
70
+ ctm.c, ctm.d,
71
+ ctm.e, ctm.f
72
+ )
63
73
  else
64
- state[:ctm] = transform
74
+ state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
65
75
  end
76
+ @text_rendering_matrix = nil # invalidate cached value
66
77
  end
67
78
 
68
79
  #####################################################
@@ -70,13 +81,13 @@ module PDF
70
81
  #####################################################
71
82
 
72
83
  def begin_text_object
73
- @text_matrix = Matrix.identity(3)
74
- @text_line_matrix = Matrix.identity(3)
84
+ @text_matrix = identity_matrix
85
+ @text_line_matrix = identity_matrix
86
+ @font_size = nil
75
87
  end
76
88
 
77
89
  def end_text_object
78
- @text_matrix = Matrix.identity(3)
79
- @text_line_matrix = Matrix.identity(3)
90
+ # don't need to do anything
80
91
  end
81
92
 
82
93
  #####################################################
@@ -88,7 +99,7 @@ module PDF
88
99
  end
89
100
 
90
101
  def set_horizontal_text_scaling(h_scaling)
91
- state[:h_scaling] = h_scaling
102
+ state[:h_scaling] = h_scaling / 100.0
92
103
  end
93
104
 
94
105
  def set_text_font_and_size(label, size)
@@ -97,7 +108,11 @@ module PDF
97
108
  end
98
109
 
99
110
  def font_size
100
- state[:text_font_size] * @text_matrix[0,0]
111
+ @font_size ||= begin
112
+ _, zero = trm_transform(0,0)
113
+ _, one = trm_transform(1,1)
114
+ (zero - one).abs
115
+ end
101
116
  end
102
117
 
103
118
  def set_text_leading(leading)
@@ -121,12 +136,16 @@ module PDF
121
136
  #####################################################
122
137
 
123
138
  def move_text_position(x, y) # Td
124
- temp_matrix = Matrix[
125
- [1, 0, 0],
126
- [0, 1, 0],
127
- [x, y, 1]
128
- ]
129
- @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
139
+ temp = TransformationMatrix.new(1, 0,
140
+ 0, 1,
141
+ x, y)
142
+ @text_line_matrix = temp.multiply!(
143
+ @text_line_matrix.a, @text_line_matrix.b,
144
+ @text_line_matrix.c, @text_line_matrix.d,
145
+ @text_line_matrix.e, @text_line_matrix.f
146
+ )
147
+ @text_matrix = @text_line_matrix.dup
148
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
130
149
  end
131
150
 
132
151
  def move_text_position_and_set_leading(x, y) # TD
@@ -135,11 +154,13 @@ module PDF
135
154
  end
136
155
 
137
156
  def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
138
- @text_matrix = @text_line_matrix = Matrix[
139
- [a, b, 0],
140
- [c, d, 0],
141
- [e, f, 1]
142
- ]
157
+ @text_matrix = TransformationMatrix.new(
158
+ a, b,
159
+ c, d,
160
+ e, f
161
+ )
162
+ @text_line_matrix = @text_matrix.dup
163
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
143
164
  end
144
165
 
145
166
  def move_to_start_of_next_line # T*
@@ -176,7 +197,7 @@ module PDF
176
197
  concatenate_matrix(*matrix) if matrix
177
198
 
178
199
  if xobject.hash[:Subtype] == :Form
179
- form = PDF::Reader::FormXObject.new(@page, xobject)
200
+ form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
180
201
  @font_stack.unshift(form.font_objects)
181
202
  @xobject_stack.unshift(form.xobjects)
182
203
  yield form if block_given?
@@ -196,22 +217,29 @@ module PDF
196
217
  # transform x and y co-ordinates from the current user space to the
197
218
  # underlying device space.
198
219
  #
199
- def ctm_transform(x, y, z = 1)
220
+ def ctm_transform(x, y)
200
221
  [
201
- (ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
202
- (ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
222
+ (ctm.a * x) + (ctm.c * y) + (ctm.e),
223
+ (ctm.b * x) + (ctm.d * y) + (ctm.f)
203
224
  ]
204
225
  end
205
226
 
206
227
  # transform x and y co-ordinates from the current text space to the
207
228
  # underlying device space.
208
229
  #
209
- def trm_transform(x, y, z = 1)
230
+ # transforming (0,0) is a really common case, so optimise for it to
231
+ # avoid unnecessary object allocations
232
+ #
233
+ def trm_transform(x, y)
210
234
  trm = text_rendering_matrix
211
- [
212
- (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
213
- (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
214
- ]
235
+ if x == 0 && y == 0
236
+ [trm.e, trm.f]
237
+ else
238
+ [
239
+ (trm.a * x) + (trm.c * y) + (trm.e),
240
+ (trm.b * x) + (trm.d * y) + (trm.f)
241
+ ]
242
+ end
215
243
  end
216
244
 
217
245
  def current_font
@@ -239,16 +267,106 @@ module PDF
239
267
  dict ? dict[label] : nil
240
268
  end
241
269
 
270
+ # when save_graphics_state is called, we need to push a new copy of the
271
+ # current state onto the stack. That way any modifications to the state
272
+ # will be undone once restore_graphics_state is called.
273
+ #
274
+ def stack_depth
275
+ @stack.size
276
+ end
277
+
278
+ # This returns a deep clone of the current state, ensuring changes are
279
+ # keep separate from earlier states.
280
+ #
281
+ # Marshal is used to round-trip the state through a string to easily
282
+ # perform the deep clone. Kinda hacky, but effective.
283
+ #
284
+ def clone_state
285
+ if @stack.empty?
286
+ {}
287
+ else
288
+ Marshal.load Marshal.dump(@stack.last)
289
+ end
290
+ end
291
+
292
+ # after each glyph is painted onto the page the text matrix must be
293
+ # modified. There's no defined operator for this, but depending on
294
+ # the use case some receivers may need to mutate the state with this
295
+ # while walking a page.
296
+ #
297
+ # NOTE: some of the variable names in this method are obscure because
298
+ # they mirror variable names from the PDF spec
299
+ #
300
+ # NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
301
+ #
302
+ # Arguments:
303
+ #
304
+ # w0 - the glyph width in *text space*. This generally means the width
305
+ # in glyph space should be divded by 1000 before being passed to
306
+ # this function
307
+ # tj - any kerning that should be applied to the text matrix before the
308
+ # following glyph is painted. This is usually the numeric arguments
309
+ # in the array passed to a TJ operator
310
+ # word_boundary - a boolean indicating if a word boundary was just
311
+ # reached. Depending on the current state extra space
312
+ # may need to be added
313
+ #
314
+ def process_glyph_displacement(w0, tj, word_boundary)
315
+ fs = font_size # font size
316
+ tc = state[:char_spacing]
317
+ if word_boundary
318
+ tw = state[:word_spacing]
319
+ else
320
+ tw = 0
321
+ end
322
+ th = state[:h_scaling]
323
+ # optimise the common path to reduce Float allocations
324
+ if th == 1 && tj == 0 && tc == 0 && tw == 0
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
329
+ else
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
332
+ end
333
+
334
+ # TODO: I'm pretty sure that tx shouldn't need to be divided by
335
+ # ctm[0] here, but this gets my tests green and I'm out of
336
+ # ideas for now
337
+ # TODO: support ty > 0
338
+ if ctm.a == 1 || ctm.a == 0
339
+ @text_matrix.horizontal_displacement_multiply!(tx)
340
+ else
341
+ @text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
342
+ end
343
+ @font_size = @text_rendering_matrix = nil # invalidate cached value
344
+ end
345
+
242
346
  private
243
347
 
348
+ # used for many and varied text positioning calculations. We potentially
349
+ # need to access the results of this method many times when working with
350
+ # text, so memoize it
351
+ #
244
352
  def text_rendering_matrix
245
- state_matrix = Matrix[
246
- [font_size * state[:h_scaling], 0, 0],
247
- [0, font_size, 0],
248
- [0, state[:text_rise], 1]
249
- ]
250
-
251
- state_matrix * @text_matrix * ctm
353
+ @text_rendering_matrix ||= begin
354
+ state_matrix = TransformationMatrix.new(
355
+ state[:text_font_size] * state[:h_scaling], 0,
356
+ 0, state[:text_font_size],
357
+ 0, state[:text_rise]
358
+ )
359
+ state_matrix.multiply!(
360
+ @text_matrix.a, @text_matrix.b,
361
+ @text_matrix.c, @text_matrix.d,
362
+ @text_matrix.e, @text_matrix.f
363
+ )
364
+ state_matrix.multiply!(
365
+ ctm.a, ctm.b,
366
+ ctm.c, ctm.d,
367
+ ctm.e, ctm.f
368
+ )
369
+ end
252
370
  end
253
371
 
254
372
  # return the current transformation matrix
@@ -271,25 +389,22 @@ module PDF
271
389
  ::Hash[wrapped_fonts]
272
390
  end
273
391
 
274
- # when save_graphics_state is called, we need to push a new copy of the
275
- # current state onto the stack. That way any modifications to the state
276
- # will be undone once restore_graphics_state is called.
277
- #
278
- # This returns a deep clone of the current state, ensuring changes are
279
- # keep separate from earlier states.
280
- #
281
- # Marshal is used to round-trip the state through a string to easily
282
- # perform the deep clone. Kinda hacky, but effective.
283
- #
284
- def clone_state
285
- if @stack.empty?
286
- {}
287
- else
288
- Marshal.load Marshal.dump(@stack.last)
289
- end
392
+ #####################################################
393
+ # Low-level Matrix Operations
394
+ #####################################################
395
+
396
+ # This class uses 3x3 matrices to represent geometric transformations
397
+ # These matrices are represented by arrays with 9 elements
398
+ # The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
399
+ # a b c
400
+ # d e f
401
+ # g h i
402
+
403
+ def identity_matrix
404
+ TransformationMatrix.new(1, 0,
405
+ 0, 1,
406
+ 0, 0)
290
407
  end
291
408
 
292
409
  end
293
- end
294
410
  end
295
-