pdf-reader 1.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf/reader/page.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
class Reader
|
@@ -20,17 +21,23 @@ module PDF
|
|
20
21
|
# the raw PDF object that defines this page
|
21
22
|
attr_reader :page_object
|
22
23
|
|
24
|
+
# a Hash-like object for storing cached data. Generally this is scoped to
|
25
|
+
# the current document and is used to avoid repeating expensive
|
26
|
+
# operations
|
27
|
+
attr_reader :cache
|
28
|
+
|
23
29
|
# creates a new page wrapper.
|
24
30
|
#
|
25
31
|
# * objects - an ObjectHash instance that wraps a PDF file
|
26
32
|
# * pagenum - an int specifying the page number to expose. 1 indexed.
|
27
33
|
#
|
28
|
-
def initialize(objects, pagenum)
|
34
|
+
def initialize(objects, pagenum, options = {})
|
29
35
|
@objects, @pagenum = objects, pagenum
|
30
36
|
@page_object = objects.deref(objects.page_references[pagenum - 1])
|
37
|
+
@cache = options[:cache] || {}
|
31
38
|
|
32
39
|
unless @page_object.is_a?(::Hash)
|
33
|
-
raise
|
40
|
+
raise InvalidPageError, "Invalid page: #{pagenum}"
|
34
41
|
end
|
35
42
|
end
|
36
43
|
|
@@ -55,6 +62,16 @@ module PDF
|
|
55
62
|
hash.merge!(@objects.deref(obj))
|
56
63
|
end
|
57
64
|
}
|
65
|
+
# This shouldn't be necesary, but some non compliant PDFs leave MediaBox
|
66
|
+
# out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
|
67
|
+
@attributes[:MediaBox] ||= [0,0,612,792]
|
68
|
+
@attributes
|
69
|
+
end
|
70
|
+
|
71
|
+
# Convenience method to identify the page's orientation.
|
72
|
+
#
|
73
|
+
def orientation
|
74
|
+
OrientationDetector.new(attributes).orientation
|
58
75
|
end
|
59
76
|
|
60
77
|
# returns the plain text content of this page encoded as UTF-8. Any
|
@@ -107,6 +124,34 @@ module PDF
|
|
107
124
|
}.join(" ")
|
108
125
|
end
|
109
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
110
155
|
private
|
111
156
|
|
112
157
|
def root
|
@@ -139,7 +184,7 @@ module PDF
|
|
139
184
|
|
140
185
|
# calls the name callback method on each receiver object with params as the arguments
|
141
186
|
#
|
142
|
-
def callback
|
187
|
+
def callback(receivers, name, params=[])
|
143
188
|
receivers.each do |receiver|
|
144
189
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
145
190
|
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'pdf/reader/overlapping_runs_filter'
|
5
|
+
|
6
|
+
class PDF::Reader
|
7
|
+
|
8
|
+
# Takes a collection of TextRun objects and renders them into a single
|
9
|
+
# string that best approximates the way they'd appear on a render PDF page.
|
10
|
+
#
|
11
|
+
# media box should be a 4 number array that describes the dimensions of the
|
12
|
+
# page to be rendered as described by the page's MediaBox attribute
|
13
|
+
class PageLayout
|
14
|
+
|
15
|
+
DEFAULT_FONT_SIZE = 12
|
16
|
+
|
17
|
+
def initialize(runs, mediabox)
|
18
|
+
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
19
|
+
|
20
|
+
@runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
|
21
|
+
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
|
+
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
|
+
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_s
|
32
|
+
return "" if @runs.empty?
|
33
|
+
return "" if row_count == 0
|
34
|
+
|
35
|
+
page = row_count.times.map { |i| " " * col_count }
|
36
|
+
@runs.each do |run|
|
37
|
+
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
38
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
39
|
+
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
40
|
+
local_string_insert(page[y_pos-1], run.text, x_pos)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
interesting_rows(page).map(&:rstrip).join("\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# given an array of strings, return a new array with empty rows from the
|
49
|
+
# beginning and end removed.
|
50
|
+
#
|
51
|
+
# interesting_rows([ "", "one", "two", "" ])
|
52
|
+
# => [ "one", "two" ]
|
53
|
+
#
|
54
|
+
def interesting_rows(rows)
|
55
|
+
line_lengths = rows.map { |l| l.strip.length }
|
56
|
+
|
57
|
+
return [] if line_lengths.all?(&:zero?)
|
58
|
+
|
59
|
+
first_line_with_text = line_lengths.index { |l| l > 0 }
|
60
|
+
last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
|
61
|
+
interesting_line_count = last_line_with_text - first_line_with_text
|
62
|
+
rows[first_line_with_text, interesting_line_count].map
|
63
|
+
end
|
64
|
+
|
65
|
+
def row_count
|
66
|
+
@row_count ||= (@page_height / @mean_font_size).floor
|
67
|
+
end
|
68
|
+
|
69
|
+
def col_count
|
70
|
+
@col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
|
71
|
+
end
|
72
|
+
|
73
|
+
def row_multiplier
|
74
|
+
@row_multiplier ||= @page_height.to_f / row_count.to_f
|
75
|
+
end
|
76
|
+
|
77
|
+
def col_multiplier
|
78
|
+
@col_multiplier ||= @page_width.to_f / col_count.to_f
|
79
|
+
end
|
80
|
+
|
81
|
+
def mean(collection)
|
82
|
+
if collection.size == 0
|
83
|
+
0
|
84
|
+
else
|
85
|
+
collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def each_line(&block)
|
90
|
+
@runs.sort.group_by { |run|
|
91
|
+
run.y.to_i
|
92
|
+
}.map { |y, collection|
|
93
|
+
yield y, collection
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
# take a collection of TextRun objects and merge any that are in close
|
98
|
+
# proximity
|
99
|
+
def merge_runs(runs)
|
100
|
+
runs.group_by { |char|
|
101
|
+
char.y.to_i
|
102
|
+
}.map { |y, chars|
|
103
|
+
group_chars_into_runs(chars.sort)
|
104
|
+
}.flatten.sort
|
105
|
+
end
|
106
|
+
|
107
|
+
def group_chars_into_runs(chars)
|
108
|
+
runs = []
|
109
|
+
while head = chars.shift
|
110
|
+
if runs.empty?
|
111
|
+
runs << head
|
112
|
+
elsif runs.last.mergable?(head)
|
113
|
+
runs[-1] = runs.last + head
|
114
|
+
else
|
115
|
+
runs << head
|
116
|
+
end
|
117
|
+
end
|
118
|
+
runs
|
119
|
+
end
|
120
|
+
|
121
|
+
def local_string_insert(haystack, needle, index)
|
122
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -1,42 +1,52 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require '
|
4
|
+
require 'pdf/reader/transformation_matrix'
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
class PDF::Reader
|
7
|
+
# encapsulates logic for tracking graphics state as the instructions for
|
8
|
+
# a single page are processed. Most of the public methods correspond
|
9
|
+
# directly to PDF operators.
|
7
10
|
class PageState
|
8
11
|
|
9
12
|
DEFAULT_GRAPHICS_STATE = {
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:text_font => nil,
|
13
|
+
:char_spacing => 0,
|
14
|
+
:word_spacing => 0,
|
15
|
+
:h_scaling => 1.0,
|
16
|
+
:text_leading => 0,
|
17
|
+
:text_font => nil,
|
16
18
|
:text_font_size => nil,
|
17
|
-
:text_mode
|
18
|
-
:text_rise
|
19
|
-
:text_knockout
|
19
|
+
:text_mode => 0,
|
20
|
+
:text_rise => 0,
|
21
|
+
:text_knockout => 0
|
20
22
|
}
|
21
23
|
|
22
24
|
# starting a new page
|
23
25
|
def initialize(page)
|
24
26
|
@page = page
|
27
|
+
@cache = page.cache
|
25
28
|
@objects = page.objects
|
26
29
|
@font_stack = [build_fonts(page.fonts)]
|
27
30
|
@xobject_stack = [page.xobjects]
|
28
31
|
@cs_stack = [page.color_spaces]
|
29
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
+
state[:ctm] = identity_matrix
|
30
34
|
end
|
31
35
|
|
32
36
|
#####################################################
|
33
37
|
# Graphics State Operators
|
34
38
|
#####################################################
|
35
39
|
|
40
|
+
# Clones the current graphics state and push it onto the top of the stack.
|
41
|
+
# Any changes that are subsequently made to the state can then by reversed
|
42
|
+
# by calling restore_graphics_state.
|
43
|
+
#
|
36
44
|
def save_graphics_state
|
37
45
|
@stack.push clone_state
|
38
46
|
end
|
39
47
|
|
48
|
+
# Restore the state to the previous value on the stack.
|
49
|
+
#
|
40
50
|
def restore_graphics_state
|
41
51
|
@stack.pop
|
42
52
|
end
|
@@ -53,16 +63,17 @@ module PDF
|
|
53
63
|
# with the new matrix to form the updated matrix.
|
54
64
|
#
|
55
65
|
def concatenate_matrix(a, b, c, d, e, f)
|
56
|
-
transform = Matrix[
|
57
|
-
[a, b, 0],
|
58
|
-
[c, d, 0],
|
59
|
-
[e, f, 1]
|
60
|
-
]
|
61
66
|
if state[:ctm]
|
62
|
-
|
67
|
+
ctm = state[:ctm]
|
68
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
|
69
|
+
ctm.a, ctm.b,
|
70
|
+
ctm.c, ctm.d,
|
71
|
+
ctm.e, ctm.f
|
72
|
+
)
|
63
73
|
else
|
64
|
-
state[:ctm] =
|
74
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
|
65
75
|
end
|
76
|
+
@text_rendering_matrix = nil # invalidate cached value
|
66
77
|
end
|
67
78
|
|
68
79
|
#####################################################
|
@@ -70,13 +81,13 @@ module PDF
|
|
70
81
|
#####################################################
|
71
82
|
|
72
83
|
def begin_text_object
|
73
|
-
@text_matrix =
|
74
|
-
@text_line_matrix =
|
84
|
+
@text_matrix = identity_matrix
|
85
|
+
@text_line_matrix = identity_matrix
|
86
|
+
@font_size = nil
|
75
87
|
end
|
76
88
|
|
77
89
|
def end_text_object
|
78
|
-
|
79
|
-
@text_line_matrix = Matrix.identity(3)
|
90
|
+
# don't need to do anything
|
80
91
|
end
|
81
92
|
|
82
93
|
#####################################################
|
@@ -88,7 +99,7 @@ module PDF
|
|
88
99
|
end
|
89
100
|
|
90
101
|
def set_horizontal_text_scaling(h_scaling)
|
91
|
-
state[:h_scaling] = h_scaling
|
102
|
+
state[:h_scaling] = h_scaling / 100.0
|
92
103
|
end
|
93
104
|
|
94
105
|
def set_text_font_and_size(label, size)
|
@@ -97,7 +108,11 @@ module PDF
|
|
97
108
|
end
|
98
109
|
|
99
110
|
def font_size
|
100
|
-
|
111
|
+
@font_size ||= begin
|
112
|
+
_, zero = trm_transform(0,0)
|
113
|
+
_, one = trm_transform(1,1)
|
114
|
+
(zero - one).abs
|
115
|
+
end
|
101
116
|
end
|
102
117
|
|
103
118
|
def set_text_leading(leading)
|
@@ -121,12 +136,16 @@ module PDF
|
|
121
136
|
#####################################################
|
122
137
|
|
123
138
|
def move_text_position(x, y) # Td
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
139
|
+
temp = TransformationMatrix.new(1, 0,
|
140
|
+
0, 1,
|
141
|
+
x, y)
|
142
|
+
@text_line_matrix = temp.multiply!(
|
143
|
+
@text_line_matrix.a, @text_line_matrix.b,
|
144
|
+
@text_line_matrix.c, @text_line_matrix.d,
|
145
|
+
@text_line_matrix.e, @text_line_matrix.f
|
146
|
+
)
|
147
|
+
@text_matrix = @text_line_matrix.dup
|
148
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
130
149
|
end
|
131
150
|
|
132
151
|
def move_text_position_and_set_leading(x, y) # TD
|
@@ -135,11 +154,13 @@ module PDF
|
|
135
154
|
end
|
136
155
|
|
137
156
|
def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
|
138
|
-
@text_matrix =
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
157
|
+
@text_matrix = TransformationMatrix.new(
|
158
|
+
a, b,
|
159
|
+
c, d,
|
160
|
+
e, f
|
161
|
+
)
|
162
|
+
@text_line_matrix = @text_matrix.dup
|
163
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
143
164
|
end
|
144
165
|
|
145
166
|
def move_to_start_of_next_line # T*
|
@@ -176,7 +197,7 @@ module PDF
|
|
176
197
|
concatenate_matrix(*matrix) if matrix
|
177
198
|
|
178
199
|
if xobject.hash[:Subtype] == :Form
|
179
|
-
form = PDF::Reader::FormXObject.new(@page, xobject)
|
200
|
+
form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
|
180
201
|
@font_stack.unshift(form.font_objects)
|
181
202
|
@xobject_stack.unshift(form.xobjects)
|
182
203
|
yield form if block_given?
|
@@ -196,22 +217,29 @@ module PDF
|
|
196
217
|
# transform x and y co-ordinates from the current user space to the
|
197
218
|
# underlying device space.
|
198
219
|
#
|
199
|
-
def ctm_transform(x, y
|
220
|
+
def ctm_transform(x, y)
|
200
221
|
[
|
201
|
-
(ctm
|
202
|
-
(ctm
|
222
|
+
(ctm.a * x) + (ctm.c * y) + (ctm.e),
|
223
|
+
(ctm.b * x) + (ctm.d * y) + (ctm.f)
|
203
224
|
]
|
204
225
|
end
|
205
226
|
|
206
227
|
# transform x and y co-ordinates from the current text space to the
|
207
228
|
# underlying device space.
|
208
229
|
#
|
209
|
-
|
230
|
+
# transforming (0,0) is a really common case, so optimise for it to
|
231
|
+
# avoid unnecessary object allocations
|
232
|
+
#
|
233
|
+
def trm_transform(x, y)
|
210
234
|
trm = text_rendering_matrix
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
235
|
+
if x == 0 && y == 0
|
236
|
+
[trm.e, trm.f]
|
237
|
+
else
|
238
|
+
[
|
239
|
+
(trm.a * x) + (trm.c * y) + (trm.e),
|
240
|
+
(trm.b * x) + (trm.d * y) + (trm.f)
|
241
|
+
]
|
242
|
+
end
|
215
243
|
end
|
216
244
|
|
217
245
|
def current_font
|
@@ -239,16 +267,106 @@ module PDF
|
|
239
267
|
dict ? dict[label] : nil
|
240
268
|
end
|
241
269
|
|
270
|
+
# when save_graphics_state is called, we need to push a new copy of the
|
271
|
+
# current state onto the stack. That way any modifications to the state
|
272
|
+
# will be undone once restore_graphics_state is called.
|
273
|
+
#
|
274
|
+
def stack_depth
|
275
|
+
@stack.size
|
276
|
+
end
|
277
|
+
|
278
|
+
# This returns a deep clone of the current state, ensuring changes are
|
279
|
+
# keep separate from earlier states.
|
280
|
+
#
|
281
|
+
# Marshal is used to round-trip the state through a string to easily
|
282
|
+
# perform the deep clone. Kinda hacky, but effective.
|
283
|
+
#
|
284
|
+
def clone_state
|
285
|
+
if @stack.empty?
|
286
|
+
{}
|
287
|
+
else
|
288
|
+
Marshal.load Marshal.dump(@stack.last)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# after each glyph is painted onto the page the text matrix must be
|
293
|
+
# modified. There's no defined operator for this, but depending on
|
294
|
+
# the use case some receivers may need to mutate the state with this
|
295
|
+
# while walking a page.
|
296
|
+
#
|
297
|
+
# NOTE: some of the variable names in this method are obscure because
|
298
|
+
# they mirror variable names from the PDF spec
|
299
|
+
#
|
300
|
+
# NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
|
301
|
+
#
|
302
|
+
# Arguments:
|
303
|
+
#
|
304
|
+
# w0 - the glyph width in *text space*. This generally means the width
|
305
|
+
# in glyph space should be divded by 1000 before being passed to
|
306
|
+
# this function
|
307
|
+
# tj - any kerning that should be applied to the text matrix before the
|
308
|
+
# following glyph is painted. This is usually the numeric arguments
|
309
|
+
# in the array passed to a TJ operator
|
310
|
+
# word_boundary - a boolean indicating if a word boundary was just
|
311
|
+
# reached. Depending on the current state extra space
|
312
|
+
# may need to be added
|
313
|
+
#
|
314
|
+
def process_glyph_displacement(w0, tj, word_boundary)
|
315
|
+
fs = font_size # font size
|
316
|
+
tc = state[:char_spacing]
|
317
|
+
if word_boundary
|
318
|
+
tw = state[:word_spacing]
|
319
|
+
else
|
320
|
+
tw = 0
|
321
|
+
end
|
322
|
+
th = state[:h_scaling]
|
323
|
+
# optimise the common path to reduce Float allocations
|
324
|
+
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
329
|
+
else
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
332
|
+
end
|
333
|
+
|
334
|
+
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
335
|
+
# ctm[0] here, but this gets my tests green and I'm out of
|
336
|
+
# ideas for now
|
337
|
+
# TODO: support ty > 0
|
338
|
+
if ctm.a == 1 || ctm.a == 0
|
339
|
+
@text_matrix.horizontal_displacement_multiply!(tx)
|
340
|
+
else
|
341
|
+
@text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
|
342
|
+
end
|
343
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
344
|
+
end
|
345
|
+
|
242
346
|
private
|
243
347
|
|
348
|
+
# used for many and varied text positioning calculations. We potentially
|
349
|
+
# need to access the results of this method many times when working with
|
350
|
+
# text, so memoize it
|
351
|
+
#
|
244
352
|
def text_rendering_matrix
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
353
|
+
@text_rendering_matrix ||= begin
|
354
|
+
state_matrix = TransformationMatrix.new(
|
355
|
+
state[:text_font_size] * state[:h_scaling], 0,
|
356
|
+
0, state[:text_font_size],
|
357
|
+
0, state[:text_rise]
|
358
|
+
)
|
359
|
+
state_matrix.multiply!(
|
360
|
+
@text_matrix.a, @text_matrix.b,
|
361
|
+
@text_matrix.c, @text_matrix.d,
|
362
|
+
@text_matrix.e, @text_matrix.f
|
363
|
+
)
|
364
|
+
state_matrix.multiply!(
|
365
|
+
ctm.a, ctm.b,
|
366
|
+
ctm.c, ctm.d,
|
367
|
+
ctm.e, ctm.f
|
368
|
+
)
|
369
|
+
end
|
252
370
|
end
|
253
371
|
|
254
372
|
# return the current transformation matrix
|
@@ -271,25 +389,22 @@ module PDF
|
|
271
389
|
::Hash[wrapped_fonts]
|
272
390
|
end
|
273
391
|
|
274
|
-
|
275
|
-
#
|
276
|
-
|
277
|
-
|
278
|
-
# This
|
279
|
-
#
|
280
|
-
#
|
281
|
-
#
|
282
|
-
#
|
283
|
-
#
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
end
|
392
|
+
#####################################################
|
393
|
+
# Low-level Matrix Operations
|
394
|
+
#####################################################
|
395
|
+
|
396
|
+
# This class uses 3x3 matrices to represent geometric transformations
|
397
|
+
# These matrices are represented by arrays with 9 elements
|
398
|
+
# The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
|
399
|
+
# a b c
|
400
|
+
# d e f
|
401
|
+
# g h i
|
402
|
+
|
403
|
+
def identity_matrix
|
404
|
+
TransformationMatrix.new(1, 0,
|
405
|
+
0, 1,
|
406
|
+
0, 0)
|
290
407
|
end
|
291
408
|
|
292
409
|
end
|
293
|
-
end
|
294
410
|
end
|
295
|
-
|