pdf-reader 1.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf/reader/page.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
class Reader
|
@@ -20,17 +21,23 @@ module PDF
|
|
20
21
|
# the raw PDF object that defines this page
|
21
22
|
attr_reader :page_object
|
22
23
|
|
24
|
+
# a Hash-like object for storing cached data. Generally this is scoped to
|
25
|
+
# the current document and is used to avoid repeating expensive
|
26
|
+
# operations
|
27
|
+
attr_reader :cache
|
28
|
+
|
23
29
|
# creates a new page wrapper.
|
24
30
|
#
|
25
31
|
# * objects - an ObjectHash instance that wraps a PDF file
|
26
32
|
# * pagenum - an int specifying the page number to expose. 1 indexed.
|
27
33
|
#
|
28
|
-
def initialize(objects, pagenum)
|
34
|
+
def initialize(objects, pagenum, options = {})
|
29
35
|
@objects, @pagenum = objects, pagenum
|
30
36
|
@page_object = objects.deref(objects.page_references[pagenum - 1])
|
37
|
+
@cache = options[:cache] || {}
|
31
38
|
|
32
39
|
unless @page_object.is_a?(::Hash)
|
33
|
-
raise
|
40
|
+
raise InvalidPageError, "Invalid page: #{pagenum}"
|
34
41
|
end
|
35
42
|
end
|
36
43
|
|
@@ -55,6 +62,16 @@ module PDF
|
|
55
62
|
hash.merge!(@objects.deref(obj))
|
56
63
|
end
|
57
64
|
}
|
65
|
+
# This shouldn't be necesary, but some non compliant PDFs leave MediaBox
|
66
|
+
# out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
|
67
|
+
@attributes[:MediaBox] ||= [0,0,612,792]
|
68
|
+
@attributes
|
69
|
+
end
|
70
|
+
|
71
|
+
# Convenience method to identify the page's orientation.
|
72
|
+
#
|
73
|
+
def orientation
|
74
|
+
OrientationDetector.new(attributes).orientation
|
58
75
|
end
|
59
76
|
|
60
77
|
# returns the plain text content of this page encoded as UTF-8. Any
|
@@ -107,6 +124,34 @@ module PDF
|
|
107
124
|
}.join(" ")
|
108
125
|
end
|
109
126
|
|
127
|
+
# returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
|
128
|
+
#
|
129
|
+
def rotate
|
130
|
+
value = attributes[:Rotate].to_i
|
131
|
+
case value
|
132
|
+
when 0, 90, 180, 270
|
133
|
+
value
|
134
|
+
else
|
135
|
+
0
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# returns the "boxes" that define the page object.
|
140
|
+
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
141
|
+
#
|
142
|
+
def boxes
|
143
|
+
mediabox = attributes[:MediaBox]
|
144
|
+
cropbox = attributes[:Cropbox] || mediabox
|
145
|
+
|
146
|
+
{
|
147
|
+
MediaBox: objects.deref!(mediabox),
|
148
|
+
CropBox: objects.deref!(cropbox),
|
149
|
+
BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
|
150
|
+
TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
|
151
|
+
ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
110
155
|
private
|
111
156
|
|
112
157
|
def root
|
@@ -139,7 +184,7 @@ module PDF
|
|
139
184
|
|
140
185
|
# calls the name callback method on each receiver object with params as the arguments
|
141
186
|
#
|
142
|
-
def callback
|
187
|
+
def callback(receivers, name, params=[])
|
143
188
|
receivers.each do |receiver|
|
144
189
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
145
190
|
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'pdf/reader/overlapping_runs_filter'
|
5
|
+
|
6
|
+
class PDF::Reader
|
7
|
+
|
8
|
+
# Takes a collection of TextRun objects and renders them into a single
|
9
|
+
# string that best approximates the way they'd appear on a render PDF page.
|
10
|
+
#
|
11
|
+
# media box should be a 4 number array that describes the dimensions of the
|
12
|
+
# page to be rendered as described by the page's MediaBox attribute
|
13
|
+
class PageLayout
|
14
|
+
|
15
|
+
DEFAULT_FONT_SIZE = 12
|
16
|
+
|
17
|
+
def initialize(runs, mediabox)
|
18
|
+
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
19
|
+
|
20
|
+
@runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
|
21
|
+
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
|
+
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
|
+
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
24
|
+
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
+
@page_height = (mediabox[3] - mediabox[1]).abs
|
26
|
+
@x_offset = @runs.map(&:x).sort.first || 0
|
27
|
+
lowest_y = @runs.map(&:y).sort.first || 0
|
28
|
+
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_s
|
32
|
+
return "" if @runs.empty?
|
33
|
+
return "" if row_count == 0
|
34
|
+
|
35
|
+
page = row_count.times.map { |i| " " * col_count }
|
36
|
+
@runs.each do |run|
|
37
|
+
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
38
|
+
y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
|
39
|
+
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
40
|
+
local_string_insert(page[y_pos-1], run.text, x_pos)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
interesting_rows(page).map(&:rstrip).join("\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# given an array of strings, return a new array with empty rows from the
|
49
|
+
# beginning and end removed.
|
50
|
+
#
|
51
|
+
# interesting_rows([ "", "one", "two", "" ])
|
52
|
+
# => [ "one", "two" ]
|
53
|
+
#
|
54
|
+
def interesting_rows(rows)
|
55
|
+
line_lengths = rows.map { |l| l.strip.length }
|
56
|
+
|
57
|
+
return [] if line_lengths.all?(&:zero?)
|
58
|
+
|
59
|
+
first_line_with_text = line_lengths.index { |l| l > 0 }
|
60
|
+
last_line_with_text = line_lengths.size - line_lengths.reverse.index { |l| l > 0 }
|
61
|
+
interesting_line_count = last_line_with_text - first_line_with_text
|
62
|
+
rows[first_line_with_text, interesting_line_count].map
|
63
|
+
end
|
64
|
+
|
65
|
+
def row_count
|
66
|
+
@row_count ||= (@page_height / @mean_font_size).floor
|
67
|
+
end
|
68
|
+
|
69
|
+
def col_count
|
70
|
+
@col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
|
71
|
+
end
|
72
|
+
|
73
|
+
def row_multiplier
|
74
|
+
@row_multiplier ||= @page_height.to_f / row_count.to_f
|
75
|
+
end
|
76
|
+
|
77
|
+
def col_multiplier
|
78
|
+
@col_multiplier ||= @page_width.to_f / col_count.to_f
|
79
|
+
end
|
80
|
+
|
81
|
+
def mean(collection)
|
82
|
+
if collection.size == 0
|
83
|
+
0
|
84
|
+
else
|
85
|
+
collection.inject(0) { |accum, v| accum + v} / collection.size.to_f
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def each_line(&block)
|
90
|
+
@runs.sort.group_by { |run|
|
91
|
+
run.y.to_i
|
92
|
+
}.map { |y, collection|
|
93
|
+
yield y, collection
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
# take a collection of TextRun objects and merge any that are in close
|
98
|
+
# proximity
|
99
|
+
def merge_runs(runs)
|
100
|
+
runs.group_by { |char|
|
101
|
+
char.y.to_i
|
102
|
+
}.map { |y, chars|
|
103
|
+
group_chars_into_runs(chars.sort)
|
104
|
+
}.flatten.sort
|
105
|
+
end
|
106
|
+
|
107
|
+
def group_chars_into_runs(chars)
|
108
|
+
runs = []
|
109
|
+
while head = chars.shift
|
110
|
+
if runs.empty?
|
111
|
+
runs << head
|
112
|
+
elsif runs.last.mergable?(head)
|
113
|
+
runs[-1] = runs.last + head
|
114
|
+
else
|
115
|
+
runs << head
|
116
|
+
end
|
117
|
+
end
|
118
|
+
runs
|
119
|
+
end
|
120
|
+
|
121
|
+
def local_string_insert(haystack, needle, index)
|
122
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -1,42 +1,52 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require '
|
4
|
+
require 'pdf/reader/transformation_matrix'
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
class PDF::Reader
|
7
|
+
# encapsulates logic for tracking graphics state as the instructions for
|
8
|
+
# a single page are processed. Most of the public methods correspond
|
9
|
+
# directly to PDF operators.
|
7
10
|
class PageState
|
8
11
|
|
9
12
|
DEFAULT_GRAPHICS_STATE = {
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:text_font => nil,
|
13
|
+
:char_spacing => 0,
|
14
|
+
:word_spacing => 0,
|
15
|
+
:h_scaling => 1.0,
|
16
|
+
:text_leading => 0,
|
17
|
+
:text_font => nil,
|
16
18
|
:text_font_size => nil,
|
17
|
-
:text_mode
|
18
|
-
:text_rise
|
19
|
-
:text_knockout
|
19
|
+
:text_mode => 0,
|
20
|
+
:text_rise => 0,
|
21
|
+
:text_knockout => 0
|
20
22
|
}
|
21
23
|
|
22
24
|
# starting a new page
|
23
25
|
def initialize(page)
|
24
26
|
@page = page
|
27
|
+
@cache = page.cache
|
25
28
|
@objects = page.objects
|
26
29
|
@font_stack = [build_fonts(page.fonts)]
|
27
30
|
@xobject_stack = [page.xobjects]
|
28
31
|
@cs_stack = [page.color_spaces]
|
29
32
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
+
state[:ctm] = identity_matrix
|
30
34
|
end
|
31
35
|
|
32
36
|
#####################################################
|
33
37
|
# Graphics State Operators
|
34
38
|
#####################################################
|
35
39
|
|
40
|
+
# Clones the current graphics state and push it onto the top of the stack.
|
41
|
+
# Any changes that are subsequently made to the state can then by reversed
|
42
|
+
# by calling restore_graphics_state.
|
43
|
+
#
|
36
44
|
def save_graphics_state
|
37
45
|
@stack.push clone_state
|
38
46
|
end
|
39
47
|
|
48
|
+
# Restore the state to the previous value on the stack.
|
49
|
+
#
|
40
50
|
def restore_graphics_state
|
41
51
|
@stack.pop
|
42
52
|
end
|
@@ -53,16 +63,17 @@ module PDF
|
|
53
63
|
# with the new matrix to form the updated matrix.
|
54
64
|
#
|
55
65
|
def concatenate_matrix(a, b, c, d, e, f)
|
56
|
-
transform = Matrix[
|
57
|
-
[a, b, 0],
|
58
|
-
[c, d, 0],
|
59
|
-
[e, f, 1]
|
60
|
-
]
|
61
66
|
if state[:ctm]
|
62
|
-
|
67
|
+
ctm = state[:ctm]
|
68
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
|
69
|
+
ctm.a, ctm.b,
|
70
|
+
ctm.c, ctm.d,
|
71
|
+
ctm.e, ctm.f
|
72
|
+
)
|
63
73
|
else
|
64
|
-
state[:ctm] =
|
74
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
|
65
75
|
end
|
76
|
+
@text_rendering_matrix = nil # invalidate cached value
|
66
77
|
end
|
67
78
|
|
68
79
|
#####################################################
|
@@ -70,13 +81,13 @@ module PDF
|
|
70
81
|
#####################################################
|
71
82
|
|
72
83
|
def begin_text_object
|
73
|
-
@text_matrix =
|
74
|
-
@text_line_matrix =
|
84
|
+
@text_matrix = identity_matrix
|
85
|
+
@text_line_matrix = identity_matrix
|
86
|
+
@font_size = nil
|
75
87
|
end
|
76
88
|
|
77
89
|
def end_text_object
|
78
|
-
|
79
|
-
@text_line_matrix = Matrix.identity(3)
|
90
|
+
# don't need to do anything
|
80
91
|
end
|
81
92
|
|
82
93
|
#####################################################
|
@@ -88,7 +99,7 @@ module PDF
|
|
88
99
|
end
|
89
100
|
|
90
101
|
def set_horizontal_text_scaling(h_scaling)
|
91
|
-
state[:h_scaling] = h_scaling
|
102
|
+
state[:h_scaling] = h_scaling / 100.0
|
92
103
|
end
|
93
104
|
|
94
105
|
def set_text_font_and_size(label, size)
|
@@ -97,7 +108,11 @@ module PDF
|
|
97
108
|
end
|
98
109
|
|
99
110
|
def font_size
|
100
|
-
|
111
|
+
@font_size ||= begin
|
112
|
+
_, zero = trm_transform(0,0)
|
113
|
+
_, one = trm_transform(1,1)
|
114
|
+
(zero - one).abs
|
115
|
+
end
|
101
116
|
end
|
102
117
|
|
103
118
|
def set_text_leading(leading)
|
@@ -121,12 +136,16 @@ module PDF
|
|
121
136
|
#####################################################
|
122
137
|
|
123
138
|
def move_text_position(x, y) # Td
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
139
|
+
temp = TransformationMatrix.new(1, 0,
|
140
|
+
0, 1,
|
141
|
+
x, y)
|
142
|
+
@text_line_matrix = temp.multiply!(
|
143
|
+
@text_line_matrix.a, @text_line_matrix.b,
|
144
|
+
@text_line_matrix.c, @text_line_matrix.d,
|
145
|
+
@text_line_matrix.e, @text_line_matrix.f
|
146
|
+
)
|
147
|
+
@text_matrix = @text_line_matrix.dup
|
148
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
130
149
|
end
|
131
150
|
|
132
151
|
def move_text_position_and_set_leading(x, y) # TD
|
@@ -135,11 +154,13 @@ module PDF
|
|
135
154
|
end
|
136
155
|
|
137
156
|
def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
|
138
|
-
@text_matrix =
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
157
|
+
@text_matrix = TransformationMatrix.new(
|
158
|
+
a, b,
|
159
|
+
c, d,
|
160
|
+
e, f
|
161
|
+
)
|
162
|
+
@text_line_matrix = @text_matrix.dup
|
163
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
143
164
|
end
|
144
165
|
|
145
166
|
def move_to_start_of_next_line # T*
|
@@ -176,7 +197,7 @@ module PDF
|
|
176
197
|
concatenate_matrix(*matrix) if matrix
|
177
198
|
|
178
199
|
if xobject.hash[:Subtype] == :Form
|
179
|
-
form = PDF::Reader::FormXObject.new(@page, xobject)
|
200
|
+
form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
|
180
201
|
@font_stack.unshift(form.font_objects)
|
181
202
|
@xobject_stack.unshift(form.xobjects)
|
182
203
|
yield form if block_given?
|
@@ -196,22 +217,29 @@ module PDF
|
|
196
217
|
# transform x and y co-ordinates from the current user space to the
|
197
218
|
# underlying device space.
|
198
219
|
#
|
199
|
-
def ctm_transform(x, y
|
220
|
+
def ctm_transform(x, y)
|
200
221
|
[
|
201
|
-
(ctm
|
202
|
-
(ctm
|
222
|
+
(ctm.a * x) + (ctm.c * y) + (ctm.e),
|
223
|
+
(ctm.b * x) + (ctm.d * y) + (ctm.f)
|
203
224
|
]
|
204
225
|
end
|
205
226
|
|
206
227
|
# transform x and y co-ordinates from the current text space to the
|
207
228
|
# underlying device space.
|
208
229
|
#
|
209
|
-
|
230
|
+
# transforming (0,0) is a really common case, so optimise for it to
|
231
|
+
# avoid unnecessary object allocations
|
232
|
+
#
|
233
|
+
def trm_transform(x, y)
|
210
234
|
trm = text_rendering_matrix
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
235
|
+
if x == 0 && y == 0
|
236
|
+
[trm.e, trm.f]
|
237
|
+
else
|
238
|
+
[
|
239
|
+
(trm.a * x) + (trm.c * y) + (trm.e),
|
240
|
+
(trm.b * x) + (trm.d * y) + (trm.f)
|
241
|
+
]
|
242
|
+
end
|
215
243
|
end
|
216
244
|
|
217
245
|
def current_font
|
@@ -239,16 +267,106 @@ module PDF
|
|
239
267
|
dict ? dict[label] : nil
|
240
268
|
end
|
241
269
|
|
270
|
+
# when save_graphics_state is called, we need to push a new copy of the
|
271
|
+
# current state onto the stack. That way any modifications to the state
|
272
|
+
# will be undone once restore_graphics_state is called.
|
273
|
+
#
|
274
|
+
def stack_depth
|
275
|
+
@stack.size
|
276
|
+
end
|
277
|
+
|
278
|
+
# This returns a deep clone of the current state, ensuring changes are
|
279
|
+
# keep separate from earlier states.
|
280
|
+
#
|
281
|
+
# Marshal is used to round-trip the state through a string to easily
|
282
|
+
# perform the deep clone. Kinda hacky, but effective.
|
283
|
+
#
|
284
|
+
def clone_state
|
285
|
+
if @stack.empty?
|
286
|
+
{}
|
287
|
+
else
|
288
|
+
Marshal.load Marshal.dump(@stack.last)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# after each glyph is painted onto the page the text matrix must be
|
293
|
+
# modified. There's no defined operator for this, but depending on
|
294
|
+
# the use case some receivers may need to mutate the state with this
|
295
|
+
# while walking a page.
|
296
|
+
#
|
297
|
+
# NOTE: some of the variable names in this method are obscure because
|
298
|
+
# they mirror variable names from the PDF spec
|
299
|
+
#
|
300
|
+
# NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
|
301
|
+
#
|
302
|
+
# Arguments:
|
303
|
+
#
|
304
|
+
# w0 - the glyph width in *text space*. This generally means the width
|
305
|
+
# in glyph space should be divded by 1000 before being passed to
|
306
|
+
# this function
|
307
|
+
# tj - any kerning that should be applied to the text matrix before the
|
308
|
+
# following glyph is painted. This is usually the numeric arguments
|
309
|
+
# in the array passed to a TJ operator
|
310
|
+
# word_boundary - a boolean indicating if a word boundary was just
|
311
|
+
# reached. Depending on the current state extra space
|
312
|
+
# may need to be added
|
313
|
+
#
|
314
|
+
def process_glyph_displacement(w0, tj, word_boundary)
|
315
|
+
fs = font_size # font size
|
316
|
+
tc = state[:char_spacing]
|
317
|
+
if word_boundary
|
318
|
+
tw = state[:word_spacing]
|
319
|
+
else
|
320
|
+
tw = 0
|
321
|
+
end
|
322
|
+
th = state[:h_scaling]
|
323
|
+
# optimise the common path to reduce Float allocations
|
324
|
+
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
325
|
+
tx = w0 * fs
|
326
|
+
elsif tj != 0
|
327
|
+
# don't apply spacing to TJ displacement
|
328
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
329
|
+
else
|
330
|
+
# apply horizontal scaling to spacing values but not font size
|
331
|
+
tx = ((w0 * fs) + tc + tw) * th
|
332
|
+
end
|
333
|
+
|
334
|
+
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
335
|
+
# ctm[0] here, but this gets my tests green and I'm out of
|
336
|
+
# ideas for now
|
337
|
+
# TODO: support ty > 0
|
338
|
+
if ctm.a == 1 || ctm.a == 0
|
339
|
+
@text_matrix.horizontal_displacement_multiply!(tx)
|
340
|
+
else
|
341
|
+
@text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
|
342
|
+
end
|
343
|
+
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
344
|
+
end
|
345
|
+
|
242
346
|
private
|
243
347
|
|
348
|
+
# used for many and varied text positioning calculations. We potentially
|
349
|
+
# need to access the results of this method many times when working with
|
350
|
+
# text, so memoize it
|
351
|
+
#
|
244
352
|
def text_rendering_matrix
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
353
|
+
@text_rendering_matrix ||= begin
|
354
|
+
state_matrix = TransformationMatrix.new(
|
355
|
+
state[:text_font_size] * state[:h_scaling], 0,
|
356
|
+
0, state[:text_font_size],
|
357
|
+
0, state[:text_rise]
|
358
|
+
)
|
359
|
+
state_matrix.multiply!(
|
360
|
+
@text_matrix.a, @text_matrix.b,
|
361
|
+
@text_matrix.c, @text_matrix.d,
|
362
|
+
@text_matrix.e, @text_matrix.f
|
363
|
+
)
|
364
|
+
state_matrix.multiply!(
|
365
|
+
ctm.a, ctm.b,
|
366
|
+
ctm.c, ctm.d,
|
367
|
+
ctm.e, ctm.f
|
368
|
+
)
|
369
|
+
end
|
252
370
|
end
|
253
371
|
|
254
372
|
# return the current transformation matrix
|
@@ -271,25 +389,22 @@ module PDF
|
|
271
389
|
::Hash[wrapped_fonts]
|
272
390
|
end
|
273
391
|
|
274
|
-
|
275
|
-
#
|
276
|
-
|
277
|
-
|
278
|
-
# This
|
279
|
-
#
|
280
|
-
#
|
281
|
-
#
|
282
|
-
#
|
283
|
-
#
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
end
|
392
|
+
#####################################################
|
393
|
+
# Low-level Matrix Operations
|
394
|
+
#####################################################
|
395
|
+
|
396
|
+
# This class uses 3x3 matrices to represent geometric transformations
|
397
|
+
# These matrices are represented by arrays with 9 elements
|
398
|
+
# The array [a,b,c,d,e,f,g,h,i] would represent a matrix like:
|
399
|
+
# a b c
|
400
|
+
# d e f
|
401
|
+
# g h i
|
402
|
+
|
403
|
+
def identity_matrix
|
404
|
+
TransformationMatrix.new(1, 0,
|
405
|
+
0, 1,
|
406
|
+
0, 0)
|
290
407
|
end
|
291
408
|
|
292
409
|
end
|
293
|
-
end
|
294
410
|
end
|
295
|
-
|