pdf-reader 2.2.0 → 2.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +90 -0
- data/README.md +18 -3
- data/Rakefile +1 -1
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +91 -47
- data/lib/pdf/reader/cid_widths.rb +7 -4
- data/lib/pdf/reader/cmap.rb +83 -59
- data/lib/pdf/reader/encoding.rb +17 -14
- data/lib/pdf/reader/error.rb +15 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +12 -10
- data/lib/pdf/reader/filter/flate.rb +30 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +11 -11
- data/lib/pdf/reader/font.rb +89 -26
- data/lib/pdf/reader/font_descriptor.rb +22 -18
- data/lib/pdf/reader/form_xobject.rb +18 -5
- data/lib/pdf/reader/glyph_hash.rb +28 -13
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +28 -11
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +292 -63
- data/lib/pdf/reader/object_stream.rb +3 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
- data/lib/pdf/reader/page.rb +143 -16
- data/lib/pdf/reader/page_layout.rb +43 -39
- data/lib/pdf/reader/page_state.rb +26 -17
- data/lib/pdf/reader/page_text_receiver.rb +74 -4
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +34 -14
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +3 -1
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +3 -2
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +40 -5
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +8 -7
- data/lib/pdf/reader/type_check.rb +98 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
- data/lib/pdf/reader/width_calculator/composite.rb +6 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +37 -11
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +49 -24
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +2048 -0
- metadata +39 -23
- data/lib/pdf/hash.rb +0 -20
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -6,15 +7,17 @@ class PDF::Reader
|
|
6
7
|
class TextRun
|
7
8
|
include Comparable
|
8
9
|
|
9
|
-
attr_reader :
|
10
|
+
attr_reader :origin
|
11
|
+
attr_reader :width
|
12
|
+
attr_reader :font_size
|
13
|
+
attr_reader :text
|
10
14
|
|
11
15
|
alias :to_s :text
|
12
16
|
|
13
17
|
def initialize(x, y, width, font_size, text)
|
14
|
-
@
|
15
|
-
@y = y
|
18
|
+
@origin = PDF::Reader::Point.new(x, y)
|
16
19
|
@width = width
|
17
|
-
@font_size = font_size
|
20
|
+
@font_size = font_size
|
18
21
|
@text = text
|
19
22
|
end
|
20
23
|
|
@@ -34,8 +37,20 @@ class PDF::Reader
|
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
40
|
+
def x
|
41
|
+
@origin.x
|
42
|
+
end
|
43
|
+
|
44
|
+
def y
|
45
|
+
@origin.y
|
46
|
+
end
|
47
|
+
|
37
48
|
def endx
|
38
|
-
@endx ||= x + width
|
49
|
+
@endx ||= @origin.x + width
|
50
|
+
end
|
51
|
+
|
52
|
+
def endy
|
53
|
+
@endy ||= @origin.y + font_size
|
39
54
|
end
|
40
55
|
|
41
56
|
def mean_character_width
|
@@ -60,8 +75,28 @@ class PDF::Reader
|
|
60
75
|
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
|
61
76
|
end
|
62
77
|
|
78
|
+
def intersect?(other_run)
|
79
|
+
x <= other_run.endx && endx >= other_run.x &&
|
80
|
+
endy >= other_run.y && y <= other_run.endy
|
81
|
+
end
|
82
|
+
|
83
|
+
# return what percentage of this text run is overlapped by another run
|
84
|
+
def intersection_area_percent(other_run)
|
85
|
+
return 0 unless intersect?(other_run)
|
86
|
+
|
87
|
+
dx = [endx, other_run.endx].min - [x, other_run.x].max
|
88
|
+
dy = [endy, other_run.endy].min - [y, other_run.y].max
|
89
|
+
intersection_area = dx*dy
|
90
|
+
|
91
|
+
intersection_area.to_f / area
|
92
|
+
end
|
93
|
+
|
63
94
|
private
|
64
95
|
|
96
|
+
def area
|
97
|
+
(endx - x) * (endy - y)
|
98
|
+
end
|
99
|
+
|
65
100
|
def mergable_range
|
66
101
|
@mergable_range ||= Range.new(endx - 3, endx + font_size)
|
67
102
|
end
|
data/lib/pdf/reader/token.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -50,7 +51,7 @@ class PDF::Reader
|
|
50
51
|
# displacement to speed up processing documents that use vertical
|
51
52
|
# writing systems
|
52
53
|
#
|
53
|
-
def multiply!(a,b
|
54
|
+
def multiply!(a,b,c, d,e,f)
|
54
55
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
55
56
|
# the identity matrix, no effect
|
56
57
|
self
|
@@ -163,12 +164,12 @@ class PDF::Reader
|
|
163
164
|
# [ e f 1 ] [ e f 1 ]
|
164
165
|
#
|
165
166
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
166
|
-
newa = (@a * a2) + (@b * c2) + (
|
167
|
-
newb = (@a * b2) + (@b * d2) + (
|
168
|
-
newc = (@c * a2) + (@d * c2) + (
|
169
|
-
newd = (@c * b2) + (@d * d2) + (
|
170
|
-
newe = (@e * a2) + (@f * c2) + (
|
171
|
-
newf = (@e * b2) + (@f * d2) + (
|
167
|
+
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
|
+
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
169
|
+
newc = (@c * a2) + (@d * c2) + (e2 * 0)
|
170
|
+
newd = (@c * b2) + (@d * d2) + (f2 * 0)
|
171
|
+
newe = (@e * a2) + (@f * c2) + (e2 * 1)
|
172
|
+
newf = (@e * b2) + (@f * d2) + (f2 * 1)
|
172
173
|
@a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
|
173
174
|
end
|
174
175
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Cast untrusted input (usually parsed out of a PDF file) to a known type
|
9
|
+
#
|
10
|
+
class TypeCheck
|
11
|
+
|
12
|
+
def self.cast_to_int!(obj)
|
13
|
+
if obj.is_a?(Integer)
|
14
|
+
obj
|
15
|
+
elsif obj.nil?
|
16
|
+
0
|
17
|
+
elsif obj.respond_to?(:to_i)
|
18
|
+
obj.to_i
|
19
|
+
else
|
20
|
+
raise MalformedPDFError, "Unable to cast to integer"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.cast_to_numeric!(obj)
|
25
|
+
if obj.is_a?(Numeric)
|
26
|
+
obj
|
27
|
+
elsif obj.nil?
|
28
|
+
0
|
29
|
+
elsif obj.respond_to?(:to_f)
|
30
|
+
obj.to_f
|
31
|
+
elsif obj.respond_to?(:to_i)
|
32
|
+
obj.to_i
|
33
|
+
else
|
34
|
+
raise MalformedPDFError, "Unable to cast to numeric"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.cast_to_string!(string)
|
39
|
+
if string.is_a?(String)
|
40
|
+
string
|
41
|
+
elsif string.nil?
|
42
|
+
""
|
43
|
+
elsif string.respond_to?(:to_s)
|
44
|
+
string.to_s
|
45
|
+
else
|
46
|
+
raise MalformedPDFError, "Unable to cast to string"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.cast_to_symbol(obj)
|
51
|
+
if obj.is_a?(Symbol)
|
52
|
+
obj
|
53
|
+
elsif obj.nil?
|
54
|
+
nil
|
55
|
+
elsif obj.respond_to?(:to_sym)
|
56
|
+
obj.to_sym
|
57
|
+
else
|
58
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.cast_to_symbol!(obj)
|
63
|
+
res = cast_to_symbol(obj)
|
64
|
+
if res
|
65
|
+
res
|
66
|
+
else
|
67
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.cast_to_pdf_dict!(obj)
|
72
|
+
if obj.is_a?(Hash)
|
73
|
+
obj
|
74
|
+
elsif obj.respond_to?(:to_h)
|
75
|
+
obj.to_h
|
76
|
+
else
|
77
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.cast_to_pdf_dict_with_stream_values!(obj)
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
result = Hash.new
|
84
|
+
obj.each do |k, v|
|
85
|
+
raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
|
86
|
+
result[cast_to_symbol!(k)] = v
|
87
|
+
end
|
88
|
+
result
|
89
|
+
elsif obj.respond_to?(:to_h)
|
90
|
+
cast_to_pdf_dict_with_stream_values!(obj.to_h)
|
91
|
+
else
|
92
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
@@ -0,0 +1,262 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Page#walk will execute the content stream of a page, calling methods on a receiver class
|
9
|
+
# provided by the user. Each operator has a specific set of parameters it expects, and we
|
10
|
+
# wrap the users receiver class in this one to verify the PDF uses valid parameters.
|
11
|
+
#
|
12
|
+
# Without these checks, users can't be confident about the number of parameters they'll receive
|
13
|
+
# for an operator, or what the type of those parameters will be. Everyone ends up building their
|
14
|
+
# own type safety guard clauses and it's tedious.
|
15
|
+
#
|
16
|
+
# Not all operators have type safety implemented yet, but we can expand the number over time.
|
17
|
+
class ValidatingReceiver
|
18
|
+
|
19
|
+
def initialize(wrapped)
|
20
|
+
@wrapped = wrapped
|
21
|
+
end
|
22
|
+
|
23
|
+
def page=(page)
|
24
|
+
call_wrapped(:page=, page)
|
25
|
+
end
|
26
|
+
|
27
|
+
#####################################################
|
28
|
+
# Graphics State Operators
|
29
|
+
#####################################################
|
30
|
+
def save_graphics_state(*args)
|
31
|
+
call_wrapped(:save_graphics_state)
|
32
|
+
end
|
33
|
+
|
34
|
+
def restore_graphics_state(*args)
|
35
|
+
call_wrapped(:restore_graphics_state)
|
36
|
+
end
|
37
|
+
|
38
|
+
#####################################################
|
39
|
+
# Matrix Operators
|
40
|
+
#####################################################
|
41
|
+
|
42
|
+
def concatenate_matrix(*args)
|
43
|
+
a, b, c, d, e, f = *args
|
44
|
+
call_wrapped(
|
45
|
+
:concatenate_matrix,
|
46
|
+
TypeCheck.cast_to_numeric!(a),
|
47
|
+
TypeCheck.cast_to_numeric!(b),
|
48
|
+
TypeCheck.cast_to_numeric!(c),
|
49
|
+
TypeCheck.cast_to_numeric!(d),
|
50
|
+
TypeCheck.cast_to_numeric!(e),
|
51
|
+
TypeCheck.cast_to_numeric!(f),
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
#####################################################
|
56
|
+
# Text Object Operators
|
57
|
+
#####################################################
|
58
|
+
|
59
|
+
def begin_text_object(*args)
|
60
|
+
call_wrapped(:begin_text_object)
|
61
|
+
end
|
62
|
+
|
63
|
+
def end_text_object(*args)
|
64
|
+
call_wrapped(:end_text_object)
|
65
|
+
end
|
66
|
+
|
67
|
+
#####################################################
|
68
|
+
# Text State Operators
|
69
|
+
#####################################################
|
70
|
+
def set_character_spacing(*args)
|
71
|
+
char_spacing, _ = *args
|
72
|
+
call_wrapped(
|
73
|
+
:set_character_spacing,
|
74
|
+
TypeCheck.cast_to_numeric!(char_spacing)
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_horizontal_text_scaling(*args)
|
79
|
+
h_scaling, _ = *args
|
80
|
+
call_wrapped(
|
81
|
+
:set_horizontal_text_scaling,
|
82
|
+
TypeCheck.cast_to_numeric!(h_scaling)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def set_text_font_and_size(*args)
|
87
|
+
label, size, _ = *args
|
88
|
+
call_wrapped(
|
89
|
+
:set_text_font_and_size,
|
90
|
+
TypeCheck.cast_to_symbol(label),
|
91
|
+
TypeCheck.cast_to_numeric!(size)
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_text_leading(*args)
|
96
|
+
leading, _ = *args
|
97
|
+
call_wrapped(
|
98
|
+
:set_text_leading,
|
99
|
+
TypeCheck.cast_to_numeric!(leading)
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_text_rendering_mode(*args)
|
104
|
+
mode, _ = *args
|
105
|
+
call_wrapped(
|
106
|
+
:set_text_rendering_mode,
|
107
|
+
TypeCheck.cast_to_numeric!(mode)
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def set_text_rise(*args)
|
112
|
+
rise, _ = *args
|
113
|
+
call_wrapped(
|
114
|
+
:set_text_rise,
|
115
|
+
TypeCheck.cast_to_numeric!(rise)
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
def set_word_spacing(*args)
|
120
|
+
word_spacing, _ = *args
|
121
|
+
call_wrapped(
|
122
|
+
:set_word_spacing,
|
123
|
+
TypeCheck.cast_to_numeric!(word_spacing)
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
#####################################################
|
128
|
+
# Text Positioning Operators
|
129
|
+
#####################################################
|
130
|
+
|
131
|
+
def move_text_position(*args) # Td
|
132
|
+
x, y, _ = *args
|
133
|
+
call_wrapped(
|
134
|
+
:move_text_position,
|
135
|
+
TypeCheck.cast_to_numeric!(x),
|
136
|
+
TypeCheck.cast_to_numeric!(y)
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
def move_text_position_and_set_leading(*args) # TD
|
141
|
+
x, y, _ = *args
|
142
|
+
call_wrapped(
|
143
|
+
:move_text_position_and_set_leading,
|
144
|
+
TypeCheck.cast_to_numeric!(x),
|
145
|
+
TypeCheck.cast_to_numeric!(y)
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_text_matrix_and_text_line_matrix(*args) # Tm
|
150
|
+
a, b, c, d, e, f = *args
|
151
|
+
call_wrapped(
|
152
|
+
:set_text_matrix_and_text_line_matrix,
|
153
|
+
TypeCheck.cast_to_numeric!(a),
|
154
|
+
TypeCheck.cast_to_numeric!(b),
|
155
|
+
TypeCheck.cast_to_numeric!(c),
|
156
|
+
TypeCheck.cast_to_numeric!(d),
|
157
|
+
TypeCheck.cast_to_numeric!(e),
|
158
|
+
TypeCheck.cast_to_numeric!(f),
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
def move_to_start_of_next_line(*args) # T*
|
163
|
+
call_wrapped(:move_to_start_of_next_line)
|
164
|
+
end
|
165
|
+
|
166
|
+
#####################################################
|
167
|
+
# Text Showing Operators
|
168
|
+
#####################################################
|
169
|
+
def show_text(*args) # Tj (AWAY)
|
170
|
+
string, _ = *args
|
171
|
+
call_wrapped(
|
172
|
+
:show_text,
|
173
|
+
TypeCheck.cast_to_string!(string)
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
|
178
|
+
params, _ = *args
|
179
|
+
unless params.is_a?(Array)
|
180
|
+
raise MalformedPDFError, "TJ operator expects a single Array argument"
|
181
|
+
end
|
182
|
+
|
183
|
+
call_wrapped(
|
184
|
+
:show_text_with_positioning,
|
185
|
+
params
|
186
|
+
)
|
187
|
+
end
|
188
|
+
|
189
|
+
def move_to_next_line_and_show_text(*args) # '
|
190
|
+
string, _ = *args
|
191
|
+
call_wrapped(
|
192
|
+
:move_to_next_line_and_show_text,
|
193
|
+
TypeCheck.cast_to_string!(string)
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
def set_spacing_next_line_show_text(*args) # "
|
198
|
+
aw, ac, string = *args
|
199
|
+
call_wrapped(
|
200
|
+
:set_spacing_next_line_show_text,
|
201
|
+
TypeCheck.cast_to_numeric!(aw),
|
202
|
+
TypeCheck.cast_to_numeric!(ac),
|
203
|
+
TypeCheck.cast_to_string!(string)
|
204
|
+
)
|
205
|
+
end
|
206
|
+
|
207
|
+
#####################################################
|
208
|
+
# Form XObject Operators
|
209
|
+
#####################################################
|
210
|
+
|
211
|
+
def invoke_xobject(*args)
|
212
|
+
label, _ = *args
|
213
|
+
|
214
|
+
call_wrapped(
|
215
|
+
:invoke_xobject,
|
216
|
+
TypeCheck.cast_to_symbol(label)
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
#####################################################
|
221
|
+
# Inline Image Operators
|
222
|
+
#####################################################
|
223
|
+
|
224
|
+
def begin_inline_image(*args)
|
225
|
+
call_wrapped(:begin_inline_image)
|
226
|
+
end
|
227
|
+
|
228
|
+
def begin_inline_image_data(*args)
|
229
|
+
# We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
|
230
|
+
# number of elements
|
231
|
+
@wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
|
232
|
+
end
|
233
|
+
|
234
|
+
def end_inline_image(*args)
|
235
|
+
data, _ = *args
|
236
|
+
|
237
|
+
call_wrapped(
|
238
|
+
:end_inline_image,
|
239
|
+
TypeCheck.cast_to_string!(data)
|
240
|
+
)
|
241
|
+
end
|
242
|
+
|
243
|
+
#####################################################
|
244
|
+
# Final safety net for any operators that don't have type checking enabled yet
|
245
|
+
#####################################################
|
246
|
+
|
247
|
+
def respond_to?(meth)
|
248
|
+
@wrapped.respond_to?(meth)
|
249
|
+
end
|
250
|
+
|
251
|
+
def method_missing(methodname, *args)
|
252
|
+
@wrapped.send(methodname, *args)
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def call_wrapped(methodname, *args)
|
258
|
+
@wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'afm'
|
@@ -12,11 +13,20 @@ class PDF::Reader
|
|
12
13
|
# see Section 9.6.2.2, PDF 32000-1:2008, pp 256
|
13
14
|
class BuiltIn
|
14
15
|
|
16
|
+
BUILTINS = [
|
17
|
+
:Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
|
18
|
+
:Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
|
19
|
+
:Symbol,
|
20
|
+
:"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
|
21
|
+
:ZapfDingbats
|
22
|
+
]
|
23
|
+
|
15
24
|
def initialize(font)
|
16
25
|
@font = font
|
17
26
|
@@all_metrics ||= PDF::Reader::SynchronizedCache.new
|
18
27
|
|
19
|
-
|
28
|
+
basefont = extract_basefont(font.basefont)
|
29
|
+
metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
|
20
30
|
|
21
31
|
if File.file?(metrics_path)
|
22
32
|
@metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
|
@@ -28,32 +38,32 @@ class PDF::Reader
|
|
28
38
|
def glyph_width(code_point)
|
29
39
|
return 0 if code_point.nil? || code_point < 0
|
30
40
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
m = names.map { |name|
|
36
|
-
@metrics.char_metrics[name.to_s]
|
37
|
-
}.compact.first
|
38
|
-
end
|
41
|
+
names = @font.encoding.int_to_name(code_point)
|
42
|
+
metrics = names.map { |name|
|
43
|
+
@metrics.char_metrics[name.to_s]
|
44
|
+
}.compact.first
|
39
45
|
|
40
|
-
if
|
41
|
-
|
42
|
-
elsif @font.widths[code_point - 1]
|
43
|
-
@font.widths[code_point - 1]
|
44
|
-
elsif control_character?(code_point)
|
45
|
-
0
|
46
|
+
if metrics
|
47
|
+
metrics[:wx]
|
46
48
|
else
|
47
|
-
0
|
49
|
+
@font.widths[code_point - 1] || 0
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
51
53
|
private
|
52
54
|
|
53
55
|
def control_character?(code_point)
|
54
|
-
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
56
|
+
match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
57
|
+
match ? true : false
|
55
58
|
end
|
56
59
|
|
60
|
+
def extract_basefont(font_name)
|
61
|
+
if BUILTINS.include?(font_name)
|
62
|
+
font_name.to_s
|
63
|
+
else
|
64
|
+
"Times-Roman"
|
65
|
+
end
|
66
|
+
end
|
57
67
|
end
|
58
68
|
end
|
59
69
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -21,7 +22,11 @@ class PDF::Reader
|
|
21
22
|
|
22
23
|
w = @widths[code_point]
|
23
24
|
# 0 is a valid width
|
24
|
-
|
25
|
+
if w
|
26
|
+
w.to_f
|
27
|
+
else
|
28
|
+
0
|
29
|
+
end
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -9,8 +10,8 @@ class PDF::Reader
|
|
9
10
|
def initialize(font)
|
10
11
|
@font = font
|
11
12
|
|
12
|
-
if @font.font_descriptor
|
13
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
14
15
|
else
|
15
16
|
@missing_width = 0
|
16
17
|
end
|
@@ -29,25 +30,23 @@ class PDF::Reader
|
|
29
30
|
|
30
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
31
32
|
# which is undesireable in this case.
|
32
|
-
|
33
|
-
|
33
|
+
first_char = @font.first_char
|
34
|
+
if first_char && first_char <= code_point
|
35
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
34
36
|
else
|
35
37
|
@missing_width.to_f
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
39
41
|
def glyph_width_from_descriptor(code_point)
|
40
|
-
return unless @font.font_descriptor
|
41
|
-
|
42
42
|
# true type fonts will have most of their information contained
|
43
43
|
# with-in a program inside the font descriptor, however the widths
|
44
44
|
# may not be in standard PDF glyph widths (1000 units => 1 text space unit)
|
45
45
|
# so this width will need to be scaled
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
nil
|
46
|
+
if fd = @font.font_descriptor
|
47
|
+
if w = fd.glyph_width(code_point)
|
48
|
+
w.to_f * fd.glyph_to_pdf_scale_factor.to_f
|
49
|
+
end
|
51
50
|
end
|
52
51
|
end
|
53
52
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -9,8 +10,8 @@ class PDF::Reader
|
|
9
10
|
def initialize(font)
|
10
11
|
@font = font
|
11
12
|
|
12
|
-
if @font.font_descriptor
|
13
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
14
15
|
else
|
15
16
|
@missing_width = 0
|
16
17
|
end
|
@@ -22,8 +23,9 @@ class PDF::Reader
|
|
22
23
|
|
23
24
|
# in ruby a negative index is valid, and will go from the end of the array
|
24
25
|
# which is undesireable in this case.
|
25
|
-
|
26
|
-
|
26
|
+
first_char = @font.first_char
|
27
|
+
if first_char && first_char <= code_point
|
28
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
27
29
|
else
|
28
30
|
@missing_width.to_f
|
29
31
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -12,13 +13,16 @@ class PDF::Reader
|
|
12
13
|
|
13
14
|
def initialize(font)
|
14
15
|
@font = font
|
15
|
-
@descendant_font = @font.descendantfonts.first
|
16
16
|
end
|
17
17
|
|
18
18
|
def glyph_width(code_point)
|
19
19
|
return 0 if code_point.nil? || code_point < 0
|
20
20
|
|
21
|
-
@
|
21
|
+
if descendant_font = @font.descendantfonts.first
|
22
|
+
descendant_font.glyph_width(code_point).to_f
|
23
|
+
else
|
24
|
+
0
|
25
|
+
end
|
22
26
|
end
|
23
27
|
end
|
24
28
|
end
|