pdf-reader 2.2.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +90 -0
- data/README.md +18 -3
- data/Rakefile +1 -1
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +91 -47
- data/lib/pdf/reader/cid_widths.rb +7 -4
- data/lib/pdf/reader/cmap.rb +83 -59
- data/lib/pdf/reader/encoding.rb +17 -14
- data/lib/pdf/reader/error.rb +15 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +12 -10
- data/lib/pdf/reader/filter/flate.rb +30 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +11 -11
- data/lib/pdf/reader/font.rb +89 -26
- data/lib/pdf/reader/font_descriptor.rb +22 -18
- data/lib/pdf/reader/form_xobject.rb +18 -5
- data/lib/pdf/reader/glyph_hash.rb +28 -13
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +28 -11
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +292 -63
- data/lib/pdf/reader/object_stream.rb +3 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
- data/lib/pdf/reader/page.rb +143 -16
- data/lib/pdf/reader/page_layout.rb +43 -39
- data/lib/pdf/reader/page_state.rb +26 -17
- data/lib/pdf/reader/page_text_receiver.rb +74 -4
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +34 -14
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +3 -1
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +3 -2
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +40 -5
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +8 -7
- data/lib/pdf/reader/type_check.rb +98 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
- data/lib/pdf/reader/width_calculator/composite.rb +6 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +37 -11
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +49 -24
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +2048 -0
- metadata +39 -23
- data/lib/pdf/hash.rb +0 -20
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -6,15 +7,17 @@ class PDF::Reader
|
|
6
7
|
class TextRun
|
7
8
|
include Comparable
|
8
9
|
|
9
|
-
attr_reader :
|
10
|
+
attr_reader :origin
|
11
|
+
attr_reader :width
|
12
|
+
attr_reader :font_size
|
13
|
+
attr_reader :text
|
10
14
|
|
11
15
|
alias :to_s :text
|
12
16
|
|
13
17
|
def initialize(x, y, width, font_size, text)
|
14
|
-
@
|
15
|
-
@y = y
|
18
|
+
@origin = PDF::Reader::Point.new(x, y)
|
16
19
|
@width = width
|
17
|
-
@font_size = font_size
|
20
|
+
@font_size = font_size
|
18
21
|
@text = text
|
19
22
|
end
|
20
23
|
|
@@ -34,8 +37,20 @@ class PDF::Reader
|
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
40
|
+
def x
|
41
|
+
@origin.x
|
42
|
+
end
|
43
|
+
|
44
|
+
def y
|
45
|
+
@origin.y
|
46
|
+
end
|
47
|
+
|
37
48
|
def endx
|
38
|
-
@endx ||= x + width
|
49
|
+
@endx ||= @origin.x + width
|
50
|
+
end
|
51
|
+
|
52
|
+
def endy
|
53
|
+
@endy ||= @origin.y + font_size
|
39
54
|
end
|
40
55
|
|
41
56
|
def mean_character_width
|
@@ -60,8 +75,28 @@ class PDF::Reader
|
|
60
75
|
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
|
61
76
|
end
|
62
77
|
|
78
|
+
def intersect?(other_run)
|
79
|
+
x <= other_run.endx && endx >= other_run.x &&
|
80
|
+
endy >= other_run.y && y <= other_run.endy
|
81
|
+
end
|
82
|
+
|
83
|
+
# return what percentage of this text run is overlapped by another run
|
84
|
+
def intersection_area_percent(other_run)
|
85
|
+
return 0 unless intersect?(other_run)
|
86
|
+
|
87
|
+
dx = [endx, other_run.endx].min - [x, other_run.x].max
|
88
|
+
dy = [endy, other_run.endy].min - [y, other_run.y].max
|
89
|
+
intersection_area = dx*dy
|
90
|
+
|
91
|
+
intersection_area.to_f / area
|
92
|
+
end
|
93
|
+
|
63
94
|
private
|
64
95
|
|
96
|
+
def area
|
97
|
+
(endx - x) * (endy - y)
|
98
|
+
end
|
99
|
+
|
65
100
|
def mergable_range
|
66
101
|
@mergable_range ||= Range.new(endx - 3, endx + font_size)
|
67
102
|
end
|
data/lib/pdf/reader/token.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -50,7 +51,7 @@ class PDF::Reader
|
|
50
51
|
# displacement to speed up processing documents that use vertical
|
51
52
|
# writing systems
|
52
53
|
#
|
53
|
-
def multiply!(a,b
|
54
|
+
def multiply!(a,b,c, d,e,f)
|
54
55
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
55
56
|
# the identity matrix, no effect
|
56
57
|
self
|
@@ -163,12 +164,12 @@ class PDF::Reader
|
|
163
164
|
# [ e f 1 ] [ e f 1 ]
|
164
165
|
#
|
165
166
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
166
|
-
newa = (@a * a2) + (@b * c2) + (
|
167
|
-
newb = (@a * b2) + (@b * d2) + (
|
168
|
-
newc = (@c * a2) + (@d * c2) + (
|
169
|
-
newd = (@c * b2) + (@d * d2) + (
|
170
|
-
newe = (@e * a2) + (@f * c2) + (
|
171
|
-
newf = (@e * b2) + (@f * d2) + (
|
167
|
+
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
|
+
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
169
|
+
newc = (@c * a2) + (@d * c2) + (e2 * 0)
|
170
|
+
newd = (@c * b2) + (@d * d2) + (f2 * 0)
|
171
|
+
newe = (@e * a2) + (@f * c2) + (e2 * 1)
|
172
|
+
newf = (@e * b2) + (@f * d2) + (f2 * 1)
|
172
173
|
@a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
|
173
174
|
end
|
174
175
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Cast untrusted input (usually parsed out of a PDF file) to a known type
|
9
|
+
#
|
10
|
+
class TypeCheck
|
11
|
+
|
12
|
+
def self.cast_to_int!(obj)
|
13
|
+
if obj.is_a?(Integer)
|
14
|
+
obj
|
15
|
+
elsif obj.nil?
|
16
|
+
0
|
17
|
+
elsif obj.respond_to?(:to_i)
|
18
|
+
obj.to_i
|
19
|
+
else
|
20
|
+
raise MalformedPDFError, "Unable to cast to integer"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.cast_to_numeric!(obj)
|
25
|
+
if obj.is_a?(Numeric)
|
26
|
+
obj
|
27
|
+
elsif obj.nil?
|
28
|
+
0
|
29
|
+
elsif obj.respond_to?(:to_f)
|
30
|
+
obj.to_f
|
31
|
+
elsif obj.respond_to?(:to_i)
|
32
|
+
obj.to_i
|
33
|
+
else
|
34
|
+
raise MalformedPDFError, "Unable to cast to numeric"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.cast_to_string!(string)
|
39
|
+
if string.is_a?(String)
|
40
|
+
string
|
41
|
+
elsif string.nil?
|
42
|
+
""
|
43
|
+
elsif string.respond_to?(:to_s)
|
44
|
+
string.to_s
|
45
|
+
else
|
46
|
+
raise MalformedPDFError, "Unable to cast to string"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.cast_to_symbol(obj)
|
51
|
+
if obj.is_a?(Symbol)
|
52
|
+
obj
|
53
|
+
elsif obj.nil?
|
54
|
+
nil
|
55
|
+
elsif obj.respond_to?(:to_sym)
|
56
|
+
obj.to_sym
|
57
|
+
else
|
58
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.cast_to_symbol!(obj)
|
63
|
+
res = cast_to_symbol(obj)
|
64
|
+
if res
|
65
|
+
res
|
66
|
+
else
|
67
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.cast_to_pdf_dict!(obj)
|
72
|
+
if obj.is_a?(Hash)
|
73
|
+
obj
|
74
|
+
elsif obj.respond_to?(:to_h)
|
75
|
+
obj.to_h
|
76
|
+
else
|
77
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.cast_to_pdf_dict_with_stream_values!(obj)
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
result = Hash.new
|
84
|
+
obj.each do |k, v|
|
85
|
+
raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
|
86
|
+
result[cast_to_symbol!(k)] = v
|
87
|
+
end
|
88
|
+
result
|
89
|
+
elsif obj.respond_to?(:to_h)
|
90
|
+
cast_to_pdf_dict_with_stream_values!(obj.to_h)
|
91
|
+
else
|
92
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
@@ -0,0 +1,262 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Page#walk will execute the content stream of a page, calling methods on a receiver class
|
9
|
+
# provided by the user. Each operator has a specific set of parameters it expects, and we
|
10
|
+
# wrap the users receiver class in this one to verify the PDF uses valid parameters.
|
11
|
+
#
|
12
|
+
# Without these checks, users can't be confident about the number of parameters they'll receive
|
13
|
+
# for an operator, or what the type of those parameters will be. Everyone ends up building their
|
14
|
+
# own type safety guard clauses and it's tedious.
|
15
|
+
#
|
16
|
+
# Not all operators have type safety implemented yet, but we can expand the number over time.
|
17
|
+
class ValidatingReceiver
|
18
|
+
|
19
|
+
def initialize(wrapped)
|
20
|
+
@wrapped = wrapped
|
21
|
+
end
|
22
|
+
|
23
|
+
def page=(page)
|
24
|
+
call_wrapped(:page=, page)
|
25
|
+
end
|
26
|
+
|
27
|
+
#####################################################
|
28
|
+
# Graphics State Operators
|
29
|
+
#####################################################
|
30
|
+
def save_graphics_state(*args)
|
31
|
+
call_wrapped(:save_graphics_state)
|
32
|
+
end
|
33
|
+
|
34
|
+
def restore_graphics_state(*args)
|
35
|
+
call_wrapped(:restore_graphics_state)
|
36
|
+
end
|
37
|
+
|
38
|
+
#####################################################
|
39
|
+
# Matrix Operators
|
40
|
+
#####################################################
|
41
|
+
|
42
|
+
def concatenate_matrix(*args)
|
43
|
+
a, b, c, d, e, f = *args
|
44
|
+
call_wrapped(
|
45
|
+
:concatenate_matrix,
|
46
|
+
TypeCheck.cast_to_numeric!(a),
|
47
|
+
TypeCheck.cast_to_numeric!(b),
|
48
|
+
TypeCheck.cast_to_numeric!(c),
|
49
|
+
TypeCheck.cast_to_numeric!(d),
|
50
|
+
TypeCheck.cast_to_numeric!(e),
|
51
|
+
TypeCheck.cast_to_numeric!(f),
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
#####################################################
|
56
|
+
# Text Object Operators
|
57
|
+
#####################################################
|
58
|
+
|
59
|
+
def begin_text_object(*args)
|
60
|
+
call_wrapped(:begin_text_object)
|
61
|
+
end
|
62
|
+
|
63
|
+
def end_text_object(*args)
|
64
|
+
call_wrapped(:end_text_object)
|
65
|
+
end
|
66
|
+
|
67
|
+
#####################################################
|
68
|
+
# Text State Operators
|
69
|
+
#####################################################
|
70
|
+
def set_character_spacing(*args)
|
71
|
+
char_spacing, _ = *args
|
72
|
+
call_wrapped(
|
73
|
+
:set_character_spacing,
|
74
|
+
TypeCheck.cast_to_numeric!(char_spacing)
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_horizontal_text_scaling(*args)
|
79
|
+
h_scaling, _ = *args
|
80
|
+
call_wrapped(
|
81
|
+
:set_horizontal_text_scaling,
|
82
|
+
TypeCheck.cast_to_numeric!(h_scaling)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def set_text_font_and_size(*args)
|
87
|
+
label, size, _ = *args
|
88
|
+
call_wrapped(
|
89
|
+
:set_text_font_and_size,
|
90
|
+
TypeCheck.cast_to_symbol(label),
|
91
|
+
TypeCheck.cast_to_numeric!(size)
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_text_leading(*args)
|
96
|
+
leading, _ = *args
|
97
|
+
call_wrapped(
|
98
|
+
:set_text_leading,
|
99
|
+
TypeCheck.cast_to_numeric!(leading)
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_text_rendering_mode(*args)
|
104
|
+
mode, _ = *args
|
105
|
+
call_wrapped(
|
106
|
+
:set_text_rendering_mode,
|
107
|
+
TypeCheck.cast_to_numeric!(mode)
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def set_text_rise(*args)
|
112
|
+
rise, _ = *args
|
113
|
+
call_wrapped(
|
114
|
+
:set_text_rise,
|
115
|
+
TypeCheck.cast_to_numeric!(rise)
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
def set_word_spacing(*args)
|
120
|
+
word_spacing, _ = *args
|
121
|
+
call_wrapped(
|
122
|
+
:set_word_spacing,
|
123
|
+
TypeCheck.cast_to_numeric!(word_spacing)
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
#####################################################
|
128
|
+
# Text Positioning Operators
|
129
|
+
#####################################################
|
130
|
+
|
131
|
+
def move_text_position(*args) # Td
|
132
|
+
x, y, _ = *args
|
133
|
+
call_wrapped(
|
134
|
+
:move_text_position,
|
135
|
+
TypeCheck.cast_to_numeric!(x),
|
136
|
+
TypeCheck.cast_to_numeric!(y)
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
def move_text_position_and_set_leading(*args) # TD
|
141
|
+
x, y, _ = *args
|
142
|
+
call_wrapped(
|
143
|
+
:move_text_position_and_set_leading,
|
144
|
+
TypeCheck.cast_to_numeric!(x),
|
145
|
+
TypeCheck.cast_to_numeric!(y)
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_text_matrix_and_text_line_matrix(*args) # Tm
|
150
|
+
a, b, c, d, e, f = *args
|
151
|
+
call_wrapped(
|
152
|
+
:set_text_matrix_and_text_line_matrix,
|
153
|
+
TypeCheck.cast_to_numeric!(a),
|
154
|
+
TypeCheck.cast_to_numeric!(b),
|
155
|
+
TypeCheck.cast_to_numeric!(c),
|
156
|
+
TypeCheck.cast_to_numeric!(d),
|
157
|
+
TypeCheck.cast_to_numeric!(e),
|
158
|
+
TypeCheck.cast_to_numeric!(f),
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
def move_to_start_of_next_line(*args) # T*
|
163
|
+
call_wrapped(:move_to_start_of_next_line)
|
164
|
+
end
|
165
|
+
|
166
|
+
#####################################################
|
167
|
+
# Text Showing Operators
|
168
|
+
#####################################################
|
169
|
+
def show_text(*args) # Tj (AWAY)
|
170
|
+
string, _ = *args
|
171
|
+
call_wrapped(
|
172
|
+
:show_text,
|
173
|
+
TypeCheck.cast_to_string!(string)
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
|
178
|
+
params, _ = *args
|
179
|
+
unless params.is_a?(Array)
|
180
|
+
raise MalformedPDFError, "TJ operator expects a single Array argument"
|
181
|
+
end
|
182
|
+
|
183
|
+
call_wrapped(
|
184
|
+
:show_text_with_positioning,
|
185
|
+
params
|
186
|
+
)
|
187
|
+
end
|
188
|
+
|
189
|
+
def move_to_next_line_and_show_text(*args) # '
|
190
|
+
string, _ = *args
|
191
|
+
call_wrapped(
|
192
|
+
:move_to_next_line_and_show_text,
|
193
|
+
TypeCheck.cast_to_string!(string)
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
def set_spacing_next_line_show_text(*args) # "
|
198
|
+
aw, ac, string = *args
|
199
|
+
call_wrapped(
|
200
|
+
:set_spacing_next_line_show_text,
|
201
|
+
TypeCheck.cast_to_numeric!(aw),
|
202
|
+
TypeCheck.cast_to_numeric!(ac),
|
203
|
+
TypeCheck.cast_to_string!(string)
|
204
|
+
)
|
205
|
+
end
|
206
|
+
|
207
|
+
#####################################################
|
208
|
+
# Form XObject Operators
|
209
|
+
#####################################################
|
210
|
+
|
211
|
+
def invoke_xobject(*args)
|
212
|
+
label, _ = *args
|
213
|
+
|
214
|
+
call_wrapped(
|
215
|
+
:invoke_xobject,
|
216
|
+
TypeCheck.cast_to_symbol(label)
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
#####################################################
|
221
|
+
# Inline Image Operators
|
222
|
+
#####################################################
|
223
|
+
|
224
|
+
def begin_inline_image(*args)
|
225
|
+
call_wrapped(:begin_inline_image)
|
226
|
+
end
|
227
|
+
|
228
|
+
def begin_inline_image_data(*args)
|
229
|
+
# We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
|
230
|
+
# number of elements
|
231
|
+
@wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
|
232
|
+
end
|
233
|
+
|
234
|
+
def end_inline_image(*args)
|
235
|
+
data, _ = *args
|
236
|
+
|
237
|
+
call_wrapped(
|
238
|
+
:end_inline_image,
|
239
|
+
TypeCheck.cast_to_string!(data)
|
240
|
+
)
|
241
|
+
end
|
242
|
+
|
243
|
+
#####################################################
|
244
|
+
# Final safety net for any operators that don't have type checking enabled yet
|
245
|
+
#####################################################
|
246
|
+
|
247
|
+
def respond_to?(meth)
|
248
|
+
@wrapped.respond_to?(meth)
|
249
|
+
end
|
250
|
+
|
251
|
+
def method_missing(methodname, *args)
|
252
|
+
@wrapped.send(methodname, *args)
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def call_wrapped(methodname, *args)
|
258
|
+
@wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'afm'
|
@@ -12,11 +13,20 @@ class PDF::Reader
|
|
12
13
|
# see Section 9.6.2.2, PDF 32000-1:2008, pp 256
|
13
14
|
class BuiltIn
|
14
15
|
|
16
|
+
BUILTINS = [
|
17
|
+
:Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
|
18
|
+
:Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
|
19
|
+
:Symbol,
|
20
|
+
:"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
|
21
|
+
:ZapfDingbats
|
22
|
+
]
|
23
|
+
|
15
24
|
def initialize(font)
|
16
25
|
@font = font
|
17
26
|
@@all_metrics ||= PDF::Reader::SynchronizedCache.new
|
18
27
|
|
19
|
-
|
28
|
+
basefont = extract_basefont(font.basefont)
|
29
|
+
metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
|
20
30
|
|
21
31
|
if File.file?(metrics_path)
|
22
32
|
@metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
|
@@ -28,32 +38,32 @@ class PDF::Reader
|
|
28
38
|
def glyph_width(code_point)
|
29
39
|
return 0 if code_point.nil? || code_point < 0
|
30
40
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
m = names.map { |name|
|
36
|
-
@metrics.char_metrics[name.to_s]
|
37
|
-
}.compact.first
|
38
|
-
end
|
41
|
+
names = @font.encoding.int_to_name(code_point)
|
42
|
+
metrics = names.map { |name|
|
43
|
+
@metrics.char_metrics[name.to_s]
|
44
|
+
}.compact.first
|
39
45
|
|
40
|
-
if
|
41
|
-
|
42
|
-
elsif @font.widths[code_point - 1]
|
43
|
-
@font.widths[code_point - 1]
|
44
|
-
elsif control_character?(code_point)
|
45
|
-
0
|
46
|
+
if metrics
|
47
|
+
metrics[:wx]
|
46
48
|
else
|
47
|
-
0
|
49
|
+
@font.widths[code_point - 1] || 0
|
48
50
|
end
|
49
51
|
end
|
50
52
|
|
51
53
|
private
|
52
54
|
|
53
55
|
def control_character?(code_point)
|
54
|
-
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
56
|
+
match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
57
|
+
match ? true : false
|
55
58
|
end
|
56
59
|
|
60
|
+
def extract_basefont(font_name)
|
61
|
+
if BUILTINS.include?(font_name)
|
62
|
+
font_name.to_s
|
63
|
+
else
|
64
|
+
"Times-Roman"
|
65
|
+
end
|
66
|
+
end
|
57
67
|
end
|
58
68
|
end
|
59
69
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -21,7 +22,11 @@ class PDF::Reader
|
|
21
22
|
|
22
23
|
w = @widths[code_point]
|
23
24
|
# 0 is a valid width
|
24
|
-
|
25
|
+
if w
|
26
|
+
w.to_f
|
27
|
+
else
|
28
|
+
0
|
29
|
+
end
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -9,8 +10,8 @@ class PDF::Reader
|
|
9
10
|
def initialize(font)
|
10
11
|
@font = font
|
11
12
|
|
12
|
-
if @font.font_descriptor
|
13
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
14
15
|
else
|
15
16
|
@missing_width = 0
|
16
17
|
end
|
@@ -29,25 +30,23 @@ class PDF::Reader
|
|
29
30
|
|
30
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
31
32
|
# which is undesireable in this case.
|
32
|
-
|
33
|
-
|
33
|
+
first_char = @font.first_char
|
34
|
+
if first_char && first_char <= code_point
|
35
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
34
36
|
else
|
35
37
|
@missing_width.to_f
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
39
41
|
def glyph_width_from_descriptor(code_point)
|
40
|
-
return unless @font.font_descriptor
|
41
|
-
|
42
42
|
# true type fonts will have most of their information contained
|
43
43
|
# with-in a program inside the font descriptor, however the widths
|
44
44
|
# may not be in standard PDF glyph widths (1000 units => 1 text space unit)
|
45
45
|
# so this width will need to be scaled
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
nil
|
46
|
+
if fd = @font.font_descriptor
|
47
|
+
if w = fd.glyph_width(code_point)
|
48
|
+
w.to_f * fd.glyph_to_pdf_scale_factor.to_f
|
49
|
+
end
|
51
50
|
end
|
52
51
|
end
|
53
52
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -9,8 +10,8 @@ class PDF::Reader
|
|
9
10
|
def initialize(font)
|
10
11
|
@font = font
|
11
12
|
|
12
|
-
if @font.font_descriptor
|
13
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
14
15
|
else
|
15
16
|
@missing_width = 0
|
16
17
|
end
|
@@ -22,8 +23,9 @@ class PDF::Reader
|
|
22
23
|
|
23
24
|
# in ruby a negative index is valid, and will go from the end of the array
|
24
25
|
# which is undesireable in this case.
|
25
|
-
|
26
|
-
|
26
|
+
first_char = @font.first_char
|
27
|
+
if first_char && first_char <= code_point
|
28
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
27
29
|
else
|
28
30
|
@missing_width.to_f
|
29
31
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -12,13 +13,16 @@ class PDF::Reader
|
|
12
13
|
|
13
14
|
def initialize(font)
|
14
15
|
@font = font
|
15
|
-
@descendant_font = @font.descendantfonts.first
|
16
16
|
end
|
17
17
|
|
18
18
|
def glyph_width(code_point)
|
19
19
|
return 0 if code_point.nil? || code_point < 0
|
20
20
|
|
21
|
-
@
|
21
|
+
if descendant_font = @font.descendantfonts.first
|
22
|
+
descendant_font.glyph_width(code_point).to_f
|
23
|
+
else
|
24
|
+
0
|
25
|
+
end
|
22
26
|
end
|
23
27
|
end
|
24
28
|
end
|