pdf-reader 2.2.0 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,15 +7,17 @@ class PDF::Reader
6
7
  class TextRun
7
8
  include Comparable
8
9
 
9
- attr_reader :x, :y, :width, :font_size, :text
10
+ attr_reader :origin
11
+ attr_reader :width
12
+ attr_reader :font_size
13
+ attr_reader :text
10
14
 
11
15
  alias :to_s :text
12
16
 
13
17
  def initialize(x, y, width, font_size, text)
14
- @x = x
15
- @y = y
18
+ @origin = PDF::Reader::Point.new(x, y)
16
19
  @width = width
17
- @font_size = font_size.floor
20
+ @font_size = font_size
18
21
  @text = text
19
22
  end
20
23
 
@@ -34,8 +37,20 @@ class PDF::Reader
34
37
  end
35
38
  end
36
39
 
40
+ def x
41
+ @origin.x
42
+ end
43
+
44
+ def y
45
+ @origin.y
46
+ end
47
+
37
48
  def endx
38
- @endx ||= x + width
49
+ @endx ||= @origin.x + width
50
+ end
51
+
52
+ def endy
53
+ @endy ||= @origin.y + font_size
39
54
  end
40
55
 
41
56
  def mean_character_width
@@ -60,8 +75,28 @@ class PDF::Reader
60
75
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
61
76
  end
62
77
 
78
+ def intersect?(other_run)
79
+ x <= other_run.endx && endx >= other_run.x &&
80
+ endy >= other_run.y && y <= other_run.endy
81
+ end
82
+
83
+ # return what percentage of this text run is overlapped by another run
84
+ def intersection_area_percent(other_run)
85
+ return 0 unless intersect?(other_run)
86
+
87
+ dx = [endx, other_run.endx].min - [x, other_run.x].max
88
+ dy = [endy, other_run.endy].min - [y, other_run.y].max
89
+ intersection_area = dx*dy
90
+
91
+ intersection_area.to_f / area
92
+ end
93
+
63
94
  private
64
95
 
96
+ def area
97
+ (endx - x) * (endy - y)
98
+ end
99
+
65
100
  def mergable_range
66
101
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
67
102
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -50,7 +51,7 @@ class PDF::Reader
50
51
  # displacement to speed up processing documents that use vertical
51
52
  # writing systems
52
53
  #
53
- def multiply!(a,b=nil,c=nil, d=nil,e=nil,f=nil)
54
+ def multiply!(a,b,c, d,e,f)
54
55
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
55
56
  # the identity matrix, no effect
56
57
  self
@@ -163,12 +164,12 @@ class PDF::Reader
163
164
  # [ e f 1 ] [ e f 1 ]
164
165
  #
165
166
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
166
- newa = (@a * a2) + (@b * c2) + (0 * e2)
167
- newb = (@a * b2) + (@b * d2) + (0 * f2)
168
- newc = (@c * a2) + (@d * c2) + (0 * e2)
169
- newd = (@c * b2) + (@d * d2) + (0 * f2)
170
- newe = (@e * a2) + (@f * c2) + (1 * e2)
171
- newf = (@e * b2) + (@f * d2) + (1 * f2)
167
+ newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
+ newb = (@a * b2) + (@b * d2) + (f2 * 0)
169
+ newc = (@c * a2) + (@d * c2) + (e2 * 0)
170
+ newd = (@c * b2) + (@d * d2) + (f2 * 0)
171
+ newe = (@e * a2) + (@f * c2) + (e2 * 1)
172
+ newf = (@e * b2) + (@f * d2) + (f2 * 1)
172
173
  @a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
173
174
  end
174
175
 
@@ -0,0 +1,98 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Cast untrusted input (usually parsed out of a PDF file) to a known type
9
+ #
10
+ class TypeCheck
11
+
12
+ def self.cast_to_int!(obj)
13
+ if obj.is_a?(Integer)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_i)
18
+ obj.to_i
19
+ else
20
+ raise MalformedPDFError, "Unable to cast to integer"
21
+ end
22
+ end
23
+
24
+ def self.cast_to_numeric!(obj)
25
+ if obj.is_a?(Numeric)
26
+ obj
27
+ elsif obj.nil?
28
+ 0
29
+ elsif obj.respond_to?(:to_f)
30
+ obj.to_f
31
+ elsif obj.respond_to?(:to_i)
32
+ obj.to_i
33
+ else
34
+ raise MalformedPDFError, "Unable to cast to numeric"
35
+ end
36
+ end
37
+
38
+ def self.cast_to_string!(string)
39
+ if string.is_a?(String)
40
+ string
41
+ elsif string.nil?
42
+ ""
43
+ elsif string.respond_to?(:to_s)
44
+ string.to_s
45
+ else
46
+ raise MalformedPDFError, "Unable to cast to string"
47
+ end
48
+ end
49
+
50
+ def self.cast_to_symbol(obj)
51
+ if obj.is_a?(Symbol)
52
+ obj
53
+ elsif obj.nil?
54
+ nil
55
+ elsif obj.respond_to?(:to_sym)
56
+ obj.to_sym
57
+ else
58
+ raise MalformedPDFError, "Unable to cast to symbol"
59
+ end
60
+ end
61
+
62
+ def self.cast_to_symbol!(obj)
63
+ res = cast_to_symbol(obj)
64
+ if res
65
+ res
66
+ else
67
+ raise MalformedPDFError, "Unable to cast to symbol"
68
+ end
69
+ end
70
+
71
+ def self.cast_to_pdf_dict!(obj)
72
+ if obj.is_a?(Hash)
73
+ obj
74
+ elsif obj.respond_to?(:to_h)
75
+ obj.to_h
76
+ else
77
+ raise MalformedPDFError, "Unable to cast to hash"
78
+ end
79
+ end
80
+
81
+ def self.cast_to_pdf_dict_with_stream_values!(obj)
82
+ if obj.is_a?(Hash)
83
+ result = Hash.new
84
+ obj.each do |k, v|
85
+ raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
86
+ result[cast_to_symbol!(k)] = v
87
+ end
88
+ result
89
+ elsif obj.respond_to?(:to_h)
90
+ cast_to_pdf_dict_with_stream_values!(obj.to_h)
91
+ else
92
+ raise MalformedPDFError, "Unable to cast to hash"
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -0,0 +1,262 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Page#walk will execute the content stream of a page, calling methods on a receiver class
9
+ # provided by the user. Each operator has a specific set of parameters it expects, and we
10
+ # wrap the users receiver class in this one to verify the PDF uses valid parameters.
11
+ #
12
+ # Without these checks, users can't be confident about the number of parameters they'll receive
13
+ # for an operator, or what the type of those parameters will be. Everyone ends up building their
14
+ # own type safety guard clauses and it's tedious.
15
+ #
16
+ # Not all operators have type safety implemented yet, but we can expand the number over time.
17
+ class ValidatingReceiver
18
+
19
+ def initialize(wrapped)
20
+ @wrapped = wrapped
21
+ end
22
+
23
+ def page=(page)
24
+ call_wrapped(:page=, page)
25
+ end
26
+
27
+ #####################################################
28
+ # Graphics State Operators
29
+ #####################################################
30
+ def save_graphics_state(*args)
31
+ call_wrapped(:save_graphics_state)
32
+ end
33
+
34
+ def restore_graphics_state(*args)
35
+ call_wrapped(:restore_graphics_state)
36
+ end
37
+
38
+ #####################################################
39
+ # Matrix Operators
40
+ #####################################################
41
+
42
+ def concatenate_matrix(*args)
43
+ a, b, c, d, e, f = *args
44
+ call_wrapped(
45
+ :concatenate_matrix,
46
+ TypeCheck.cast_to_numeric!(a),
47
+ TypeCheck.cast_to_numeric!(b),
48
+ TypeCheck.cast_to_numeric!(c),
49
+ TypeCheck.cast_to_numeric!(d),
50
+ TypeCheck.cast_to_numeric!(e),
51
+ TypeCheck.cast_to_numeric!(f),
52
+ )
53
+ end
54
+
55
+ #####################################################
56
+ # Text Object Operators
57
+ #####################################################
58
+
59
+ def begin_text_object(*args)
60
+ call_wrapped(:begin_text_object)
61
+ end
62
+
63
+ def end_text_object(*args)
64
+ call_wrapped(:end_text_object)
65
+ end
66
+
67
+ #####################################################
68
+ # Text State Operators
69
+ #####################################################
70
+ def set_character_spacing(*args)
71
+ char_spacing, _ = *args
72
+ call_wrapped(
73
+ :set_character_spacing,
74
+ TypeCheck.cast_to_numeric!(char_spacing)
75
+ )
76
+ end
77
+
78
+ def set_horizontal_text_scaling(*args)
79
+ h_scaling, _ = *args
80
+ call_wrapped(
81
+ :set_horizontal_text_scaling,
82
+ TypeCheck.cast_to_numeric!(h_scaling)
83
+ )
84
+ end
85
+
86
+ def set_text_font_and_size(*args)
87
+ label, size, _ = *args
88
+ call_wrapped(
89
+ :set_text_font_and_size,
90
+ TypeCheck.cast_to_symbol(label),
91
+ TypeCheck.cast_to_numeric!(size)
92
+ )
93
+ end
94
+
95
+ def set_text_leading(*args)
96
+ leading, _ = *args
97
+ call_wrapped(
98
+ :set_text_leading,
99
+ TypeCheck.cast_to_numeric!(leading)
100
+ )
101
+ end
102
+
103
+ def set_text_rendering_mode(*args)
104
+ mode, _ = *args
105
+ call_wrapped(
106
+ :set_text_rendering_mode,
107
+ TypeCheck.cast_to_numeric!(mode)
108
+ )
109
+ end
110
+
111
+ def set_text_rise(*args)
112
+ rise, _ = *args
113
+ call_wrapped(
114
+ :set_text_rise,
115
+ TypeCheck.cast_to_numeric!(rise)
116
+ )
117
+ end
118
+
119
+ def set_word_spacing(*args)
120
+ word_spacing, _ = *args
121
+ call_wrapped(
122
+ :set_word_spacing,
123
+ TypeCheck.cast_to_numeric!(word_spacing)
124
+ )
125
+ end
126
+
127
+ #####################################################
128
+ # Text Positioning Operators
129
+ #####################################################
130
+
131
+ def move_text_position(*args) # Td
132
+ x, y, _ = *args
133
+ call_wrapped(
134
+ :move_text_position,
135
+ TypeCheck.cast_to_numeric!(x),
136
+ TypeCheck.cast_to_numeric!(y)
137
+ )
138
+ end
139
+
140
+ def move_text_position_and_set_leading(*args) # TD
141
+ x, y, _ = *args
142
+ call_wrapped(
143
+ :move_text_position_and_set_leading,
144
+ TypeCheck.cast_to_numeric!(x),
145
+ TypeCheck.cast_to_numeric!(y)
146
+ )
147
+ end
148
+
149
+ def set_text_matrix_and_text_line_matrix(*args) # Tm
150
+ a, b, c, d, e, f = *args
151
+ call_wrapped(
152
+ :set_text_matrix_and_text_line_matrix,
153
+ TypeCheck.cast_to_numeric!(a),
154
+ TypeCheck.cast_to_numeric!(b),
155
+ TypeCheck.cast_to_numeric!(c),
156
+ TypeCheck.cast_to_numeric!(d),
157
+ TypeCheck.cast_to_numeric!(e),
158
+ TypeCheck.cast_to_numeric!(f),
159
+ )
160
+ end
161
+
162
+ def move_to_start_of_next_line(*args) # T*
163
+ call_wrapped(:move_to_start_of_next_line)
164
+ end
165
+
166
+ #####################################################
167
+ # Text Showing Operators
168
+ #####################################################
169
+ def show_text(*args) # Tj (AWAY)
170
+ string, _ = *args
171
+ call_wrapped(
172
+ :show_text,
173
+ TypeCheck.cast_to_string!(string)
174
+ )
175
+ end
176
+
177
+ def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
178
+ params, _ = *args
179
+ unless params.is_a?(Array)
180
+ raise MalformedPDFError, "TJ operator expects a single Array argument"
181
+ end
182
+
183
+ call_wrapped(
184
+ :show_text_with_positioning,
185
+ params
186
+ )
187
+ end
188
+
189
+ def move_to_next_line_and_show_text(*args) # '
190
+ string, _ = *args
191
+ call_wrapped(
192
+ :move_to_next_line_and_show_text,
193
+ TypeCheck.cast_to_string!(string)
194
+ )
195
+ end
196
+
197
+ def set_spacing_next_line_show_text(*args) # "
198
+ aw, ac, string = *args
199
+ call_wrapped(
200
+ :set_spacing_next_line_show_text,
201
+ TypeCheck.cast_to_numeric!(aw),
202
+ TypeCheck.cast_to_numeric!(ac),
203
+ TypeCheck.cast_to_string!(string)
204
+ )
205
+ end
206
+
207
+ #####################################################
208
+ # Form XObject Operators
209
+ #####################################################
210
+
211
+ def invoke_xobject(*args)
212
+ label, _ = *args
213
+
214
+ call_wrapped(
215
+ :invoke_xobject,
216
+ TypeCheck.cast_to_symbol(label)
217
+ )
218
+ end
219
+
220
+ #####################################################
221
+ # Inline Image Operators
222
+ #####################################################
223
+
224
+ def begin_inline_image(*args)
225
+ call_wrapped(:begin_inline_image)
226
+ end
227
+
228
+ def begin_inline_image_data(*args)
229
+ # We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
230
+ # number of elements
231
+ @wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
232
+ end
233
+
234
+ def end_inline_image(*args)
235
+ data, _ = *args
236
+
237
+ call_wrapped(
238
+ :end_inline_image,
239
+ TypeCheck.cast_to_string!(data)
240
+ )
241
+ end
242
+
243
+ #####################################################
244
+ # Final safety net for any operators that don't have type checking enabled yet
245
+ #####################################################
246
+
247
+ def respond_to?(meth)
248
+ @wrapped.respond_to?(meth)
249
+ end
250
+
251
+ def method_missing(methodname, *args)
252
+ @wrapped.send(methodname, *args)
253
+ end
254
+
255
+ private
256
+
257
+ def call_wrapped(methodname, *args)
258
+ @wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
259
+ end
260
+ end
261
+ end
262
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'afm'
@@ -12,11 +13,20 @@ class PDF::Reader
12
13
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
13
14
  class BuiltIn
14
15
 
16
+ BUILTINS = [
17
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
18
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
19
+ :Symbol,
20
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
21
+ :ZapfDingbats
22
+ ]
23
+
15
24
  def initialize(font)
16
25
  @font = font
17
26
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
18
27
 
19
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
28
+ basefont = extract_basefont(font.basefont)
29
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
20
30
 
21
31
  if File.file?(metrics_path)
22
32
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -28,32 +38,32 @@ class PDF::Reader
28
38
  def glyph_width(code_point)
29
39
  return 0 if code_point.nil? || code_point < 0
30
40
 
31
- m = @metrics.char_metrics_by_code[code_point]
32
- if m.nil?
33
- names = @font.encoding.int_to_name(code_point)
34
-
35
- m = names.map { |name|
36
- @metrics.char_metrics[name.to_s]
37
- }.compact.first
38
- end
41
+ names = @font.encoding.int_to_name(code_point)
42
+ metrics = names.map { |name|
43
+ @metrics.char_metrics[name.to_s]
44
+ }.compact.first
39
45
 
40
- if m
41
- m[:wx]
42
- elsif @font.widths[code_point - 1]
43
- @font.widths[code_point - 1]
44
- elsif control_character?(code_point)
45
- 0
46
+ if metrics
47
+ metrics[:wx]
46
48
  else
47
- 0
49
+ @font.widths[code_point - 1] || 0
48
50
  end
49
51
  end
50
52
 
51
53
  private
52
54
 
53
55
  def control_character?(code_point)
54
- @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
56
+ match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
57
+ match ? true : false
55
58
  end
56
59
 
60
+ def extract_basefont(font_name)
61
+ if BUILTINS.include?(font_name)
62
+ font_name.to_s
63
+ else
64
+ "Times-Roman"
65
+ end
66
+ end
57
67
  end
58
68
  end
59
69
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -21,7 +22,11 @@ class PDF::Reader
21
22
 
22
23
  w = @widths[code_point]
23
24
  # 0 is a valid width
24
- return w.to_f unless w.nil?
25
+ if w
26
+ w.to_f
27
+ else
28
+ 0
29
+ end
25
30
  end
26
31
  end
27
32
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -9,8 +10,8 @@ class PDF::Reader
9
10
  def initialize(font)
10
11
  @font = font
11
12
 
12
- if @font.font_descriptor
13
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
14
15
  else
15
16
  @missing_width = 0
16
17
  end
@@ -29,25 +30,23 @@ class PDF::Reader
29
30
 
30
31
  # in ruby a negative index is valid, and will go from the end of the array
31
32
  # which is undesireable in this case.
32
- if @font.first_char <= code_point
33
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
33
+ first_char = @font.first_char
34
+ if first_char && first_char <= code_point
35
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
34
36
  else
35
37
  @missing_width.to_f
36
38
  end
37
39
  end
38
40
 
39
41
  def glyph_width_from_descriptor(code_point)
40
- return unless @font.font_descriptor
41
-
42
42
  # true type fonts will have most of their information contained
43
43
  # with-in a program inside the font descriptor, however the widths
44
44
  # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
45
45
  # so this width will need to be scaled
46
- w = @font.font_descriptor.glyph_width(code_point)
47
- if w
48
- w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
49
- else
50
- nil
46
+ if fd = @font.font_descriptor
47
+ if w = fd.glyph_width(code_point)
48
+ w.to_f * fd.glyph_to_pdf_scale_factor.to_f
49
+ end
51
50
  end
52
51
  end
53
52
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -9,8 +10,8 @@ class PDF::Reader
9
10
  def initialize(font)
10
11
  @font = font
11
12
 
12
- if @font.font_descriptor
13
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
14
15
  else
15
16
  @missing_width = 0
16
17
  end
@@ -22,8 +23,9 @@ class PDF::Reader
22
23
 
23
24
  # in ruby a negative index is valid, and will go from the end of the array
24
25
  # which is undesireable in this case.
25
- if @font.first_char <= code_point
26
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ first_char = @font.first_char
27
+ if first_char && first_char <= code_point
28
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
27
29
  else
28
30
  @missing_width.to_f
29
31
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -12,13 +13,16 @@ class PDF::Reader
12
13
 
13
14
  def initialize(font)
14
15
  @font = font
15
- @descendant_font = @font.descendantfonts.first
16
16
  end
17
17
 
18
18
  def glyph_width(code_point)
19
19
  return 0 if code_point.nil? || code_point < 0
20
20
 
21
- @descendant_font.glyph_width(code_point).to_f
21
+ if descendant_font = @font.descendantfonts.first
22
+ descendant_font.glyph_width(code_point).to_f
23
+ else
24
+ 0
25
+ end
22
26
  end
23
27
  end
24
28
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # PDF files may define fonts in a number of ways. Each approach means we must