pdf-reader 2.2.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,15 +7,17 @@ class PDF::Reader
6
7
  class TextRun
7
8
  include Comparable
8
9
 
9
- attr_reader :x, :y, :width, :font_size, :text
10
+ attr_reader :origin
11
+ attr_reader :width
12
+ attr_reader :font_size
13
+ attr_reader :text
10
14
 
11
15
  alias :to_s :text
12
16
 
13
17
  def initialize(x, y, width, font_size, text)
14
- @x = x
15
- @y = y
18
+ @origin = PDF::Reader::Point.new(x, y)
16
19
  @width = width
17
- @font_size = font_size.floor
20
+ @font_size = font_size
18
21
  @text = text
19
22
  end
20
23
 
@@ -34,8 +37,20 @@ class PDF::Reader
34
37
  end
35
38
  end
36
39
 
40
+ def x
41
+ @origin.x
42
+ end
43
+
44
+ def y
45
+ @origin.y
46
+ end
47
+
37
48
  def endx
38
- @endx ||= x + width
49
+ @endx ||= @origin.x + width
50
+ end
51
+
52
+ def endy
53
+ @endy ||= @origin.y + font_size
39
54
  end
40
55
 
41
56
  def mean_character_width
@@ -60,8 +75,28 @@ class PDF::Reader
60
75
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
61
76
  end
62
77
 
78
+ def intersect?(other_run)
79
+ x <= other_run.endx && endx >= other_run.x &&
80
+ endy >= other_run.y && y <= other_run.endy
81
+ end
82
+
83
+ # return what percentage of this text run is overlapped by another run
84
+ def intersection_area_percent(other_run)
85
+ return 0 unless intersect?(other_run)
86
+
87
+ dx = [endx, other_run.endx].min - [x, other_run.x].max
88
+ dy = [endy, other_run.endy].min - [y, other_run.y].max
89
+ intersection_area = dx*dy
90
+
91
+ intersection_area.to_f / area
92
+ end
93
+
63
94
  private
64
95
 
96
+ def area
97
+ (endx - x) * (endy - y)
98
+ end
99
+
65
100
  def mergable_range
66
101
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
67
102
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -50,7 +51,7 @@ class PDF::Reader
50
51
  # displacement to speed up processing documents that use vertical
51
52
  # writing systems
52
53
  #
53
- def multiply!(a,b=nil,c=nil, d=nil,e=nil,f=nil)
54
+ def multiply!(a,b,c, d,e,f)
54
55
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
55
56
  # the identity matrix, no effect
56
57
  self
@@ -163,12 +164,12 @@ class PDF::Reader
163
164
  # [ e f 1 ] [ e f 1 ]
164
165
  #
165
166
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
166
- newa = (@a * a2) + (@b * c2) + (0 * e2)
167
- newb = (@a * b2) + (@b * d2) + (0 * f2)
168
- newc = (@c * a2) + (@d * c2) + (0 * e2)
169
- newd = (@c * b2) + (@d * d2) + (0 * f2)
170
- newe = (@e * a2) + (@f * c2) + (1 * e2)
171
- newf = (@e * b2) + (@f * d2) + (1 * f2)
167
+ newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
+ newb = (@a * b2) + (@b * d2) + (f2 * 0)
169
+ newc = (@c * a2) + (@d * c2) + (e2 * 0)
170
+ newd = (@c * b2) + (@d * d2) + (f2 * 0)
171
+ newe = (@e * a2) + (@f * c2) + (e2 * 1)
172
+ newf = (@e * b2) + (@f * d2) + (f2 * 1)
172
173
  @a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
173
174
  end
174
175
 
@@ -0,0 +1,98 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Cast untrusted input (usually parsed out of a PDF file) to a known type
9
+ #
10
+ class TypeCheck
11
+
12
+ def self.cast_to_int!(obj)
13
+ if obj.is_a?(Integer)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_i)
18
+ obj.to_i
19
+ else
20
+ raise MalformedPDFError, "Unable to cast to integer"
21
+ end
22
+ end
23
+
24
+ def self.cast_to_numeric!(obj)
25
+ if obj.is_a?(Numeric)
26
+ obj
27
+ elsif obj.nil?
28
+ 0
29
+ elsif obj.respond_to?(:to_f)
30
+ obj.to_f
31
+ elsif obj.respond_to?(:to_i)
32
+ obj.to_i
33
+ else
34
+ raise MalformedPDFError, "Unable to cast to numeric"
35
+ end
36
+ end
37
+
38
+ def self.cast_to_string!(string)
39
+ if string.is_a?(String)
40
+ string
41
+ elsif string.nil?
42
+ ""
43
+ elsif string.respond_to?(:to_s)
44
+ string.to_s
45
+ else
46
+ raise MalformedPDFError, "Unable to cast to string"
47
+ end
48
+ end
49
+
50
+ def self.cast_to_symbol(obj)
51
+ if obj.is_a?(Symbol)
52
+ obj
53
+ elsif obj.nil?
54
+ nil
55
+ elsif obj.respond_to?(:to_sym)
56
+ obj.to_sym
57
+ else
58
+ raise MalformedPDFError, "Unable to cast to symbol"
59
+ end
60
+ end
61
+
62
+ def self.cast_to_symbol!(obj)
63
+ res = cast_to_symbol(obj)
64
+ if res
65
+ res
66
+ else
67
+ raise MalformedPDFError, "Unable to cast to symbol"
68
+ end
69
+ end
70
+
71
+ def self.cast_to_pdf_dict!(obj)
72
+ if obj.is_a?(Hash)
73
+ obj
74
+ elsif obj.respond_to?(:to_h)
75
+ obj.to_h
76
+ else
77
+ raise MalformedPDFError, "Unable to cast to hash"
78
+ end
79
+ end
80
+
81
+ def self.cast_to_pdf_dict_with_stream_values!(obj)
82
+ if obj.is_a?(Hash)
83
+ result = Hash.new
84
+ obj.each do |k, v|
85
+ raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
86
+ result[cast_to_symbol!(k)] = v
87
+ end
88
+ result
89
+ elsif obj.respond_to?(:to_h)
90
+ cast_to_pdf_dict_with_stream_values!(obj.to_h)
91
+ else
92
+ raise MalformedPDFError, "Unable to cast to hash"
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -0,0 +1,262 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Page#walk will execute the content stream of a page, calling methods on a receiver class
9
+ # provided by the user. Each operator has a specific set of parameters it expects, and we
10
+ # wrap the users receiver class in this one to verify the PDF uses valid parameters.
11
+ #
12
+ # Without these checks, users can't be confident about the number of parameters they'll receive
13
+ # for an operator, or what the type of those parameters will be. Everyone ends up building their
14
+ # own type safety guard clauses and it's tedious.
15
+ #
16
+ # Not all operators have type safety implemented yet, but we can expand the number over time.
17
+ class ValidatingReceiver
18
+
19
+ def initialize(wrapped)
20
+ @wrapped = wrapped
21
+ end
22
+
23
+ def page=(page)
24
+ call_wrapped(:page=, page)
25
+ end
26
+
27
+ #####################################################
28
+ # Graphics State Operators
29
+ #####################################################
30
+ def save_graphics_state(*args)
31
+ call_wrapped(:save_graphics_state)
32
+ end
33
+
34
+ def restore_graphics_state(*args)
35
+ call_wrapped(:restore_graphics_state)
36
+ end
37
+
38
+ #####################################################
39
+ # Matrix Operators
40
+ #####################################################
41
+
42
+ def concatenate_matrix(*args)
43
+ a, b, c, d, e, f = *args
44
+ call_wrapped(
45
+ :concatenate_matrix,
46
+ TypeCheck.cast_to_numeric!(a),
47
+ TypeCheck.cast_to_numeric!(b),
48
+ TypeCheck.cast_to_numeric!(c),
49
+ TypeCheck.cast_to_numeric!(d),
50
+ TypeCheck.cast_to_numeric!(e),
51
+ TypeCheck.cast_to_numeric!(f),
52
+ )
53
+ end
54
+
55
+ #####################################################
56
+ # Text Object Operators
57
+ #####################################################
58
+
59
+ def begin_text_object(*args)
60
+ call_wrapped(:begin_text_object)
61
+ end
62
+
63
+ def end_text_object(*args)
64
+ call_wrapped(:end_text_object)
65
+ end
66
+
67
+ #####################################################
68
+ # Text State Operators
69
+ #####################################################
70
+ def set_character_spacing(*args)
71
+ char_spacing, _ = *args
72
+ call_wrapped(
73
+ :set_character_spacing,
74
+ TypeCheck.cast_to_numeric!(char_spacing)
75
+ )
76
+ end
77
+
78
+ def set_horizontal_text_scaling(*args)
79
+ h_scaling, _ = *args
80
+ call_wrapped(
81
+ :set_horizontal_text_scaling,
82
+ TypeCheck.cast_to_numeric!(h_scaling)
83
+ )
84
+ end
85
+
86
+ def set_text_font_and_size(*args)
87
+ label, size, _ = *args
88
+ call_wrapped(
89
+ :set_text_font_and_size,
90
+ TypeCheck.cast_to_symbol(label),
91
+ TypeCheck.cast_to_numeric!(size)
92
+ )
93
+ end
94
+
95
+ def set_text_leading(*args)
96
+ leading, _ = *args
97
+ call_wrapped(
98
+ :set_text_leading,
99
+ TypeCheck.cast_to_numeric!(leading)
100
+ )
101
+ end
102
+
103
+ def set_text_rendering_mode(*args)
104
+ mode, _ = *args
105
+ call_wrapped(
106
+ :set_text_rendering_mode,
107
+ TypeCheck.cast_to_numeric!(mode)
108
+ )
109
+ end
110
+
111
+ def set_text_rise(*args)
112
+ rise, _ = *args
113
+ call_wrapped(
114
+ :set_text_rise,
115
+ TypeCheck.cast_to_numeric!(rise)
116
+ )
117
+ end
118
+
119
+ def set_word_spacing(*args)
120
+ word_spacing, _ = *args
121
+ call_wrapped(
122
+ :set_word_spacing,
123
+ TypeCheck.cast_to_numeric!(word_spacing)
124
+ )
125
+ end
126
+
127
+ #####################################################
128
+ # Text Positioning Operators
129
+ #####################################################
130
+
131
+ def move_text_position(*args) # Td
132
+ x, y, _ = *args
133
+ call_wrapped(
134
+ :move_text_position,
135
+ TypeCheck.cast_to_numeric!(x),
136
+ TypeCheck.cast_to_numeric!(y)
137
+ )
138
+ end
139
+
140
+ def move_text_position_and_set_leading(*args) # TD
141
+ x, y, _ = *args
142
+ call_wrapped(
143
+ :move_text_position_and_set_leading,
144
+ TypeCheck.cast_to_numeric!(x),
145
+ TypeCheck.cast_to_numeric!(y)
146
+ )
147
+ end
148
+
149
+ def set_text_matrix_and_text_line_matrix(*args) # Tm
150
+ a, b, c, d, e, f = *args
151
+ call_wrapped(
152
+ :set_text_matrix_and_text_line_matrix,
153
+ TypeCheck.cast_to_numeric!(a),
154
+ TypeCheck.cast_to_numeric!(b),
155
+ TypeCheck.cast_to_numeric!(c),
156
+ TypeCheck.cast_to_numeric!(d),
157
+ TypeCheck.cast_to_numeric!(e),
158
+ TypeCheck.cast_to_numeric!(f),
159
+ )
160
+ end
161
+
162
+ def move_to_start_of_next_line(*args) # T*
163
+ call_wrapped(:move_to_start_of_next_line)
164
+ end
165
+
166
+ #####################################################
167
+ # Text Showing Operators
168
+ #####################################################
169
+ def show_text(*args) # Tj (AWAY)
170
+ string, _ = *args
171
+ call_wrapped(
172
+ :show_text,
173
+ TypeCheck.cast_to_string!(string)
174
+ )
175
+ end
176
+
177
+ def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
178
+ params, _ = *args
179
+ unless params.is_a?(Array)
180
+ raise MalformedPDFError, "TJ operator expects a single Array argument"
181
+ end
182
+
183
+ call_wrapped(
184
+ :show_text_with_positioning,
185
+ params
186
+ )
187
+ end
188
+
189
+ def move_to_next_line_and_show_text(*args) # '
190
+ string, _ = *args
191
+ call_wrapped(
192
+ :move_to_next_line_and_show_text,
193
+ TypeCheck.cast_to_string!(string)
194
+ )
195
+ end
196
+
197
+ def set_spacing_next_line_show_text(*args) # "
198
+ aw, ac, string = *args
199
+ call_wrapped(
200
+ :set_spacing_next_line_show_text,
201
+ TypeCheck.cast_to_numeric!(aw),
202
+ TypeCheck.cast_to_numeric!(ac),
203
+ TypeCheck.cast_to_string!(string)
204
+ )
205
+ end
206
+
207
+ #####################################################
208
+ # Form XObject Operators
209
+ #####################################################
210
+
211
+ def invoke_xobject(*args)
212
+ label, _ = *args
213
+
214
+ call_wrapped(
215
+ :invoke_xobject,
216
+ TypeCheck.cast_to_symbol(label)
217
+ )
218
+ end
219
+
220
+ #####################################################
221
+ # Inline Image Operators
222
+ #####################################################
223
+
224
+ def begin_inline_image(*args)
225
+ call_wrapped(:begin_inline_image)
226
+ end
227
+
228
+ def begin_inline_image_data(*args)
229
+ # We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
230
+ # number of elements
231
+ @wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
232
+ end
233
+
234
+ def end_inline_image(*args)
235
+ data, _ = *args
236
+
237
+ call_wrapped(
238
+ :end_inline_image,
239
+ TypeCheck.cast_to_string!(data)
240
+ )
241
+ end
242
+
243
+ #####################################################
244
+ # Final safety net for any operators that don't have type checking enabled yet
245
+ #####################################################
246
+
247
+ def respond_to?(meth)
248
+ @wrapped.respond_to?(meth)
249
+ end
250
+
251
+ def method_missing(methodname, *args)
252
+ @wrapped.send(methodname, *args)
253
+ end
254
+
255
+ private
256
+
257
+ def call_wrapped(methodname, *args)
258
+ @wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
259
+ end
260
+ end
261
+ end
262
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'afm'
@@ -12,11 +13,20 @@ class PDF::Reader
12
13
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
13
14
  class BuiltIn
14
15
 
16
+ BUILTINS = [
17
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
18
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
19
+ :Symbol,
20
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
21
+ :ZapfDingbats
22
+ ]
23
+
15
24
  def initialize(font)
16
25
  @font = font
17
26
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
18
27
 
19
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
28
+ basefont = extract_basefont(font.basefont)
29
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
20
30
 
21
31
  if File.file?(metrics_path)
22
32
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -28,32 +38,32 @@ class PDF::Reader
28
38
  def glyph_width(code_point)
29
39
  return 0 if code_point.nil? || code_point < 0
30
40
 
31
- m = @metrics.char_metrics_by_code[code_point]
32
- if m.nil?
33
- names = @font.encoding.int_to_name(code_point)
34
-
35
- m = names.map { |name|
36
- @metrics.char_metrics[name.to_s]
37
- }.compact.first
38
- end
41
+ names = @font.encoding.int_to_name(code_point)
42
+ metrics = names.map { |name|
43
+ @metrics.char_metrics[name.to_s]
44
+ }.compact.first
39
45
 
40
- if m
41
- m[:wx]
42
- elsif @font.widths[code_point - 1]
43
- @font.widths[code_point - 1]
44
- elsif control_character?(code_point)
45
- 0
46
+ if metrics
47
+ metrics[:wx]
46
48
  else
47
- 0
49
+ @font.widths[code_point - 1] || 0
48
50
  end
49
51
  end
50
52
 
51
53
  private
52
54
 
53
55
  def control_character?(code_point)
54
- @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
56
+ match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
57
+ match ? true : false
55
58
  end
56
59
 
60
+ def extract_basefont(font_name)
61
+ if BUILTINS.include?(font_name)
62
+ font_name.to_s
63
+ else
64
+ "Times-Roman"
65
+ end
66
+ end
57
67
  end
58
68
  end
59
69
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -21,7 +22,11 @@ class PDF::Reader
21
22
 
22
23
  w = @widths[code_point]
23
24
  # 0 is a valid width
24
- return w.to_f unless w.nil?
25
+ if w
26
+ w.to_f
27
+ else
28
+ 0
29
+ end
25
30
  end
26
31
  end
27
32
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -9,8 +10,8 @@ class PDF::Reader
9
10
  def initialize(font)
10
11
  @font = font
11
12
 
12
- if @font.font_descriptor
13
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
14
15
  else
15
16
  @missing_width = 0
16
17
  end
@@ -29,25 +30,23 @@ class PDF::Reader
29
30
 
30
31
  # in ruby a negative index is valid, and will go from the end of the array
31
32
  # which is undesireable in this case.
32
- if @font.first_char <= code_point
33
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
33
+ first_char = @font.first_char
34
+ if first_char && first_char <= code_point
35
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
34
36
  else
35
37
  @missing_width.to_f
36
38
  end
37
39
  end
38
40
 
39
41
  def glyph_width_from_descriptor(code_point)
40
- return unless @font.font_descriptor
41
-
42
42
  # true type fonts will have most of their information contained
43
43
  # with-in a program inside the font descriptor, however the widths
44
44
  # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
45
45
  # so this width will need to be scaled
46
- w = @font.font_descriptor.glyph_width(code_point)
47
- if w
48
- w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
49
- else
50
- nil
46
+ if fd = @font.font_descriptor
47
+ if w = fd.glyph_width(code_point)
48
+ w.to_f * fd.glyph_to_pdf_scale_factor.to_f
49
+ end
51
50
  end
52
51
  end
53
52
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -9,8 +10,8 @@ class PDF::Reader
9
10
  def initialize(font)
10
11
  @font = font
11
12
 
12
- if @font.font_descriptor
13
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
14
15
  else
15
16
  @missing_width = 0
16
17
  end
@@ -22,8 +23,9 @@ class PDF::Reader
22
23
 
23
24
  # in ruby a negative index is valid, and will go from the end of the array
24
25
  # which is undesireable in this case.
25
- if @font.first_char <= code_point
26
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ first_char = @font.first_char
27
+ if first_char && first_char <= code_point
28
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
27
29
  else
28
30
  @missing_width.to_f
29
31
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -12,13 +13,16 @@ class PDF::Reader
12
13
 
13
14
  def initialize(font)
14
15
  @font = font
15
- @descendant_font = @font.descendantfonts.first
16
16
  end
17
17
 
18
18
  def glyph_width(code_point)
19
19
  return 0 if code_point.nil? || code_point < 0
20
20
 
21
- @descendant_font.glyph_width(code_point).to_f
21
+ if descendant_font = @font.descendantfonts.first
22
+ descendant_font.glyph_width(code_point).to_f
23
+ else
24
+ 0
25
+ end
22
26
  end
23
27
  end
24
28
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # PDF files may define fonts in a number of ways. Each approach means we must