pdf-reader 2.9.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +39 -0
  3. data/README.md +33 -33
  4. data/Rakefile +2 -2
  5. data/lib/pdf/reader/advanced_text_run_filter.rb +152 -0
  6. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  7. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  8. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  9. data/lib/pdf/reader/buffer.rb +39 -22
  10. data/lib/pdf/reader/cid_widths.rb +14 -6
  11. data/lib/pdf/reader/cmap.rb +16 -5
  12. data/lib/pdf/reader/encoding.rb +42 -18
  13. data/lib/pdf/reader/error.rb +6 -4
  14. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  15. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  16. data/lib/pdf/reader/filter/depredict.rb +6 -2
  17. data/lib/pdf/reader/filter/flate.rb +5 -2
  18. data/lib/pdf/reader/filter/lzw.rb +2 -0
  19. data/lib/pdf/reader/filter/null.rb +2 -0
  20. data/lib/pdf/reader/filter/run_length.rb +2 -0
  21. data/lib/pdf/reader/filter.rb +1 -0
  22. data/lib/pdf/reader/font.rb +99 -32
  23. data/lib/pdf/reader/font_descriptor.rb +79 -24
  24. data/lib/pdf/reader/form_xobject.rb +15 -1
  25. data/lib/pdf/reader/glyph_hash.rb +41 -8
  26. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  27. data/lib/pdf/reader/lzw.rb +42 -16
  28. data/lib/pdf/reader/no_text_filter.rb +15 -0
  29. data/lib/pdf/reader/null_security_handler.rb +1 -0
  30. data/lib/pdf/reader/object_cache.rb +7 -2
  31. data/lib/pdf/reader/object_hash.rb +129 -16
  32. data/lib/pdf/reader/object_stream.rb +22 -5
  33. data/lib/pdf/reader/overlapping_runs_filter.rb +8 -2
  34. data/lib/pdf/reader/page.rb +66 -13
  35. data/lib/pdf/reader/page_layout.rb +26 -9
  36. data/lib/pdf/reader/page_state.rb +12 -3
  37. data/lib/pdf/reader/page_text_receiver.rb +16 -2
  38. data/lib/pdf/reader/pages_strategy.rb +1 -1
  39. data/lib/pdf/reader/parser.rb +52 -13
  40. data/lib/pdf/reader/point.rb +9 -2
  41. data/lib/pdf/reader/print_receiver.rb +2 -6
  42. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  43. data/lib/pdf/reader/rectangle.rb +24 -1
  44. data/lib/pdf/reader/reference.rb +13 -3
  45. data/lib/pdf/reader/register_receiver.rb +15 -2
  46. data/lib/pdf/reader/resources.rb +12 -2
  47. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  48. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  49. data/lib/pdf/reader/stream.rb +9 -3
  50. data/lib/pdf/reader/synchronized_cache.rb +6 -3
  51. data/lib/pdf/reader/text_run.rb +33 -3
  52. data/lib/pdf/reader/token.rb +1 -0
  53. data/lib/pdf/reader/transformation_matrix.rb +41 -10
  54. data/lib/pdf/reader/type_check.rb +53 -0
  55. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  56. data/lib/pdf/reader/validating_receiver.rb +29 -0
  57. data/lib/pdf/reader/width_calculator/built_in.rb +13 -5
  58. data/lib/pdf/reader/width_calculator/composite.rb +11 -3
  59. data/lib/pdf/reader/width_calculator/true_type.rb +14 -12
  60. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +8 -5
  61. data/lib/pdf/reader/width_calculator/type_zero.rb +8 -3
  62. data/lib/pdf/reader/xref.rb +31 -10
  63. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  64. data/lib/pdf/reader.rb +24 -12
  65. data/rbi/pdf-reader.rbi +1504 -1480
  66. metadata +34 -17
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -29,48 +29,99 @@
29
29
 
30
30
  require 'pdf/reader/width_calculator'
31
31
 
32
+
32
33
  class PDF::Reader
33
34
  # Represents a single font PDF object and provides some useful methods
34
35
  # for extracting info. Mainly used for converting text to UTF-8.
35
36
  #
36
37
  class Font
37
- attr_accessor :subtype, :encoding, :descendantfonts, :tounicode
38
- attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
39
- :cid_widths, :cid_default_width
38
+ #: type widthCalculator = (
39
+ #| PDF::Reader::WidthCalculator::TypeZero |
40
+ #| PDF::Reader::WidthCalculator::BuiltIn |
41
+ #| PDF::Reader::WidthCalculator::TypeOneOrThree |
42
+ #| PDF::Reader::WidthCalculator::TrueType |
43
+ #| PDF::Reader::WidthCalculator::Composite
44
+ #| )
45
+
46
+ #: Symbol?
47
+ attr_accessor :subtype
48
+
49
+ #: PDF::Reader::Encoding
50
+ attr_accessor :encoding
51
+
52
+ #: Array[PDF::Reader::Font]
53
+ attr_accessor :descendantfonts
54
+
55
+ #: PDF::Reader::CMap | nil
56
+ attr_accessor :tounicode
57
+
58
+ #: Array[Numeric]
59
+ attr_reader :widths
60
+
61
+ #: Integer?
62
+ attr_reader :first_char
63
+
64
+ #: Integer?
65
+ attr_reader :last_char
66
+
67
+ #: Symbol?
68
+ attr_reader :basefont
69
+
70
+ #: PDF::Reader::FontDescriptor?
71
+ attr_reader :font_descriptor
40
72
 
73
+ #: Array[Numeric]
74
+ attr_reader :cid_widths
75
+
76
+ #: Numeric
77
+ attr_reader :cid_default_width
78
+
79
+ #: (PDF::Reader::ObjectHash, Hash[Symbol, untyped]) -> void
41
80
  def initialize(ohash, obj)
42
81
  @ohash = ohash
43
- @tounicode = nil
82
+ @tounicode = nil #: PDF::Reader::CMap | nil
83
+ @descendantfonts = [] #: Array[PDF::Reader::Font]
84
+ @widths = [] #: Array[Numeric]
85
+ @first_char = nil #: Integer?
86
+ @last_char = nil #: Integer?
87
+ @basefont = nil #: Symbol?
88
+ @font_descriptor = nil #: PDF::Reader::FontDescriptor?
89
+ @cid_widths = [] #: Array[Numeric]
90
+ @cid_default_width = 0 #: Numeric
91
+ @encoding = PDF::Reader::Encoding.new(:StandardEncoding) #: PDF::Reader::Encoding
92
+ @cached_widths = {} #: Hash[Integer, Numeric]
93
+ @font_matrix = nil #: Array[Numeric] | nil
44
94
 
45
95
  extract_base_info(obj)
46
96
  extract_type3_info(obj)
47
97
  extract_descriptor(obj)
48
98
  extract_descendants(obj)
49
- @width_calc = build_width_calculator
50
-
51
- @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
99
+ @width_calc = build_width_calculator #: widthCalculator
52
100
  end
53
101
 
102
+ #: (Integer | String | Array[Integer | String]) -> String
54
103
  def to_utf8(params)
55
104
  if @tounicode
56
- to_utf8_via_cmap(params)
105
+ to_utf8_via_cmap(params, @tounicode)
57
106
  else
58
107
  to_utf8_via_encoding(params)
59
108
  end
60
109
  end
61
110
 
111
+ #: (String) -> (Array[Integer | Float | String | nil] | nil)
62
112
  def unpack(data)
63
113
  data.unpack(encoding.unpack)
64
114
  end
65
115
 
66
116
  # looks up the specified codepoint and returns a value that is in (pdf)
67
117
  # glyph space, which is 1000 glyph units = 1 text space unit
118
+ #: (Integer | String) -> Numeric
68
119
  def glyph_width(code_point)
69
120
  if code_point.is_a?(String)
70
- code_point = code_point.unpack(encoding.unpack).first
121
+ code_point = unpack_string_to_array_of_ints(code_point, encoding.unpack).first
122
+ raise MalformedPDFError, "code point missing" if code_point.nil?
71
123
  end
72
124
 
73
- @cached_widths ||= {}
74
125
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
75
126
  end
76
127
 
@@ -78,12 +129,13 @@ class PDF::Reader
78
129
  #
79
130
  # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
131
  #
132
+ #: (Integer | String) -> Numeric
81
133
  def glyph_width_in_text_space(code_point)
82
134
  glyph_width_in_glyph_space = glyph_width(code_point)
83
135
 
84
136
  if @subtype == :Type3
85
- x1, y1 = font_matrix_transform(0,0)
86
- x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
137
+ x1, _y1 = font_matrix_transform(0,0)
138
+ x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
139
  (x2 - x1).abs.round(2)
88
140
  else
89
141
  glyph_width_in_glyph_space / 1000.0
@@ -93,13 +145,14 @@ class PDF::Reader
93
145
  private
94
146
 
95
147
  # Only valid for Type3 fonts
148
+ #: (Numeric, Numeric) -> [Numeric, Numeric]
96
149
  def font_matrix_transform(x, y)
97
150
  return x, y if @font_matrix.nil?
98
151
 
99
152
  matrix = TransformationMatrix.new(
100
- @font_matrix[0], @font_matrix[1],
101
- @font_matrix[2], @font_matrix[3],
102
- @font_matrix[4], @font_matrix[5],
153
+ @font_matrix[0] || 0, @font_matrix[1] || 0,
154
+ @font_matrix[2] || 0, @font_matrix[3] || 0,
155
+ @font_matrix[4] || 0, @font_matrix[5] || 0,
103
156
  )
104
157
 
105
158
  if x == 0 && y == 0
@@ -112,6 +165,7 @@ class PDF::Reader
112
165
  end
113
166
  end
114
167
 
168
+ #: (Symbol | String | nil) -> PDF::Reader::Encoding
115
169
  def default_encoding(font_name)
116
170
  case font_name.to_s
117
171
  when "Symbol" then
@@ -123,6 +177,7 @@ class PDF::Reader
123
177
  end
124
178
  end
125
179
 
180
+ #: () -> widthCalculator
126
181
  def build_width_calculator
127
182
  if @subtype == :Type0
128
183
  PDF::Reader::WidthCalculator::TypeZero.new(self)
@@ -149,6 +204,7 @@ class PDF::Reader
149
204
  end
150
205
  end
151
206
 
207
+ #: (Hash[Symbol, untyped]) -> PDF::Reader::Encoding
152
208
  def build_encoding(obj)
153
209
  if obj[:Encoding].is_a?(Symbol)
154
210
  # one of the standard encodings, referenced by name
@@ -163,6 +219,7 @@ class PDF::Reader
163
219
  end
164
220
  end
165
221
 
222
+ #: (Hash[Symbol, untyped]) -> void
166
223
  def extract_base_info(obj)
167
224
  @subtype = @ohash.deref_name(obj[:Subtype])
168
225
  @basefont = @ohash.deref_name(obj[:BaseFont])
@@ -185,6 +242,7 @@ class PDF::Reader
185
242
  end
186
243
  end
187
244
 
245
+ #: (Hash[Symbol, untyped]) -> void
188
246
  def extract_type3_info(obj)
189
247
  if @subtype == :Type3
190
248
  @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
@@ -193,45 +251,50 @@ class PDF::Reader
193
251
  end
194
252
  end
195
253
 
254
+ #: (Hash[Symbol, untyped]) -> void
196
255
  def extract_descriptor(obj)
197
256
  if obj[:FontDescriptor]
198
257
  # create a font descriptor object if we can, in other words, unless this is
199
258
  # a CID Font
200
- fd = @ohash.deref_hash(obj[:FontDescriptor])
259
+ fd = @ohash.deref_hash(obj[:FontDescriptor]) || {}
201
260
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
202
261
  else
203
262
  @font_descriptor = nil
204
263
  end
205
264
  end
206
265
 
266
+ #: (Hash[Symbol, untyped]) -> void
207
267
  def extract_descendants(obj)
208
- return unless obj[:DescendantFonts]
209
268
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
210
269
  # A one-element array specifying the CIDFont dictionary that is the
211
270
  # descendant of this Type 0 font.
212
- descendants = @ohash.deref_array(obj[:DescendantFonts])
213
- @descendantfonts = descendants.map { |desc|
214
- PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
- }
271
+ if obj[:DescendantFonts]
272
+ descendants = @ohash.deref_array(obj[:DescendantFonts]) || []
273
+ @descendantfonts = descendants.map { |desc|
274
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc) || {})
275
+ }
276
+ else
277
+ @descendantfonts = []
278
+ end
216
279
  end
217
280
 
218
- def to_utf8_via_cmap(params)
281
+ #: (Integer | String | Array[Integer | String], PDF::Reader::CMap) -> String
282
+ def to_utf8_via_cmap(params, cmap)
219
283
  case params
220
284
  when Integer
221
285
  [
222
- @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
286
+ cmap.decode(params)
223
287
  ].flatten.pack("U*")
224
288
  when String
225
- params.unpack(encoding.unpack).map { |c|
226
- @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
289
+ unpack_string_to_array_of_ints(params, encoding.unpack).map { |code_point|
290
+ cmap.decode(code_point)
227
291
  }.flatten.pack("U*")
228
292
  when Array
229
- params.collect { |param| to_utf8_via_cmap(param) }
230
- else
231
- params
293
+ params.collect { |param| to_utf8_via_cmap(param, cmap) }.join("")
232
294
  end
233
295
  end
234
296
 
297
+ #: (Integer | String | Array[Integer | String]) -> String
235
298
  def to_utf8_via_encoding(params)
236
299
  if encoding.kind_of?(String)
237
300
  raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
@@ -243,11 +306,15 @@ class PDF::Reader
243
306
  when String
244
307
  encoding.to_utf8(params)
245
308
  when Array
246
- params.collect { |param| to_utf8_via_encoding(param) }
247
- else
248
- params
309
+ params.collect { |param| to_utf8_via_encoding(param) }.join("")
249
310
  end
250
311
  end
251
312
 
313
+ #: (String, String) -> Array[Integer]
314
+ def unpack_string_to_array_of_ints(unpack_me, unpack_arg)
315
+ unpack_me.unpack(unpack_arg).map { |code_point|
316
+ code_point = TypeCheck.cast_to_int!(code_point)
317
+ }
318
+ end
252
319
  end
253
320
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'ttfunk'
@@ -9,29 +9,75 @@ class PDF::Reader
9
9
  # Font descriptors are outlined in Section 9.8, PDF 32000-1:2008, pp 281-288
10
10
  class FontDescriptor
11
11
 
12
- attr_reader :font_name, :font_family, :font_stretch, :font_weight,
13
- :font_bounding_box, :cap_height, :ascent, :descent, :leading,
14
- :avg_width, :max_width, :missing_width, :italic_angle, :stem_v,
15
- :x_height, :font_flags
12
+ #: String
13
+ attr_reader :font_name
16
14
 
15
+ #: String?
16
+ attr_reader :font_family
17
+
18
+ #: Symbol
19
+ attr_reader :font_stretch
20
+
21
+ #: Numeric
22
+ attr_reader :font_weight
23
+
24
+ #: Array[Numeric]
25
+ attr_reader :font_bounding_box
26
+
27
+ #: Numeric
28
+ attr_reader :cap_height
29
+
30
+ #: Numeric
31
+ attr_reader :ascent
32
+
33
+ #: Numeric
34
+ attr_reader :descent
35
+
36
+ #: Numeric
37
+ attr_reader :leading
38
+
39
+ #: Numeric
40
+ attr_reader :avg_width
41
+
42
+ #: Numeric
43
+ attr_reader :max_width
44
+
45
+ #: Numeric
46
+ attr_reader :missing_width
47
+
48
+ #: Numeric?
49
+ attr_reader :italic_angle
50
+
51
+ #: Numeric?
52
+ attr_reader :stem_v
53
+
54
+ #: Numeric?
55
+ attr_reader :x_height
56
+
57
+ #: Integer
58
+ attr_reader :font_flags
59
+
60
+ #: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
17
61
  def initialize(ohash, fd_hash)
18
62
  # TODO change these to typed derefs
19
- @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
- @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
- @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
- @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
- @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
- @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
- @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
- @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
- @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
- @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
- @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
- @stem_v = ohash.deref_number(fd_hash[:StemV])
31
- @x_height = ohash.deref_number(fd_hash[:XHeight])
32
- @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
- @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
- @font_family = ohash.deref_string(fd_hash[:FontFamily])
63
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0 #: Numeric
64
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0 #: Numeric
65
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0 #: Numeric
66
+ @font_bounding_box = ohash.deref_array_of_numbers(
67
+ fd_hash[:FontBBox]
68
+ ) || [0,0,0,0] #: Array[Numeric]
69
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0 #: Numeric
70
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0 #: Numeric
71
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0 #: Integer
72
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle]) #: Numeric?
73
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s #: String
74
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0 #: Numeric
75
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0 #: Numeric
76
+ @stem_v = ohash.deref_number(fd_hash[:StemV]) #: Numeric?
77
+ @x_height = ohash.deref_number(fd_hash[:XHeight]) #: Numeric?
78
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal #: Symbol
79
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400 #: Numeric
80
+ @font_family = ohash.deref_string(fd_hash[:FontFamily]) #: String?
35
81
 
36
82
  # A FontDescriptor may have an embedded font program in FontFile
37
83
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -41,12 +87,15 @@ class PDF::Reader
41
87
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
42
88
  # 3) OpenType: OpenType Font Program
43
89
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
44
- @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
90
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2]) #: PDF::Reader::Stream?
45
91
  #TODO handle FontFile and FontFile3
92
+ @ttf_program_stream = nil #: TTFunk::File?
46
93
 
47
- @is_ttf = true if @font_program_stream
94
+ @is_ttf = @font_program_stream ? true : false #: bool
95
+ @glyph_to_pdf_sf = nil #: Numeric?
48
96
  end
49
97
 
98
+ #: (Integer) -> Numeric
50
99
  def glyph_width(char_code)
51
100
  if @is_ttf
52
101
  if ttf_program_stream.cmap.unicode.length > 0
@@ -56,13 +105,16 @@ class PDF::Reader
56
105
  end
57
106
  char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
58
107
  if char_metric
59
- return char_metric.advance_width
108
+ char_metric.advance_width
109
+ else
110
+ 0
60
111
  end
61
112
  end
62
113
  end
63
114
 
64
115
  # PDF states that a glyph is 1000 units wide, true type doesn't enforce
65
116
  # any behavior, but uses units/em to define how wide the 'M' is (the widest letter)
117
+ #: () -> Numeric
66
118
  def glyph_to_pdf_scale_factor
67
119
  if @is_ttf
68
120
  @glyph_to_pdf_sf ||= (1.0 / ttf_program_stream.header.units_per_em) * 1000.0
@@ -74,7 +126,10 @@ class PDF::Reader
74
126
 
75
127
  private
76
128
 
129
+ #: () -> TTFunk::File
77
130
  def ttf_program_stream
131
+ raise MalformedPDFError, "No font_program_stream" unless @font_program_stream
132
+
78
133
  @ttf_program_stream ||= TTFunk::File.new(@font_program_stream.unfiltered_data)
79
134
  end
80
135
  end
@@ -17,6 +17,7 @@ module PDF
17
17
  class FormXObject
18
18
  extend Forwardable
19
19
 
20
+ #: untyped
20
21
  attr_reader :xobject
21
22
 
22
23
  def_delegators :resources, :color_spaces
@@ -28,6 +29,7 @@ module PDF
28
29
  def_delegators :resources, :shadings
29
30
  def_delegators :resources, :xobjects
30
31
 
32
+ #: (untyped, untyped, ?Hash[untyped, untyped]) -> void
31
33
  def initialize(page, xobject, options = {})
32
34
  @page = page
33
35
  @objects = page.objects
@@ -42,10 +44,11 @@ module PDF
42
44
  # The values are a PDF::Reader::Font instances that provide access
43
45
  # to most available metrics for each font.
44
46
  #
47
+ #: () -> untyped
45
48
  def font_objects
46
49
  raw_fonts = @objects.deref_hash(fonts)
47
50
  ::Hash[raw_fonts.map { |label, font|
48
- [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
51
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
49
52
  }]
50
53
  end
51
54
 
@@ -54,13 +57,18 @@ module PDF
54
57
  #
55
58
  # See the comments on PDF::Reader::Page#walk for more detail.
56
59
  #
60
+ #: (*untyped) -> untyped
57
61
  def walk(*receivers)
62
+ receivers = receivers.map { |receiver|
63
+ ValidatingReceiver.new(receiver)
64
+ }
58
65
  content_stream(receivers, raw_content)
59
66
  end
60
67
 
61
68
  # returns the raw content stream for this page. This is plumbing, nothing to
62
69
  # see here unless you're a PDF nerd like me.
63
70
  #
71
+ #: () -> untyped
64
72
  def raw_content
65
73
  @xobject.unfiltered_data
66
74
  end
@@ -69,24 +77,29 @@ module PDF
69
77
 
70
78
  # Returns the resources that accompany this form.
71
79
  #
80
+ #: () -> untyped
72
81
  def resources
73
82
  @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
74
83
  end
75
84
 
85
+ #: (untyped, untyped, ?Array[untyped]) -> untyped
76
86
  def callback(receivers, name, params=[])
77
87
  receivers.each do |receiver|
78
88
  receiver.send(name, *params) if receiver.respond_to?(name)
79
89
  end
80
90
  end
81
91
 
92
+ #: () -> untyped
82
93
  def content_stream_md5
83
94
  @content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
84
95
  end
85
96
 
97
+ #: () -> untyped
86
98
  def cached_tokens_key
87
99
  @cached_tokens_key ||= "tokens-#{content_stream_md5}"
88
100
  end
89
101
 
102
+ #: () -> untyped
90
103
  def tokens
91
104
  @cache[cached_tokens_key] ||= begin
92
105
  buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
@@ -99,6 +112,7 @@ module PDF
99
112
  end
100
113
  end
101
114
 
115
+ #: (untyped, untyped) -> untyped
102
116
  def content_stream(receivers, instructions)
103
117
  params = []
104
118
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -32,11 +32,38 @@ class PDF::Reader
32
32
  # The mapping is read from a data file on disk the first time it's needed.
33
33
  #
34
34
  class GlyphHash # :nodoc:
35
+ @@by_codepoint_cache = nil #: Hash[Integer, Array[Symbol]] | nil
36
+ @@by_name_cache = nil #: Hash[Symbol, Integer] | nil
37
+
38
+ # An internal class for returning multiple pieces of data and keep sorbet happy
39
+ class ReturnData
40
+ #: Hash[Symbol, Integer]
41
+ attr_reader :by_name
42
+
43
+ #: Hash[Integer, Array[Symbol]]
44
+ attr_reader :by_codepoint
45
+
46
+ #:(Hash[Symbol, Integer], Hash[Integer, Array[Symbol]]) -> void
47
+ def initialize(by_name, by_codepoint)
48
+ @by_name = by_name
49
+ @by_codepoint = by_codepoint
50
+ end
51
+ end
52
+
53
+ #: () -> void
35
54
  def initialize
55
+ @@by_codepoint_cache ||= nil
56
+ @@by_name_cache ||= nil
57
+
36
58
  # only parse the glyph list once, and cache the results (for performance)
37
- adobe = @@cache ||= load_adobe_glyph_mapping
38
- @by_name = adobe.first
39
- @by_codepoint = adobe.last
59
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
60
+ @by_name = @@by_name_cache #: Hash[Symbol, Integer]
61
+ @by_codepoint = @@by_codepoint_cache #: Hash[Integer, Array[Symbol]]
62
+ else
63
+ res = load_adobe_glyph_mapping
64
+ @by_name = @@by_name_cache ||= res.by_name
65
+ @by_codepoint = @@by_codepoint_cache ||= res.by_codepoint
66
+ end
40
67
  end
41
68
 
42
69
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -59,6 +86,7 @@ class PDF::Reader
59
86
  # h.name_to_unicode(:34)
60
87
  # => 34
61
88
  #
89
+ #: (Symbol | nil) -> (Integer | nil)
62
90
  def name_to_unicode(name)
63
91
  return nil unless name.is_a?(Symbol)
64
92
 
@@ -96,6 +124,7 @@ class PDF::Reader
96
124
  # h.unicode_to_name(34)
97
125
  # => [:34]
98
126
  #
127
+ #: (Integer) -> Array[Symbol]
99
128
  def unicode_to_name(codepoint)
100
129
  @by_codepoint[codepoint.to_i] || []
101
130
  end
@@ -105,9 +134,10 @@ class PDF::Reader
105
134
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
106
135
  # a text file supplied by Adobe at:
107
136
  # https://github.com/adobe-type-tools/agl-aglfn
137
+ #: () -> ReturnData
108
138
  def load_adobe_glyph_mapping
109
- keyed_by_name = {}
110
- keyed_by_codepoint = {}
139
+ keyed_by_name = {} #: Hash[Symbol, Integer]
140
+ keyed_by_codepoint = {} #: Hash[Integer, Array[Symbol]]
111
141
 
112
142
  paths = [
113
143
  File.dirname(__FILE__) + "/glyphlist.txt",
@@ -121,13 +151,16 @@ class PDF::Reader
121
151
  cp = "0x#{code}".hex
122
152
  keyed_by_name[name.to_sym] = cp
123
153
  keyed_by_codepoint[cp] ||= []
124
- keyed_by_codepoint[cp] << name.to_sym
154
+ arr = keyed_by_codepoint[cp]
155
+ if arr
156
+ arr.push(name.to_sym)
157
+ end
125
158
  end
126
159
  end
127
160
  end
128
161
  end
129
162
 
130
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
163
+ ReturnData.new(keyed_by_name.freeze, keyed_by_codepoint.freeze)
131
164
  end
132
165
 
133
166
  end
@@ -16,20 +16,21 @@ class PDF::Reader
16
16
  #
17
17
  class KeyBuilderV5
18
18
 
19
+ #: (?Hash[Symbol, String]) -> void
19
20
  def initialize(opts = {})
20
- @key_length = 256
21
+ @key_length = 256 #: Integer
21
22
 
22
23
  # hash(32B) + validation salt(8B) + key salt(8B)
23
- @owner_key = opts[:owner_key] || ""
24
+ @owner_key = opts[:owner_key] || "" #: String
24
25
 
25
26
  # hash(32B) + validation salt(8B) + key salt(8B)
26
- @user_key = opts[:user_key] || ""
27
+ @user_key = opts[:user_key] || "" #: String
27
28
 
28
29
  # decryption key, encrypted w/ owner password
29
- @owner_encryption_key = opts[:owner_encryption_key] || ""
30
+ @owner_encryption_key = opts[:owner_encryption_key] || "" #: String
30
31
 
31
32
  # decryption key, encrypted w/ user password
32
- @user_encryption_key = opts[:user_encryption_key] || ""
33
+ @user_encryption_key = opts[:user_encryption_key] || "" #: String
33
34
  end
34
35
 
35
36
  # Takes a string containing a user provided password.
@@ -38,6 +39,7 @@ class PDF::Reader
38
39
  # decrypting the file will be returned. If the password doesn't match the file,
39
40
  # and exception will be raised.
40
41
  #
42
+ #: (String) -> String
41
43
  def key(pass)
42
44
  pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
43
45
 
@@ -58,28 +60,31 @@ class PDF::Reader
58
60
  #
59
61
  # if the string is a valid user/owner password, this will return the decryption key
60
62
  #
63
+ #: (String) -> (String | nil)
61
64
  def auth_owner_pass(password)
62
- if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
65
+ if Digest::SHA256.digest(password + @owner_key[32..39].to_s + @user_key) == @owner_key[0..31]
63
66
  cipher = OpenSSL::Cipher.new('AES-256-CBC')
64
67
  cipher.decrypt
65
- cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
68
+ cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1].to_s + @user_key)
66
69
  cipher.iv = "\x00" * 16
67
70
  cipher.padding = 0
68
71
  cipher.update(@owner_encryption_key) + cipher.final
69
72
  end
70
73
  end
71
74
 
75
+ #: (String) -> (String | nil)
72
76
  def auth_user_pass(password)
73
- if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
77
+ if Digest::SHA256.digest(password + @user_key[32..39].to_s) == @user_key[0..31]
74
78
  cipher = OpenSSL::Cipher.new('AES-256-CBC')
75
79
  cipher.decrypt
76
- cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
80
+ cipher.key = Digest::SHA256.digest(password + @user_key[40..-1].to_s)
77
81
  cipher.iv = "\x00" * 16
78
82
  cipher.padding = 0
79
83
  cipher.update(@user_encryption_key) + cipher.final
80
84
  end
81
85
  end
82
86
 
87
+ #: (String) -> (String | nil)
83
88
  def auth_owner_pass_r6(password)
84
89
  if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
85
90
  cipher = OpenSSL::Cipher.new('AES-256-CBC')
@@ -91,6 +96,7 @@ class PDF::Reader
91
96
  end
92
97
  end
93
98
 
99
+ #: (String) -> (String | nil)
94
100
  def auth_user_pass_r6(password)
95
101
  if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
96
102
  cipher = OpenSSL::Cipher.new('AES-256-CBC')
@@ -104,6 +110,7 @@ class PDF::Reader
104
110
 
105
111
  # PDF 2.0 spec, 7.6.4.3.4
106
112
  # Algorithm 2.B: Computing a hash (revision 6 and later)
113
+ #: (String, String, ?String) -> String
107
114
  def r6_digest(password, salt, user_key = '')
108
115
  k = Digest::SHA256.digest(password + salt + user_key)
109
116
  e = ''
@@ -128,6 +135,7 @@ class PDF::Reader
128
135
  k[0, 32].to_s
129
136
  end
130
137
 
138
+ #: (String) -> Integer
131
139
  def unpack_128bit_bigendian_int(str)
132
140
  ints = str[0,16].to_s.unpack("N*")
133
141
  (ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i