pdf-reader 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +22 -0
  3. data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  7. data/lib/pdf/reader/buffer.rb +35 -17
  8. data/lib/pdf/reader/cid_widths.rb +7 -1
  9. data/lib/pdf/reader/cmap.rb +14 -3
  10. data/lib/pdf/reader/encoding.rb +37 -12
  11. data/lib/pdf/reader/error.rb +6 -0
  12. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  13. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  14. data/lib/pdf/reader/filter/depredict.rb +4 -0
  15. data/lib/pdf/reader/filter/flate.rb +5 -2
  16. data/lib/pdf/reader/filter/lzw.rb +2 -0
  17. data/lib/pdf/reader/filter/null.rb +2 -0
  18. data/lib/pdf/reader/filter/run_length.rb +2 -0
  19. data/lib/pdf/reader/filter.rb +1 -0
  20. data/lib/pdf/reader/font.rb +90 -22
  21. data/lib/pdf/reader/font_descriptor.rb +76 -23
  22. data/lib/pdf/reader/form_xobject.rb +11 -0
  23. data/lib/pdf/reader/glyph_hash.rb +34 -9
  24. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  25. data/lib/pdf/reader/lzw.rb +17 -6
  26. data/lib/pdf/reader/no_text_filter.rb +1 -0
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +7 -2
  29. data/lib/pdf/reader/object_hash.rb +116 -9
  30. data/lib/pdf/reader/object_stream.rb +19 -2
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
  32. data/lib/pdf/reader/page.rb +41 -7
  33. data/lib/pdf/reader/page_layout.rb +25 -8
  34. data/lib/pdf/reader/page_state.rb +5 -2
  35. data/lib/pdf/reader/page_text_receiver.rb +6 -2
  36. data/lib/pdf/reader/pages_strategy.rb +1 -1
  37. data/lib/pdf/reader/parser.rb +51 -10
  38. data/lib/pdf/reader/point.rb +9 -2
  39. data/lib/pdf/reader/print_receiver.rb +2 -6
  40. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  41. data/lib/pdf/reader/rectangle.rb +24 -1
  42. data/lib/pdf/reader/reference.rb +10 -1
  43. data/lib/pdf/reader/register_receiver.rb +15 -2
  44. data/lib/pdf/reader/resources.rb +9 -0
  45. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  46. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  47. data/lib/pdf/reader/stream.rb +9 -3
  48. data/lib/pdf/reader/synchronized_cache.rb +5 -2
  49. data/lib/pdf/reader/text_run.rb +28 -1
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +33 -2
  52. data/lib/pdf/reader/type_check.rb +10 -3
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  54. data/lib/pdf/reader/validating_receiver.rb +29 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
  56. data/lib/pdf/reader/width_calculator/composite.rb +5 -1
  57. data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
  60. data/lib/pdf/reader/xref.rb +28 -7
  61. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  62. data/lib/pdf/reader.rb +18 -2
  63. data/rbi/pdf-reader.rbi +1502 -1594
  64. metadata +17 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7174f6e8c3c655cc9a1c120e5f0b99d06b0c2355803480d1cf8347c1825ddb01
4
- data.tar.gz: ade7c031fe3c3d6e022125ccd3ef65a9482e21a7d90e6ea9d9d65aeeee3b30e8
3
+ metadata.gz: 1be615eb6abc5557e61ba53958c7211fac0f1528e75dc54eff27ffb5554d7c80
4
+ data.tar.gz: 875221f31dc119cd0f7ae3cc0246b3bbb70f6127c0047ec924c8030e9186b55b
5
5
  SHA512:
6
- metadata.gz: '08d343015a23dd678264053ade37a3449c91bdfda9764c65bc6ae196529062c33272ffe191beaea9f1e20d31cb1de9c117535e27fee763968e822288324931c6'
7
- data.tar.gz: 8b0df463cc6292048f0ad68dd682131d53a4bc9630f84841103f8c73a6c837e91cdf5af08f98c255bcefc7d2a67146387d26ab0c3cbece42853f2581981a613b
6
+ metadata.gz: 4b4501ca72d06b5a569fdcc77f384131fbd85342f8da7a084a02210ec7a3821e8b9f1cad88685262d0cc4e993f7b0031bed5d510c353c7d8fb5fe28f97a2ea83
7
+ data.tar.gz: a4fe329f2d8ae7cc295cb17d573963ddab6c0cde52d6524ad182f4651dab8ba90215bcb1ecf60c7fcf248135aed152b50a1d34afa03b270b93c5a172ac4048b3
data/CHANGELOG CHANGED
@@ -1,3 +1,25 @@
1
+ v2.15.0 (13th August 2025)
2
+
3
+ - Overhaul sorbet types, moving from an external RBI file to inline comments in RBS syntax
4
+ - multiple PRs, but mainly https://github.com/yob/pdf-reader/pull/562
5
+ - See https://railsatscale.com/2025-04-23-rbs-support-for-sorbet/
6
+ - No impact expected for most users, but projects that use sorbet may find subtle changes in
7
+ the RBI file that is shipped with the gem
8
+ - Relax version requirements for dependency `afm`, allow 1.x (https://github.com/yob/pdf-reader/pull/557)
9
+ - Improve text positioning logic in some PDFs (https://github.com/yob/pdf-reader/pull/554)
10
+ - Multiple fixes for encrypted files
11
+ - Some files with passwords > 32 bytes long (https://github.com/yob/pdf-reader/pull/555)
12
+ - Some files that contain cipher text with a 16 byte IV and no further blocks (https://github.com/yob/pdf-reader/pull/561)
13
+ - Some files that encrypted data with no padding (https://github.com/yob/pdf-reader/pull/564)
14
+ - Add jruby 10 to CI matrix (https://github.com/yob/pdf-reader/pull/552)
15
+
16
+ v2.14.1 (4th February 2025)
17
+ - Fix issue in RBI signatures, introduced in v2.14.0(https://github.com/yob/pdf-reader/pull/550)
18
+
19
+ v2.14.0 (29th January 2025)
20
+ - Raise minimum supported ruby to 2.1 (https://github.com/yob/pdf-reader/pull/543)
21
+ - Add support for filtering to Page#text (https://github.com/yob/pdf-reader/pull/545)
22
+
1
23
  v2.13.0 (2nd November 2024)
2
24
  - Permit Ascii86 v1.0 and v2.0 (https://github.com/yob/pdf-reader/pull/539)
3
25
  - Allow StringIO type for PDF::Reader input (https://github.com/yob/pdf-reader/pull/535)
@@ -46,28 +46,37 @@ class PDF::Reader
46
46
  less_than_or_equal
47
47
  include
48
48
  exclude
49
- ]
49
+ ] #: Array[Symbol]
50
50
 
51
+ #: (Array[PDF::Reader::TextRun], Hash[Symbol, untyped]) -> Array[PDF::Reader::TextRun]
51
52
  def self.only(text_runs, filter_hash)
52
53
  new(text_runs, filter_hash).only
53
54
  end
54
55
 
56
+ #: (Array[PDF::Reader::TextRun], Hash[Symbol, untyped]) -> Array[PDF::Reader::TextRun]
55
57
  def self.exclude(text_runs, filter_hash)
56
58
  new(text_runs, filter_hash).exclude
57
59
  end
58
60
 
59
- attr_reader :text_runs, :filter_hash
61
+ #: Array[PDF::Reader::TextRun]
62
+ attr_reader :text_runs
60
63
 
64
+ #: Hash[Symbol, untyped]
65
+ attr_reader :filter_hash
66
+
67
+ #: (Array[PDF::Reader::TextRun], Hash[Symbol, untyped]) -> void
61
68
  def initialize(text_runs, filter_hash)
62
69
  @text_runs = text_runs
63
70
  @filter_hash = filter_hash
64
71
  end
65
72
 
73
+ #: () -> Array[PDF::Reader::TextRun]
66
74
  def only
67
75
  return text_runs if filter_hash.empty?
68
76
  text_runs.select { |text_run| evaluate_filter(text_run) }
69
77
  end
70
78
 
79
+ #: () -> Array[PDF::Reader::TextRun]
71
80
  def exclude
72
81
  return text_runs if filter_hash.empty?
73
82
  text_runs.reject { |text_run| evaluate_filter(text_run) }
@@ -75,6 +84,7 @@ class PDF::Reader
75
84
 
76
85
  private
77
86
 
87
+ #: (PDF::Reader::TextRun) -> bool
78
88
  def evaluate_filter(text_run)
79
89
  if filter_hash[:or]
80
90
  evaluate_or_filters(text_run, filter_hash[:or])
@@ -85,24 +95,28 @@ class PDF::Reader
85
95
  end
86
96
  end
87
97
 
98
+ #: (PDF::Reader::TextRun, Array[Hash[Symbol, untyped]]) -> bool
88
99
  def evaluate_or_filters(text_run, conditions)
89
100
  conditions.any? do |condition|
90
101
  evaluate_filters(text_run, condition)
91
102
  end
92
103
  end
93
104
 
105
+ #: (PDF::Reader::TextRun, Array[Hash[Symbol, untyped]]) -> bool
94
106
  def evaluate_and_filters(text_run, conditions)
95
107
  conditions.all? do |condition|
96
108
  evaluate_filters(text_run, condition)
97
109
  end
98
110
  end
99
111
 
112
+ #: (PDF::Reader::TextRun, Hash[Symbol, untyped]) -> bool
100
113
  def evaluate_filters(text_run, filter_hash)
101
114
  filter_hash.all? do |attribute, conditions|
102
115
  evaluate_attribute_conditions(text_run, attribute, conditions)
103
116
  end
104
117
  end
105
118
 
119
+ #: (PDF::Reader::TextRun, Symbol, Hash[Symbol, untyped]) -> bool
106
120
  def evaluate_attribute_conditions(text_run, attribute, conditions)
107
121
  conditions.all? do |operator, value|
108
122
  unless VALID_OPERATORS.include?(operator)
@@ -113,6 +127,7 @@ class PDF::Reader
113
127
  end
114
128
  end
115
129
 
130
+ #: (untyped, Symbol, untyped) -> bool
116
131
  def apply_operator(attribute_value, operator, filter_value)
117
132
  case operator
118
133
  when :equal
@@ -11,6 +11,7 @@ class PDF::Reader
11
11
  #
12
12
  class AesV2SecurityHandler
13
13
 
14
+ #: (String) -> void
14
15
  def initialize(key)
15
16
  @encrypt_key = key
16
17
  end
@@ -21,10 +22,38 @@ class PDF::Reader
21
22
  #
22
23
  # version == 4 and CFM == AESV2
23
24
  #
25
+ # used to decrypt PDF streams (buf). Input data should be in bytesizes of
26
+ # a multiple of 16, anything else is an error. The first 16 bytes are the initialization
27
+ # vector, so any input of exactly 16 bytes decrypts to an empty string
28
+ #
24
29
  # buf - a string to decrypt
25
30
  # ref - a PDF::Reader::Reference for the object to decrypt
26
31
  #
32
+ #: (String, PDF::Reader::Reference) -> String
27
33
  def decrypt( buf, ref )
34
+ if buf.bytesize % 16 > 0
35
+ raise PDF::Reader::MalformedPDFError.new("Ciphertext not a multiple of 16")
36
+ elsif buf.bytesize == 16
37
+ return ""
38
+ else
39
+ begin
40
+ internal_decrypt(buf, ref)
41
+ rescue OpenSSL::Cipher::CipherError
42
+ # If we failed to decrypt it might be a padding error, so try again
43
+ # and assume no padding in the ciphertext. This will "suceed" but might
44
+ # return garbage if the key is incorrect but that's OK - well before this
45
+ # class is used we have confirmed the user provided key is correct so if
46
+ # this works without error we can be confident the returned plaintext is
47
+ # correct
48
+ internal_decrypt(buf, ref, false)
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ #: (String, PDF::Reader::Reference, ?bool) -> String
56
+ def internal_decrypt(buf, ref, padding = true)
28
57
  objKey = @encrypt_key.dup
29
58
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
30
59
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
@@ -32,6 +61,7 @@ class PDF::Reader
32
61
  length = objKey.length < 16 ? objKey.length : 16
33
62
  cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
34
63
  cipher.decrypt
64
+ cipher.padding = 0 unless padding
35
65
  cipher.key = Digest::MD5.digest(objKey)[0,length]
36
66
  cipher.iv = buf[0..15]
37
67
  cipher.update(buf[16..-1]) + cipher.final
@@ -12,27 +12,59 @@ class PDF::Reader
12
12
  #
13
13
  class AesV3SecurityHandler
14
14
 
15
+ #: (String) -> void
15
16
  def initialize(key)
17
+ if key.bytesize != 32
18
+ raise PDF::Reader::MalformedPDFError.new(
19
+ "AES-256 key must be exactly 32 bytes, got #{key.bytesize}"
20
+ )
21
+ end
16
22
  @encrypt_key = key
17
- @cipher = "AES-256-CBC"
23
+ @cipher = "AES-256-CBC" #: String
18
24
  end
19
25
 
20
26
  ##7.6.2 General Encryption Algorithm
21
27
  #
22
28
  # Algorithm 1: Encryption of data using the RC4 or AES algorithms
23
29
  #
24
- # used to decrypt RC4/AES encrypted PDF streams (buf)
30
+ # used to decrypt RC4/AES encrypted PDF streams (buf). Input data should be in bytesizes of
31
+ # a multiple of 16, anything else is an error. The first 16 bytes are the initialization
32
+ # vector, so any input of exactly 16 bytes decrypts to an empty string
25
33
  #
26
34
  # buf - a string to decrypt
27
35
  # ref - a PDF::Reader::Reference for the object to decrypt
28
36
  #
37
+ #: (String, PDF::Reader::Reference) -> String
29
38
  def decrypt( buf, ref )
39
+ if buf.bytesize % 16 > 0
40
+ raise PDF::Reader::MalformedPDFError.new("Ciphertext not a multiple of 16")
41
+ elsif buf.bytesize == 16
42
+ return ""
43
+ else
44
+ begin
45
+ internal_decrypt(buf, ref)
46
+ rescue OpenSSL::Cipher::CipherError
47
+ # If we failed to decrypt it might be a padding error, so try again
48
+ # and assume no padding in the ciphertext. This will "suceed" but might
49
+ # return garbage if the key is incorrect but that's OK - well before this
50
+ # class is used we have confirmed the user provided key is correct so if
51
+ # this works without error we can be confident the returned plaintext is
52
+ # correct
53
+ internal_decrypt(buf, ref, false)
54
+ end
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ #: (String, PDF::Reader::Reference, ?bool) -> String
61
+ def internal_decrypt(buf, ref, padding = true)
30
62
  cipher = OpenSSL::Cipher.new(@cipher)
31
63
  cipher.decrypt
64
+ cipher.padding = 0 unless padding
32
65
  cipher.key = @encrypt_key.dup
33
66
  cipher.iv = buf[0..15]
34
67
  cipher.update(buf[16..-1]) + cipher.final
35
68
  end
36
-
37
69
  end
38
70
  end
@@ -8,6 +8,7 @@ class PDF::Reader
8
8
  # MediaBox or CropBox, but could be a user specified rectangle too
9
9
  class BoundingRectangleRunsFilter
10
10
 
11
+ #: (Array[PDF::Reader::TextRun], PDF::Reader::Rectangle) -> Array[PDF::Reader::TextRun]
11
12
  def self.runs_within_rect(runs, rect)
12
13
  runs.select { |run| rect.contains?(run.origin) }
13
14
  end
@@ -38,30 +38,31 @@ class PDF::Reader
38
38
  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
39
39
  #
40
40
  class Buffer
41
- TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
42
- TOKEN_DELIMITER=[0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F]
41
+ TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20] #: Array[Integer]
42
+ TOKEN_DELIMITER=[0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F] #: Array[Integer]
43
43
 
44
44
  # some strings for comparissons. Declaring them here avoids creating new
45
45
  # strings that need GC over and over
46
- LEFT_PAREN = "("
47
- LESS_THAN = "<"
48
- STREAM = "stream"
49
- ID = "ID"
50
- FWD_SLASH = "/"
51
- NULL_BYTE = "\x00"
52
- CR = "\r"
53
- LF = "\n"
54
- CRLF = "\r\n"
55
- WHITE_SPACE = ["\n", "\r", ' ']
46
+ LEFT_PAREN = "(" #: String
47
+ LESS_THAN = "<" #: String
48
+ STREAM = "stream" #: String
49
+ ID = "ID" #: String
50
+ FWD_SLASH = "/" #: String
51
+ NULL_BYTE = "\x00" #: String
52
+ CR = "\r" #: String
53
+ LF = "\n" #: String
54
+ CRLF = "\r\n" #: String
55
+ WHITE_SPACE = ["\n", "\r", ' '] #: Array[String]
56
56
 
57
57
  # Quite a few PDFs have trailing junk.
58
58
  # This can be several k of nuls in some cases
59
59
  # Allow for this here
60
- TRAILING_BYTECOUNT = 5000
60
+ TRAILING_BYTECOUNT = 5000 #: Integer
61
61
 
62
62
  # must match whole tokens
63
- DIGITS_ONLY = %r{\A\d+\z}
63
+ DIGITS_ONLY = %r{\A\d+\z} #: Regexp
64
64
 
65
+ #: Integer
65
66
  attr_reader :pos
66
67
 
67
68
  # Creates a new buffer.
@@ -76,17 +77,19 @@ class PDF::Reader
76
77
  # :content_stream - set to true if buffer will be tokenising a
77
78
  # content stream. Defaults to false
78
79
  #
80
+ #: ((StringIO | Tempfile | IO), ?Hash[Symbol, untyped]) -> void
79
81
  def initialize(io, opts = {})
80
82
  @io = io
81
- @tokens = []
82
- @in_content_stream = opts[:content_stream]
83
+ @tokens = [] #: Array[String | PDF::Reader::Reference]
84
+ @in_content_stream = opts[:content_stream] #: bool
83
85
 
84
86
  @io.seek(opts[:seek]) if opts[:seek]
85
- @pos = @io.pos
87
+ @pos = @io.pos #: Integer
86
88
  end
87
89
 
88
90
  # return true if there are no more tokens left
89
91
  #
92
+ #: () -> bool
90
93
  def empty?
91
94
  prepare_tokens if @tokens.size < 3
92
95
 
@@ -105,6 +108,7 @@ class PDF::Reader
105
108
  # Skipping a bare CR is not spec-compliant.
106
109
  # This is because the data may start with LF.
107
110
  # However we check for CRLF first, so the ambiguity is avoided.
111
+ #: (Integer, ?Hash[Symbol, untyped]) -> String?
108
112
  def read(bytes, opts = {})
109
113
  reset_pos
110
114
 
@@ -130,6 +134,7 @@ class PDF::Reader
130
134
  # return the next token from the source. Returns a string if a token
131
135
  # is found, nil if there are no tokens left.
132
136
  #
137
+ #: () -> (nil | String | PDF::Reader::Reference)
133
138
  def token
134
139
  reset_pos
135
140
  prepare_tokens if @tokens.size < 3
@@ -141,6 +146,7 @@ class PDF::Reader
141
146
 
142
147
  # return the byte offset where the first XRef table in th source can be found.
143
148
  #
149
+ #: () -> Integer
144
150
  def find_first_xref_offset
145
151
  check_size_is_non_zero
146
152
  @io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
@@ -164,6 +170,7 @@ class PDF::Reader
164
170
 
165
171
  private
166
172
 
173
+ #: () -> void
167
174
  def check_size_is_non_zero
168
175
  @io.seek(-1, IO::SEEK_END)
169
176
  @io.seek(0)
@@ -173,12 +180,14 @@ class PDF::Reader
173
180
 
174
181
  # Returns true if this buffer is parsing a content stream
175
182
  #
183
+ #: () -> bool
176
184
  def in_content_stream?
177
185
  @in_content_stream ? true : false
178
186
  end
179
187
 
180
188
  # Some bastard moved our IO stream cursor. Restore it.
181
189
  #
190
+ #: () -> void
182
191
  def reset_pos
183
192
  @io.seek(@pos) if @io.pos != @pos
184
193
  end
@@ -186,12 +195,14 @@ class PDF::Reader
186
195
  # save the current position of the source IO stream. If someone else (like another buffer)
187
196
  # moves the cursor, we can then restore it.
188
197
  #
198
+ #: () -> void
189
199
  def save_pos
190
200
  @pos = @io.pos
191
201
  end
192
202
 
193
203
  # attempt to prime the buffer with the next few tokens.
194
204
  #
205
+ #: () -> void
195
206
  def prepare_tokens
196
207
  10.times do
197
208
  case state
@@ -208,6 +219,7 @@ class PDF::Reader
208
219
  # tokenising behaves slightly differently based on the current context.
209
220
  # Determine the current context/state by examining the last token we found
210
221
  #
222
+ #: () -> Symbol
211
223
  def state
212
224
  case @tokens.last
213
225
  when LEFT_PAREN then :literal_string
@@ -236,6 +248,7 @@ class PDF::Reader
236
248
  # indirect reference, so test for that case first and avoid the relatively
237
249
  # expensive regexp checks if possible.
238
250
  #
251
+ #: () -> void
239
252
  def merge_indirect_reference
240
253
  return if @tokens.size < 3
241
254
  return if @tokens[2] != "R"
@@ -253,6 +266,7 @@ class PDF::Reader
253
266
  # If the EI follows white-space the space is dropped from the data
254
267
  # The EI must followed by white-space or end of buffer
255
268
  # This is to reduce the chance of accidentally matching an embedded EI
269
+ #: () -> void
256
270
  def prepare_inline_token
257
271
  idstart = @io.pos
258
272
  prevchr = ''
@@ -299,6 +313,7 @@ class PDF::Reader
299
313
  # if we're currently inside a hex string, read hex nibbles until
300
314
  # we find a closing >
301
315
  #
316
+ #: () -> void
302
317
  def prepare_hex_token
303
318
  str = "".dup
304
319
 
@@ -328,6 +343,7 @@ class PDF::Reader
328
343
  # processing to fix things like escaped new lines, but that's someone else's
329
344
  # problem.
330
345
  #
346
+ #: () -> void
331
347
  def prepare_literal_token
332
348
  str = "".dup
333
349
  count = 1
@@ -358,6 +374,7 @@ class PDF::Reader
358
374
  # What each byte means is complex, check out section "3.1.1 Character Set" of the 1.7 spec
359
375
  # to read up on it.
360
376
  #
377
+ #: () -> void
361
378
  def prepare_regular_token
362
379
  tok = "".dup
363
380
 
@@ -435,6 +452,7 @@ class PDF::Reader
435
452
  # peek at the next character in the io stream, leaving the stream position
436
453
  # untouched
437
454
  #
455
+ #: () -> (Integer | nil)
438
456
  def peek_byte
439
457
  byte = @io.getbyte
440
458
  @io.seek(-1, IO::SEEK_CUR) if byte
@@ -18,12 +18,14 @@ class PDF::Reader
18
18
  # Graphics State Operators
19
19
  def_delegators :@widths, :[], :fetch
20
20
 
21
+ #: (Numeric, Array[Numeric]) -> void
21
22
  def initialize(default, array)
22
- @widths = parse_array(default, array.dup)
23
+ @widths = parse_array(default, array.dup) #: Hash[Numeric, Numeric]
23
24
  end
24
25
 
25
26
  private
26
27
 
28
+ #: (Numeric, Array[Numeric]) -> Hash[Numeric, Numeric]
27
29
  def parse_array(default, array)
28
30
  widths = Hash.new(default)
29
31
  params = []
@@ -43,6 +45,8 @@ class PDF::Reader
43
45
 
44
46
  # this is the form 10 [234 63 234 346 47 234] where width of index 10 is
45
47
  # 234, index 11 is 63, etc
48
+ #
49
+ #: (Integer, Array[Numeric]) -> Hash[Numeric, Numeric]
46
50
  def parse_first_form(first, widths)
47
51
  widths.inject({}) { |accum, glyph_width|
48
52
  accum[first + accum.size] = glyph_width
@@ -51,6 +55,8 @@ class PDF::Reader
51
55
  end
52
56
 
53
57
  # this is the form 10 20 123 where all index between 10 and 20 have width 123
58
+ #
59
+ #: (Integer, Integer, Numeric) -> Hash[Numeric, Numeric]
54
60
  def parse_second_form(first, final, width)
55
61
  if first > final
56
62
  raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -44,15 +44,18 @@ class PDF::Reader
44
44
  "begin" => :noop,
45
45
  "begincmap" => :noop,
46
46
  "def" => :noop
47
- }
47
+ } #: Hash[String, Symbol]
48
48
 
49
+ #: Hash[Integer, Array[Integer]]
49
50
  attr_reader :map
50
51
 
52
+ #: (String) -> void
51
53
  def initialize(data)
52
- @map = {}
54
+ @map = {} #: Hash[Integer, Array[Integer]]
53
55
  process_data(data)
54
56
  end
55
57
 
58
+ #: () -> Integer
56
59
  def size
57
60
  @map.size
58
61
  end
@@ -61,12 +64,14 @@ class PDF::Reader
61
64
  #
62
65
  # Returns an array of Integers.
63
66
  #
67
+ #: (Integer) -> Array[Integer]
64
68
  def decode(c)
65
69
  @map.fetch(c, [])
66
70
  end
67
71
 
68
72
  private
69
73
 
74
+ #: (String, ?Symbol) -> void
70
75
  def process_data(data, initial_mode = :none)
71
76
  parser = build_parser(data)
72
77
  mode = initial_mode
@@ -96,6 +101,7 @@ class PDF::Reader
96
101
  end
97
102
 
98
103
 
104
+ #: (String) -> PDF::Reader::Parser
99
105
  def build_parser(instructions)
100
106
  buffer = Buffer.new(StringIO.new(instructions))
101
107
  Parser.new(buffer)
@@ -109,6 +115,7 @@ class PDF::Reader
109
115
  # However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
110
116
  # exception when we try converting broken UTF-16 to UTF-8
111
117
  #
118
+ #: (String) -> Array[Integer]
112
119
  def str_to_int(str)
113
120
  unpacked_string = if str.bytesize == 1 # UTF-8
114
121
  str.unpack("C*")
@@ -133,6 +140,7 @@ class PDF::Reader
133
140
  result
134
141
  end
135
142
 
143
+ #: (Array[String]) -> void
136
144
  def process_bfchar_instructions(instructions)
137
145
  instructions.each_slice(2) do |one, two|
138
146
  find = str_to_int(one.to_s)
@@ -143,6 +151,7 @@ class PDF::Reader
143
151
  end
144
152
  end
145
153
 
154
+ #: (Array[Array[String] | String]) -> void
146
155
  def process_bfrange_instructions(instructions)
147
156
  instructions.each_slice(3) do |start, finish, to|
148
157
  if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
@@ -155,6 +164,7 @@ class PDF::Reader
155
164
  end
156
165
  end
157
166
 
167
+ #: (String, String, String) -> void
158
168
  def bfrange_type_one(start_code, end_code, dst)
159
169
  start_code = str_to_int(start_code).first
160
170
  end_code = str_to_int(end_code).first
@@ -168,6 +178,7 @@ class PDF::Reader
168
178
  end
169
179
  end
170
180
 
181
+ #: (String, String, Array[String]) -> void
171
182
  def bfrange_type_two(start_code, end_code, dst)
172
183
  start_code = str_to_int(start_code).first
173
184
  end_code = str_to_int(end_code).first