pdf-reader 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +22 -0
  3. data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  7. data/lib/pdf/reader/buffer.rb +35 -17
  8. data/lib/pdf/reader/cid_widths.rb +7 -1
  9. data/lib/pdf/reader/cmap.rb +14 -3
  10. data/lib/pdf/reader/encoding.rb +37 -12
  11. data/lib/pdf/reader/error.rb +6 -0
  12. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  13. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  14. data/lib/pdf/reader/filter/depredict.rb +4 -0
  15. data/lib/pdf/reader/filter/flate.rb +5 -2
  16. data/lib/pdf/reader/filter/lzw.rb +2 -0
  17. data/lib/pdf/reader/filter/null.rb +2 -0
  18. data/lib/pdf/reader/filter/run_length.rb +2 -0
  19. data/lib/pdf/reader/filter.rb +1 -0
  20. data/lib/pdf/reader/font.rb +90 -22
  21. data/lib/pdf/reader/font_descriptor.rb +76 -23
  22. data/lib/pdf/reader/form_xobject.rb +11 -0
  23. data/lib/pdf/reader/glyph_hash.rb +34 -9
  24. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  25. data/lib/pdf/reader/lzw.rb +17 -6
  26. data/lib/pdf/reader/no_text_filter.rb +1 -0
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +7 -2
  29. data/lib/pdf/reader/object_hash.rb +116 -9
  30. data/lib/pdf/reader/object_stream.rb +19 -2
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
  32. data/lib/pdf/reader/page.rb +41 -7
  33. data/lib/pdf/reader/page_layout.rb +25 -8
  34. data/lib/pdf/reader/page_state.rb +5 -2
  35. data/lib/pdf/reader/page_text_receiver.rb +6 -2
  36. data/lib/pdf/reader/pages_strategy.rb +1 -1
  37. data/lib/pdf/reader/parser.rb +51 -10
  38. data/lib/pdf/reader/point.rb +9 -2
  39. data/lib/pdf/reader/print_receiver.rb +2 -6
  40. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  41. data/lib/pdf/reader/rectangle.rb +24 -1
  42. data/lib/pdf/reader/reference.rb +10 -1
  43. data/lib/pdf/reader/register_receiver.rb +15 -2
  44. data/lib/pdf/reader/resources.rb +9 -0
  45. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  46. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  47. data/lib/pdf/reader/stream.rb +9 -3
  48. data/lib/pdf/reader/synchronized_cache.rb +5 -2
  49. data/lib/pdf/reader/text_run.rb +28 -1
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +33 -2
  52. data/lib/pdf/reader/type_check.rb +10 -3
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  54. data/lib/pdf/reader/validating_receiver.rb +29 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
  56. data/lib/pdf/reader/width_calculator/composite.rb +5 -1
  57. data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
  60. data/lib/pdf/reader/xref.rb +28 -7
  61. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  62. data/lib/pdf/reader.rb +18 -2
  63. data/rbi/pdf-reader.rbi +1502 -1594
  64. metadata +17 -11
@@ -7,10 +7,12 @@ class PDF::Reader
7
7
  # Security handler for when we don't support the flavour of encryption
8
8
  # used in a PDF.
9
9
  class UnimplementedSecurityHandler
10
+ #: (Hash[Symbol, untyped]) -> bool
10
11
  def self.supports?(encrypt)
11
12
  true
12
13
  end
13
14
 
15
+ #: (String, PDF::Reader::Reference) -> String
14
16
  def decrypt(buf, ref)
15
17
  raise PDF::Reader::EncryptedPDFError, "Unsupported encryption style"
16
18
  end
@@ -16,10 +16,12 @@ module PDF
16
16
  # Not all operators have type safety implemented yet, but we can expand the number over time.
17
17
  class ValidatingReceiver
18
18
 
19
+ #: (untyped) -> void
19
20
  def initialize(wrapped)
20
21
  @wrapped = wrapped
21
22
  end
22
23
 
24
+ #: (PDF::Reader::Page) -> void
23
25
  def page=(page)
24
26
  call_wrapped(:page=, page)
25
27
  end
@@ -27,10 +29,12 @@ module PDF
27
29
  #####################################################
28
30
  # Graphics State Operators
29
31
  #####################################################
32
+ #: (*untyped) -> void
30
33
  def save_graphics_state(*args)
31
34
  call_wrapped(:save_graphics_state)
32
35
  end
33
36
 
37
+ #: (*untyped) -> void
34
38
  def restore_graphics_state(*args)
35
39
  call_wrapped(:restore_graphics_state)
36
40
  end
@@ -39,6 +43,7 @@ module PDF
39
43
  # Matrix Operators
40
44
  #####################################################
41
45
 
46
+ #: (*untyped) -> void
42
47
  def concatenate_matrix(*args)
43
48
  a, b, c, d, e, f = *args
44
49
  call_wrapped(
@@ -56,10 +61,12 @@ module PDF
56
61
  # Text Object Operators
57
62
  #####################################################
58
63
 
64
+ #: (*untyped) -> void
59
65
  def begin_text_object(*args)
60
66
  call_wrapped(:begin_text_object)
61
67
  end
62
68
 
69
+ #: (*untyped) -> void
63
70
  def end_text_object(*args)
64
71
  call_wrapped(:end_text_object)
65
72
  end
@@ -67,6 +74,7 @@ module PDF
67
74
  #####################################################
68
75
  # Text State Operators
69
76
  #####################################################
77
+ #: (*untyped) -> void
70
78
  def set_character_spacing(*args)
71
79
  char_spacing, _ = *args
72
80
  call_wrapped(
@@ -75,6 +83,7 @@ module PDF
75
83
  )
76
84
  end
77
85
 
86
+ #: (*untyped) -> void
78
87
  def set_horizontal_text_scaling(*args)
79
88
  h_scaling, _ = *args
80
89
  call_wrapped(
@@ -83,6 +92,7 @@ module PDF
83
92
  )
84
93
  end
85
94
 
95
+ #: (*untyped) -> void
86
96
  def set_text_font_and_size(*args)
87
97
  label, size, _ = *args
88
98
  call_wrapped(
@@ -92,6 +102,7 @@ module PDF
92
102
  )
93
103
  end
94
104
 
105
+ #: (*untyped) -> void
95
106
  def set_text_leading(*args)
96
107
  leading, _ = *args
97
108
  call_wrapped(
@@ -100,6 +111,7 @@ module PDF
100
111
  )
101
112
  end
102
113
 
114
+ #: (*untyped) -> void
103
115
  def set_text_rendering_mode(*args)
104
116
  mode, _ = *args
105
117
  call_wrapped(
@@ -108,6 +120,7 @@ module PDF
108
120
  )
109
121
  end
110
122
 
123
+ #: (*untyped) -> void
111
124
  def set_text_rise(*args)
112
125
  rise, _ = *args
113
126
  call_wrapped(
@@ -116,6 +129,7 @@ module PDF
116
129
  )
117
130
  end
118
131
 
132
+ #: (*untyped) -> void
119
133
  def set_word_spacing(*args)
120
134
  word_spacing, _ = *args
121
135
  call_wrapped(
@@ -128,6 +142,7 @@ module PDF
128
142
  # Text Positioning Operators
129
143
  #####################################################
130
144
 
145
+ #: (*untyped) -> void
131
146
  def move_text_position(*args) # Td
132
147
  x, y, _ = *args
133
148
  call_wrapped(
@@ -137,6 +152,7 @@ module PDF
137
152
  )
138
153
  end
139
154
 
155
+ #: (*untyped) -> void
140
156
  def move_text_position_and_set_leading(*args) # TD
141
157
  x, y, _ = *args
142
158
  call_wrapped(
@@ -146,6 +162,7 @@ module PDF
146
162
  )
147
163
  end
148
164
 
165
+ #: (*untyped) -> void
149
166
  def set_text_matrix_and_text_line_matrix(*args) # Tm
150
167
  a, b, c, d, e, f = *args
151
168
  call_wrapped(
@@ -159,6 +176,7 @@ module PDF
159
176
  )
160
177
  end
161
178
 
179
+ #: (*untyped) -> void
162
180
  def move_to_start_of_next_line(*args) # T*
163
181
  call_wrapped(:move_to_start_of_next_line)
164
182
  end
@@ -166,6 +184,7 @@ module PDF
166
184
  #####################################################
167
185
  # Text Showing Operators
168
186
  #####################################################
187
+ #: (*untyped) -> void
169
188
  def show_text(*args) # Tj (AWAY)
170
189
  string, _ = *args
171
190
  call_wrapped(
@@ -174,6 +193,7 @@ module PDF
174
193
  )
175
194
  end
176
195
 
196
+ #: (*untyped) -> void
177
197
  def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
178
198
  params, _ = *args
179
199
  unless params.is_a?(Array)
@@ -186,6 +206,7 @@ module PDF
186
206
  )
187
207
  end
188
208
 
209
+ #: (*untyped) -> void
189
210
  def move_to_next_line_and_show_text(*args) # '
190
211
  string, _ = *args
191
212
  call_wrapped(
@@ -194,6 +215,7 @@ module PDF
194
215
  )
195
216
  end
196
217
 
218
+ #: (*untyped) -> void
197
219
  def set_spacing_next_line_show_text(*args) # "
198
220
  aw, ac, string = *args
199
221
  call_wrapped(
@@ -208,6 +230,7 @@ module PDF
208
230
  # Form XObject Operators
209
231
  #####################################################
210
232
 
233
+ #: (*untyped) -> void
211
234
  def invoke_xobject(*args)
212
235
  label, _ = *args
213
236
 
@@ -221,16 +244,19 @@ module PDF
221
244
  # Inline Image Operators
222
245
  #####################################################
223
246
 
247
+ #: (*untyped) -> void
224
248
  def begin_inline_image(*args)
225
249
  call_wrapped(:begin_inline_image)
226
250
  end
227
251
 
252
+ #: (*untyped) -> void
228
253
  def begin_inline_image_data(*args)
229
254
  # We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
230
255
  # number of elements
231
256
  @wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
232
257
  end
233
258
 
259
+ #: (*untyped) -> void
234
260
  def end_inline_image(*args)
235
261
  data, _ = *args
236
262
 
@@ -244,16 +270,19 @@ module PDF
244
270
  # Final safety net for any operators that don't have type checking enabled yet
245
271
  #####################################################
246
272
 
273
+ #: (untyped) -> bool
247
274
  def respond_to?(meth)
248
275
  @wrapped.respond_to?(meth)
249
276
  end
250
277
 
278
+ #: (Symbol, *untyped) -> void
251
279
  def method_missing(methodname, *args)
252
280
  @wrapped.send(methodname, *args)
253
281
  end
254
282
 
255
283
  private
256
284
 
285
+ #: (untyped, *untyped) -> void
257
286
  def call_wrapped(methodname, *args)
258
287
  @wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
259
288
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
@@ -12,6 +12,7 @@ class PDF::Reader
12
12
  # the reader is expected to have it's own copy of the font metrics.
13
13
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
14
14
  class BuiltIn
15
+ @@all_metrics = nil #: PDF::Reader::SynchronizedCache | nil
15
16
 
16
17
  BUILTINS = [
17
18
  :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
@@ -19,11 +20,13 @@ class PDF::Reader
19
20
  :Symbol,
20
21
  :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
21
22
  :ZapfDingbats
22
- ]
23
+ ] #: Array[Symbol]
23
24
 
25
+ #: (PDF::Reader::Font) -> void
24
26
  def initialize(font)
25
27
  @font = font
26
28
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
29
+ @metrics = nil #: AFM::Font?
27
30
 
28
31
  basefont = extract_basefont(font.basefont)
29
32
  metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
@@ -35,8 +38,10 @@ class PDF::Reader
35
38
  end
36
39
  end
37
40
 
41
+ #: (Integer?) -> Numeric
38
42
  def glyph_width(code_point)
39
- return 0 if code_point.nil? || code_point < 0
43
+ return 0 if code_point.nil? || code_point < 0 || @metrics.nil?
44
+
40
45
 
41
46
  names = @font.encoding.int_to_name(code_point)
42
47
  metrics = names.map { |name|
@@ -52,11 +57,13 @@ class PDF::Reader
52
57
 
53
58
  private
54
59
 
60
+ #: (Integer) -> bool
55
61
  def control_character?(code_point)
56
62
  match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
57
63
  match ? true : false
58
64
  end
59
65
 
66
+ #: (Symbol?) -> String
60
67
  def extract_basefont(font_name)
61
68
  if BUILTINS.include?(font_name)
62
69
  font_name.to_s
@@ -12,11 +12,15 @@ class PDF::Reader
12
12
  # see Section 9.7.4.1, PDF 32000-1:2008, pp 269-270
13
13
  class Composite
14
14
 
15
+ #: (PDF::Reader::Font) -> void
15
16
  def initialize(font)
16
17
  @font = font
17
- @widths = PDF::Reader::CidWidths.new(@font.cid_default_width, @font.cid_widths)
18
+ @widths = PDF::Reader::CidWidths.new(
19
+ @font.cid_default_width, @font.cid_widths
20
+ ) #: PDF::Reader::CidWidths
18
21
  end
19
22
 
23
+ #: (Integer?) -> Numeric
20
24
  def glyph_width(code_point)
21
25
  return 0 if code_point.nil? || code_point < 0
22
26
 
@@ -7,16 +7,18 @@ class PDF::Reader
7
7
  # Calculates the width of a glyph in a TrueType font
8
8
  class TrueType
9
9
 
10
+ #: (PDF::Reader::Font) -> void
10
11
  def initialize(font)
11
12
  @font = font
12
13
 
13
14
  if fd = @font.font_descriptor
14
- @missing_width = fd.missing_width
15
+ @missing_width = fd.missing_width #: Numeric
15
16
  else
16
17
  @missing_width = 0
17
18
  end
18
19
  end
19
20
 
21
+ #: (Integer?) -> Numeric
20
22
  def glyph_width(code_point)
21
23
  return 0 if code_point.nil? || code_point < 0
22
24
  glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point) || 0
@@ -25,6 +27,7 @@ class PDF::Reader
25
27
  private
26
28
 
27
29
  #TODO convert Type3 units 1000 units => 1 text space unit
30
+ #: (Integer) -> Numeric?
28
31
  def glyph_width_from_font(code_point)
29
32
  return if @font.widths.nil? || @font.widths.count == 0
30
33
 
@@ -38,6 +41,7 @@ class PDF::Reader
38
41
  end
39
42
  end
40
43
 
44
+ #: (Integer) -> Numeric?
41
45
  def glyph_width_from_descriptor(code_point)
42
46
  # true type fonts will have most of their information contained
43
47
  # with-in a program inside the font descriptor, however the widths
@@ -7,16 +7,18 @@ class PDF::Reader
7
7
  # Calculates the width of a glyph in a Type One or Type Three
8
8
  class TypeOneOrThree
9
9
 
10
+ #: (PDF::Reader::Font) -> void
10
11
  def initialize(font)
11
12
  @font = font
12
13
 
13
14
  if fd = @font.font_descriptor
14
- @missing_width = fd.missing_width
15
+ @missing_width = fd.missing_width #: Numeric
15
16
  else
16
17
  @missing_width = 0
17
18
  end
18
19
  end
19
20
 
21
+ #: (Integer?) -> Numeric
20
22
  def glyph_width(code_point)
21
23
  return 0 if code_point.nil? || code_point < 0
22
24
  return 0 if @font.widths.nil? || @font.widths.count == 0
@@ -11,10 +11,12 @@ class PDF::Reader
11
11
  # the descendant font
12
12
  class TypeZero
13
13
 
14
+ #: (PDF::Reader::Font) -> void
14
15
  def initialize(font)
15
16
  @font = font
16
17
  end
17
18
 
19
+ #: (Integer?) -> Numeric
18
20
  def glyph_width(code_point)
19
21
  return 0 if code_point.nil? || code_point < 0
20
22
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -46,8 +46,11 @@ class PDF::Reader
46
46
  # the Enumerable mixin. The key difference is no []= method - the hash
47
47
  # is read only.
48
48
  #
49
+ #: [Elem]
49
50
  class XRef
50
51
  include Enumerable
52
+
53
+ #: Hash[Symbol, untyped]
51
54
  attr_reader :trailer
52
55
 
53
56
  ################################################################################
@@ -55,16 +58,19 @@ class PDF::Reader
55
58
  #
56
59
  # io - must be an IO object, generally either a file or a StringIO
57
60
  #
61
+ #: (IO | Tempfile | StringIO) -> void
58
62
  def initialize(io)
59
63
  @io = io
60
- @junk_offset = calc_junk_offset(io) || 0
61
- @xref = {}
62
- @trailer = load_offsets
64
+ @junk_offset = calc_junk_offset(io) || 0 #: Integer
65
+ @xref = {} #: Hash[Integer, Hash[Integer, Integer | PDF::Reader::Reference]]
66
+ @trailer = load_offsets #: Hash[Symbol, untyped]
63
67
  end
64
68
 
65
69
  ################################################################################
66
70
  # return the number of objects in this file. Objects with multiple generations are
67
71
  # only counter once.
72
+ #
73
+ #: () -> untyped
68
74
  def size
69
75
  @xref.size
70
76
  end
@@ -72,6 +78,7 @@ class PDF::Reader
72
78
  # returns the byte offset for the specified PDF object.
73
79
  #
74
80
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
81
+ #: (untyped) -> untyped
75
82
  def [](ref)
76
83
  @xref.fetch(ref.id, {}).fetch(ref.gen)
77
84
  rescue
@@ -79,6 +86,9 @@ class PDF::Reader
79
86
  end
80
87
  ################################################################################
81
88
  # iterate over each object in the xref table
89
+ #
90
+ # @override(allow_incompatible: true)
91
+ #: () { (PDF::Reader::Reference) -> untyped } -> void
82
92
  def each(&block)
83
93
  ids = @xref.keys.sort
84
94
  ids.each do |id|
@@ -97,6 +107,7 @@ class PDF::Reader
97
107
  # After seeking to the offset, processing is handed of to either load_xref_table()
98
108
  # or load_xref_stream() based on what we find there.
99
109
  #
110
+ #: (?Integer?) -> Hash[Symbol, untyped]
100
111
  def load_offsets(offset = nil)
101
112
  offset ||= new_buffer.find_first_xref_offset
102
113
  offset += @junk_offset
@@ -117,7 +128,9 @@ class PDF::Reader
117
128
  # to handle the case where an XRef Stream has the Length specified via an
118
129
  # indirect object
119
130
  stream = PDF::Reader::Parser.new(buf).object(tok_one.to_i, tok_two.to_i)
120
- return load_xref_stream(stream)
131
+ if stream.is_a?(PDF::Reader::Stream)
132
+ return load_xref_stream(stream)
133
+ end
121
134
  end
122
135
 
123
136
  raise PDF::Reader::MalformedPDFError,
@@ -126,6 +139,8 @@ class PDF::Reader
126
139
  ################################################################################
127
140
  # Assumes the underlying buffer is positioned at the start of a traditional
128
141
  # Xref table and processes it into memory.
142
+ #
143
+ #: (PDF::Reader::Buffer) -> Hash[Symbol, untyped]
129
144
  def load_xref_table(buf)
130
145
  params = []
131
146
 
@@ -169,8 +184,9 @@ class PDF::Reader
169
184
  ################################################################################
170
185
  # Read an XRef stream from the underlying buffer instead of a traditional xref table.
171
186
  #
187
+ #: (PDF::Reader::Stream) -> Hash[Symbol, untyped]
172
188
  def load_xref_stream(stream)
173
- unless stream.is_a?(PDF::Reader::Stream) && stream.hash[:Type] == :XRef
189
+ unless stream.hash[:Type] == :XRef
174
190
  raise PDF::Reader::MalformedPDFError, "xref stream not found when expected"
175
191
  end
176
192
  trailer = Hash[stream.hash.select { |key, value|
@@ -216,8 +232,9 @@ class PDF::Reader
216
232
  # XRef streams pack info into integers 1-N bytes wide. Depending on the number of
217
233
  # bytes they need to be converted to an int in different ways.
218
234
  #
235
+ #: (String?) -> Integer
219
236
  def unpack_bytes(bytes)
220
- if bytes.to_s.size == 0
237
+ res = if bytes.nil? || bytes == ""
221
238
  0
222
239
  elsif bytes.size == 1
223
240
  bytes.unpack("C")[0]
@@ -232,6 +249,7 @@ class PDF::Reader
232
249
  else
233
250
  raise UnsupportedFeatureError, "Unable to unpack xref stream entries of #{bytes.size} bytes"
234
251
  end
252
+ TypeCheck.cast_to_int!(res)
235
253
  end
236
254
  ################################################################################
237
255
  # Wrap the io stream we're working with in a buffer that can tokenise it for us.
@@ -239,12 +257,14 @@ class PDF::Reader
239
257
  # We create multiple buffers so we can be tokenising multiple sections of the file
240
258
  # at the same time without worrying about clearing the buffers contents.
241
259
  #
260
+ #: (?Integer) -> PDF::Reader::Buffer
242
261
  def new_buffer(offset = 0)
243
262
  PDF::Reader::Buffer.new(@io, :seek => offset)
244
263
  end
245
264
  ################################################################################
246
265
  # Stores an offset value for a particular PDF object ID and revision number
247
266
  #
267
+ #: (Integer, Integer, Integer | PDF::Reader::Reference) -> (Integer | PDF::Reader::Reference)
248
268
  def store(id, gen, offset)
249
269
  (@xref[id] ||= {})[gen] ||= offset
250
270
  end
@@ -258,6 +278,7 @@ class PDF::Reader
258
278
  # Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
259
279
  # header appear somewhere within the first 1024 bytes of the file
260
280
  #
281
+ #: (IO | Tempfile | StringIO) -> Integer?
261
282
  def calc_junk_offset(io)
262
283
  io.rewind
263
284
  offset = io.pos
@@ -6,6 +6,7 @@ class PDF::Reader
6
6
  # There's no point rendering zero-width characters
7
7
  class ZeroWidthRunsFilter
8
8
 
9
+ #: (Array[PDF::Reader::TextRun]) -> Array[PDF::Reader::TextRun]
9
10
  def self.exclude_zero_width_runs(runs)
10
11
  runs.reject { |run| run.width == 0 }
11
12
  end
data/lib/pdf/reader.rb CHANGED
@@ -95,6 +95,7 @@ module PDF
95
95
  class Reader
96
96
 
97
97
  # lowlevel hash-like access to all objects in the underlying PDF
98
+ #: PDF::Reader::ObjectHash
98
99
  attr_reader :objects
99
100
 
100
101
  # creates a new document reader for the provided PDF.
@@ -115,14 +116,18 @@ module PDF
115
116
  # Using this method directly is supported, but it's more common to use
116
117
  # `PDF::Reader.open`
117
118
  #
119
+ #: (String | Tempfile | IO | StringIO, ?Hash[untyped, untyped]) -> void
118
120
  def initialize(input, opts = {})
119
- @cache = PDF::Reader::ObjectCache.new
121
+ @cache = PDF::Reader::ObjectCache.new #: PDF::Reader::ObjectCache
120
122
  opts.merge!(:cache => @cache)
121
- @objects = PDF::Reader::ObjectHash.new(input, opts)
123
+ @objects = PDF::Reader::ObjectHash.new(input, opts) #: PDF::Reader::ObjectHash
124
+ @page_count = nil #: Integer | nil
125
+ @root = nil #: Hash[Symbol, untyped] | nil
122
126
  end
123
127
 
124
128
  # Return a Hash with some basic information about the PDF file
125
129
  #
130
+ #: () -> Hash[untyped, untyped]?
126
131
  def info
127
132
  dict = @objects.deref_hash(@objects.trailer[:Info]) || {}
128
133
  doc_strings_to_utf8(dict)
@@ -131,6 +136,7 @@ module PDF
131
136
  # Return a String with extra XML metadata provided by the author of the PDF file. Not
132
137
  # always present.
133
138
  #
139
+ #: () -> String?
134
140
  def metadata
135
141
  stream = @objects.deref_stream(root[:Metadata])
136
142
  if stream.nil?
@@ -144,6 +150,7 @@ module PDF
144
150
 
145
151
  # To number of pages in this PDF
146
152
  #
153
+ #: () -> Integer
147
154
  def page_count
148
155
  pages = @objects.deref_hash(root[:Pages])
149
156
  unless pages.kind_of?(::Hash)
@@ -154,6 +161,7 @@ module PDF
154
161
 
155
162
  # The PDF version this file uses
156
163
  #
164
+ #: () -> Float
157
165
  def pdf_version
158
166
  @objects.pdf_version
159
167
  end
@@ -171,6 +179,7 @@ module PDF
171
179
  # puts reader.pdf_version
172
180
  # end
173
181
  #
182
+ #: (String | Tempfile | IO, ?Hash[untyped, untyped]) { (PDF::Reader) -> void } -> untyped
174
183
  def self.open(input, opts = {}, &block)
175
184
  yield PDF::Reader.new(input, opts)
176
185
  end
@@ -189,6 +198,7 @@ module PDF
189
198
  # See the docs for PDF::Reader::Page to read more about the
190
199
  # methods available on each page
191
200
  #
201
+ #: () -> Array[PDF::Reader::Page]
192
202
  def pages
193
203
  return [] if page_count <= 0
194
204
 
@@ -213,6 +223,7 @@ module PDF
213
223
  # See the docs for PDF::Reader::Page to read more about the
214
224
  # methods available on each page
215
225
  #
226
+ #: (Integer) -> PDF::Reader::Page
216
227
  def page(num)
217
228
  num = num.to_i
218
229
  if num < 1 || num > self.page_count
@@ -225,6 +236,7 @@ module PDF
225
236
 
226
237
  # recursively convert strings from outside a content stream into UTF-8
227
238
  #
239
+ #: (untyped) -> untyped
228
240
  def doc_strings_to_utf8(obj)
229
241
  case obj
230
242
  when ::Hash then
@@ -246,6 +258,7 @@ module PDF
246
258
  end
247
259
  end
248
260
 
261
+ #: (String) -> bool
249
262
  def has_utf16_bom?(str)
250
263
  first_bytes = str[0,2]
251
264
 
@@ -256,6 +269,7 @@ module PDF
256
269
 
257
270
  # TODO find a PDF I can use to spec this behaviour
258
271
  #
272
+ #: (String) -> String
259
273
  def pdfdoc_to_utf8(obj)
260
274
  obj.force_encoding("utf-8")
261
275
  obj
@@ -264,6 +278,7 @@ module PDF
264
278
  # one day we'll all run on a 1.9 compatible VM and I can just do this with
265
279
  # String#encode
266
280
  #
281
+ #: (String) -> String
267
282
  def utf16_to_utf8(obj)
268
283
  str = obj[2, obj.size].to_s
269
284
  str = str.unpack("n*").pack("U*")
@@ -271,6 +286,7 @@ module PDF
271
286
  str
272
287
  end
273
288
 
289
+ #: () -> Hash[Symbol, untyped]
274
290
  def root
275
291
  @root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
276
292
  end