pdf-reader 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +22 -0
  3. data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  7. data/lib/pdf/reader/buffer.rb +35 -17
  8. data/lib/pdf/reader/cid_widths.rb +7 -1
  9. data/lib/pdf/reader/cmap.rb +14 -3
  10. data/lib/pdf/reader/encoding.rb +37 -12
  11. data/lib/pdf/reader/error.rb +6 -0
  12. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  13. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  14. data/lib/pdf/reader/filter/depredict.rb +4 -0
  15. data/lib/pdf/reader/filter/flate.rb +5 -2
  16. data/lib/pdf/reader/filter/lzw.rb +2 -0
  17. data/lib/pdf/reader/filter/null.rb +2 -0
  18. data/lib/pdf/reader/filter/run_length.rb +2 -0
  19. data/lib/pdf/reader/filter.rb +1 -0
  20. data/lib/pdf/reader/font.rb +90 -22
  21. data/lib/pdf/reader/font_descriptor.rb +76 -23
  22. data/lib/pdf/reader/form_xobject.rb +11 -0
  23. data/lib/pdf/reader/glyph_hash.rb +34 -9
  24. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  25. data/lib/pdf/reader/lzw.rb +17 -6
  26. data/lib/pdf/reader/no_text_filter.rb +1 -0
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +7 -2
  29. data/lib/pdf/reader/object_hash.rb +116 -9
  30. data/lib/pdf/reader/object_stream.rb +19 -2
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
  32. data/lib/pdf/reader/page.rb +41 -7
  33. data/lib/pdf/reader/page_layout.rb +25 -8
  34. data/lib/pdf/reader/page_state.rb +5 -2
  35. data/lib/pdf/reader/page_text_receiver.rb +6 -2
  36. data/lib/pdf/reader/pages_strategy.rb +1 -1
  37. data/lib/pdf/reader/parser.rb +51 -10
  38. data/lib/pdf/reader/point.rb +9 -2
  39. data/lib/pdf/reader/print_receiver.rb +2 -6
  40. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  41. data/lib/pdf/reader/rectangle.rb +24 -1
  42. data/lib/pdf/reader/reference.rb +10 -1
  43. data/lib/pdf/reader/register_receiver.rb +15 -2
  44. data/lib/pdf/reader/resources.rb +9 -0
  45. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  46. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  47. data/lib/pdf/reader/stream.rb +9 -3
  48. data/lib/pdf/reader/synchronized_cache.rb +5 -2
  49. data/lib/pdf/reader/text_run.rb +28 -1
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +33 -2
  52. data/lib/pdf/reader/type_check.rb +10 -3
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  54. data/lib/pdf/reader/validating_receiver.rb +29 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
  56. data/lib/pdf/reader/width_calculator/composite.rb +5 -1
  57. data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
  60. data/lib/pdf/reader/xref.rb +28 -7
  61. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  62. data/lib/pdf/reader.rb +18 -2
  63. data/rbi/pdf-reader.rbi +1502 -1594
  64. metadata +17 -11
@@ -31,26 +31,34 @@ class PDF::Reader
31
31
  ################################################################################
32
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
33
33
  class Reference
34
+ #: Integer
34
35
  attr_reader :id
36
+
37
+ #: Integer
35
38
  attr_reader :gen
36
39
  ################################################################################
37
40
  # Create a new Reference to an object with the specified id and revision number
41
+ #: (Integer, Integer) -> void
38
42
  def initialize(id, gen)
39
- @id, @gen = id, gen
43
+ @id = id
44
+ @gen = gen
40
45
  end
41
46
  ################################################################################
42
47
  # returns the current Reference object in an array with a single element
48
+ #: () -> Array[PDF::Reader::Reference]
43
49
  def to_a
44
50
  [self]
45
51
  end
46
52
  ################################################################################
47
53
  # returns the ID of this reference. Use with caution, ignores the generation id
54
+ #: () -> Integer
48
55
  def to_i
49
56
  self.id
50
57
  end
51
58
  ################################################################################
52
59
  # returns true if the provided object points to the same PDF Object as the
53
60
  # current object
61
+ #: (Object) -> bool
54
62
  def ==(obj)
55
63
  return false unless obj.kind_of?(PDF::Reader::Reference)
56
64
 
@@ -61,6 +69,7 @@ class PDF::Reader
61
69
  # returns a hash based on the PDF::Reference this object points to. Two
62
70
  # different Reference objects that point to the same PDF Object will
63
71
  # return an identical hash
72
+ #: () -> Integer
64
73
  def hash
65
74
  "#{self.id}:#{self.gen}".hash
66
75
  end
@@ -22,45 +22,55 @@ class PDF::Reader
22
22
  #
23
23
  class RegisterReceiver
24
24
 
25
+ #: Array[Hash[Symbol, untyped]]
25
26
  attr_accessor :callbacks
26
27
 
28
+ #: () -> void
27
29
  def initialize
28
- @callbacks = []
30
+ @callbacks = [] #: Array[Hash[Symbol, untyped]]
29
31
  end
30
32
 
33
+ #: (untyped) -> bool
31
34
  def respond_to?(meth)
32
35
  true
33
36
  end
34
37
 
38
+ #: (Symbol, *untyped) -> void
35
39
  def method_missing(methodname, *args)
36
40
  callbacks << {:name => methodname.to_sym, :args => args}
37
41
  end
38
42
 
39
43
  # count the number of times a callback fired
44
+ #: (Symbol) -> Integer
40
45
  def count(methodname)
41
46
  callbacks.count { |cb| cb[:name] == methodname}
42
47
  end
43
48
 
44
49
  # return the details for every time the specified callback was fired
50
+ #: (Symbol) -> Array[Hash[Symbol, untyped]]
45
51
  def all(methodname)
46
52
  callbacks.select { |cb| cb[:name] == methodname }
47
53
  end
48
54
 
55
+ #: (Symbol) -> Array[Array[untyped]]
49
56
  def all_args(methodname)
50
57
  all(methodname).map { |cb| cb[:args] }
51
58
  end
52
59
 
53
60
  # return the details for the first time the specified callback was fired
61
+ #: (Symbol) -> Hash[Symbol, untyped]?
54
62
  def first_occurance_of(methodname)
55
63
  callbacks.find { |cb| cb[:name] == methodname }
56
64
  end
57
65
 
58
66
  # return the details for the final time the specified callback was fired
67
+ #: (Symbol) -> Hash[Symbol, untyped]?
59
68
  def final_occurance_of(methodname)
60
69
  all(methodname).last
61
70
  end
62
71
 
63
72
  # return the first occurance of a particular series of callbacks
73
+ #: (*Symbol) -> Array[Hash[Symbol, untyped]]?
64
74
  def series(*methods)
65
75
  return nil if methods.empty?
66
76
 
@@ -70,7 +80,10 @@ class PDF::Reader
70
80
  indexes.each do |idx|
71
81
  count = methods.size
72
82
  method_indexes.each do |midx|
73
- count -= 1 if callbacks[idx+midx] && callbacks[idx+midx][:name] == methods[midx]
83
+ res = callbacks[idx+midx]
84
+ if res && res[:name] == methods[midx]
85
+ count -= 1
86
+ end
74
87
  end
75
88
  if count == 0
76
89
  return callbacks[idx, methods.size]
@@ -9,6 +9,7 @@ module PDF
9
9
  #
10
10
  class Resources
11
11
 
12
+ #: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
12
13
  def initialize(objects, resources)
13
14
  @objects = objects
14
15
  @resources = resources
@@ -20,6 +21,7 @@ module PDF
20
21
  # with no caching. You will want to cache the results instead
21
22
  # of calling it over and over.
22
23
  #
24
+ #: () -> Hash[Symbol, untyped]
23
25
  def color_spaces
24
26
  @objects.deref_hash!(@resources[:ColorSpace]) || {}
25
27
  end
@@ -30,6 +32,7 @@ module PDF
30
32
  # with no caching. You will want to cache the results instead
31
33
  # of calling it over and over.
32
34
  #
35
+ #: () -> Hash[Symbol, untyped]
33
36
  def fonts
34
37
  @objects.deref_hash!(@resources[:Font]) || {}
35
38
  end
@@ -41,6 +44,7 @@ module PDF
41
44
  # with no caching. You will want to cache the results instead
42
45
  # of calling it over and over.
43
46
  #
47
+ #: () -> Hash[Symbol, untyped]
44
48
  def graphic_states
45
49
  @objects.deref_hash!(@resources[:ExtGState]) || {}
46
50
  end
@@ -51,6 +55,7 @@ module PDF
51
55
  # with no caching. You will want to cache the results instead
52
56
  # of calling it over and over.
53
57
  #
58
+ #: () -> Hash[Symbol, untyped]
54
59
  def patterns
55
60
  @objects.deref_hash!(@resources[:Pattern]) || {}
56
61
  end
@@ -61,6 +66,7 @@ module PDF
61
66
  # with no caching. You will want to cache the results instead
62
67
  # of calling it over and over.
63
68
  #
69
+ #: () -> Array[Symbol]
64
70
  def procedure_sets
65
71
  @objects.deref_array!(@resources[:ProcSet]) || []
66
72
  end
@@ -71,6 +77,7 @@ module PDF
71
77
  # with no caching. You will want to cache the results instead
72
78
  # of calling it over and over.
73
79
  #
80
+ #: () -> Hash[Symbol, untyped]
74
81
  def properties
75
82
  @objects.deref_hash!(@resources[:Properties]) || {}
76
83
  end
@@ -81,6 +88,7 @@ module PDF
81
88
  # with no caching. You will want to cache the results instead
82
89
  # of calling it over and over.
83
90
  #
91
+ #: () -> Hash[Symbol, untyped]
84
92
  def shadings
85
93
  @objects.deref_hash!(@resources[:Shading]) || {}
86
94
  end
@@ -91,6 +99,7 @@ module PDF
91
99
  # with no caching. You will want to cache the results instead
92
100
  # of calling it over and over.
93
101
  #
102
+ #: () -> Hash[Symbol, PDF::Reader::Stream]
94
103
  def xobjects
95
104
  dict = @objects.deref_hash!(@resources[:XObject]) || {}
96
105
  TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
@@ -7,6 +7,13 @@ class PDF::Reader
7
7
  # able to decrypt the file.
8
8
  class SecurityHandlerFactory
9
9
 
10
+ #: (Hash[Symbol, untyped], Array[untyped] | nil, String | nil) -> (
11
+ #| NullSecurityHandler |
12
+ #| AesV2SecurityHandler |
13
+ #| Rc4SecurityHandler |
14
+ #| AesV3SecurityHandler |
15
+ #| UnimplementedSecurityHandler
16
+ #| )
10
17
  def self.build(encrypt, doc_id, password)
11
18
  doc_id ||= []
12
19
  password ||= ""
@@ -22,6 +29,9 @@ class PDF::Reader
22
29
  end
23
30
  end
24
31
 
32
+ #: (Hash[Symbol, untyped], Array[untyped], String) -> (
33
+ #| AesV2SecurityHandler | Rc4SecurityHandler
34
+ #| )
25
35
  def self.build_standard_handler(encrypt, doc_id, password)
26
36
  encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
27
37
  key_builder = StandardKeyBuilder.new(
@@ -41,6 +51,7 @@ class PDF::Reader
41
51
  end
42
52
  end
43
53
 
54
+ #: (Hash[Symbol, untyped], Array[untyped], String) -> (AesV3SecurityHandler)
44
55
  def self.build_v5_handler(encrypt, doc_id, password)
45
56
  key_builder = KeyBuilderV5.new(
46
57
  owner_key: encrypt[:O],
@@ -52,6 +63,7 @@ class PDF::Reader
52
63
  end
53
64
 
54
65
  # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
66
+ #: (Hash[Symbol, untyped]) -> bool
55
67
  def self.standard?(encrypt)
56
68
  return false if encrypt.nil?
57
69
 
@@ -65,6 +77,7 @@ class PDF::Reader
65
77
  # This handler supports both
66
78
  # - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
67
79
  # - AES-256 encryption defined in PDF 2.0 ('revision 6')
80
+ #: (Hash[Symbol, untyped]) -> untyped
68
81
  def self.standard_v5?(encrypt)
69
82
  return false if encrypt.nil?
70
83
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
 
3
4
  require 'digest/md5'
4
5
  require 'rc4'
@@ -23,16 +24,17 @@ class PDF::Reader
23
24
  PassPadBytes = [ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
24
25
  0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
25
26
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
26
- 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
27
+ 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ] #: Array[Integer]
27
28
 
29
+ #: (?Hash[Symbol, untyped]) -> void
28
30
  def initialize(opts = {})
29
- @key_length = opts[:key_length].to_i/8
30
- @revision = opts[:revision].to_i
31
- @owner_key = opts[:owner_key]
32
- @user_key = opts[:user_key]
33
- @permissions = opts[:permissions].to_i
34
- @encryptMeta = opts.fetch(:encrypted_metadata, true)
35
- @file_id = opts[:file_id] || ""
31
+ @key_length = opts[:key_length].to_i/8 #: Integer
32
+ @revision = opts[:revision].to_i #: Integer
33
+ @owner_key = opts[:owner_key] #: String?
34
+ @user_key = opts[:user_key] #: String?
35
+ @permissions = opts[:permissions].to_i #: Integer
36
+ @encryptMeta = opts.fetch(:encrypted_metadata, true) #: bool
37
+ @file_id = opts[:file_id] || "" #: String
36
38
 
37
39
  if @key_length != 5 && @key_length != 16
38
40
  msg = "StandardKeyBuilder only supports 40 and 128 bit\
@@ -47,8 +49,8 @@ class PDF::Reader
47
49
  # decrypting the file will be returned. If the password doesn't match the file,
48
50
  # and exception will be raised.
49
51
  #
50
- def key(pass)
51
- pass ||= ""
52
+ #: (?String) -> String
53
+ def key(pass = "")
52
54
  encrypt_key = auth_owner_pass(pass)
53
55
  encrypt_key ||= auth_user_pass(pass)
54
56
 
@@ -60,14 +62,17 @@ class PDF::Reader
60
62
 
61
63
  # Pads supplied password to 32bytes using PassPadBytes as specified on
62
64
  # pp61 of spec
65
+ #
66
+ #: (?String?) -> String
63
67
  def pad_pass(p="")
64
- if p.nil? || p.empty?
65
- PassPadBytes.pack('C*')
66
- else
67
- p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
68
- end
68
+ return PassPadBytes.pack('C*') if p.nil? || p.empty?
69
+
70
+ pTrimmedLength = [p.bytesize, 32].min
71
+ pad = PassPadBytes[0, 32 - pTrimmedLength] || []
72
+ p[0, 32].to_s + pad.pack('C*')
69
73
  end
70
74
 
75
+ #: (String, Integer) -> String
71
76
  def xor_each_byte(buf, int)
72
77
  buf.each_byte.map{ |b| b^int}.pack("C*")
73
78
  end
@@ -84,6 +89,7 @@ class PDF::Reader
84
89
  # if the supplied password is not a valid owner password for this document
85
90
  # then it returns nil
86
91
  #
92
+ #: (String) -> String?
87
93
  def auth_owner_pass(pass)
88
94
  md5 = Digest::MD5.digest(pad_pass(pass))
89
95
  if @revision > 2 then
@@ -92,7 +98,13 @@ class PDF::Reader
92
98
  #first iteration decrypt owner_key
93
99
  out = @owner_key
94
100
  #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
95
- 19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
101
+ 19.downto(0).each { |i|
102
+ # The RC4 gem doen't have type annotations, so the type checker doesn't
103
+ # know decrypt() returns a string
104
+ out = TypeCheck.cast_to_string!(
105
+ RC4.new(xor_each_byte(keyBegins,i)).decrypt(out)
106
+ )
107
+ }
96
108
  else
97
109
  out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
98
110
  end
@@ -110,6 +122,7 @@ class PDF::Reader
110
122
  # if the supplied password is not a valid user password for this document
111
123
  # then it returns nil
112
124
  #
125
+ #: (String) -> String?
113
126
  def auth_user_pass(pass)
114
127
  keyBegins = make_file_key(pass)
115
128
  if @revision >= 3
@@ -117,28 +130,29 @@ class PDF::Reader
117
130
  out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
118
131
  #zero doesn't matter -> so from 0-19
119
132
  20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
120
- pass = @user_key[0, 16] == out
133
+ pass = @user_key.to_s[0, 16] == out
121
134
  else
122
135
  pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
123
136
  end
124
137
  pass ? keyBegins : nil
125
138
  end
126
139
 
140
+ #: (String) -> String
127
141
  def make_file_key( user_pass )
128
142
  # a) if there's a password, pad it to 32 bytes, else, just use the padding.
129
- @buf = pad_pass(user_pass)
143
+ buf = pad_pass(user_pass)
130
144
  # c) add owner key
131
- @buf << @owner_key
145
+ buf << @owner_key
132
146
  # d) add permissions 1 byte at a time, in little-endian order
133
- (0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
147
+ (0..24).step(8){|e| buf << (@permissions >> e & 0xFF)}
134
148
  # e) add the file ID
135
- @buf << @file_id
149
+ buf << @file_id
136
150
  # f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
137
151
  if @revision >= 4 && !@encryptMeta
138
- @buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
152
+ buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
139
153
  end
140
154
  # b) init MD5 digest + g) finish the hash
141
- md5 = Digest::MD5.digest(@buf)
155
+ md5 = Digest::MD5.digest(buf)
142
156
  # h) spin hash 50 times
143
157
  if @revision >= 3
144
158
  50.times {
@@ -34,18 +34,24 @@ class PDF::Reader
34
34
  # compression, etc) and a stream of bytes.
35
35
  #
36
36
  class Stream
37
- attr_accessor :hash, :data
37
+ #: Hash[Symbol, untyped]
38
+ attr_accessor :hash
39
+
40
+ #: String
41
+ attr_accessor :data
38
42
 
39
43
  ################################################################################
40
44
  # Creates a new stream with the specified dictionary and data. The dictionary
41
45
  # should be a standard ruby hash, the data should be a standard ruby string.
46
+ #: (Hash[Symbol, untyped], String) -> void
42
47
  def initialize(hash, data)
43
- @hash = TypeCheck.cast_to_pdf_dict!(hash)
48
+ @hash = TypeCheck.cast_to_pdf_dict!(hash) #: Hash[Symbol, untyped]
44
49
  @data = data
45
- @udata = nil
50
+ @udata = nil #: String | nil
46
51
  end
47
52
  ################################################################################
48
53
  # apply this streams filters to its data and return the result.
54
+ #: () -> String
49
55
  def unfiltered_data
50
56
  return @udata if @udata
51
57
  @udata = data.dup
@@ -20,13 +20,16 @@ class PDF::Reader
20
20
  # same time, we don't want to throw away thread safety We have two
21
21
  # interchangeable thread-safe cache implementations:
22
22
  class SynchronizedCache
23
+ #: () -> void
23
24
  def initialize
24
- @cache = {}
25
- @mutex = Mutex.new
25
+ @cache = {} #: Hash[Object, untyped]
26
+ @mutex = Mutex.new #: Mutex
26
27
  end
28
+ #: (Object) -> untyped
27
29
  def [](key)
28
30
  @mutex.synchronize { @cache[key] }
29
31
  end
32
+ #: (Object, (Object | nil)) -> untyped
30
33
  def []=(key,value)
31
34
  @mutex.synchronize { @cache[key] = value }
32
35
  end
@@ -7,22 +7,34 @@ class PDF::Reader
7
7
  class TextRun
8
8
  include Comparable
9
9
 
10
+ #: PDF::Reader::Point
10
11
  attr_reader :origin
12
+
13
+ #: Numeric
11
14
  attr_reader :width
15
+
16
+ #: Numeric
12
17
  attr_reader :font_size
18
+
19
+ #: String
13
20
  attr_reader :text
14
21
 
15
22
  alias :to_s :text
16
23
 
24
+ #: (Numeric, Numeric, Numeric, Numeric, String) -> void
17
25
  def initialize(x, y, width, font_size, text)
18
- @origin = PDF::Reader::Point.new(x, y)
26
+ @origin = PDF::Reader::Point.new(x, y) #: PDF::Reader::Point
19
27
  @width = width
20
28
  @font_size = font_size
21
29
  @text = text
30
+ @endx = nil #: Numeric | nil
31
+ @endy = nil #: Numeric | nil
32
+ @mergable_range = nil #: Range[Numeric] | nil
22
33
  end
23
34
 
24
35
  # Allows collections of TextRun objects to be sorted. They will be sorted
25
36
  # in order of their position on a cartesian plain - Top Left to Bottom Right
37
+ #: (PDF::Reader::Point) -> Numeric
26
38
  def <=>(other)
27
39
  if x == other.x && y == other.y
28
40
  0
@@ -34,33 +46,42 @@ class PDF::Reader
34
46
  -1
35
47
  elsif x > other.x
36
48
  1
49
+ else
50
+ 0 # Unreachable?
37
51
  end
38
52
  end
39
53
 
54
+ #: () -> Numeric
40
55
  def x
41
56
  @origin.x
42
57
  end
43
58
 
59
+ #: () -> Numeric
44
60
  def y
45
61
  @origin.y
46
62
  end
47
63
 
64
+ #: () -> Numeric
48
65
  def endx
49
66
  @endx ||= @origin.x + width
50
67
  end
51
68
 
69
+ #: () -> Numeric
52
70
  def endy
53
71
  @endy ||= @origin.y + font_size
54
72
  end
55
73
 
74
+ #: () -> Numeric
56
75
  def mean_character_width
57
76
  @width / character_count
58
77
  end
59
78
 
79
+ #: (PDF::Reader::TextRun) -> bool
60
80
  def mergable?(other)
61
81
  y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x)
62
82
  end
63
83
 
84
+ #: (PDF::Reader::TextRun) -> PDF::Reader::TextRun
64
85
  def +(other)
65
86
  raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)
66
87
 
@@ -71,16 +92,19 @@ class PDF::Reader
71
92
  end
72
93
  end
73
94
 
95
+ #: () -> String
74
96
  def inspect
75
97
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
76
98
  end
77
99
 
100
+ #: (PDF::Reader::TextRun) -> bool
78
101
  def intersect?(other_run)
79
102
  x <= other_run.endx && endx >= other_run.x &&
80
103
  endy >= other_run.y && y <= other_run.endy
81
104
  end
82
105
 
83
106
  # return what percentage of this text run is overlapped by another run
107
+ #: (PDF::Reader::TextRun) -> Numeric
84
108
  def intersection_area_percent(other_run)
85
109
  return 0 unless intersect?(other_run)
86
110
 
@@ -93,16 +117,19 @@ class PDF::Reader
93
117
 
94
118
  private
95
119
 
120
+ #: () -> Numeric
96
121
  def area
97
122
  (endx - x) * (endy - y)
98
123
  end
99
124
 
125
+ #: () -> Range[Numeric]
100
126
  def mergable_range
101
127
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
102
128
  end
103
129
 
104
130
  # Assume string encoding is marked correctly and we can trust String#size to return a
105
131
  # character count
132
+ #: () -> Float
106
133
  def character_count
107
134
  @text.size.to_f
108
135
  end
@@ -35,6 +35,7 @@ class PDF::Reader
35
35
  class Token < String # :nodoc:
36
36
  ################################################################################
37
37
  # Creates a new token with the specified value
38
+ #: (untyped) -> void
38
39
  def initialize(val)
39
40
  super
40
41
  end
@@ -14,16 +14,40 @@ class PDF::Reader
14
14
  # only 6 numbers. This is important to save CPU time, memory and GC pressure
15
15
  # caused by allocating too many unnecessary objects.
16
16
  class TransformationMatrix
17
- attr_reader :a, :b, :c, :d, :e, :f
17
+ #: Numeric
18
+ attr_reader :a
18
19
 
20
+ #: Numeric
21
+ attr_reader :b
22
+
23
+ #: Numeric
24
+ attr_reader :c
25
+
26
+ #: Numeric
27
+ attr_reader :d
28
+
29
+ #: Numeric
30
+ attr_reader :e
31
+
32
+ #: Numeric
33
+ attr_reader :f
34
+
35
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
19
36
  def initialize(a, b, c, d, e, f)
20
- @a, @b, @c, @d, @e, @f = a, b, c, d, e, f
37
+ @a = a
38
+ @b = b
39
+ @c = c
40
+ @d = d
41
+ @e = e
42
+ @f = f
21
43
  end
22
44
 
45
+ #: () -> String
23
46
  def inspect
24
47
  "#{a}, #{b}, 0,\n#{c}, #{d}, #{0},\n#{e}, #{f}, 1"
25
48
  end
26
49
 
50
+ #: () -> [Numeric]
27
51
  def to_a
28
52
  [@a,@b,0,
29
53
  @c,@d,0,
@@ -51,6 +75,7 @@ class PDF::Reader
51
75
  # displacement to speed up processing documents that use vertical
52
76
  # writing systems
53
77
  #
78
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> PDF::Reader::TransformationMatrix
54
79
  def multiply!(a,b,c, d,e,f)
55
80
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
56
81
  # the identity matrix, no effect
@@ -90,6 +115,7 @@ class PDF::Reader
90
115
  # [ 3 4 0 ] x [ 0 1 0 ]
91
116
  # [ 5 6 1 ] [ e2 0 1 ]
92
117
  #
118
+ #: (Numeric) -> void
93
119
  def horizontal_displacement_multiply!(e2)
94
120
  @e = @e + e2
95
121
  end
@@ -105,6 +131,7 @@ class PDF::Reader
105
131
  # [ 0 1 0 ] x [ 3 4 0 ]
106
132
  # [ 5 0 1 ] [ 5 6 1 ]
107
133
  #
134
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
108
135
  def horizontal_displacement_multiply_reversed!(a2,b2,c2,d2,e2,f2)
109
136
  newa = a2
110
137
  newb = b2
@@ -124,6 +151,7 @@ class PDF::Reader
124
151
  # [ 3 4 0 ] x [ 0 5 0 ]
125
152
  # [ 5 6 1 ] [ 0 0 1 ]
126
153
  #
154
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
127
155
  def xy_scaling_multiply!(a2,b2,c2,d2,e2,f2)
128
156
  newa = @a * a2
129
157
  newb = @b * d2
@@ -143,6 +171,7 @@ class PDF::Reader
143
171
  # [ 0 5 0 ] x [ 3 4 0 ]
144
172
  # [ 0 0 1 ] [ 5 6 1 ]
145
173
  #
174
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
146
175
  def xy_scaling_multiply_reversed!(a2,b2,c2,d2,e2,f2)
147
176
  newa = @a * a2
148
177
  newb = @a * b2
@@ -163,6 +192,7 @@ class PDF::Reader
163
192
  # [ c d 0 ] x [ c d 0 ]
164
193
  # [ e f 1 ] [ e f 1 ]
165
194
  #
195
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
166
196
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
167
197
  newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
198
  newb = (@a * b2) + (@b * d2) + (f2 * 0)
@@ -183,6 +213,7 @@ class PDF::Reader
183
213
  # [ c d 0 ] x [ c d 0 ]
184
214
  # [ e f 1 ] [ e f 1 ]
185
215
  #
216
+ #: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
186
217
  def faster_multiply!(a2,b2,c2, d2,e2,f2)
187
218
  newa = (@a * a2) + (@b * c2)
188
219
  newb = (@a * b2) + (@b * d2)
@@ -9,6 +9,7 @@ module PDF
9
9
  #
10
10
  class TypeCheck
11
11
 
12
+ #: (untyped) -> Integer
12
13
  def self.cast_to_int!(obj)
13
14
  if obj.is_a?(Integer)
14
15
  obj
@@ -21,6 +22,7 @@ module PDF
21
22
  end
22
23
  end
23
24
 
25
+ #: (untyped) -> Numeric
24
26
  def self.cast_to_numeric!(obj)
25
27
  if obj.is_a?(Numeric)
26
28
  obj
@@ -35,6 +37,7 @@ module PDF
35
37
  end
36
38
  end
37
39
 
40
+ #: (untyped) -> String
38
41
  def self.cast_to_string!(string)
39
42
  if string.is_a?(String)
40
43
  string
@@ -47,6 +50,7 @@ module PDF
47
50
  end
48
51
  end
49
52
 
53
+ #: (untyped) -> Symbol | nil
50
54
  def self.cast_to_symbol(obj)
51
55
  if obj.is_a?(Symbol)
52
56
  obj
@@ -59,15 +63,17 @@ module PDF
59
63
  end
60
64
  end
61
65
 
66
+ #: (untyped) -> Symbol
62
67
  def self.cast_to_symbol!(obj)
63
68
  res = cast_to_symbol(obj)
64
- if res
65
- res
66
- else
69
+ if res.nil?
67
70
  raise MalformedPDFError, "Unable to cast to symbol"
71
+ else
72
+ res
68
73
  end
69
74
  end
70
75
 
76
+ #: (untyped) -> Hash[Symbol, untyped]
71
77
  def self.cast_to_pdf_dict!(obj)
72
78
  if obj.is_a?(Hash)
73
79
  obj
@@ -78,6 +84,7 @@ module PDF
78
84
  end
79
85
  end
80
86
 
87
+ #: (untyped) -> Hash[Symbol, PDF::Reader::Stream]
81
88
  def self.cast_to_pdf_dict_with_stream_values!(obj)
82
89
  if obj.is_a?(Hash)
83
90
  result = Hash.new