pdf-reader 2.14.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +22 -0
- data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +35 -17
- data/lib/pdf/reader/cid_widths.rb +7 -1
- data/lib/pdf/reader/cmap.rb +14 -3
- data/lib/pdf/reader/encoding.rb +37 -12
- data/lib/pdf/reader/error.rb +6 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +4 -0
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +90 -22
- data/lib/pdf/reader/font_descriptor.rb +76 -23
- data/lib/pdf/reader/form_xobject.rb +11 -0
- data/lib/pdf/reader/glyph_hash.rb +34 -9
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +17 -6
- data/lib/pdf/reader/no_text_filter.rb +1 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +116 -9
- data/lib/pdf/reader/object_stream.rb +19 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
- data/lib/pdf/reader/page.rb +41 -7
- data/lib/pdf/reader/page_layout.rb +25 -8
- data/lib/pdf/reader/page_state.rb +5 -2
- data/lib/pdf/reader/page_text_receiver.rb +6 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +51 -10
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +10 -1
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +9 -0
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +5 -2
- data/lib/pdf/reader/text_run.rb +28 -1
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +33 -2
- data/lib/pdf/reader/type_check.rb +10 -3
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
- data/lib/pdf/reader/width_calculator/composite.rb +5 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
- data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
- data/lib/pdf/reader/xref.rb +28 -7
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +18 -2
- data/rbi/pdf-reader.rbi +1502 -1594
- metadata +17 -11
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
require 'ttfunk'
|
@@ -9,29 +9,75 @@ class PDF::Reader
|
|
9
9
|
# Font descriptors are outlined in Section 9.8, PDF 32000-1:2008, pp 281-288
|
10
10
|
class FontDescriptor
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
:avg_width, :max_width, :missing_width, :italic_angle, :stem_v,
|
15
|
-
:x_height, :font_flags
|
12
|
+
#: String
|
13
|
+
attr_reader :font_name
|
16
14
|
|
15
|
+
#: String?
|
16
|
+
attr_reader :font_family
|
17
|
+
|
18
|
+
#: Symbol
|
19
|
+
attr_reader :font_stretch
|
20
|
+
|
21
|
+
#: Numeric
|
22
|
+
attr_reader :font_weight
|
23
|
+
|
24
|
+
#: Array[Numeric]
|
25
|
+
attr_reader :font_bounding_box
|
26
|
+
|
27
|
+
#: Numeric
|
28
|
+
attr_reader :cap_height
|
29
|
+
|
30
|
+
#: Numeric
|
31
|
+
attr_reader :ascent
|
32
|
+
|
33
|
+
#: Numeric
|
34
|
+
attr_reader :descent
|
35
|
+
|
36
|
+
#: Numeric
|
37
|
+
attr_reader :leading
|
38
|
+
|
39
|
+
#: Numeric
|
40
|
+
attr_reader :avg_width
|
41
|
+
|
42
|
+
#: Numeric
|
43
|
+
attr_reader :max_width
|
44
|
+
|
45
|
+
#: Numeric
|
46
|
+
attr_reader :missing_width
|
47
|
+
|
48
|
+
#: Numeric?
|
49
|
+
attr_reader :italic_angle
|
50
|
+
|
51
|
+
#: Numeric?
|
52
|
+
attr_reader :stem_v
|
53
|
+
|
54
|
+
#: Numeric?
|
55
|
+
attr_reader :x_height
|
56
|
+
|
57
|
+
#: Integer
|
58
|
+
attr_reader :font_flags
|
59
|
+
|
60
|
+
#: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
|
17
61
|
def initialize(ohash, fd_hash)
|
18
62
|
# TODO change these to typed derefs
|
19
|
-
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
|
20
|
-
@descent = ohash.deref_number(fd_hash[:Descent]) || 0
|
21
|
-
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
|
22
|
-
@font_bounding_box = ohash.deref_array_of_numbers(
|
23
|
-
|
24
|
-
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
33
|
-
@
|
34
|
-
@
|
63
|
+
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0 #: Numeric
|
64
|
+
@descent = ohash.deref_number(fd_hash[:Descent]) || 0 #: Numeric
|
65
|
+
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0 #: Numeric
|
66
|
+
@font_bounding_box = ohash.deref_array_of_numbers(
|
67
|
+
fd_hash[:FontBBox]
|
68
|
+
) || [0,0,0,0] #: Array[Numeric]
|
69
|
+
@avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0 #: Numeric
|
70
|
+
@cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0 #: Numeric
|
71
|
+
@font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0 #: Integer
|
72
|
+
@italic_angle = ohash.deref_number(fd_hash[:ItalicAngle]) #: Numeric?
|
73
|
+
@font_name = ohash.deref_name(fd_hash[:FontName]).to_s #: String
|
74
|
+
@leading = ohash.deref_number(fd_hash[:Leading]) || 0 #: Numeric
|
75
|
+
@max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0 #: Numeric
|
76
|
+
@stem_v = ohash.deref_number(fd_hash[:StemV]) #: Numeric?
|
77
|
+
@x_height = ohash.deref_number(fd_hash[:XHeight]) #: Numeric?
|
78
|
+
@font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal #: Symbol
|
79
|
+
@font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400 #: Numeric
|
80
|
+
@font_family = ohash.deref_string(fd_hash[:FontFamily]) #: String?
|
35
81
|
|
36
82
|
# A FontDescriptor may have an embedded font program in FontFile
|
37
83
|
# (Type 1 Font Program), FontFile2 (TrueType font program), or
|
@@ -41,12 +87,15 @@ class PDF::Reader
|
|
41
87
|
# 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
|
42
88
|
# 3) OpenType: OpenType Font Program
|
43
89
|
# see Section 9.9, PDF 32000-1:2008, pp 288-292
|
44
|
-
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
|
90
|
+
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2]) #: PDF::Reader::Stream?
|
45
91
|
#TODO handle FontFile and FontFile3
|
92
|
+
@ttf_program_stream = nil #: TTFunk::File?
|
46
93
|
|
47
|
-
@is_ttf = true
|
94
|
+
@is_ttf = @font_program_stream ? true : false #: bool
|
95
|
+
@glyph_to_pdf_sf = nil #: Numeric?
|
48
96
|
end
|
49
97
|
|
98
|
+
#: (Integer) -> Numeric
|
50
99
|
def glyph_width(char_code)
|
51
100
|
if @is_ttf
|
52
101
|
if ttf_program_stream.cmap.unicode.length > 0
|
@@ -65,6 +114,7 @@ class PDF::Reader
|
|
65
114
|
|
66
115
|
# PDF states that a glyph is 1000 units wide, true type doesn't enforce
|
67
116
|
# any behavior, but uses units/em to define how wide the 'M' is (the widest letter)
|
117
|
+
#: () -> Numeric
|
68
118
|
def glyph_to_pdf_scale_factor
|
69
119
|
if @is_ttf
|
70
120
|
@glyph_to_pdf_sf ||= (1.0 / ttf_program_stream.header.units_per_em) * 1000.0
|
@@ -76,7 +126,10 @@ class PDF::Reader
|
|
76
126
|
|
77
127
|
private
|
78
128
|
|
129
|
+
#: () -> TTFunk::File
|
79
130
|
def ttf_program_stream
|
131
|
+
raise MalformedPDFError, "No font_program_stream" unless @font_program_stream
|
132
|
+
|
80
133
|
@ttf_program_stream ||= TTFunk::File.new(@font_program_stream.unfiltered_data)
|
81
134
|
end
|
82
135
|
end
|
@@ -17,6 +17,7 @@ module PDF
|
|
17
17
|
class FormXObject
|
18
18
|
extend Forwardable
|
19
19
|
|
20
|
+
#: untyped
|
20
21
|
attr_reader :xobject
|
21
22
|
|
22
23
|
def_delegators :resources, :color_spaces
|
@@ -28,6 +29,7 @@ module PDF
|
|
28
29
|
def_delegators :resources, :shadings
|
29
30
|
def_delegators :resources, :xobjects
|
30
31
|
|
32
|
+
#: (untyped, untyped, ?Hash[untyped, untyped]) -> void
|
31
33
|
def initialize(page, xobject, options = {})
|
32
34
|
@page = page
|
33
35
|
@objects = page.objects
|
@@ -42,6 +44,7 @@ module PDF
|
|
42
44
|
# The values are a PDF::Reader::Font instances that provide access
|
43
45
|
# to most available metrics for each font.
|
44
46
|
#
|
47
|
+
#: () -> untyped
|
45
48
|
def font_objects
|
46
49
|
raw_fonts = @objects.deref_hash(fonts)
|
47
50
|
::Hash[raw_fonts.map { |label, font|
|
@@ -54,6 +57,7 @@ module PDF
|
|
54
57
|
#
|
55
58
|
# See the comments on PDF::Reader::Page#walk for more detail.
|
56
59
|
#
|
60
|
+
#: (*untyped) -> untyped
|
57
61
|
def walk(*receivers)
|
58
62
|
receivers = receivers.map { |receiver|
|
59
63
|
ValidatingReceiver.new(receiver)
|
@@ -64,6 +68,7 @@ module PDF
|
|
64
68
|
# returns the raw content stream for this page. This is plumbing, nothing to
|
65
69
|
# see here unless you're a PDF nerd like me.
|
66
70
|
#
|
71
|
+
#: () -> untyped
|
67
72
|
def raw_content
|
68
73
|
@xobject.unfiltered_data
|
69
74
|
end
|
@@ -72,24 +77,29 @@ module PDF
|
|
72
77
|
|
73
78
|
# Returns the resources that accompany this form.
|
74
79
|
#
|
80
|
+
#: () -> untyped
|
75
81
|
def resources
|
76
82
|
@resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
|
77
83
|
end
|
78
84
|
|
85
|
+
#: (untyped, untyped, ?Array[untyped]) -> untyped
|
79
86
|
def callback(receivers, name, params=[])
|
80
87
|
receivers.each do |receiver|
|
81
88
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
82
89
|
end
|
83
90
|
end
|
84
91
|
|
92
|
+
#: () -> untyped
|
85
93
|
def content_stream_md5
|
86
94
|
@content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
|
87
95
|
end
|
88
96
|
|
97
|
+
#: () -> untyped
|
89
98
|
def cached_tokens_key
|
90
99
|
@cached_tokens_key ||= "tokens-#{content_stream_md5}"
|
91
100
|
end
|
92
101
|
|
102
|
+
#: () -> untyped
|
93
103
|
def tokens
|
94
104
|
@cache[cached_tokens_key] ||= begin
|
95
105
|
buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
|
@@ -102,6 +112,7 @@ module PDF
|
|
102
112
|
end
|
103
113
|
end
|
104
114
|
|
115
|
+
#: (untyped, untyped) -> untyped
|
105
116
|
def content_stream(receivers, instructions)
|
106
117
|
params = []
|
107
118
|
|
@@ -32,18 +32,37 @@ class PDF::Reader
|
|
32
32
|
# The mapping is read from a data file on disk the first time it's needed.
|
33
33
|
#
|
34
34
|
class GlyphHash # :nodoc:
|
35
|
+
@@by_codepoint_cache = nil #: Hash[Integer, Array[Symbol]] | nil
|
36
|
+
@@by_name_cache = nil #: Hash[Symbol, Integer] | nil
|
37
|
+
|
38
|
+
# An internal class for returning multiple pieces of data and keep sorbet happy
|
39
|
+
class ReturnData
|
40
|
+
#: Hash[Symbol, Integer]
|
41
|
+
attr_reader :by_name
|
42
|
+
|
43
|
+
#: Hash[Integer, Array[Symbol]]
|
44
|
+
attr_reader :by_codepoint
|
45
|
+
|
46
|
+
#:(Hash[Symbol, Integer], Hash[Integer, Array[Symbol]]) -> void
|
47
|
+
def initialize(by_name, by_codepoint)
|
48
|
+
@by_name = by_name
|
49
|
+
@by_codepoint = by_codepoint
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#: () -> void
|
35
54
|
def initialize
|
36
55
|
@@by_codepoint_cache ||= nil
|
37
56
|
@@by_name_cache ||= nil
|
38
57
|
|
39
58
|
# only parse the glyph list once, and cache the results (for performance)
|
40
59
|
if @@by_codepoint_cache != nil && @@by_name_cache != nil
|
41
|
-
@by_name = @@by_name_cache
|
42
|
-
@by_codepoint = @@by_codepoint_cache
|
60
|
+
@by_name = @@by_name_cache #: Hash[Symbol, Integer]
|
61
|
+
@by_codepoint = @@by_codepoint_cache #: Hash[Integer, Array[Symbol]]
|
43
62
|
else
|
44
|
-
|
45
|
-
@by_name = @@by_name_cache ||= by_name
|
46
|
-
@by_codepoint = @@by_codepoint_cache ||= by_codepoint
|
63
|
+
res = load_adobe_glyph_mapping
|
64
|
+
@by_name = @@by_name_cache ||= res.by_name
|
65
|
+
@by_codepoint = @@by_codepoint_cache ||= res.by_codepoint
|
47
66
|
end
|
48
67
|
end
|
49
68
|
|
@@ -67,6 +86,7 @@ class PDF::Reader
|
|
67
86
|
# h.name_to_unicode(:34)
|
68
87
|
# => 34
|
69
88
|
#
|
89
|
+
#: (Symbol | nil) -> (Integer | nil)
|
70
90
|
def name_to_unicode(name)
|
71
91
|
return nil unless name.is_a?(Symbol)
|
72
92
|
|
@@ -104,6 +124,7 @@ class PDF::Reader
|
|
104
124
|
# h.unicode_to_name(34)
|
105
125
|
# => [:34]
|
106
126
|
#
|
127
|
+
#: (Integer) -> Array[Symbol]
|
107
128
|
def unicode_to_name(codepoint)
|
108
129
|
@by_codepoint[codepoint.to_i] || []
|
109
130
|
end
|
@@ -113,9 +134,10 @@ class PDF::Reader
|
|
113
134
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
114
135
|
# a text file supplied by Adobe at:
|
115
136
|
# https://github.com/adobe-type-tools/agl-aglfn
|
137
|
+
#: () -> ReturnData
|
116
138
|
def load_adobe_glyph_mapping
|
117
|
-
keyed_by_name = {}
|
118
|
-
keyed_by_codepoint = {}
|
139
|
+
keyed_by_name = {} #: Hash[Symbol, Integer]
|
140
|
+
keyed_by_codepoint = {} #: Hash[Integer, Array[Symbol]]
|
119
141
|
|
120
142
|
paths = [
|
121
143
|
File.dirname(__FILE__) + "/glyphlist.txt",
|
@@ -129,13 +151,16 @@ class PDF::Reader
|
|
129
151
|
cp = "0x#{code}".hex
|
130
152
|
keyed_by_name[name.to_sym] = cp
|
131
153
|
keyed_by_codepoint[cp] ||= []
|
132
|
-
keyed_by_codepoint[cp]
|
154
|
+
arr = keyed_by_codepoint[cp]
|
155
|
+
if arr
|
156
|
+
arr.push(name.to_sym)
|
157
|
+
end
|
133
158
|
end
|
134
159
|
end
|
135
160
|
end
|
136
161
|
end
|
137
162
|
|
138
|
-
|
163
|
+
ReturnData.new(keyed_by_name.freeze, keyed_by_codepoint.freeze)
|
139
164
|
end
|
140
165
|
|
141
166
|
end
|
@@ -16,20 +16,21 @@ class PDF::Reader
|
|
16
16
|
#
|
17
17
|
class KeyBuilderV5
|
18
18
|
|
19
|
+
#: (?Hash[Symbol, String]) -> void
|
19
20
|
def initialize(opts = {})
|
20
|
-
@key_length = 256
|
21
|
+
@key_length = 256 #: Integer
|
21
22
|
|
22
23
|
# hash(32B) + validation salt(8B) + key salt(8B)
|
23
|
-
@owner_key = opts[:owner_key] || ""
|
24
|
+
@owner_key = opts[:owner_key] || "" #: String
|
24
25
|
|
25
26
|
# hash(32B) + validation salt(8B) + key salt(8B)
|
26
|
-
@user_key = opts[:user_key] || ""
|
27
|
+
@user_key = opts[:user_key] || "" #: String
|
27
28
|
|
28
29
|
# decryption key, encrypted w/ owner password
|
29
|
-
@owner_encryption_key = opts[:owner_encryption_key] || ""
|
30
|
+
@owner_encryption_key = opts[:owner_encryption_key] || "" #: String
|
30
31
|
|
31
32
|
# decryption key, encrypted w/ user password
|
32
|
-
@user_encryption_key = opts[:user_encryption_key] || ""
|
33
|
+
@user_encryption_key = opts[:user_encryption_key] || "" #: String
|
33
34
|
end
|
34
35
|
|
35
36
|
# Takes a string containing a user provided password.
|
@@ -38,6 +39,7 @@ class PDF::Reader
|
|
38
39
|
# decrypting the file will be returned. If the password doesn't match the file,
|
39
40
|
# and exception will be raised.
|
40
41
|
#
|
42
|
+
#: (String) -> String
|
41
43
|
def key(pass)
|
42
44
|
pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
|
43
45
|
|
@@ -58,28 +60,31 @@ class PDF::Reader
|
|
58
60
|
#
|
59
61
|
# if the string is a valid user/owner password, this will return the decryption key
|
60
62
|
#
|
63
|
+
#: (String) -> (String | nil)
|
61
64
|
def auth_owner_pass(password)
|
62
|
-
if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
|
65
|
+
if Digest::SHA256.digest(password + @owner_key[32..39].to_s + @user_key) == @owner_key[0..31]
|
63
66
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
64
67
|
cipher.decrypt
|
65
|
-
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
|
68
|
+
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1].to_s + @user_key)
|
66
69
|
cipher.iv = "\x00" * 16
|
67
70
|
cipher.padding = 0
|
68
71
|
cipher.update(@owner_encryption_key) + cipher.final
|
69
72
|
end
|
70
73
|
end
|
71
74
|
|
75
|
+
#: (String) -> (String | nil)
|
72
76
|
def auth_user_pass(password)
|
73
|
-
if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
|
77
|
+
if Digest::SHA256.digest(password + @user_key[32..39].to_s) == @user_key[0..31]
|
74
78
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
75
79
|
cipher.decrypt
|
76
|
-
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
|
80
|
+
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1].to_s)
|
77
81
|
cipher.iv = "\x00" * 16
|
78
82
|
cipher.padding = 0
|
79
83
|
cipher.update(@user_encryption_key) + cipher.final
|
80
84
|
end
|
81
85
|
end
|
82
86
|
|
87
|
+
#: (String) -> (String | nil)
|
83
88
|
def auth_owner_pass_r6(password)
|
84
89
|
if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
|
85
90
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
@@ -91,6 +96,7 @@ class PDF::Reader
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
#: (String) -> (String | nil)
|
94
100
|
def auth_user_pass_r6(password)
|
95
101
|
if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
|
96
102
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
@@ -104,6 +110,7 @@ class PDF::Reader
|
|
104
110
|
|
105
111
|
# PDF 2.0 spec, 7.6.4.3.4
|
106
112
|
# Algorithm 2.B: Computing a hash (revision 6 and later)
|
113
|
+
#: (String, String, ?String) -> String
|
107
114
|
def r6_digest(password, salt, user_key = '')
|
108
115
|
k = Digest::SHA256.digest(password + salt + user_key)
|
109
116
|
e = ''
|
@@ -128,6 +135,7 @@ class PDF::Reader
|
|
128
135
|
k[0, 32].to_s
|
129
136
|
end
|
130
137
|
|
138
|
+
#: (String) -> Integer
|
131
139
|
def unpack_128bit_bigendian_int(str)
|
132
140
|
ints = str[0,16].to_s.unpack("N*")
|
133
141
|
(ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -22,20 +22,24 @@ module PDF
|
|
22
22
|
# Wraps an LZW encoded string
|
23
23
|
class BitStream # :nodoc:
|
24
24
|
|
25
|
+
#: (String, Integer) -> void
|
25
26
|
def initialize(data, bits_in_chunk)
|
26
27
|
@data = data
|
27
28
|
@data.force_encoding("BINARY")
|
29
|
+
@current_pos = 0 #: Integer
|
30
|
+
@bits_left_in_byte = 8 #: Integer
|
31
|
+
@bits_in_chunk = 0 #: Integer
|
28
32
|
set_bits_in_chunk(bits_in_chunk)
|
29
|
-
@current_pos = 0
|
30
|
-
@bits_left_in_byte = 8
|
31
33
|
end
|
32
34
|
|
35
|
+
#: (Integer) -> void
|
33
36
|
def set_bits_in_chunk(bits_in_chunk)
|
34
37
|
raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12
|
35
38
|
|
36
39
|
@bits_in_chunk = bits_in_chunk
|
37
40
|
end
|
38
41
|
|
42
|
+
#: () -> Integer
|
39
43
|
def read
|
40
44
|
bits_left_in_chunk = @bits_in_chunk
|
41
45
|
chunk = -1
|
@@ -59,19 +63,23 @@ module PDF
|
|
59
63
|
end
|
60
64
|
end
|
61
65
|
|
62
|
-
CODE_EOD = 257 #end of data
|
63
|
-
CODE_CLEAR_TABLE = 256 #clear table
|
66
|
+
CODE_EOD = 257 #: Integer #end of data
|
67
|
+
CODE_CLEAR_TABLE = 256 #: Integer #clear table
|
64
68
|
|
65
69
|
# stores de pairs code => string
|
66
70
|
class StringTable
|
71
|
+
#: Integer
|
67
72
|
attr_reader :string_table_pos
|
68
73
|
|
74
|
+
#: () -> void
|
69
75
|
def initialize
|
70
|
-
@data = Hash.new
|
71
|
-
|
76
|
+
@data = Hash.new #: Hash[Integer, String]
|
77
|
+
# The initial code
|
78
|
+
@string_table_pos = 258 #: Integer
|
72
79
|
end
|
73
80
|
|
74
81
|
#if code less than 258 return fixed string
|
82
|
+
#: (Integer) -> String?
|
75
83
|
def [](key)
|
76
84
|
if key > 257
|
77
85
|
@data[key]
|
@@ -80,6 +88,7 @@ module PDF
|
|
80
88
|
end
|
81
89
|
end
|
82
90
|
|
91
|
+
#: (String) -> void
|
83
92
|
def add(string)
|
84
93
|
@data.store(@string_table_pos, string)
|
85
94
|
@string_table_pos += 1
|
@@ -88,6 +97,7 @@ module PDF
|
|
88
97
|
|
89
98
|
# Decompresses a LZW compressed string.
|
90
99
|
#
|
100
|
+
#: (String) -> String
|
91
101
|
def self.decode(data)
|
92
102
|
stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
|
93
103
|
string_table = StringTable.new
|
@@ -125,6 +135,7 @@ module PDF
|
|
125
135
|
result
|
126
136
|
end
|
127
137
|
|
138
|
+
#: (PDF::Reader::LZW::StringTable, Integer?, Integer?) -> String
|
128
139
|
def self.create_new_string(string_table, some_code, other_code)
|
129
140
|
raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?
|
130
141
|
|
@@ -15,10 +15,15 @@ class PDF::Reader
|
|
15
15
|
# These object types use little memory and are accessed a heap of times as
|
16
16
|
# part of random page access, so we'll cache the unmarshalled objects and
|
17
17
|
# avoid lots of repetitive (and expensive) tokenising
|
18
|
-
CACHEABLE_TYPES = [:Catalog, :Page, :Pages]
|
18
|
+
CACHEABLE_TYPES = [:Catalog, :Page, :Pages] #: Array[Symbol]
|
19
19
|
|
20
|
-
|
20
|
+
#: untyped
|
21
|
+
attr_reader :hits
|
21
22
|
|
23
|
+
#: untyped
|
24
|
+
attr_reader :misses
|
25
|
+
|
26
|
+
#: (?untyped) -> void
|
22
27
|
def initialize(lru_size = 1000)
|
23
28
|
@objects = {}
|
24
29
|
@lru_cache = Hashery::LRUHash.new(lru_size.to_i)
|