pdf-reader 2.9.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +39 -0
- data/README.md +33 -33
- data/Rakefile +2 -2
- data/lib/pdf/reader/advanced_text_run_filter.rb +152 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +39 -22
- data/lib/pdf/reader/cid_widths.rb +14 -6
- data/lib/pdf/reader/cmap.rb +16 -5
- data/lib/pdf/reader/encoding.rb +42 -18
- data/lib/pdf/reader/error.rb +6 -4
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +6 -2
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +99 -32
- data/lib/pdf/reader/font_descriptor.rb +79 -24
- data/lib/pdf/reader/form_xobject.rb +15 -1
- data/lib/pdf/reader/glyph_hash.rb +41 -8
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +42 -16
- data/lib/pdf/reader/no_text_filter.rb +15 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +129 -16
- data/lib/pdf/reader/object_stream.rb +22 -5
- data/lib/pdf/reader/overlapping_runs_filter.rb +8 -2
- data/lib/pdf/reader/page.rb +66 -13
- data/lib/pdf/reader/page_layout.rb +26 -9
- data/lib/pdf/reader/page_state.rb +12 -3
- data/lib/pdf/reader/page_text_receiver.rb +16 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +52 -13
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +13 -3
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +12 -2
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +6 -3
- data/lib/pdf/reader/text_run.rb +33 -3
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +41 -10
- data/lib/pdf/reader/type_check.rb +53 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +13 -5
- data/lib/pdf/reader/width_calculator/composite.rb +11 -3
- data/lib/pdf/reader/width_calculator/true_type.rb +14 -12
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +8 -5
- data/lib/pdf/reader/width_calculator/type_zero.rb +8 -3
- data/lib/pdf/reader/xref.rb +31 -10
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +24 -12
- data/rbi/pdf-reader.rbi +1504 -1480
- metadata +34 -17
@@ -1,9 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
|
-
#
|
6
|
-
|
7
5
|
require 'forwardable'
|
8
6
|
|
9
7
|
class PDF::Reader
|
@@ -20,12 +18,14 @@ class PDF::Reader
|
|
20
18
|
# Graphics State Operators
|
21
19
|
def_delegators :@widths, :[], :fetch
|
22
20
|
|
21
|
+
#: (Numeric, Array[Numeric]) -> void
|
23
22
|
def initialize(default, array)
|
24
|
-
@widths = parse_array(default, array.dup)
|
23
|
+
@widths = parse_array(default, array.dup) #: Hash[Numeric, Numeric]
|
25
24
|
end
|
26
25
|
|
27
26
|
private
|
28
27
|
|
28
|
+
#: (Numeric, Array[Numeric]) -> Hash[Numeric, Numeric]
|
29
29
|
def parse_array(default, array)
|
30
30
|
widths = Hash.new(default)
|
31
31
|
params = []
|
@@ -33,10 +33,10 @@ class PDF::Reader
|
|
33
33
|
params << array.shift
|
34
34
|
|
35
35
|
if params.size == 2 && params.last.is_a?(Array)
|
36
|
-
widths.merge! parse_first_form(params.first, params.last)
|
36
|
+
widths.merge! parse_first_form(params.first.to_i, Array(params.last))
|
37
37
|
params = []
|
38
38
|
elsif params.size == 3
|
39
|
-
widths.merge! parse_second_form(params[0], params[1], params[2])
|
39
|
+
widths.merge! parse_second_form(params[0].to_i, params[1].to_i, params[2].to_i)
|
40
40
|
params = []
|
41
41
|
end
|
42
42
|
end
|
@@ -45,6 +45,8 @@ class PDF::Reader
|
|
45
45
|
|
46
46
|
# this is the form 10 [234 63 234 346 47 234] where width of index 10 is
|
47
47
|
# 234, index 11 is 63, etc
|
48
|
+
#
|
49
|
+
#: (Integer, Array[Numeric]) -> Hash[Numeric, Numeric]
|
48
50
|
def parse_first_form(first, widths)
|
49
51
|
widths.inject({}) { |accum, glyph_width|
|
50
52
|
accum[first + accum.size] = glyph_width
|
@@ -53,7 +55,13 @@ class PDF::Reader
|
|
53
55
|
end
|
54
56
|
|
55
57
|
# this is the form 10 20 123 where all index between 10 and 20 have width 123
|
58
|
+
#
|
59
|
+
#: (Integer, Integer, Numeric) -> Hash[Numeric, Numeric]
|
56
60
|
def parse_second_form(first, final, width)
|
61
|
+
if first > final
|
62
|
+
raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
|
63
|
+
end
|
64
|
+
|
57
65
|
(first..final).inject({}) { |accum, index|
|
58
66
|
accum[index] = width
|
59
67
|
accum
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -44,15 +44,18 @@ class PDF::Reader
|
|
44
44
|
"begin" => :noop,
|
45
45
|
"begincmap" => :noop,
|
46
46
|
"def" => :noop
|
47
|
-
}
|
47
|
+
} #: Hash[String, Symbol]
|
48
48
|
|
49
|
+
#: Hash[Integer, Array[Integer]]
|
49
50
|
attr_reader :map
|
50
51
|
|
52
|
+
#: (String) -> void
|
51
53
|
def initialize(data)
|
52
|
-
@map = {}
|
54
|
+
@map = {} #: Hash[Integer, Array[Integer]]
|
53
55
|
process_data(data)
|
54
56
|
end
|
55
57
|
|
58
|
+
#: () -> Integer
|
56
59
|
def size
|
57
60
|
@map.size
|
58
61
|
end
|
@@ -61,12 +64,14 @@ class PDF::Reader
|
|
61
64
|
#
|
62
65
|
# Returns an array of Integers.
|
63
66
|
#
|
67
|
+
#: (Integer) -> Array[Integer]
|
64
68
|
def decode(c)
|
65
69
|
@map.fetch(c, [])
|
66
70
|
end
|
67
71
|
|
68
72
|
private
|
69
73
|
|
74
|
+
#: (String, ?Symbol) -> void
|
70
75
|
def process_data(data, initial_mode = :none)
|
71
76
|
parser = build_parser(data)
|
72
77
|
mode = initial_mode
|
@@ -96,6 +101,7 @@ class PDF::Reader
|
|
96
101
|
end
|
97
102
|
|
98
103
|
|
104
|
+
#: (String) -> PDF::Reader::Parser
|
99
105
|
def build_parser(instructions)
|
100
106
|
buffer = Buffer.new(StringIO.new(instructions))
|
101
107
|
Parser.new(buffer)
|
@@ -109,6 +115,7 @@ class PDF::Reader
|
|
109
115
|
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
110
116
|
# exception when we try converting broken UTF-16 to UTF-8
|
111
117
|
#
|
118
|
+
#: (String) -> Array[Integer]
|
112
119
|
def str_to_int(str)
|
113
120
|
unpacked_string = if str.bytesize == 1 # UTF-8
|
114
121
|
str.unpack("C*")
|
@@ -118,8 +125,8 @@ class PDF::Reader
|
|
118
125
|
result = []
|
119
126
|
while unpacked_string.any? do
|
120
127
|
if unpacked_string.size >= 2 &&
|
121
|
-
unpacked_string.first.to_i
|
122
|
-
unpacked_string.first.to_i
|
128
|
+
unpacked_string.first.to_i >= 0xD800 &&
|
129
|
+
unpacked_string.first.to_i <= 0xDBFF
|
123
130
|
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
124
131
|
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
125
132
|
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
@@ -133,6 +140,7 @@ class PDF::Reader
|
|
133
140
|
result
|
134
141
|
end
|
135
142
|
|
143
|
+
#: (Array[String]) -> void
|
136
144
|
def process_bfchar_instructions(instructions)
|
137
145
|
instructions.each_slice(2) do |one, two|
|
138
146
|
find = str_to_int(one.to_s)
|
@@ -143,6 +151,7 @@ class PDF::Reader
|
|
143
151
|
end
|
144
152
|
end
|
145
153
|
|
154
|
+
#: (Array[Array[String] | String]) -> void
|
146
155
|
def process_bfrange_instructions(instructions)
|
147
156
|
instructions.each_slice(3) do |start, finish, to|
|
148
157
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
@@ -155,6 +164,7 @@ class PDF::Reader
|
|
155
164
|
end
|
156
165
|
end
|
157
166
|
|
167
|
+
#: (String, String, String) -> void
|
158
168
|
def bfrange_type_one(start_code, end_code, dst)
|
159
169
|
start_code = str_to_int(start_code).first
|
160
170
|
end_code = str_to_int(end_code).first
|
@@ -168,6 +178,7 @@ class PDF::Reader
|
|
168
178
|
end
|
169
179
|
end
|
170
180
|
|
181
|
+
#: (String, String, Array[String]) -> void
|
171
182
|
def bfrange_type_two(start_code, end_code, dst)
|
172
183
|
start_code = str_to_int(start_code).first
|
173
184
|
end_code = str_to_int(end_code).first
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -32,25 +32,31 @@ class PDF::Reader
|
|
32
32
|
# convert strings of various PDF-dialect encodings into UTF-8.
|
33
33
|
class Encoding # :nodoc:
|
34
34
|
CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
|
35
|
-
24,25,26,27,28,29,30,31]
|
36
|
-
UNKNOWN_CHAR = 0x25AF # ▯
|
35
|
+
24,25,26,27,28,29,30,31] #: Array[Integer]
|
36
|
+
UNKNOWN_CHAR = 0x25AF #: Integer # ▯
|
37
37
|
|
38
|
+
#: String
|
38
39
|
attr_reader :unpack
|
39
40
|
|
41
|
+
#: (Hash[Symbol, untyped] | Symbol | nil) -> void
|
40
42
|
def initialize(enc)
|
41
|
-
|
42
|
-
@
|
43
|
+
# maps from character codes to Unicode codepoints
|
44
|
+
@mapping = default_mapping #: Hash[Integer, Integer]
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
+
# maps from character codes to UTF-8 strings.
|
47
|
+
@string_cache = {} #: Hash[Integer, String]
|
48
|
+
|
49
|
+
@enc_name = :StandardEncoding #: Symbol
|
50
|
+
if enc.kind_of?(Hash)
|
51
|
+
@enc_name = enc[:Encoding] || enc[:BaseEncoding]
|
46
52
|
elsif enc && enc.respond_to?(:to_sym)
|
47
|
-
enc.to_sym
|
48
|
-
else
|
49
|
-
:StandardEncoding
|
53
|
+
@enc_name = enc.to_sym
|
50
54
|
end
|
51
55
|
|
52
|
-
@unpack = get_unpack(@enc_name)
|
53
|
-
@map_file = get_mapping_file(@enc_name)
|
56
|
+
@unpack = get_unpack(@enc_name) #: String
|
57
|
+
@map_file = get_mapping_file(@enc_name) #: String | nil
|
58
|
+
@differences = nil #: Hash[Integer, Integer] | nil
|
59
|
+
@glyphlist = nil #: PDF::Reader::GlyphHash | nil
|
54
60
|
|
55
61
|
load_mapping(@map_file) if @map_file
|
56
62
|
|
@@ -68,6 +74,7 @@ class PDF::Reader
|
|
68
74
|
# To save space the following array is also valid and equivalent to the previous one
|
69
75
|
#
|
70
76
|
# [25, :A, :B]
|
77
|
+
#: (Array[Integer | Symbol]) -> Hash[Integer, Integer]
|
71
78
|
def differences=(diff)
|
72
79
|
PDF::Reader::Error.validate_type(diff, "diff", Array)
|
73
80
|
|
@@ -76,15 +83,16 @@ class PDF::Reader
|
|
76
83
|
diff.each do |val|
|
77
84
|
if val.kind_of?(Numeric)
|
78
85
|
byte = val.to_i
|
79
|
-
|
86
|
+
elsif codepoint = glyphlist.name_to_unicode(val)
|
80
87
|
@differences[byte] = val
|
81
|
-
@mapping[byte] =
|
88
|
+
@mapping[byte] = codepoint
|
82
89
|
byte += 1
|
83
90
|
end
|
84
91
|
end
|
85
92
|
@differences
|
86
93
|
end
|
87
94
|
|
95
|
+
#: () -> Hash[Integer, Integer]
|
88
96
|
def differences
|
89
97
|
# this method is only used by the spec tests
|
90
98
|
@differences ||= {}
|
@@ -101,6 +109,7 @@ class PDF::Reader
|
|
101
109
|
# * pack the final array of Unicode codepoints into a utf-8 string
|
102
110
|
# * mark the string as utf-8 if we're running on a M17N aware VM
|
103
111
|
#
|
112
|
+
#: (String) -> String
|
104
113
|
def to_utf8(str)
|
105
114
|
if utf8_conversion_impossible?
|
106
115
|
little_boxes(str.unpack(unpack).size)
|
@@ -109,6 +118,7 @@ class PDF::Reader
|
|
109
118
|
end
|
110
119
|
end
|
111
120
|
|
121
|
+
#: (Integer) -> String
|
112
122
|
def int_to_utf8_string(glyph_code)
|
113
123
|
@string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
|
114
124
|
end
|
@@ -118,13 +128,19 @@ class PDF::Reader
|
|
118
128
|
# int_to_name(65)
|
119
129
|
# => [:A]
|
120
130
|
#
|
131
|
+
#: (Integer) -> Array[Symbol]
|
121
132
|
def int_to_name(glyph_code)
|
122
|
-
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
133
|
+
if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
123
134
|
[]
|
124
135
|
elsif differences[glyph_code]
|
125
136
|
[differences[glyph_code]]
|
126
137
|
elsif @mapping[glyph_code]
|
127
|
-
|
138
|
+
val = @mapping[glyph_code]
|
139
|
+
if val
|
140
|
+
glyphlist.unicode_to_name(val)
|
141
|
+
else
|
142
|
+
[]
|
143
|
+
end
|
128
144
|
else
|
129
145
|
[]
|
130
146
|
end
|
@@ -137,16 +153,17 @@ class PDF::Reader
|
|
137
153
|
# - leaves all other bytes <= 255 unchaged
|
138
154
|
#
|
139
155
|
# Each specific encoding will change this default as required for their glyphs
|
156
|
+
#: () -> Hash[Integer, Integer]
|
140
157
|
def default_mapping
|
141
158
|
all_bytes = (0..255).to_a
|
142
159
|
tuples = all_bytes.map {|i|
|
143
160
|
CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
|
144
161
|
}
|
145
162
|
mapping = Hash[tuples]
|
146
|
-
mapping[nil] = UNKNOWN_CHAR
|
147
163
|
mapping
|
148
164
|
end
|
149
165
|
|
166
|
+
#: (Integer) -> String
|
150
167
|
def internal_int_to_utf8_string(glyph_code)
|
151
168
|
ret = [
|
152
169
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
@@ -155,10 +172,12 @@ class PDF::Reader
|
|
155
172
|
ret
|
156
173
|
end
|
157
174
|
|
175
|
+
#: () -> bool
|
158
176
|
def utf8_conversion_impossible?
|
159
177
|
@enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
160
178
|
end
|
161
179
|
|
180
|
+
#: (Integer) -> String
|
162
181
|
def little_boxes(times)
|
163
182
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
164
183
|
ret = codepoints.pack("U*")
|
@@ -166,12 +185,14 @@ class PDF::Reader
|
|
166
185
|
ret
|
167
186
|
end
|
168
187
|
|
188
|
+
#: (String) -> String
|
169
189
|
def convert_to_utf8(str)
|
170
|
-
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
190
|
+
ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
|
171
191
|
ret.force_encoding("UTF-8")
|
172
192
|
ret
|
173
193
|
end
|
174
194
|
|
195
|
+
#: (Symbol) -> String
|
175
196
|
def get_unpack(enc)
|
176
197
|
case enc
|
177
198
|
when :"Identity-H", :"Identity-V", :UTF16Encoding
|
@@ -181,6 +202,7 @@ class PDF::Reader
|
|
181
202
|
end
|
182
203
|
end
|
183
204
|
|
205
|
+
#: (Symbol) -> String?
|
184
206
|
def get_mapping_file(enc)
|
185
207
|
case enc
|
186
208
|
when :"Identity-H", :"Identity-V", :UTF16Encoding then
|
@@ -202,10 +224,12 @@ class PDF::Reader
|
|
202
224
|
end
|
203
225
|
end
|
204
226
|
|
227
|
+
#: () -> PDF::Reader::GlyphHash
|
205
228
|
def glyphlist
|
206
229
|
@glyphlist ||= PDF::Reader::GlyphHash.new
|
207
230
|
end
|
208
231
|
|
232
|
+
#: (String) -> void
|
209
233
|
def load_mapping(file)
|
210
234
|
File.open(file, "r:BINARY") do |f|
|
211
235
|
f.each do |l|
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -31,37 +31,39 @@ class PDF::Reader
|
|
31
31
|
# are valid
|
32
32
|
class Error # :nodoc:
|
33
33
|
################################################################################
|
34
|
+
#: (untyped, untyped, ?untyped) -> untyped
|
34
35
|
def self.str_assert(lvalue, rvalue, chars=nil)
|
35
36
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
36
37
|
lvalue = lvalue[0,chars] if chars
|
37
38
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
38
39
|
end
|
39
40
|
################################################################################
|
41
|
+
#: (untyped, untyped, ?untyped) -> untyped
|
40
42
|
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
41
43
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
42
44
|
lvalue = lvalue[0,chars] if chars
|
43
45
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
|
44
46
|
end
|
45
47
|
################################################################################
|
48
|
+
#: (untyped, untyped) -> untyped
|
46
49
|
def self.assert_equal(lvalue, rvalue)
|
47
50
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
48
51
|
end
|
49
52
|
################################################################################
|
53
|
+
#: (Object, String, Module) -> void
|
50
54
|
def self.validate_type(object, name, klass)
|
51
55
|
raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
52
56
|
end
|
53
57
|
################################################################################
|
58
|
+
#: (Object, String, Module) -> void
|
54
59
|
def self.validate_type_as_malformed(object, name, klass)
|
55
60
|
raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
56
61
|
end
|
57
62
|
################################################################################
|
63
|
+
#: (Object, String) -> void
|
58
64
|
def self.validate_not_nil(object, name)
|
59
65
|
raise ArgumentError, "#{object} must not be nil" if object.nil?
|
60
66
|
end
|
61
|
-
################################################################################
|
62
|
-
def self.validate_not_nil_as_malformed(object, name)
|
63
|
-
raise MalformedPDFError, "#{object} must not be nil" if object.nil?
|
64
|
-
end
|
65
67
|
end
|
66
68
|
|
67
69
|
################################################################################
|
@@ -9,6 +9,7 @@ class PDF::Reader
|
|
9
9
|
# implementation of the Ascii85 filter
|
10
10
|
class Ascii85
|
11
11
|
|
12
|
+
#: (?Hash[untyped, untyped]) -> void
|
12
13
|
def initialize(options = {})
|
13
14
|
@options = options
|
14
15
|
end
|
@@ -17,6 +18,7 @@ class PDF::Reader
|
|
17
18
|
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
18
19
|
# rubygem.
|
19
20
|
#
|
21
|
+
#: (String) -> String
|
20
22
|
def filter(data)
|
21
23
|
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
22
24
|
if defined?(::Ascii85Native)
|
@@ -8,6 +8,7 @@ class PDF::Reader
|
|
8
8
|
# implementation of the AsciiHex stream filter
|
9
9
|
class AsciiHex
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
@@ -15,6 +16,7 @@ class PDF::Reader
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data using the AsciiHex algorithm.
|
17
18
|
#
|
19
|
+
#: (String) -> String
|
18
20
|
def filter(data)
|
19
21
|
data.chop! if data[-1,1] == ">"
|
20
22
|
data = data[1,data.size] if data[0,1] == "<"
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -8,6 +8,7 @@ class PDF::Reader
|
|
8
8
|
# improve compression
|
9
9
|
class Depredict
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
@@ -16,6 +17,7 @@ class PDF::Reader
|
|
16
17
|
# Streams can be preprocessed to improve compression. This reverses the
|
17
18
|
# preprocessing
|
18
19
|
#
|
20
|
+
#: (String) -> String
|
19
21
|
def filter(data)
|
20
22
|
predictor = @options[:Predictor].to_i
|
21
23
|
|
@@ -34,6 +36,7 @@ class PDF::Reader
|
|
34
36
|
private
|
35
37
|
|
36
38
|
################################################################################
|
39
|
+
#: (untyped) -> String
|
37
40
|
def tiff_depredict(data)
|
38
41
|
data = data.unpack("C*")
|
39
42
|
unfiltered = ''
|
@@ -60,6 +63,7 @@ class PDF::Reader
|
|
60
63
|
unfiltered
|
61
64
|
end
|
62
65
|
################################################################################
|
66
|
+
#: (untyped) -> String
|
63
67
|
def png_depredict(data)
|
64
68
|
return data if @options[:Predictor].to_i < 10
|
65
69
|
|
@@ -125,7 +129,7 @@ class PDF::Reader
|
|
125
129
|
row_data[index] = (byte + paeth) % 256
|
126
130
|
end
|
127
131
|
else
|
128
|
-
raise
|
132
|
+
raise MalformedPDFError, "Invalid filter algorithm #{filter}"
|
129
133
|
end
|
130
134
|
|
131
135
|
s = []
|
@@ -10,15 +10,17 @@ class PDF::Reader
|
|
10
10
|
# implementation of the Flate (zlib) stream filter
|
11
11
|
class Flate
|
12
12
|
|
13
|
-
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
14
|
-
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 #: Integer # Zlib::MAX_WBITS + 32
|
14
|
+
ZLIB_RAW_DEFLATE = -15 #: Integer # Zlib::MAX_WBITS * -1
|
15
15
|
|
16
|
+
#: (?Hash[untyped, untyped]) -> void
|
16
17
|
def initialize(options = {})
|
17
18
|
@options = options
|
18
19
|
end
|
19
20
|
|
20
21
|
################################################################################
|
21
22
|
# Decode the specified data with the Zlib compression algorithm
|
23
|
+
#: (String) -> String
|
22
24
|
def filter(data)
|
23
25
|
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
24
26
|
|
@@ -31,6 +33,7 @@ class PDF::Reader
|
|
31
33
|
|
32
34
|
private
|
33
35
|
|
36
|
+
#: (untyped) -> untyped
|
34
37
|
def zlib_inflate(data)
|
35
38
|
begin
|
36
39
|
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
@@ -8,12 +8,14 @@ class PDF::Reader
|
|
8
8
|
# implementation of the LZW stream filter
|
9
9
|
class Lzw
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
14
15
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data with the LZW compression algorithm
|
18
|
+
#: (String) -> String
|
17
19
|
def filter(data)
|
18
20
|
data = PDF::Reader::LZW.decode(data)
|
19
21
|
Depredict.new(@options).filter(data)
|
@@ -6,10 +6,12 @@ class PDF::Reader
|
|
6
6
|
module Filter # :nodoc:
|
7
7
|
# implementation of the null stream filter
|
8
8
|
class Null
|
9
|
+
#: (?Hash[untyped, untyped]) -> void
|
9
10
|
def initialize(options = {})
|
10
11
|
@options = options
|
11
12
|
end
|
12
13
|
|
14
|
+
#: (String) -> String
|
13
15
|
def filter(data)
|
14
16
|
data
|
15
17
|
end
|
@@ -8,12 +8,14 @@ class PDF::Reader # :nodoc:
|
|
8
8
|
# implementation of the run length stream filter
|
9
9
|
class RunLength
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
14
15
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data with the RunLengthDecode compression algorithm
|
18
|
+
#: (String) -> String
|
17
19
|
def filter(data)
|
18
20
|
pos = 0
|
19
21
|
out = "".dup
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -41,6 +41,7 @@ class PDF::Reader
|
|
41
41
|
# Filters that are only used to encode image data are accepted, but the data is
|
42
42
|
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
43
43
|
#
|
44
|
+
#: (Symbol, ?Hash[untyped, untyped]) -> untyped
|
44
45
|
def self.with(name, options = {})
|
45
46
|
case name
|
46
47
|
when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
|