pdf-reader 1.4.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG +53 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -1
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/buffer.rb +14 -12
- data/lib/pdf/reader/cid_widths.rb +2 -0
- data/lib/pdf/reader/cmap.rb +48 -36
- data/lib/pdf/reader/encoding.rb +16 -18
- data/lib/pdf/reader/error.rb +5 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +1 -0
- data/lib/pdf/reader/filter/flate.rb +29 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +4 -6
- data/lib/pdf/reader/filter.rb +2 -0
- data/lib/pdf/reader/font.rb +12 -13
- data/lib/pdf/reader/font_descriptor.rb +1 -0
- data/lib/pdf/reader/form_xobject.rb +1 -0
- data/lib/pdf/reader/glyph_hash.rb +7 -2
- data/lib/pdf/reader/lzw.rb +4 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +91 -37
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +5 -4
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +30 -1
- data/lib/pdf/reader/page_layout.rb +19 -24
- data/lib/pdf/reader/page_state.rb +8 -5
- data/lib/pdf/reader/page_text_receiver.rb +23 -1
- data/lib/pdf/reader/pages_strategy.rb +2 -304
- data/lib/pdf/reader/parser.rb +10 -7
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +80 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +1 -0
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +28 -9
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +11 -5
- data/lib/pdf/reader.rb +30 -119
- data/lib/pdf-reader.rb +1 -0
- metadata +35 -61
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -19
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -31,6 +32,17 @@ class PDF::Reader
|
|
31
32
|
# extracting various useful information.
|
32
33
|
#
|
33
34
|
class CMap # :nodoc:
|
35
|
+
CMAP_KEYWORDS = {
|
36
|
+
"begincodespacerange" => 1,
|
37
|
+
"endcodespacerange" => 1,
|
38
|
+
"beginbfchar" => 1,
|
39
|
+
"endbfchar" => 1,
|
40
|
+
"beginbfrange" => 1,
|
41
|
+
"endbfrange" => 1,
|
42
|
+
"begin" => 1,
|
43
|
+
"begincmap" => 1,
|
44
|
+
"def" => 1
|
45
|
+
}
|
34
46
|
|
35
47
|
attr_reader :map
|
36
48
|
|
@@ -40,24 +52,25 @@ class PDF::Reader
|
|
40
52
|
end
|
41
53
|
|
42
54
|
def process_data(data)
|
55
|
+
parser = build_parser(data)
|
43
56
|
mode = nil
|
44
|
-
instructions =
|
57
|
+
instructions = []
|
45
58
|
|
46
|
-
|
47
|
-
if
|
59
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
60
|
+
if token == "beginbfchar"
|
48
61
|
mode = :char
|
49
|
-
elsif
|
62
|
+
elsif token == "endbfchar"
|
50
63
|
process_bfchar_instructions(instructions)
|
51
|
-
instructions =
|
64
|
+
instructions = []
|
52
65
|
mode = nil
|
53
|
-
elsif
|
66
|
+
elsif token == "beginbfrange"
|
54
67
|
mode = :range
|
55
|
-
elsif
|
68
|
+
elsif token == "endbfrange"
|
56
69
|
process_bfrange_instructions(instructions)
|
57
|
-
instructions =
|
70
|
+
instructions = []
|
58
71
|
mode = nil
|
59
72
|
elsif mode == :char || mode == :range
|
60
|
-
instructions <<
|
73
|
+
instructions << token
|
61
74
|
end
|
62
75
|
end
|
63
76
|
end
|
@@ -83,44 +96,46 @@ class PDF::Reader
|
|
83
96
|
Parser.new(buffer)
|
84
97
|
end
|
85
98
|
|
99
|
+
# The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
|
100
|
+
# theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
|
101
|
+
#
|
102
|
+
# str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
|
103
|
+
#
|
104
|
+
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
105
|
+
# exception when we try converting broken UTF-16 to UTF-8
|
106
|
+
#
|
86
107
|
def str_to_int(str)
|
87
108
|
return nil if str.nil? || str.size == 0
|
88
|
-
unpacked_string = if str.
|
109
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
89
110
|
str.unpack("C*")
|
90
111
|
else # UTF-16
|
91
112
|
str.unpack("n*")
|
92
113
|
end
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
114
|
+
result = []
|
115
|
+
while unpacked_string.any? do
|
116
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
117
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
118
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
119
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
120
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
121
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
122
|
+
else
|
123
|
+
result << unpacked_string.shift
|
124
|
+
end
|
104
125
|
end
|
126
|
+
result
|
105
127
|
end
|
106
128
|
|
107
129
|
def process_bfchar_instructions(instructions)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@map[find[0]] = replace
|
113
|
-
find = str_to_int(parser.parse_token)
|
114
|
-
replace = str_to_int(parser.parse_token)
|
130
|
+
instructions.each_slice(2) do |one, two|
|
131
|
+
find = str_to_int(one)
|
132
|
+
replace = str_to_int(two)
|
133
|
+
@map[find.first] = replace
|
115
134
|
end
|
116
135
|
end
|
117
136
|
|
118
137
|
def process_bfrange_instructions(instructions)
|
119
|
-
|
120
|
-
start = parser.parse_token
|
121
|
-
finish = parser.parse_token
|
122
|
-
to = parser.parse_token
|
123
|
-
while start && finish && to
|
138
|
+
instructions.each_slice(3) do |start, finish, to|
|
124
139
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
125
140
|
bfrange_type_one(start, finish, to)
|
126
141
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -128,9 +143,6 @@ class PDF::Reader
|
|
128
143
|
else
|
129
144
|
raise "invalid bfrange section"
|
130
145
|
end
|
131
|
-
start = parser.parse_token
|
132
|
-
finish = parser.parse_token
|
133
|
-
to = parser.parse_token
|
134
146
|
end
|
135
147
|
end
|
136
148
|
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -39,20 +40,22 @@ class PDF::Reader
|
|
39
40
|
@mapping = default_mapping # maps from character codes to Unicode codepoints
|
40
41
|
@string_cache = {} # maps from character codes to UTF-8 strings.
|
41
42
|
|
42
|
-
if enc.kind_of?(Hash)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
enc = enc.to_sym
|
43
|
+
@enc_name = if enc.kind_of?(Hash)
|
44
|
+
enc[:Encoding] || enc[:BaseEncoding]
|
45
|
+
elsif enc && enc.respond_to?(:to_sym)
|
46
|
+
enc.to_sym
|
47
47
|
else
|
48
|
-
|
48
|
+
:StandardEncoding
|
49
49
|
end
|
50
50
|
|
51
|
-
@
|
52
|
-
@
|
53
|
-
@map_file = get_mapping_file(enc)
|
51
|
+
@unpack = get_unpack(@enc_name)
|
52
|
+
@map_file = get_mapping_file(@enc_name)
|
54
53
|
|
55
54
|
load_mapping(@map_file) if @map_file
|
55
|
+
|
56
|
+
if enc.is_a?(Hash) && enc[:Differences]
|
57
|
+
self.differences = enc[:Differences]
|
58
|
+
end
|
56
59
|
end
|
57
60
|
|
58
61
|
# set the differences table for this encoding. should be an array in the following format:
|
@@ -147,7 +150,7 @@ class PDF::Reader
|
|
147
150
|
ret = [
|
148
151
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
149
152
|
].pack("U*")
|
150
|
-
ret.force_encoding("UTF-8")
|
153
|
+
ret.force_encoding("UTF-8")
|
151
154
|
ret
|
152
155
|
end
|
153
156
|
|
@@ -158,13 +161,13 @@ class PDF::Reader
|
|
158
161
|
def little_boxes(times)
|
159
162
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
160
163
|
ret = codepoints.pack("U*")
|
161
|
-
ret.force_encoding("UTF-8")
|
164
|
+
ret.force_encoding("UTF-8")
|
162
165
|
ret
|
163
166
|
end
|
164
167
|
|
165
168
|
def convert_to_utf8(str)
|
166
169
|
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
167
|
-
ret.force_encoding("UTF-8")
|
170
|
+
ret.force_encoding("UTF-8")
|
168
171
|
ret
|
169
172
|
end
|
170
173
|
|
@@ -198,17 +201,12 @@ class PDF::Reader
|
|
198
201
|
end
|
199
202
|
end
|
200
203
|
|
201
|
-
def has_mapping?
|
202
|
-
@mapping.size > 0
|
203
|
-
end
|
204
|
-
|
205
204
|
def glyphlist
|
206
205
|
@glyphlist ||= PDF::Reader::GlyphHash.new
|
207
206
|
end
|
208
207
|
|
209
208
|
def load_mapping(file)
|
210
|
-
|
211
|
-
File.open(file, mode) do |f|
|
209
|
+
File.open(file, "r:BINARY") do |f|
|
212
210
|
f.each do |l|
|
213
211
|
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
212
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -52,6 +53,10 @@ class PDF::Reader
|
|
52
53
|
# the PDF spec and cannot be recovered
|
53
54
|
class MalformedPDFError < RuntimeError; end
|
54
55
|
|
56
|
+
################################################################################
|
57
|
+
# an exception that is raised when an invalid page number is used
|
58
|
+
class InvalidPageError < ArgumentError; end
|
59
|
+
|
55
60
|
################################################################################
|
56
61
|
# an exception that is raised when a PDF object appears to be invalid
|
57
62
|
class InvalidObjectError < MalformedPDFError; end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
|
4
5
|
require 'zlib'
|
@@ -7,6 +8,9 @@ class PDF::Reader
|
|
7
8
|
module Filter # :nodoc:
|
8
9
|
# implementation of the Flate (zlib) stream filter
|
9
10
|
class Flate
|
11
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
|
+
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
|
10
14
|
def initialize(options = {})
|
11
15
|
@options = options
|
12
16
|
end
|
@@ -14,25 +18,34 @@ class PDF::Reader
|
|
14
18
|
################################################################################
|
15
19
|
# Decode the specified data with the Zlib compression algorithm
|
16
20
|
def filter(data)
|
17
|
-
deflated =
|
21
|
+
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
22
|
+
|
23
|
+
if deflated.nil?
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error while inflating a compressed stream (no suitable inflation algorithm found)"
|
26
|
+
end
|
27
|
+
Depredict.new(@options).filter(deflated)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def zlib_inflate(data)
|
18
33
|
begin
|
19
|
-
|
20
|
-
rescue Zlib::DataError
|
34
|
+
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
+
rescue Zlib::DataError
|
21
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
22
|
-
# is RFC1951 deflated data, wrapped in a
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
# See
|
27
|
-
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
28
|
-
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
29
|
-
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
37
|
+
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
|
+
# fails, swallow the exception and attempt to inflate the data as a raw
|
39
|
+
# RFC1951 stream.
|
30
40
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
41
|
+
|
42
|
+
begin
|
43
|
+
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
+
rescue StandardError
|
45
|
+
# swallow this one too, so we can try some other fallback options
|
46
|
+
end
|
47
|
+
|
48
|
+
nil
|
36
49
|
end
|
37
50
|
end
|
38
51
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
class PDF::Reader # :nodoc:
|
4
6
|
module Filter # :nodoc:
|
@@ -12,14 +14,10 @@ class PDF::Reader # :nodoc:
|
|
12
14
|
# Decode the specified data with the RunLengthDecode compression algorithm
|
13
15
|
def filter(data)
|
14
16
|
pos = 0
|
15
|
-
out = ""
|
17
|
+
out = "".dup
|
16
18
|
|
17
19
|
while pos < data.length
|
18
|
-
|
19
|
-
length = data.getbyte(pos)
|
20
|
-
else
|
21
|
-
length = data[pos]
|
22
|
-
end
|
20
|
+
length = data.getbyte(pos)
|
23
21
|
pos += 1
|
24
22
|
|
25
23
|
case
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -46,6 +47,7 @@ class PDF::Reader
|
|
46
47
|
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
47
48
|
when :DCTDecode then PDF::Reader::Filter::Null.new(options)
|
48
49
|
when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
|
50
|
+
when :Fl then PDF::Reader::Filter::Flate.new(options)
|
49
51
|
when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
|
50
52
|
when :JPXDecode then PDF::Reader::Filter::Null.new(options)
|
51
53
|
when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -36,11 +37,7 @@ class PDF::Reader
|
|
36
37
|
attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
|
37
38
|
:cid_widths, :cid_default_width
|
38
39
|
|
39
|
-
def initialize(ohash
|
40
|
-
if ohash.nil? || obj.nil?
|
41
|
-
$stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
|
42
|
-
return
|
43
|
-
end
|
40
|
+
def initialize(ohash, obj)
|
44
41
|
@ohash = ohash
|
45
42
|
@tounicode = nil
|
46
43
|
|
@@ -52,12 +49,6 @@ class PDF::Reader
|
|
52
49
|
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
53
50
|
end
|
54
51
|
|
55
|
-
def basefont=(font)
|
56
|
-
$stderr.puts "Font#basefont= is deprecated and will be removed in the 2.0 release"
|
57
|
-
@encoding ||= default_encoding(font)
|
58
|
-
@basefont = font
|
59
|
-
end
|
60
|
-
|
61
52
|
def to_utf8(params)
|
62
53
|
if @tounicode
|
63
54
|
to_utf8_via_cmap(params)
|
@@ -106,7 +97,13 @@ class PDF::Reader
|
|
106
97
|
elsif @subtype == :Type3
|
107
98
|
PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
|
108
99
|
elsif @subtype == :TrueType
|
109
|
-
|
100
|
+
if @font_descriptor
|
101
|
+
PDF::Reader::WidthCalculator::TrueType.new(self)
|
102
|
+
else
|
103
|
+
# A TrueType font that isn't embedded. Most readers look for a version on the
|
104
|
+
# local system and fallback to a substitute. For now, we go straight to a substitute
|
105
|
+
PDF::Reader::WidthCalculator::BuiltIn.new(self)
|
106
|
+
end
|
110
107
|
elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
|
111
108
|
PDF::Reader::WidthCalculator::Composite.new(self)
|
112
109
|
else
|
@@ -134,7 +131,9 @@ class PDF::Reader
|
|
134
131
|
if obj[:ToUnicode]
|
135
132
|
# ToUnicode is optional for Type1 and Type3
|
136
133
|
stream = @ohash.object(obj[:ToUnicode])
|
137
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
138
137
|
end
|
139
138
|
end
|
140
139
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -48,6 +49,9 @@ class PDF::Reader
|
|
48
49
|
# h.name_to_unicode(:Euro)
|
49
50
|
# => 8364
|
50
51
|
#
|
52
|
+
# h.name_to_unicode(:X4A)
|
53
|
+
# => 74
|
54
|
+
#
|
51
55
|
# h.name_to_unicode(:G30)
|
52
56
|
# => 48
|
53
57
|
#
|
@@ -62,6 +66,8 @@ class PDF::Reader
|
|
62
66
|
|
63
67
|
if @by_name.has_key?(name)
|
64
68
|
@by_name[name]
|
69
|
+
elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
|
70
|
+
"0x#{str[1,4]}".hex
|
65
71
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
66
72
|
"0x#{str[3,4]}".hex
|
67
73
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -102,8 +108,7 @@ class PDF::Reader
|
|
102
108
|
keyed_by_name = {}
|
103
109
|
keyed_by_codepoint = {}
|
104
110
|
|
105
|
-
|
106
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
111
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
|
107
112
|
f.each do |l|
|
108
113
|
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
114
|
if name && code
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
|
@@ -22,7 +23,7 @@ module PDF
|
|
22
23
|
|
23
24
|
def initialize(data, bits_in_chunk)
|
24
25
|
@data = data
|
25
|
-
@data.force_encoding("BINARY")
|
26
|
+
@data.force_encoding("BINARY")
|
26
27
|
@bits_in_chunk = bits_in_chunk
|
27
28
|
@current_pos = 0
|
28
29
|
@bits_left_in_byte = 8
|
@@ -82,7 +83,7 @@ module PDF
|
|
82
83
|
#
|
83
84
|
def self.decode(data)
|
84
85
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
85
|
-
result =
|
86
|
+
result = "".dup
|
86
87
|
until (code = stream.read) == CODE_EOD
|
87
88
|
if code == CODE_CLEAR_TABLE
|
88
89
|
stream.set_bits_in_chunk(9)
|
@@ -116,11 +117,10 @@ module PDF
|
|
116
117
|
result
|
117
118
|
end
|
118
119
|
|
119
|
-
private
|
120
|
-
|
121
120
|
def self.create_new_string(string_table,some_code, other_code)
|
122
121
|
string_table[some_code] + string_table[other_code][0].chr
|
123
122
|
end
|
123
|
+
private_class_method :create_new_string
|
124
124
|
|
125
125
|
end
|
126
126
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
|
6
|
+
# A null object security handler. Used when a PDF is unencrypted.
|
7
|
+
class NullSecurityHandler
|
8
|
+
|
9
|
+
def self.supports?(encrypt)
|
10
|
+
encrypt.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def decrypt(buf, _ref)
|
14
|
+
buf
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|