pdf-reader 1.4.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG +53 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -1
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/buffer.rb +14 -12
- data/lib/pdf/reader/cid_widths.rb +2 -0
- data/lib/pdf/reader/cmap.rb +48 -36
- data/lib/pdf/reader/encoding.rb +16 -18
- data/lib/pdf/reader/error.rb +5 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +1 -0
- data/lib/pdf/reader/filter/flate.rb +29 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +4 -6
- data/lib/pdf/reader/filter.rb +2 -0
- data/lib/pdf/reader/font.rb +12 -13
- data/lib/pdf/reader/font_descriptor.rb +1 -0
- data/lib/pdf/reader/form_xobject.rb +1 -0
- data/lib/pdf/reader/glyph_hash.rb +7 -2
- data/lib/pdf/reader/lzw.rb +4 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +91 -37
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +5 -4
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +30 -1
- data/lib/pdf/reader/page_layout.rb +19 -24
- data/lib/pdf/reader/page_state.rb +8 -5
- data/lib/pdf/reader/page_text_receiver.rb +23 -1
- data/lib/pdf/reader/pages_strategy.rb +2 -304
- data/lib/pdf/reader/parser.rb +10 -7
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +80 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +1 -0
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +28 -9
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +11 -5
- data/lib/pdf/reader.rb +30 -119
- data/lib/pdf-reader.rb +1 -0
- metadata +35 -61
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -19
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -31,6 +32,17 @@ class PDF::Reader
|
|
31
32
|
# extracting various useful information.
|
32
33
|
#
|
33
34
|
class CMap # :nodoc:
|
35
|
+
CMAP_KEYWORDS = {
|
36
|
+
"begincodespacerange" => 1,
|
37
|
+
"endcodespacerange" => 1,
|
38
|
+
"beginbfchar" => 1,
|
39
|
+
"endbfchar" => 1,
|
40
|
+
"beginbfrange" => 1,
|
41
|
+
"endbfrange" => 1,
|
42
|
+
"begin" => 1,
|
43
|
+
"begincmap" => 1,
|
44
|
+
"def" => 1
|
45
|
+
}
|
34
46
|
|
35
47
|
attr_reader :map
|
36
48
|
|
@@ -40,24 +52,25 @@ class PDF::Reader
|
|
40
52
|
end
|
41
53
|
|
42
54
|
def process_data(data)
|
55
|
+
parser = build_parser(data)
|
43
56
|
mode = nil
|
44
|
-
instructions =
|
57
|
+
instructions = []
|
45
58
|
|
46
|
-
|
47
|
-
if
|
59
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
60
|
+
if token == "beginbfchar"
|
48
61
|
mode = :char
|
49
|
-
elsif
|
62
|
+
elsif token == "endbfchar"
|
50
63
|
process_bfchar_instructions(instructions)
|
51
|
-
instructions =
|
64
|
+
instructions = []
|
52
65
|
mode = nil
|
53
|
-
elsif
|
66
|
+
elsif token == "beginbfrange"
|
54
67
|
mode = :range
|
55
|
-
elsif
|
68
|
+
elsif token == "endbfrange"
|
56
69
|
process_bfrange_instructions(instructions)
|
57
|
-
instructions =
|
70
|
+
instructions = []
|
58
71
|
mode = nil
|
59
72
|
elsif mode == :char || mode == :range
|
60
|
-
instructions <<
|
73
|
+
instructions << token
|
61
74
|
end
|
62
75
|
end
|
63
76
|
end
|
@@ -83,44 +96,46 @@ class PDF::Reader
|
|
83
96
|
Parser.new(buffer)
|
84
97
|
end
|
85
98
|
|
99
|
+
# The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
|
100
|
+
# theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
|
101
|
+
#
|
102
|
+
# str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
|
103
|
+
#
|
104
|
+
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
105
|
+
# exception when we try converting broken UTF-16 to UTF-8
|
106
|
+
#
|
86
107
|
def str_to_int(str)
|
87
108
|
return nil if str.nil? || str.size == 0
|
88
|
-
unpacked_string = if str.
|
109
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
89
110
|
str.unpack("C*")
|
90
111
|
else # UTF-16
|
91
112
|
str.unpack("n*")
|
92
113
|
end
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
114
|
+
result = []
|
115
|
+
while unpacked_string.any? do
|
116
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
117
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
118
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
119
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
120
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
121
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
122
|
+
else
|
123
|
+
result << unpacked_string.shift
|
124
|
+
end
|
104
125
|
end
|
126
|
+
result
|
105
127
|
end
|
106
128
|
|
107
129
|
def process_bfchar_instructions(instructions)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@map[find[0]] = replace
|
113
|
-
find = str_to_int(parser.parse_token)
|
114
|
-
replace = str_to_int(parser.parse_token)
|
130
|
+
instructions.each_slice(2) do |one, two|
|
131
|
+
find = str_to_int(one)
|
132
|
+
replace = str_to_int(two)
|
133
|
+
@map[find.first] = replace
|
115
134
|
end
|
116
135
|
end
|
117
136
|
|
118
137
|
def process_bfrange_instructions(instructions)
|
119
|
-
|
120
|
-
start = parser.parse_token
|
121
|
-
finish = parser.parse_token
|
122
|
-
to = parser.parse_token
|
123
|
-
while start && finish && to
|
138
|
+
instructions.each_slice(3) do |start, finish, to|
|
124
139
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
125
140
|
bfrange_type_one(start, finish, to)
|
126
141
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -128,9 +143,6 @@ class PDF::Reader
|
|
128
143
|
else
|
129
144
|
raise "invalid bfrange section"
|
130
145
|
end
|
131
|
-
start = parser.parse_token
|
132
|
-
finish = parser.parse_token
|
133
|
-
to = parser.parse_token
|
134
146
|
end
|
135
147
|
end
|
136
148
|
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -39,20 +40,22 @@ class PDF::Reader
|
|
39
40
|
@mapping = default_mapping # maps from character codes to Unicode codepoints
|
40
41
|
@string_cache = {} # maps from character codes to UTF-8 strings.
|
41
42
|
|
42
|
-
if enc.kind_of?(Hash)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
enc = enc.to_sym
|
43
|
+
@enc_name = if enc.kind_of?(Hash)
|
44
|
+
enc[:Encoding] || enc[:BaseEncoding]
|
45
|
+
elsif enc && enc.respond_to?(:to_sym)
|
46
|
+
enc.to_sym
|
47
47
|
else
|
48
|
-
|
48
|
+
:StandardEncoding
|
49
49
|
end
|
50
50
|
|
51
|
-
@
|
52
|
-
@
|
53
|
-
@map_file = get_mapping_file(enc)
|
51
|
+
@unpack = get_unpack(@enc_name)
|
52
|
+
@map_file = get_mapping_file(@enc_name)
|
54
53
|
|
55
54
|
load_mapping(@map_file) if @map_file
|
55
|
+
|
56
|
+
if enc.is_a?(Hash) && enc[:Differences]
|
57
|
+
self.differences = enc[:Differences]
|
58
|
+
end
|
56
59
|
end
|
57
60
|
|
58
61
|
# set the differences table for this encoding. should be an array in the following format:
|
@@ -147,7 +150,7 @@ class PDF::Reader
|
|
147
150
|
ret = [
|
148
151
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
149
152
|
].pack("U*")
|
150
|
-
ret.force_encoding("UTF-8")
|
153
|
+
ret.force_encoding("UTF-8")
|
151
154
|
ret
|
152
155
|
end
|
153
156
|
|
@@ -158,13 +161,13 @@ class PDF::Reader
|
|
158
161
|
def little_boxes(times)
|
159
162
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
160
163
|
ret = codepoints.pack("U*")
|
161
|
-
ret.force_encoding("UTF-8")
|
164
|
+
ret.force_encoding("UTF-8")
|
162
165
|
ret
|
163
166
|
end
|
164
167
|
|
165
168
|
def convert_to_utf8(str)
|
166
169
|
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
167
|
-
ret.force_encoding("UTF-8")
|
170
|
+
ret.force_encoding("UTF-8")
|
168
171
|
ret
|
169
172
|
end
|
170
173
|
|
@@ -198,17 +201,12 @@ class PDF::Reader
|
|
198
201
|
end
|
199
202
|
end
|
200
203
|
|
201
|
-
def has_mapping?
|
202
|
-
@mapping.size > 0
|
203
|
-
end
|
204
|
-
|
205
204
|
def glyphlist
|
206
205
|
@glyphlist ||= PDF::Reader::GlyphHash.new
|
207
206
|
end
|
208
207
|
|
209
208
|
def load_mapping(file)
|
210
|
-
|
211
|
-
File.open(file, mode) do |f|
|
209
|
+
File.open(file, "r:BINARY") do |f|
|
212
210
|
f.each do |l|
|
213
211
|
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
212
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -52,6 +53,10 @@ class PDF::Reader
|
|
52
53
|
# the PDF spec and cannot be recovered
|
53
54
|
class MalformedPDFError < RuntimeError; end
|
54
55
|
|
56
|
+
################################################################################
|
57
|
+
# an exception that is raised when an invalid page number is used
|
58
|
+
class InvalidPageError < ArgumentError; end
|
59
|
+
|
55
60
|
################################################################################
|
56
61
|
# an exception that is raised when a PDF object appears to be invalid
|
57
62
|
class InvalidObjectError < MalformedPDFError; end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
|
4
5
|
require 'zlib'
|
@@ -7,6 +8,9 @@ class PDF::Reader
|
|
7
8
|
module Filter # :nodoc:
|
8
9
|
# implementation of the Flate (zlib) stream filter
|
9
10
|
class Flate
|
11
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
|
+
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
|
10
14
|
def initialize(options = {})
|
11
15
|
@options = options
|
12
16
|
end
|
@@ -14,25 +18,34 @@ class PDF::Reader
|
|
14
18
|
################################################################################
|
15
19
|
# Decode the specified data with the Zlib compression algorithm
|
16
20
|
def filter(data)
|
17
|
-
deflated =
|
21
|
+
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
22
|
+
|
23
|
+
if deflated.nil?
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error while inflating a compressed stream (no suitable inflation algorithm found)"
|
26
|
+
end
|
27
|
+
Depredict.new(@options).filter(deflated)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def zlib_inflate(data)
|
18
33
|
begin
|
19
|
-
|
20
|
-
rescue Zlib::DataError
|
34
|
+
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
+
rescue Zlib::DataError
|
21
36
|
# by default, Ruby's Zlib assumes the data it's inflating
|
22
|
-
# is RFC1951 deflated data, wrapped in a
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
# See
|
27
|
-
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
28
|
-
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
29
|
-
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
37
|
+
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
|
+
# fails, swallow the exception and attempt to inflate the data as a raw
|
39
|
+
# RFC1951 stream.
|
30
40
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
41
|
+
|
42
|
+
begin
|
43
|
+
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
+
rescue StandardError
|
45
|
+
# swallow this one too, so we can try some other fallback options
|
46
|
+
end
|
47
|
+
|
48
|
+
nil
|
36
49
|
end
|
37
50
|
end
|
38
51
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
class PDF::Reader # :nodoc:
|
4
6
|
module Filter # :nodoc:
|
@@ -12,14 +14,10 @@ class PDF::Reader # :nodoc:
|
|
12
14
|
# Decode the specified data with the RunLengthDecode compression algorithm
|
13
15
|
def filter(data)
|
14
16
|
pos = 0
|
15
|
-
out = ""
|
17
|
+
out = "".dup
|
16
18
|
|
17
19
|
while pos < data.length
|
18
|
-
|
19
|
-
length = data.getbyte(pos)
|
20
|
-
else
|
21
|
-
length = data[pos]
|
22
|
-
end
|
20
|
+
length = data.getbyte(pos)
|
23
21
|
pos += 1
|
24
22
|
|
25
23
|
case
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -46,6 +47,7 @@ class PDF::Reader
|
|
46
47
|
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
47
48
|
when :DCTDecode then PDF::Reader::Filter::Null.new(options)
|
48
49
|
when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
|
50
|
+
when :Fl then PDF::Reader::Filter::Flate.new(options)
|
49
51
|
when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
|
50
52
|
when :JPXDecode then PDF::Reader::Filter::Null.new(options)
|
51
53
|
when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -36,11 +37,7 @@ class PDF::Reader
|
|
36
37
|
attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
|
37
38
|
:cid_widths, :cid_default_width
|
38
39
|
|
39
|
-
def initialize(ohash
|
40
|
-
if ohash.nil? || obj.nil?
|
41
|
-
$stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
|
42
|
-
return
|
43
|
-
end
|
40
|
+
def initialize(ohash, obj)
|
44
41
|
@ohash = ohash
|
45
42
|
@tounicode = nil
|
46
43
|
|
@@ -52,12 +49,6 @@ class PDF::Reader
|
|
52
49
|
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
53
50
|
end
|
54
51
|
|
55
|
-
def basefont=(font)
|
56
|
-
$stderr.puts "Font#basefont= is deprecated and will be removed in the 2.0 release"
|
57
|
-
@encoding ||= default_encoding(font)
|
58
|
-
@basefont = font
|
59
|
-
end
|
60
|
-
|
61
52
|
def to_utf8(params)
|
62
53
|
if @tounicode
|
63
54
|
to_utf8_via_cmap(params)
|
@@ -106,7 +97,13 @@ class PDF::Reader
|
|
106
97
|
elsif @subtype == :Type3
|
107
98
|
PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
|
108
99
|
elsif @subtype == :TrueType
|
109
|
-
|
100
|
+
if @font_descriptor
|
101
|
+
PDF::Reader::WidthCalculator::TrueType.new(self)
|
102
|
+
else
|
103
|
+
# A TrueType font that isn't embedded. Most readers look for a version on the
|
104
|
+
# local system and fallback to a substitute. For now, we go straight to a substitute
|
105
|
+
PDF::Reader::WidthCalculator::BuiltIn.new(self)
|
106
|
+
end
|
110
107
|
elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
|
111
108
|
PDF::Reader::WidthCalculator::Composite.new(self)
|
112
109
|
else
|
@@ -134,7 +131,9 @@ class PDF::Reader
|
|
134
131
|
if obj[:ToUnicode]
|
135
132
|
# ToUnicode is optional for Type1 and Type3
|
136
133
|
stream = @ohash.object(obj[:ToUnicode])
|
137
|
-
|
134
|
+
if stream.is_a?(PDF::Reader::Stream)
|
135
|
+
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
|
+
end
|
138
137
|
end
|
139
138
|
end
|
140
139
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
################################################################################
|
4
5
|
#
|
@@ -48,6 +49,9 @@ class PDF::Reader
|
|
48
49
|
# h.name_to_unicode(:Euro)
|
49
50
|
# => 8364
|
50
51
|
#
|
52
|
+
# h.name_to_unicode(:X4A)
|
53
|
+
# => 74
|
54
|
+
#
|
51
55
|
# h.name_to_unicode(:G30)
|
52
56
|
# => 48
|
53
57
|
#
|
@@ -62,6 +66,8 @@ class PDF::Reader
|
|
62
66
|
|
63
67
|
if @by_name.has_key?(name)
|
64
68
|
@by_name[name]
|
69
|
+
elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
|
70
|
+
"0x#{str[1,4]}".hex
|
65
71
|
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
66
72
|
"0x#{str[3,4]}".hex
|
67
73
|
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
@@ -102,8 +108,7 @@ class PDF::Reader
|
|
102
108
|
keyed_by_name = {}
|
103
109
|
keyed_by_codepoint = {}
|
104
110
|
|
105
|
-
|
106
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
111
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
|
107
112
|
f.each do |l|
|
108
113
|
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
114
|
if name && code
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
|
@@ -22,7 +23,7 @@ module PDF
|
|
22
23
|
|
23
24
|
def initialize(data, bits_in_chunk)
|
24
25
|
@data = data
|
25
|
-
@data.force_encoding("BINARY")
|
26
|
+
@data.force_encoding("BINARY")
|
26
27
|
@bits_in_chunk = bits_in_chunk
|
27
28
|
@current_pos = 0
|
28
29
|
@bits_left_in_byte = 8
|
@@ -82,7 +83,7 @@ module PDF
|
|
82
83
|
#
|
83
84
|
def self.decode(data)
|
84
85
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
85
|
-
result =
|
86
|
+
result = "".dup
|
86
87
|
until (code = stream.read) == CODE_EOD
|
87
88
|
if code == CODE_CLEAR_TABLE
|
88
89
|
stream.set_bits_in_chunk(9)
|
@@ -116,11 +117,10 @@ module PDF
|
|
116
117
|
result
|
117
118
|
end
|
118
119
|
|
119
|
-
private
|
120
|
-
|
121
120
|
def self.create_new_string(string_table,some_code, other_code)
|
122
121
|
string_table[some_code] + string_table[other_code][0].chr
|
123
122
|
end
|
123
|
+
private_class_method :create_new_string
|
124
124
|
|
125
125
|
end
|
126
126
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
|
6
|
+
# A null object security handler. Used when a PDF is unencrypted.
|
7
|
+
class NullSecurityHandler
|
8
|
+
|
9
|
+
def self.supports?(encrypt)
|
10
|
+
encrypt.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def decrypt(buf, _ref)
|
14
|
+
buf
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|