pdf-reader 0.11.0.alpha → 0.12.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +17 -1
- data/README.rdoc +31 -1
- data/bin/pdf_list_callbacks +2 -0
- data/examples/callbacks.rb +2 -1
- data/examples/extract_bates.rb +3 -2
- data/examples/extract_images.rb +146 -23
- data/examples/hash.rb +5 -5
- data/examples/metadata.rb +5 -16
- data/examples/page_count.rb +13 -0
- data/examples/rspec.rb +17 -41
- data/examples/text.rb +4 -29
- data/examples/version.rb +3 -15
- data/lib/pdf/reader.rb +45 -27
- data/lib/pdf/reader/encoding.rb +3 -3
- data/lib/pdf/reader/error.rb +1 -0
- data/lib/pdf/reader/filter.rb +64 -9
- data/lib/pdf/reader/font.rb +0 -17
- data/lib/pdf/reader/form_xobject.rb +83 -0
- data/lib/pdf/reader/glyph_hash.rb +88 -0
- data/lib/pdf/reader/glyphlist.txt +1 -1
- data/lib/pdf/reader/object_hash.rb +42 -12
- data/lib/pdf/reader/page.rb +63 -17
- data/lib/pdf/reader/page_text_receiver.rb +38 -4
- data/lib/pdf/reader/standard_security_handler.rb +186 -0
- data/lib/pdf/reader/stream.rb +2 -2
- metadata +39 -9
- data/examples/page_counter_improved.rb +0 -23
- data/examples/page_counter_naive.rb +0 -24
data/examples/text.rb
CHANGED
@@ -6,35 +6,10 @@
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'pdf/reader'
|
8
8
|
|
9
|
-
|
10
|
-
attr_accessor :content
|
9
|
+
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-unicode.pdf"
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# Called when page parsing starts
|
17
|
-
def begin_page(arg = nil)
|
18
|
-
@content << ""
|
19
|
-
end
|
20
|
-
|
21
|
-
# record text that is drawn on the page
|
22
|
-
def show_text(string, *params)
|
23
|
-
@content.last << string.strip
|
24
|
-
end
|
25
|
-
|
26
|
-
# there's a few text callbacks, so make sure we process them all
|
27
|
-
alias :super_show_text :show_text
|
28
|
-
alias :move_to_next_line_and_show_text :show_text
|
29
|
-
alias :set_spacing_next_line_show_text :show_text
|
30
|
-
|
31
|
-
# this final text callback takes slightly different arguments
|
32
|
-
def show_text_with_positioning(*params)
|
33
|
-
params = params.first
|
34
|
-
params.each { |str| show_text(str) if str.kind_of?(String)}
|
11
|
+
PDF::Reader.open(filename) do |reader|
|
12
|
+
reader.pages.each do |page|
|
13
|
+
puts page.text
|
35
14
|
end
|
36
15
|
end
|
37
|
-
|
38
|
-
receiver = PageTextReceiver.new
|
39
|
-
pdf = PDF::Reader.file("somefile.pdf", receiver)
|
40
|
-
puts receiver.content.inspect
|
data/examples/version.rb
CHANGED
@@ -6,20 +6,8 @@
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'pdf/reader'
|
8
8
|
|
9
|
-
|
10
|
-
attr_accessor :version
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@version = nil
|
14
|
-
end
|
15
|
-
|
16
|
-
# Called when document parsing starts
|
17
|
-
def pdf_version(arg = nil)
|
18
|
-
@version = arg
|
19
|
-
end
|
9
|
+
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-basic.pdf"
|
20
10
|
|
11
|
+
PDF::Reader.open(filename) do |reader|
|
12
|
+
puts reader.pdf_version
|
21
13
|
end
|
22
|
-
|
23
|
-
receiver = VersionReceiver.new
|
24
|
-
pdf = PDF::Reader.file(ARGV.shift, receiver)
|
25
|
-
puts receiver.version
|
data/lib/pdf/reader.rb
CHANGED
@@ -84,13 +84,18 @@ module PDF
|
|
84
84
|
# page = reader.page(1)
|
85
85
|
# page.walk(receiver)
|
86
86
|
#
|
87
|
+
# == Encrypted Files
|
88
|
+
#
|
89
|
+
# Depending on the algorithm it may be possible to parse an encrypted file.
|
90
|
+
# For standard PDF encryption you'll need the :password option
|
91
|
+
#
|
92
|
+
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
93
|
+
#
|
87
94
|
class Reader
|
88
95
|
|
89
96
|
# lowlevel hash-like access to all objects in the underlying PDF
|
90
97
|
attr_reader :objects
|
91
98
|
|
92
|
-
attr_reader :page_count, :pdf_version, :info, :metadata
|
93
|
-
|
94
99
|
# creates a new document reader for the provided PDF.
|
95
100
|
#
|
96
101
|
# input can be an IO-ish object (StringIO, File, etc) containing a PDF
|
@@ -102,16 +107,34 @@ module PDF
|
|
102
107
|
# reader = PDF::Reader.new(file)
|
103
108
|
# end
|
104
109
|
#
|
105
|
-
|
110
|
+
# If the source file is encrypted you can provide a password for decrypting
|
111
|
+
#
|
112
|
+
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
113
|
+
#
|
114
|
+
def initialize(input = nil, opts = {})
|
106
115
|
if input # support the deprecated Reader API
|
107
|
-
@objects = PDF::Reader::ObjectHash.new(input)
|
108
|
-
@page_count = get_page_count
|
109
|
-
@pdf_version = @objects.pdf_version
|
110
|
-
@info = @objects.deref(@objects.trailer[:Info])
|
111
|
-
@metadata = get_metadata
|
116
|
+
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
112
117
|
end
|
113
118
|
end
|
114
119
|
|
120
|
+
def info
|
121
|
+
@objects.deref(@objects.trailer[:Info])
|
122
|
+
end
|
123
|
+
|
124
|
+
def metadata
|
125
|
+
stream = @objects.deref(root[:Metadata])
|
126
|
+
stream ? stream.unfiltered_data : nil
|
127
|
+
end
|
128
|
+
|
129
|
+
def page_count
|
130
|
+
pages = @objects.deref(root[:Pages])
|
131
|
+
@page_count ||= pages[:Count]
|
132
|
+
end
|
133
|
+
|
134
|
+
def pdf_version
|
135
|
+
@objects.pdf_version
|
136
|
+
end
|
137
|
+
|
115
138
|
# syntactic sugar for opening a PDF file. Accepts the same arguments
|
116
139
|
# as new().
|
117
140
|
#
|
@@ -119,8 +142,14 @@ module PDF
|
|
119
142
|
# puts reader.pdf_version
|
120
143
|
# end
|
121
144
|
#
|
122
|
-
|
123
|
-
|
145
|
+
# or
|
146
|
+
#
|
147
|
+
# PDF::Reader.open("somefile.pdf", :password => "apples") do |reader|
|
148
|
+
# puts reader.pdf_version
|
149
|
+
# end
|
150
|
+
#
|
151
|
+
def self.open(input, opts = {}, &block)
|
152
|
+
yield PDF::Reader.new(input, opts)
|
124
153
|
end
|
125
154
|
|
126
155
|
# DEPRECATED: this method was deprecated in version 0.11.0 and will
|
@@ -185,7 +214,7 @@ module PDF
|
|
185
214
|
# methods available on each page
|
186
215
|
#
|
187
216
|
def pages
|
188
|
-
(1
|
217
|
+
(1..self.page_count).map { |num|
|
189
218
|
PDF::Reader::Page.new(@objects, num)
|
190
219
|
}
|
191
220
|
end
|
@@ -204,7 +233,7 @@ module PDF
|
|
204
233
|
#
|
205
234
|
def page(num)
|
206
235
|
num = num.to_i
|
207
|
-
raise ArgumentError, "valid pages are 1 .. #{
|
236
|
+
raise ArgumentError, "valid pages are 1 .. #{self.page_count}" if num < 1 || num > self.page_count
|
208
237
|
PDF::Reader::Page.new(@objects, num)
|
209
238
|
end
|
210
239
|
|
@@ -217,10 +246,6 @@ module PDF
|
|
217
246
|
def parse(io, receivers, opts = {})
|
218
247
|
ohash = ObjectHash.new(io)
|
219
248
|
|
220
|
-
if ohash.trailer[:Encrypt]
|
221
|
-
raise ::PDF::Reader::UnsupportedFeatureError, 'PDF::Reader cannot read encrypted PDF files'
|
222
|
-
end
|
223
|
-
|
224
249
|
options = {:pages => true, :raw_text => false, :metadata => true}
|
225
250
|
options.merge!(opts)
|
226
251
|
|
@@ -252,17 +277,7 @@ module PDF
|
|
252
277
|
end
|
253
278
|
|
254
279
|
def root
|
255
|
-
root ||= @objects.deref(@objects.trailer[:Root])
|
256
|
-
end
|
257
|
-
|
258
|
-
def get_metadata
|
259
|
-
stream = @objects.deref(root[:Metadata])
|
260
|
-
stream ? stream.unfiltered_data : nil
|
261
|
-
end
|
262
|
-
|
263
|
-
def get_page_count
|
264
|
-
pages = @objects.deref(root[:Pages])
|
265
|
-
pages[:Count]
|
280
|
+
@root ||= @objects.deref(@objects.trailer[:Root])
|
266
281
|
end
|
267
282
|
|
268
283
|
end
|
@@ -276,6 +291,8 @@ require 'pdf/reader/encoding'
|
|
276
291
|
require 'pdf/reader/error'
|
277
292
|
require 'pdf/reader/filter'
|
278
293
|
require 'pdf/reader/font'
|
294
|
+
require 'pdf/reader/form_xobject'
|
295
|
+
require 'pdf/reader/glyph_hash'
|
279
296
|
require 'pdf/reader/lzw'
|
280
297
|
require 'pdf/reader/metadata_strategy'
|
281
298
|
require 'pdf/reader/object_cache'
|
@@ -286,6 +303,7 @@ require 'pdf/reader/parser'
|
|
286
303
|
require 'pdf/reader/print_receiver'
|
287
304
|
require 'pdf/reader/reference'
|
288
305
|
require 'pdf/reader/register_receiver'
|
306
|
+
require 'pdf/reader/standard_security_handler'
|
289
307
|
require 'pdf/reader/stream'
|
290
308
|
require 'pdf/reader/text_receiver'
|
291
309
|
require 'pdf/reader/page_text_receiver'
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -97,7 +97,7 @@ class PDF::Reader
|
|
97
97
|
}.map { |num|
|
98
98
|
original_codepoint_to_unicode(num, tounicode)
|
99
99
|
}.map { |c|
|
100
|
-
|
100
|
+
names_to_unicode[c] || c
|
101
101
|
}.map { |c|
|
102
102
|
if c.nil? || !c.is_a?(Fixnum)
|
103
103
|
PDF::Reader::Encoding::UNKNOWN_CHAR
|
@@ -170,8 +170,8 @@ class PDF::Reader
|
|
170
170
|
mapping.size > 0
|
171
171
|
end
|
172
172
|
|
173
|
-
def
|
174
|
-
@
|
173
|
+
def names_to_unicode
|
174
|
+
@names_to_unicode ||= PDF::Reader::GlyphHash.new
|
175
175
|
end
|
176
176
|
|
177
177
|
def load_mapping(file)
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -49,5 +49,6 @@ class PDF::Reader
|
|
49
49
|
class MalformedPDFError < RuntimeError; end
|
50
50
|
class InvalidObjectError < MalformedPDFError; end
|
51
51
|
class UnsupportedFeatureError < RuntimeError; end
|
52
|
+
class EncryptedPDFError < UnsupportedFeatureError; end
|
52
53
|
end
|
53
54
|
################################################################################
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -31,6 +31,7 @@ class PDF::Reader
|
|
31
31
|
# content.
|
32
32
|
#
|
33
33
|
class Filter # :nodoc:
|
34
|
+
|
34
35
|
################################################################################
|
35
36
|
# creates a new filter for decoding content.
|
36
37
|
#
|
@@ -41,14 +42,16 @@ class PDF::Reader
|
|
41
42
|
@options = options
|
42
43
|
|
43
44
|
case name.to_sym
|
44
|
-
when :ASCII85Decode
|
45
|
-
when :ASCIIHexDecode
|
46
|
-
when :CCITTFaxDecode
|
47
|
-
when :DCTDecode
|
48
|
-
when :FlateDecode
|
49
|
-
when :JBIG2Decode
|
50
|
-
when :LZWDecode
|
51
|
-
|
45
|
+
when :ASCII85Decode then @filter = :ascii85
|
46
|
+
when :ASCIIHexDecode then @filter = :asciihex
|
47
|
+
when :CCITTFaxDecode then @filter = nil
|
48
|
+
when :DCTDecode then @filter = nil
|
49
|
+
when :FlateDecode then @filter = :flate
|
50
|
+
when :JBIG2Decode then @filter = nil
|
51
|
+
when :LZWDecode then @filter = :lzw
|
52
|
+
when :RunLengthDecode then @filter = :runlength
|
53
|
+
else
|
54
|
+
raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
52
55
|
end
|
53
56
|
end
|
54
57
|
################################################################################
|
@@ -117,6 +120,36 @@ class PDF::Reader
|
|
117
120
|
depredict(data, @options)
|
118
121
|
end
|
119
122
|
################################################################################
|
123
|
+
# Decode the specified data with the RunLengthDecode compression algorithm
|
124
|
+
def runlength(data)
|
125
|
+
pos = 0
|
126
|
+
out = ""
|
127
|
+
|
128
|
+
while pos < data.length
|
129
|
+
length = data.getbyte(pos)
|
130
|
+
pos += 1
|
131
|
+
|
132
|
+
case
|
133
|
+
when length == 128
|
134
|
+
break
|
135
|
+
when length < 128
|
136
|
+
# When the length is < 128, we copy the following length+1 bytes
|
137
|
+
# literally.
|
138
|
+
out << data[pos, length + 1]
|
139
|
+
pos += length
|
140
|
+
else
|
141
|
+
# When the length is > 128, we copy the next byte (257 - length)
|
142
|
+
# times; i.e., "\xFA\x00" ([250, 0]) will expand to
|
143
|
+
# "\x00\x00\x00\x00\x00\x00\x00".
|
144
|
+
out << data[pos, 1] * (257 - length)
|
145
|
+
end
|
146
|
+
|
147
|
+
pos += 1
|
148
|
+
end
|
149
|
+
|
150
|
+
out
|
151
|
+
end
|
152
|
+
################################################################################
|
120
153
|
def depredict(data, opts = {})
|
121
154
|
predictor = (opts || {})[:Predictor].to_i
|
122
155
|
|
@@ -133,7 +166,29 @@ class PDF::Reader
|
|
133
166
|
end
|
134
167
|
################################################################################
|
135
168
|
def tiff_depredict(data, opts = {})
|
136
|
-
|
169
|
+
data = data.unpack("C*")
|
170
|
+
unfiltered = []
|
171
|
+
bpc = opts[:BitsPerComponent] || 8
|
172
|
+
pixel_bits = bpc * opts[:Colors]
|
173
|
+
pixel_bytes = pixel_bits / 8
|
174
|
+
line_len = (pixel_bytes * opts[:Columns])
|
175
|
+
pos = 0
|
176
|
+
|
177
|
+
if bpc != 8
|
178
|
+
raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
|
179
|
+
end
|
180
|
+
|
181
|
+
until pos > data.size
|
182
|
+
row_data = data[pos, line_len]
|
183
|
+
row_data.each_with_index do |byte, index|
|
184
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
185
|
+
row_data[index] = (byte + left) % 256
|
186
|
+
end
|
187
|
+
unfiltered += row_data
|
188
|
+
pos += line_len
|
189
|
+
end
|
190
|
+
|
191
|
+
unfiltered.pack("C*")
|
137
192
|
end
|
138
193
|
################################################################################
|
139
194
|
def png_depredict(data, opts = {})
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -41,23 +41,6 @@ class PDF::Reader
|
|
41
41
|
extract_descendants(obj)
|
42
42
|
end
|
43
43
|
|
44
|
-
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
45
|
-
# a text file supplied by Adobe at:
|
46
|
-
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
47
|
-
def self.glyphnames
|
48
|
-
glyphs = {}
|
49
|
-
|
50
|
-
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
51
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt",mode) do |f|
|
52
|
-
f.each do |l|
|
53
|
-
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
54
|
-
glyphs[name.to_sym] = "0x#{code}".hex if name
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
glyphs
|
59
|
-
end
|
60
|
-
|
61
44
|
def basefont=(font)
|
62
45
|
# setup a default encoding for the selected font. It can always be overridden
|
63
46
|
# with encoding= if required
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module PDF
|
4
|
+
class Reader
|
5
|
+
|
6
|
+
# High level representation of a single PDF form xobject. Form xobjects
|
7
|
+
# are contained pieces of content that can be inserted onto multiple
|
8
|
+
# pages. They're generally used as a space efficient way to store
|
9
|
+
# repetative content (like logos, header, footers, etc).
|
10
|
+
#
|
11
|
+
# This behaves and looks much like a limited PDF::Reader::Page class.
|
12
|
+
#
|
13
|
+
class FormXObject
|
14
|
+
|
15
|
+
def initialize(page, xobject)
|
16
|
+
@page = page
|
17
|
+
@objects = page.objects
|
18
|
+
@xobject = @objects.deref(xobject)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the resources that accompany this form.
|
22
|
+
#
|
23
|
+
def resources
|
24
|
+
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
25
|
+
end
|
26
|
+
|
27
|
+
# return a hash of fonts used on this form.
|
28
|
+
#
|
29
|
+
# The keys are the font labels used within the form content stream.
|
30
|
+
#
|
31
|
+
# The values are a PDF::Reader::Font instances that provide access
|
32
|
+
# to most available metrics for each font.
|
33
|
+
#
|
34
|
+
def fonts
|
35
|
+
raw_fonts = @objects.deref(resources[:Font] || {})
|
36
|
+
::Hash[raw_fonts.map { |label, font|
|
37
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
|
38
|
+
}]
|
39
|
+
end
|
40
|
+
|
41
|
+
# processes the raw content stream for this form in sequential order and
|
42
|
+
# passes callbacks to the receiver objects.
|
43
|
+
#
|
44
|
+
# See the comments on PDF::Reader::Page#walk for more detail.
|
45
|
+
#
|
46
|
+
def walk(*receivers)
|
47
|
+
content_stream(receivers, raw_content)
|
48
|
+
end
|
49
|
+
|
50
|
+
# returns the raw content stream for this page. This is plumbing, nothing to
|
51
|
+
# see here unless you're a PDF nerd like me.
|
52
|
+
#
|
53
|
+
def raw_content
|
54
|
+
@xobject.unfiltered_data
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def callback(receivers, name, params=[])
|
60
|
+
receivers.each do |receiver|
|
61
|
+
receiver.send(name, *params) if receiver.respond_to?(name)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def content_stream(receivers, instructions)
|
66
|
+
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
67
|
+
parser = Parser.new(buffer, @objects)
|
68
|
+
params = []
|
69
|
+
|
70
|
+
while (token = parser.parse_token(PagesStrategy::OPERATORS))
|
71
|
+
if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
|
72
|
+
callback(receivers, PagesStrategy::OPERATORS[token], params)
|
73
|
+
params.clear
|
74
|
+
else
|
75
|
+
params << token
|
76
|
+
end
|
77
|
+
end
|
78
|
+
rescue EOFError => e
|
79
|
+
raise MalformedPDFError, "End Of File while processing a content stream"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2011 James Healy (jimmy@deefa.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
class GlyphHash # :nodoc:
|
28
|
+
def initialize
|
29
|
+
@adobe = load_adobe_glyph_mapping
|
30
|
+
end
|
31
|
+
|
32
|
+
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
33
|
+
# if no conversion is possible.
|
34
|
+
#
|
35
|
+
# h = GlyphHash.new
|
36
|
+
#
|
37
|
+
# h[:A]
|
38
|
+
# => 65
|
39
|
+
#
|
40
|
+
# h[:Euro]
|
41
|
+
# => 8364
|
42
|
+
#
|
43
|
+
# h[:G30]
|
44
|
+
# => 48
|
45
|
+
#
|
46
|
+
# h[:34]
|
47
|
+
#
|
48
|
+
def [](name)
|
49
|
+
return nil unless name.is_a?(Symbol)
|
50
|
+
|
51
|
+
str = name.to_s
|
52
|
+
|
53
|
+
if @adobe.has_key?(name)
|
54
|
+
@adobe[name]
|
55
|
+
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
56
|
+
"0x#{str[3,4]}".hex
|
57
|
+
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
58
|
+
"0x#{str[1,6]}".hex
|
59
|
+
elsif str.match(/\A[A-Za-z]\d{2,4}\Z/)
|
60
|
+
str[1,4].to_i
|
61
|
+
elsif str.match(/\A[A-Za-z]{2}\d{2,4}\Z/)
|
62
|
+
str[2,4].to_i
|
63
|
+
else
|
64
|
+
nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
71
|
+
# a text file supplied by Adobe at:
|
72
|
+
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
73
|
+
def load_adobe_glyph_mapping
|
74
|
+
glyphs = {}
|
75
|
+
|
76
|
+
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
77
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
78
|
+
f.each do |l|
|
79
|
+
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
80
|
+
glyphs[name.to_sym] = "0x#{code}".hex if name
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
glyphs
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|