pdf-reader 0.11.0.alpha → 0.12.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +17 -1
- data/README.rdoc +31 -1
- data/bin/pdf_list_callbacks +2 -0
- data/examples/callbacks.rb +2 -1
- data/examples/extract_bates.rb +3 -2
- data/examples/extract_images.rb +146 -23
- data/examples/hash.rb +5 -5
- data/examples/metadata.rb +5 -16
- data/examples/page_count.rb +13 -0
- data/examples/rspec.rb +17 -41
- data/examples/text.rb +4 -29
- data/examples/version.rb +3 -15
- data/lib/pdf/reader.rb +45 -27
- data/lib/pdf/reader/encoding.rb +3 -3
- data/lib/pdf/reader/error.rb +1 -0
- data/lib/pdf/reader/filter.rb +64 -9
- data/lib/pdf/reader/font.rb +0 -17
- data/lib/pdf/reader/form_xobject.rb +83 -0
- data/lib/pdf/reader/glyph_hash.rb +88 -0
- data/lib/pdf/reader/glyphlist.txt +1 -1
- data/lib/pdf/reader/object_hash.rb +42 -12
- data/lib/pdf/reader/page.rb +63 -17
- data/lib/pdf/reader/page_text_receiver.rb +38 -4
- data/lib/pdf/reader/standard_security_handler.rb +186 -0
- data/lib/pdf/reader/stream.rb +2 -2
- metadata +39 -9
- data/examples/page_counter_improved.rb +0 -23
- data/examples/page_counter_naive.rb +0 -24
data/examples/text.rb
CHANGED
@@ -6,35 +6,10 @@
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'pdf/reader'
|
8
8
|
|
9
|
-
|
10
|
-
attr_accessor :content
|
9
|
+
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-unicode.pdf"
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# Called when page parsing starts
|
17
|
-
def begin_page(arg = nil)
|
18
|
-
@content << ""
|
19
|
-
end
|
20
|
-
|
21
|
-
# record text that is drawn on the page
|
22
|
-
def show_text(string, *params)
|
23
|
-
@content.last << string.strip
|
24
|
-
end
|
25
|
-
|
26
|
-
# there's a few text callbacks, so make sure we process them all
|
27
|
-
alias :super_show_text :show_text
|
28
|
-
alias :move_to_next_line_and_show_text :show_text
|
29
|
-
alias :set_spacing_next_line_show_text :show_text
|
30
|
-
|
31
|
-
# this final text callback takes slightly different arguments
|
32
|
-
def show_text_with_positioning(*params)
|
33
|
-
params = params.first
|
34
|
-
params.each { |str| show_text(str) if str.kind_of?(String)}
|
11
|
+
PDF::Reader.open(filename) do |reader|
|
12
|
+
reader.pages.each do |page|
|
13
|
+
puts page.text
|
35
14
|
end
|
36
15
|
end
|
37
|
-
|
38
|
-
receiver = PageTextReceiver.new
|
39
|
-
pdf = PDF::Reader.file("somefile.pdf", receiver)
|
40
|
-
puts receiver.content.inspect
|
data/examples/version.rb
CHANGED
@@ -6,20 +6,8 @@
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'pdf/reader'
|
8
8
|
|
9
|
-
|
10
|
-
attr_accessor :version
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@version = nil
|
14
|
-
end
|
15
|
-
|
16
|
-
# Called when document parsing starts
|
17
|
-
def pdf_version(arg = nil)
|
18
|
-
@version = arg
|
19
|
-
end
|
9
|
+
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-basic.pdf"
|
20
10
|
|
11
|
+
PDF::Reader.open(filename) do |reader|
|
12
|
+
puts reader.pdf_version
|
21
13
|
end
|
22
|
-
|
23
|
-
receiver = VersionReceiver.new
|
24
|
-
pdf = PDF::Reader.file(ARGV.shift, receiver)
|
25
|
-
puts receiver.version
|
data/lib/pdf/reader.rb
CHANGED
@@ -84,13 +84,18 @@ module PDF
|
|
84
84
|
# page = reader.page(1)
|
85
85
|
# page.walk(receiver)
|
86
86
|
#
|
87
|
+
# == Encrypted Files
|
88
|
+
#
|
89
|
+
# Depending on the algorithm it may be possible to parse an encrypted file.
|
90
|
+
# For standard PDF encryption you'll need the :password option
|
91
|
+
#
|
92
|
+
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
93
|
+
#
|
87
94
|
class Reader
|
88
95
|
|
89
96
|
# lowlevel hash-like access to all objects in the underlying PDF
|
90
97
|
attr_reader :objects
|
91
98
|
|
92
|
-
attr_reader :page_count, :pdf_version, :info, :metadata
|
93
|
-
|
94
99
|
# creates a new document reader for the provided PDF.
|
95
100
|
#
|
96
101
|
# input can be an IO-ish object (StringIO, File, etc) containing a PDF
|
@@ -102,16 +107,34 @@ module PDF
|
|
102
107
|
# reader = PDF::Reader.new(file)
|
103
108
|
# end
|
104
109
|
#
|
105
|
-
|
110
|
+
# If the source file is encrypted you can provide a password for decrypting
|
111
|
+
#
|
112
|
+
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
113
|
+
#
|
114
|
+
def initialize(input = nil, opts = {})
|
106
115
|
if input # support the deprecated Reader API
|
107
|
-
@objects = PDF::Reader::ObjectHash.new(input)
|
108
|
-
@page_count = get_page_count
|
109
|
-
@pdf_version = @objects.pdf_version
|
110
|
-
@info = @objects.deref(@objects.trailer[:Info])
|
111
|
-
@metadata = get_metadata
|
116
|
+
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
112
117
|
end
|
113
118
|
end
|
114
119
|
|
120
|
+
def info
|
121
|
+
@objects.deref(@objects.trailer[:Info])
|
122
|
+
end
|
123
|
+
|
124
|
+
def metadata
|
125
|
+
stream = @objects.deref(root[:Metadata])
|
126
|
+
stream ? stream.unfiltered_data : nil
|
127
|
+
end
|
128
|
+
|
129
|
+
def page_count
|
130
|
+
pages = @objects.deref(root[:Pages])
|
131
|
+
@page_count ||= pages[:Count]
|
132
|
+
end
|
133
|
+
|
134
|
+
def pdf_version
|
135
|
+
@objects.pdf_version
|
136
|
+
end
|
137
|
+
|
115
138
|
# syntactic sugar for opening a PDF file. Accepts the same arguments
|
116
139
|
# as new().
|
117
140
|
#
|
@@ -119,8 +142,14 @@ module PDF
|
|
119
142
|
# puts reader.pdf_version
|
120
143
|
# end
|
121
144
|
#
|
122
|
-
|
123
|
-
|
145
|
+
# or
|
146
|
+
#
|
147
|
+
# PDF::Reader.open("somefile.pdf", :password => "apples") do |reader|
|
148
|
+
# puts reader.pdf_version
|
149
|
+
# end
|
150
|
+
#
|
151
|
+
def self.open(input, opts = {}, &block)
|
152
|
+
yield PDF::Reader.new(input, opts)
|
124
153
|
end
|
125
154
|
|
126
155
|
# DEPRECATED: this method was deprecated in version 0.11.0 and will
|
@@ -185,7 +214,7 @@ module PDF
|
|
185
214
|
# methods available on each page
|
186
215
|
#
|
187
216
|
def pages
|
188
|
-
(1
|
217
|
+
(1..self.page_count).map { |num|
|
189
218
|
PDF::Reader::Page.new(@objects, num)
|
190
219
|
}
|
191
220
|
end
|
@@ -204,7 +233,7 @@ module PDF
|
|
204
233
|
#
|
205
234
|
def page(num)
|
206
235
|
num = num.to_i
|
207
|
-
raise ArgumentError, "valid pages are 1 .. #{
|
236
|
+
raise ArgumentError, "valid pages are 1 .. #{self.page_count}" if num < 1 || num > self.page_count
|
208
237
|
PDF::Reader::Page.new(@objects, num)
|
209
238
|
end
|
210
239
|
|
@@ -217,10 +246,6 @@ module PDF
|
|
217
246
|
def parse(io, receivers, opts = {})
|
218
247
|
ohash = ObjectHash.new(io)
|
219
248
|
|
220
|
-
if ohash.trailer[:Encrypt]
|
221
|
-
raise ::PDF::Reader::UnsupportedFeatureError, 'PDF::Reader cannot read encrypted PDF files'
|
222
|
-
end
|
223
|
-
|
224
249
|
options = {:pages => true, :raw_text => false, :metadata => true}
|
225
250
|
options.merge!(opts)
|
226
251
|
|
@@ -252,17 +277,7 @@ module PDF
|
|
252
277
|
end
|
253
278
|
|
254
279
|
def root
|
255
|
-
root ||= @objects.deref(@objects.trailer[:Root])
|
256
|
-
end
|
257
|
-
|
258
|
-
def get_metadata
|
259
|
-
stream = @objects.deref(root[:Metadata])
|
260
|
-
stream ? stream.unfiltered_data : nil
|
261
|
-
end
|
262
|
-
|
263
|
-
def get_page_count
|
264
|
-
pages = @objects.deref(root[:Pages])
|
265
|
-
pages[:Count]
|
280
|
+
@root ||= @objects.deref(@objects.trailer[:Root])
|
266
281
|
end
|
267
282
|
|
268
283
|
end
|
@@ -276,6 +291,8 @@ require 'pdf/reader/encoding'
|
|
276
291
|
require 'pdf/reader/error'
|
277
292
|
require 'pdf/reader/filter'
|
278
293
|
require 'pdf/reader/font'
|
294
|
+
require 'pdf/reader/form_xobject'
|
295
|
+
require 'pdf/reader/glyph_hash'
|
279
296
|
require 'pdf/reader/lzw'
|
280
297
|
require 'pdf/reader/metadata_strategy'
|
281
298
|
require 'pdf/reader/object_cache'
|
@@ -286,6 +303,7 @@ require 'pdf/reader/parser'
|
|
286
303
|
require 'pdf/reader/print_receiver'
|
287
304
|
require 'pdf/reader/reference'
|
288
305
|
require 'pdf/reader/register_receiver'
|
306
|
+
require 'pdf/reader/standard_security_handler'
|
289
307
|
require 'pdf/reader/stream'
|
290
308
|
require 'pdf/reader/text_receiver'
|
291
309
|
require 'pdf/reader/page_text_receiver'
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -97,7 +97,7 @@ class PDF::Reader
|
|
97
97
|
}.map { |num|
|
98
98
|
original_codepoint_to_unicode(num, tounicode)
|
99
99
|
}.map { |c|
|
100
|
-
|
100
|
+
names_to_unicode[c] || c
|
101
101
|
}.map { |c|
|
102
102
|
if c.nil? || !c.is_a?(Fixnum)
|
103
103
|
PDF::Reader::Encoding::UNKNOWN_CHAR
|
@@ -170,8 +170,8 @@ class PDF::Reader
|
|
170
170
|
mapping.size > 0
|
171
171
|
end
|
172
172
|
|
173
|
-
def
|
174
|
-
@
|
173
|
+
def names_to_unicode
|
174
|
+
@names_to_unicode ||= PDF::Reader::GlyphHash.new
|
175
175
|
end
|
176
176
|
|
177
177
|
def load_mapping(file)
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -49,5 +49,6 @@ class PDF::Reader
|
|
49
49
|
class MalformedPDFError < RuntimeError; end
|
50
50
|
class InvalidObjectError < MalformedPDFError; end
|
51
51
|
class UnsupportedFeatureError < RuntimeError; end
|
52
|
+
class EncryptedPDFError < UnsupportedFeatureError; end
|
52
53
|
end
|
53
54
|
################################################################################
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -31,6 +31,7 @@ class PDF::Reader
|
|
31
31
|
# content.
|
32
32
|
#
|
33
33
|
class Filter # :nodoc:
|
34
|
+
|
34
35
|
################################################################################
|
35
36
|
# creates a new filter for decoding content.
|
36
37
|
#
|
@@ -41,14 +42,16 @@ class PDF::Reader
|
|
41
42
|
@options = options
|
42
43
|
|
43
44
|
case name.to_sym
|
44
|
-
when :ASCII85Decode
|
45
|
-
when :ASCIIHexDecode
|
46
|
-
when :CCITTFaxDecode
|
47
|
-
when :DCTDecode
|
48
|
-
when :FlateDecode
|
49
|
-
when :JBIG2Decode
|
50
|
-
when :LZWDecode
|
51
|
-
|
45
|
+
when :ASCII85Decode then @filter = :ascii85
|
46
|
+
when :ASCIIHexDecode then @filter = :asciihex
|
47
|
+
when :CCITTFaxDecode then @filter = nil
|
48
|
+
when :DCTDecode then @filter = nil
|
49
|
+
when :FlateDecode then @filter = :flate
|
50
|
+
when :JBIG2Decode then @filter = nil
|
51
|
+
when :LZWDecode then @filter = :lzw
|
52
|
+
when :RunLengthDecode then @filter = :runlength
|
53
|
+
else
|
54
|
+
raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
52
55
|
end
|
53
56
|
end
|
54
57
|
################################################################################
|
@@ -117,6 +120,36 @@ class PDF::Reader
|
|
117
120
|
depredict(data, @options)
|
118
121
|
end
|
119
122
|
################################################################################
|
123
|
+
# Decode the specified data with the RunLengthDecode compression algorithm
|
124
|
+
def runlength(data)
|
125
|
+
pos = 0
|
126
|
+
out = ""
|
127
|
+
|
128
|
+
while pos < data.length
|
129
|
+
length = data.getbyte(pos)
|
130
|
+
pos += 1
|
131
|
+
|
132
|
+
case
|
133
|
+
when length == 128
|
134
|
+
break
|
135
|
+
when length < 128
|
136
|
+
# When the length is < 128, we copy the following length+1 bytes
|
137
|
+
# literally.
|
138
|
+
out << data[pos, length + 1]
|
139
|
+
pos += length
|
140
|
+
else
|
141
|
+
# When the length is > 128, we copy the next byte (257 - length)
|
142
|
+
# times; i.e., "\xFA\x00" ([250, 0]) will expand to
|
143
|
+
# "\x00\x00\x00\x00\x00\x00\x00".
|
144
|
+
out << data[pos, 1] * (257 - length)
|
145
|
+
end
|
146
|
+
|
147
|
+
pos += 1
|
148
|
+
end
|
149
|
+
|
150
|
+
out
|
151
|
+
end
|
152
|
+
################################################################################
|
120
153
|
def depredict(data, opts = {})
|
121
154
|
predictor = (opts || {})[:Predictor].to_i
|
122
155
|
|
@@ -133,7 +166,29 @@ class PDF::Reader
|
|
133
166
|
end
|
134
167
|
################################################################################
|
135
168
|
def tiff_depredict(data, opts = {})
|
136
|
-
|
169
|
+
data = data.unpack("C*")
|
170
|
+
unfiltered = []
|
171
|
+
bpc = opts[:BitsPerComponent] || 8
|
172
|
+
pixel_bits = bpc * opts[:Colors]
|
173
|
+
pixel_bytes = pixel_bits / 8
|
174
|
+
line_len = (pixel_bytes * opts[:Columns])
|
175
|
+
pos = 0
|
176
|
+
|
177
|
+
if bpc != 8
|
178
|
+
raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
|
179
|
+
end
|
180
|
+
|
181
|
+
until pos > data.size
|
182
|
+
row_data = data[pos, line_len]
|
183
|
+
row_data.each_with_index do |byte, index|
|
184
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
185
|
+
row_data[index] = (byte + left) % 256
|
186
|
+
end
|
187
|
+
unfiltered += row_data
|
188
|
+
pos += line_len
|
189
|
+
end
|
190
|
+
|
191
|
+
unfiltered.pack("C*")
|
137
192
|
end
|
138
193
|
################################################################################
|
139
194
|
def png_depredict(data, opts = {})
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -41,23 +41,6 @@ class PDF::Reader
|
|
41
41
|
extract_descendants(obj)
|
42
42
|
end
|
43
43
|
|
44
|
-
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
45
|
-
# a text file supplied by Adobe at:
|
46
|
-
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
47
|
-
def self.glyphnames
|
48
|
-
glyphs = {}
|
49
|
-
|
50
|
-
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
51
|
-
File.open(File.dirname(__FILE__) + "/glyphlist.txt",mode) do |f|
|
52
|
-
f.each do |l|
|
53
|
-
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
54
|
-
glyphs[name.to_sym] = "0x#{code}".hex if name
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
glyphs
|
59
|
-
end
|
60
|
-
|
61
44
|
def basefont=(font)
|
62
45
|
# setup a default encoding for the selected font. It can always be overridden
|
63
46
|
# with encoding= if required
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module PDF
|
4
|
+
class Reader
|
5
|
+
|
6
|
+
# High level representation of a single PDF form xobject. Form xobjects
|
7
|
+
# are contained pieces of content that can be inserted onto multiple
|
8
|
+
# pages. They're generally used as a space efficient way to store
|
9
|
+
# repetative content (like logos, header, footers, etc).
|
10
|
+
#
|
11
|
+
# This behaves and looks much like a limited PDF::Reader::Page class.
|
12
|
+
#
|
13
|
+
class FormXObject
|
14
|
+
|
15
|
+
def initialize(page, xobject)
|
16
|
+
@page = page
|
17
|
+
@objects = page.objects
|
18
|
+
@xobject = @objects.deref(xobject)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the resources that accompany this form.
|
22
|
+
#
|
23
|
+
def resources
|
24
|
+
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
25
|
+
end
|
26
|
+
|
27
|
+
# return a hash of fonts used on this form.
|
28
|
+
#
|
29
|
+
# The keys are the font labels used within the form content stream.
|
30
|
+
#
|
31
|
+
# The values are a PDF::Reader::Font instances that provide access
|
32
|
+
# to most available metrics for each font.
|
33
|
+
#
|
34
|
+
def fonts
|
35
|
+
raw_fonts = @objects.deref(resources[:Font] || {})
|
36
|
+
::Hash[raw_fonts.map { |label, font|
|
37
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
|
38
|
+
}]
|
39
|
+
end
|
40
|
+
|
41
|
+
# processes the raw content stream for this form in sequential order and
|
42
|
+
# passes callbacks to the receiver objects.
|
43
|
+
#
|
44
|
+
# See the comments on PDF::Reader::Page#walk for more detail.
|
45
|
+
#
|
46
|
+
def walk(*receivers)
|
47
|
+
content_stream(receivers, raw_content)
|
48
|
+
end
|
49
|
+
|
50
|
+
# returns the raw content stream for this page. This is plumbing, nothing to
|
51
|
+
# see here unless you're a PDF nerd like me.
|
52
|
+
#
|
53
|
+
def raw_content
|
54
|
+
@xobject.unfiltered_data
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def callback(receivers, name, params=[])
|
60
|
+
receivers.each do |receiver|
|
61
|
+
receiver.send(name, *params) if receiver.respond_to?(name)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def content_stream(receivers, instructions)
|
66
|
+
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
67
|
+
parser = Parser.new(buffer, @objects)
|
68
|
+
params = []
|
69
|
+
|
70
|
+
while (token = parser.parse_token(PagesStrategy::OPERATORS))
|
71
|
+
if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
|
72
|
+
callback(receivers, PagesStrategy::OPERATORS[token], params)
|
73
|
+
params.clear
|
74
|
+
else
|
75
|
+
params << token
|
76
|
+
end
|
77
|
+
end
|
78
|
+
rescue EOFError => e
|
79
|
+
raise MalformedPDFError, "End Of File while processing a content stream"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2011 James Healy (jimmy@deefa.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
class GlyphHash # :nodoc:
|
28
|
+
def initialize
|
29
|
+
@adobe = load_adobe_glyph_mapping
|
30
|
+
end
|
31
|
+
|
32
|
+
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
33
|
+
# if no conversion is possible.
|
34
|
+
#
|
35
|
+
# h = GlyphHash.new
|
36
|
+
#
|
37
|
+
# h[:A]
|
38
|
+
# => 65
|
39
|
+
#
|
40
|
+
# h[:Euro]
|
41
|
+
# => 8364
|
42
|
+
#
|
43
|
+
# h[:G30]
|
44
|
+
# => 48
|
45
|
+
#
|
46
|
+
# h[:34]
|
47
|
+
#
|
48
|
+
def [](name)
|
49
|
+
return nil unless name.is_a?(Symbol)
|
50
|
+
|
51
|
+
str = name.to_s
|
52
|
+
|
53
|
+
if @adobe.has_key?(name)
|
54
|
+
@adobe[name]
|
55
|
+
elsif str.match(/\Auni[A-F\d]{4}\Z/)
|
56
|
+
"0x#{str[3,4]}".hex
|
57
|
+
elsif str.match(/\Au[A-F\d]{4,6}\Z/)
|
58
|
+
"0x#{str[1,6]}".hex
|
59
|
+
elsif str.match(/\A[A-Za-z]\d{2,4}\Z/)
|
60
|
+
str[1,4].to_i
|
61
|
+
elsif str.match(/\A[A-Za-z]{2}\d{2,4}\Z/)
|
62
|
+
str[2,4].to_i
|
63
|
+
else
|
64
|
+
nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
71
|
+
# a text file supplied by Adobe at:
|
72
|
+
# http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
|
73
|
+
def load_adobe_glyph_mapping
|
74
|
+
glyphs = {}
|
75
|
+
|
76
|
+
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
77
|
+
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
78
|
+
f.each do |l|
|
79
|
+
m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
80
|
+
glyphs[name.to_sym] = "0x#{code}".hex if name
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
glyphs
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|