pdf-reader 0.5.1 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/README +22 -0
- data/Rakefile +1 -1
- data/TODO +21 -3
- data/lib/pdf/reader.rb +4 -1
- data/lib/pdf/reader/buffer.rb +3 -3
- data/lib/pdf/reader/cmap.rb +48 -0
- data/lib/pdf/reader/content.rb +37 -3
- data/lib/pdf/reader/encoding.rb +1012 -0
- data/lib/pdf/reader/explore.rb +1 -1
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +75 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/parser.rb +29 -27
- data/lib/pdf/reader/register_receiver.rb +48 -1
- data/lib/pdf/reader/xref.rb +1 -0
- metadata +6 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
v0.6.0 (xxx)
|
2
|
+
- all text is now transparently converted to UTF-8 before being passed to the callbacks.
|
3
|
+
before this version, text was just passed as a byte level copy of what was in the PDF file, which
|
4
|
+
was mildly annoying with some encodings, and resulted in garbled text for Unicode encoded text.
|
5
|
+
- Fonts that use a difference table are now handled correctly
|
6
|
+
- fixed some 1.9 incompatible syntax
|
7
|
+
- expanded RegisterReceiver class to record extra info
|
8
|
+
- tweaked a README example
|
9
|
+
|
1
10
|
v0.5.1 (1st January 2008)
|
2
11
|
- Several documentation tweaks
|
3
12
|
- Improve support for parsing PDFs under windows (thanks to Jari Williamsson)
|
data/README
CHANGED
@@ -29,6 +29,12 @@ For a full list of the supported callback methods and a description of when they
|
|
29
29
|
will be called, refer to PDF::Reader::Content. See the code examples below for a
|
30
30
|
way to print a list of all the callbacks generated by a file to STDOUT.
|
31
31
|
|
32
|
+
= Text Encoding
|
33
|
+
|
34
|
+
Internally, text can be stored inside a PDF in various encodings, including
|
35
|
+
zingbats, win-1252, mac roman and a form of Unicode. To avoid confusion, all
|
36
|
+
text will be converted to UTF-8 before it is passed back from PDF::Reader.
|
37
|
+
|
32
38
|
= Exceptions
|
33
39
|
|
34
40
|
There are two key exceptions that you will need to watch out for when processing a
|
@@ -47,6 +53,12 @@ us with future code improvements.
|
|
47
53
|
- Peter Jones <mailto:pjones@pmade.com>
|
48
54
|
- James Healy <mailto:jimmy@deefa.com>
|
49
55
|
|
56
|
+
= Mailing List
|
57
|
+
|
58
|
+
Any questions or feedback should be sent to the PDF::Reader google group.
|
59
|
+
|
60
|
+
http://groups.google.com/group/pdf-reader
|
61
|
+
|
50
62
|
= Examples
|
51
63
|
|
52
64
|
The easiest way to explain how this works in practice is to show some examples.
|
@@ -117,6 +129,10 @@ it through less or to a text file.
|
|
117
129
|
alias :move_to_next_line_and_show_text :show_text
|
118
130
|
alias :set_spacing_next_line_show_text :show_text
|
119
131
|
|
132
|
+
def show_text_with_positioning(*params)
|
133
|
+
params = params.first
|
134
|
+
params.each { |str| show_text(str) if str.kind_of?(String)}
|
135
|
+
end
|
120
136
|
end
|
121
137
|
|
122
138
|
context "My generated PDF" do
|
@@ -183,6 +199,12 @@ Requires the rbook-isbn gem.
|
|
183
199
|
receiver = ISBNReceiver.new
|
184
200
|
PDF::Reader.file("somefile.pdf", receiver)
|
185
201
|
|
202
|
+
= Known Limitations
|
203
|
+
|
204
|
+
The order of the callbacks is unpredicable, and is dependent on the internal
|
205
|
+
layout of the file, not the order objects are displayed to the user. As a
|
206
|
+
consequence of this it is highly unlikely that text will be completely in
|
207
|
+
order.
|
186
208
|
|
187
209
|
= Resources
|
188
210
|
|
data/Rakefile
CHANGED
data/TODO
CHANGED
@@ -1,10 +1,28 @@
|
|
1
|
-
|
2
|
-
-
|
3
|
-
interested in meta data, there's no point in walking the pages tree.
|
1
|
+
v0.7
|
2
|
+
- Allow the user to only process certain aspects of the PDF file. For example, if they're only
|
3
|
+
interested in meta data or bookmarks, there's no point in walking the pages tree.
|
4
|
+
- maybe a third option to Reader.parse?
|
5
|
+
parse(io, receiver, {:pages => true, :fonts => false, :metadata => true, :bookmarks => false})
|
4
6
|
|
7
|
+
- Tweak encoding mappings to differentiate between bytes that are invalid for an encoding, and bytes that are unchanged.
|
8
|
+
poppler seems to do this in a quite reasonable way. Original Encoding -> Glyph Names -> Unicode. As of 0.6 we go straight
|
9
|
+
from the Original encoding to Unicode.
|
10
|
+
|
11
|
+
v0.9
|
12
|
+
- Support for CJK text (convert to UTF-8 like all other encodings)
|
13
|
+
- Add a way to extract raster images
|
14
|
+
|
15
|
+
|
16
|
+
Sometime
|
5
17
|
- Ship some extra receivers in the standard package, particuarly ones that are useful for running
|
6
18
|
rspec over generated PDF files
|
7
19
|
|
8
20
|
- Improve metadata support
|
9
21
|
|
10
22
|
- Add support for additional filters: ASCIIHexDecode, ASCII85Decode, LZWDecode, RunLengthDecode, CCITTFaxDecode, JBIG2Decode, DCTDecode, JPXDecode, Crypt?
|
23
|
+
|
24
|
+
- Add support for additional encodings:
|
25
|
+
- PDFDocEncoding
|
26
|
+
- Identity-V(I *think* this relates to vertical text. Not sure how we'd support it sensibly)
|
27
|
+
|
28
|
+
- Investigate how R->L text is handled
|
data/lib/pdf/reader.rb
CHANGED
@@ -43,7 +43,7 @@ module PDF
|
|
43
43
|
#
|
44
44
|
# This is useful for processing a PDF that is already in memory
|
45
45
|
#
|
46
|
-
# PDF::Reader.string(
|
46
|
+
# PDF::Reader.string(pdf_string, receiver)
|
47
47
|
#
|
48
48
|
# = Parsing an IO object
|
49
49
|
#
|
@@ -73,9 +73,12 @@ end
|
|
73
73
|
################################################################################
|
74
74
|
require 'pdf/reader/explore'
|
75
75
|
require 'pdf/reader/buffer'
|
76
|
+
require 'pdf/reader/cmap'
|
76
77
|
require 'pdf/reader/content'
|
78
|
+
require 'pdf/reader/encoding'
|
77
79
|
require 'pdf/reader/error'
|
78
80
|
require 'pdf/reader/filter'
|
81
|
+
require 'pdf/reader/font'
|
79
82
|
require 'pdf/reader/name'
|
80
83
|
require 'pdf/reader/parser'
|
81
84
|
require 'pdf/reader/reference'
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -89,9 +89,9 @@ class PDF::Reader
|
|
89
89
|
i = @buffer.index(/[\[\]()<>{}\s\/]/) || @buffer.size
|
90
90
|
|
91
91
|
token_chars =
|
92
|
-
if i == 0 and @buffer[i,2] == "<<"
|
93
|
-
elsif i == 0 and @buffer[i,2] == ">>"
|
94
|
-
elsif i == 0
|
92
|
+
if i == 0 and @buffer[i,2] == "<<" then 2
|
93
|
+
elsif i == 0 and @buffer[i,2] == ">>" then 2
|
94
|
+
elsif i == 0 then 1
|
95
95
|
else i
|
96
96
|
end
|
97
97
|
|
@@ -0,0 +1,48 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
class CMap
|
28
|
+
|
29
|
+
def initialize(data)
|
30
|
+
@map = {}
|
31
|
+
inmap = false
|
32
|
+
data.each_line do |l|
|
33
|
+
inmap = true if l.include?("beginbfchar")
|
34
|
+
if inmap
|
35
|
+
m, find, replace = *l.match(/<([0-9a-fA-F]+)> <([0-9a-fA-F]+)>/)
|
36
|
+
@map["0x#{find}".hex] = "0x#{replace}".hex if find && replace
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def decode(c)
|
42
|
+
# TODO: implement the conversion
|
43
|
+
Error.assert_equal(c.class, Fixnum)
|
44
|
+
@map[c]
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
data/lib/pdf/reader/content.rb
CHANGED
@@ -55,6 +55,12 @@ class PDF::Reader
|
|
55
55
|
# puts params.inspect
|
56
56
|
#
|
57
57
|
# == Text Callbacks
|
58
|
+
#
|
59
|
+
# All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
|
60
|
+
# PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be careful
|
61
|
+
# when doing a comparison on strings returned from PDF::Reader (when doing unit tests for example). The
|
62
|
+
# string may not be byte-by-byte identical with the string that was originally written to the PDF.
|
63
|
+
#
|
58
64
|
# - end_text_object
|
59
65
|
# - move_to_start_of_next_line
|
60
66
|
# - set_character_spacing
|
@@ -221,6 +227,7 @@ class PDF::Reader
|
|
221
227
|
def initialize (receiver, xref)
|
222
228
|
@receiver = receiver
|
223
229
|
@xref = xref
|
230
|
+
@fonts ||= {}
|
224
231
|
end
|
225
232
|
################################################################################
|
226
233
|
# Begin processing the document
|
@@ -233,6 +240,9 @@ class PDF::Reader
|
|
233
240
|
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
234
241
|
# its content
|
235
242
|
def walk_pages (page)
|
243
|
+
resolve_resources(@xref.object(page['Resources'])) if page['Resources']
|
244
|
+
|
245
|
+
# extract page content
|
236
246
|
if page['Type'] == "Pages"
|
237
247
|
callback(:begin_page_container, [page])
|
238
248
|
page['Kids'].each {|child| walk_pages(@xref.object(child))}
|
@@ -262,7 +272,12 @@ class PDF::Reader
|
|
262
272
|
token = @parser.parse_token(OPERATORS)
|
263
273
|
|
264
274
|
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
265
|
-
|
275
|
+
@current_font = @params.first if OPERATORS[token] == :set_text_font_and_size
|
276
|
+
|
277
|
+
# convert any text to utf-8
|
278
|
+
if OPERATORS[token].to_s.include?("show_text") && @fonts[@current_font]
|
279
|
+
@params = @fonts[@current_font].to_utf8(@params)
|
280
|
+
end
|
266
281
|
callback(OPERATORS[token], @params)
|
267
282
|
@params.clear
|
268
283
|
break
|
@@ -274,8 +289,27 @@ class PDF::Reader
|
|
274
289
|
rescue EOFError => e
|
275
290
|
end
|
276
291
|
################################################################################
|
277
|
-
def resolve_resources
|
278
|
-
#
|
292
|
+
def resolve_resources(resources)
|
293
|
+
# extract any font information
|
294
|
+
if resources['Font']
|
295
|
+
@xref.object(resources['Font']).each do |label, desc|
|
296
|
+
desc = @xref.object(desc)
|
297
|
+
@fonts[label] = PDF::Reader::Font.new
|
298
|
+
@fonts[label].label = label
|
299
|
+
@fonts[label].subtype = desc['Subtype'] if desc['Subtype']
|
300
|
+
@fonts[label].basefont = desc['BaseFont'] if desc['BaseFont']
|
301
|
+
@fonts[label].encoding = PDF::Reader::Encoding.factory(@xref.object(desc['Encoding']))
|
302
|
+
@fonts[label].descendantfonts = desc['DescendantFonts'] if desc['DescendantFonts']
|
303
|
+
if desc['ToUnicode']
|
304
|
+
@fonts[label].tounicode = desc['ToUnicode']
|
305
|
+
@fonts[label].tounicode = @xref.object(@fonts[label].tounicode)
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
#@fonts.each do |key,val|
|
310
|
+
# puts "#{key}: #{val.inspect}"
|
311
|
+
# puts
|
312
|
+
#end
|
279
313
|
end
|
280
314
|
################################################################################
|
281
315
|
# calls the name callback method on the receiver class with params as the arguments
|
@@ -0,0 +1,1012 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
require 'enumerator'
|
27
|
+
|
28
|
+
class PDF::Reader
|
29
|
+
class Encoding
|
30
|
+
|
31
|
+
attr_reader :differences
|
32
|
+
|
33
|
+
# set the differences table for this encoding. should be an array in the following format:
|
34
|
+
#
|
35
|
+
# [25, "A", 26, "B"]
|
36
|
+
#
|
37
|
+
# The array alternates bewteen a decimal byte number and a glyph name to map to that byte
|
38
|
+
#
|
39
|
+
# To save space the following array is also valid and equivilant to the previous one
|
40
|
+
#
|
41
|
+
# [25, "A", "B"]
|
42
|
+
def differences=(diff)
|
43
|
+
raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array)
|
44
|
+
|
45
|
+
@differences = {}
|
46
|
+
byte = 0
|
47
|
+
diff.each do |val|
|
48
|
+
if val.kind_of?(Numeric)
|
49
|
+
byte = val.to_i
|
50
|
+
else
|
51
|
+
@differences[byte] = val
|
52
|
+
byte += 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
@differences
|
56
|
+
end
|
57
|
+
|
58
|
+
# Takes the "Encoding" value of a Font dictionary and builds a PDF::Reader::Encoding object
|
59
|
+
def self.factory(enc)
|
60
|
+
if enc.kind_of?(Hash)
|
61
|
+
diff = enc['Differences']
|
62
|
+
enc = enc['Encoding'] || enc['BaseEncoding']
|
63
|
+
elsif enc != nil
|
64
|
+
enc = enc.to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
case enc
|
68
|
+
when nil then enc = PDF::Reader::Encoding::StandardEncoding.new
|
69
|
+
when "Identity-H" then enc = PDF::Reader::Encoding::IdentityH.new
|
70
|
+
when "MacRomanEncoding" then enc = PDF::Reader::Encoding::MacRomanEncoding.new
|
71
|
+
when "MacExpertEncoding" then enc = PDF::Reader::Encoding::MacExpertEncoding.new
|
72
|
+
when "StandardEncoding" then enc = PDF::Reader::Encoding::StandardEncoding.new
|
73
|
+
when "SymbolEncoding" then enc = PDF::Reader::Encoding::SymbolEncoding.new
|
74
|
+
when "WinAnsiEncoding" then enc = PDF::Reader::Encoding::WinAnsiEncoding.new
|
75
|
+
when "ZapfDingbatsEncoding" then enc = PDF::Reader::Encoding::ZapfDingbatsEncoding.new
|
76
|
+
else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
|
77
|
+
end
|
78
|
+
|
79
|
+
enc.differences = diff if enc && diff
|
80
|
+
|
81
|
+
return enc
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_utf8(str, tounicode = nil)
|
85
|
+
# abstract method, of sorts
|
86
|
+
raise RuntimeError, "Called abstract method"
|
87
|
+
end
|
88
|
+
|
89
|
+
# accepts an array of byte numbers, and replaces any that have entries in the differences table
|
90
|
+
# with a glyph name
|
91
|
+
def process_differences(arr)
|
92
|
+
@differences ||= {}
|
93
|
+
arr.collect! { |n| @differences[n].nil? ? n : @differences[n]}
|
94
|
+
end
|
95
|
+
protected :process_differences
|
96
|
+
|
97
|
+
# accepts an array of unicode code points and glyphnames, and converts any glyph names to codepoints
|
98
|
+
def process_glyphnames(arr)
|
99
|
+
@differences ||= {}
|
100
|
+
arr.collect! { |n| n.kind_of?(Numeric) ? n : PDF::Reader::Font.glyphnames[n]}
|
101
|
+
end
|
102
|
+
protected :process_glyphnames
|
103
|
+
|
104
|
+
class IdentityH < Encoding
|
105
|
+
def to_utf8(str, map = nil)
|
106
|
+
raise ArgumentError, "a ToUnicode cmap is required to decode an IdentityH string" if map.nil?
|
107
|
+
|
108
|
+
array_enc = []
|
109
|
+
|
110
|
+
# iterate over string, reading it in 2 byte chunks and interpreting those
|
111
|
+
# chunks as ints
|
112
|
+
str.unpack("n*").each do |c|
|
113
|
+
# convert the int to a unicode codepoint
|
114
|
+
array_enc << map.decode(c)
|
115
|
+
end
|
116
|
+
|
117
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
118
|
+
ret = array_enc.pack("U*")
|
119
|
+
|
120
|
+
# set the strings encoding correctly under ruby 1.9+
|
121
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
122
|
+
|
123
|
+
return ret
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class MacExpertEncoding < Encoding
|
128
|
+
# convert a MacExpertEncoding string into UTF-8
|
129
|
+
def to_utf8(str, tounicode = nil)
|
130
|
+
array_expert = str.unpack('C*')
|
131
|
+
array_expert = self.process_differences(array_expert)
|
132
|
+
array_enc = []
|
133
|
+
array_expert.each do |num|
|
134
|
+
case num
|
135
|
+
# change necesary characters to equivilant Unicode codepoints
|
136
|
+
when 0x21; array_enc << 0xF721
|
137
|
+
when 0x22; array_enc << 0xF6F8 # Hungarumlautsmall
|
138
|
+
when 0x23; array_enc << 0xF7A2
|
139
|
+
when 0x24; array_enc << 0xF724
|
140
|
+
when 0x25; array_enc << 0xF6E4
|
141
|
+
when 0x26; array_enc << 0xF726
|
142
|
+
when 0x27; array_enc << 0xF7B4
|
143
|
+
when 0x28; array_enc << 0x207D
|
144
|
+
when 0x29; array_enc << 0xF07E
|
145
|
+
when 0x2A; array_enc << 0x2025
|
146
|
+
when 0x2B; array_enc << 0x2024
|
147
|
+
when 0x2F; array_enc << 0x2044
|
148
|
+
when 0x30; array_enc << 0xF730
|
149
|
+
when 0x31; array_enc << 0xF731
|
150
|
+
when 0x32; array_enc << 0xF732
|
151
|
+
when 0x33; array_enc << 0xF733
|
152
|
+
when 0x34; array_enc << 0xF734
|
153
|
+
when 0x35; array_enc << 0xF735
|
154
|
+
when 0x36; array_enc << 0xF736
|
155
|
+
when 0x37; array_enc << 0xF737
|
156
|
+
when 0x38; array_enc << 0xF738
|
157
|
+
when 0x39; array_enc << 0xF739
|
158
|
+
when 0x3D; array_enc << 0xF6DE
|
159
|
+
when 0x3F; array_enc << 0xF73F
|
160
|
+
when 0x44; array_enc << 0xF7F0
|
161
|
+
when 0x47; array_enc << 0x00BC
|
162
|
+
when 0x48; array_enc << 0x00BD
|
163
|
+
when 0x49; array_enc << 0x00BE
|
164
|
+
when 0x4A; array_enc << 0x215B
|
165
|
+
when 0x4B; array_enc << 0x215C
|
166
|
+
when 0x4C; array_enc << 0x215D
|
167
|
+
when 0x4D; array_enc << 0x215E
|
168
|
+
when 0x4E; array_enc << 0x2153
|
169
|
+
when 0x4F; array_enc << 0x2154
|
170
|
+
when 0x56; array_enc << 0xFB00
|
171
|
+
when 0x57; array_enc << 0xFB01
|
172
|
+
when 0x58; array_enc << 0xFB02
|
173
|
+
when 0x59; array_enc << 0xFB03
|
174
|
+
when 0x5A; array_enc << 0xFB04
|
175
|
+
when 0x5B; array_enc << 0x208D
|
176
|
+
when 0x5D; array_enc << 0x208E
|
177
|
+
when 0x5E; array_enc << 0xF6F6
|
178
|
+
when 0x5F; array_enc << 0xF6E5
|
179
|
+
when 0x60; array_enc << 0xF760
|
180
|
+
when 0x61; array_enc << 0xF761
|
181
|
+
when 0x62; array_enc << 0xF762
|
182
|
+
when 0x63; array_enc << 0xF763
|
183
|
+
when 0x64; array_enc << 0xF764
|
184
|
+
when 0x65; array_enc << 0xF765
|
185
|
+
when 0x66; array_enc << 0xF766
|
186
|
+
when 0x67; array_enc << 0xF767
|
187
|
+
when 0x68; array_enc << 0xF768
|
188
|
+
when 0x69; array_enc << 0xF769
|
189
|
+
when 0x6A; array_enc << 0xF76A
|
190
|
+
when 0x6B; array_enc << 0xF76B
|
191
|
+
when 0x6C; array_enc << 0xF76C
|
192
|
+
when 0x6D; array_enc << 0xF76D
|
193
|
+
when 0x6E; array_enc << 0xF76E
|
194
|
+
when 0x6F; array_enc << 0xF76F
|
195
|
+
when 0x70; array_enc << 0xF770
|
196
|
+
when 0x71; array_enc << 0xF771
|
197
|
+
when 0x72; array_enc << 0xF772
|
198
|
+
when 0x73; array_enc << 0xF773
|
199
|
+
when 0x74; array_enc << 0xF774
|
200
|
+
when 0x75; array_enc << 0xF775
|
201
|
+
when 0x76; array_enc << 0xF776
|
202
|
+
when 0x77; array_enc << 0xF777
|
203
|
+
when 0x78; array_enc << 0xF778
|
204
|
+
when 0x79; array_enc << 0xF779
|
205
|
+
when 0x7A; array_enc << 0xF77A
|
206
|
+
when 0x7B; array_enc << 0x20A1
|
207
|
+
when 0x7C; array_enc << 0xF6DC
|
208
|
+
when 0x7D; array_enc << 0xF6DD
|
209
|
+
when 0x7E; array_enc << 0xF6FE
|
210
|
+
when 0x81; array_enc << 0xF6E9
|
211
|
+
when 0x82; array_enc << 0xF6E0
|
212
|
+
when 0x87; array_enc << 0xF7E1 # Acircumflexsmall
|
213
|
+
when 0x88; array_enc << 0xF7E0
|
214
|
+
when 0x89; array_enc << 0xF7E2 # Acutesmall
|
215
|
+
when 0x8A; array_enc << 0xF7E4
|
216
|
+
when 0x8B; array_enc << 0xF7E3
|
217
|
+
when 0x8C; array_enc << 0xF7E5
|
218
|
+
when 0x8D; array_enc << 0xF7E7
|
219
|
+
when 0x8E; array_enc << 0xF7E9
|
220
|
+
when 0x8F; array_enc << 0xF7E8
|
221
|
+
when 0x90; array_enc << 0xF7E4
|
222
|
+
when 0x91; array_enc << 0xF7EB
|
223
|
+
when 0x92; array_enc << 0xF7ED
|
224
|
+
when 0x93; array_enc << 0xF7EC
|
225
|
+
when 0x94; array_enc << 0xF7EE
|
226
|
+
when 0x95; array_enc << 0xF7EF
|
227
|
+
when 0x96; array_enc << 0xF7F1
|
228
|
+
when 0x97; array_enc << 0xF7F3
|
229
|
+
when 0x98; array_enc << 0xF7F2
|
230
|
+
when 0x99; array_enc << 0xF7F4
|
231
|
+
when 0x9A; array_enc << 0xF7F6
|
232
|
+
when 0x9B; array_enc << 0xF7F5
|
233
|
+
when 0x9C; array_enc << 0xF7FA
|
234
|
+
when 0x9D; array_enc << 0xF7F9
|
235
|
+
when 0x9E; array_enc << 0xF7FB
|
236
|
+
when 0x9F; array_enc << 0xF7FC
|
237
|
+
when 0xA1; array_enc << 0x2078
|
238
|
+
when 0xA2; array_enc << 0x2084
|
239
|
+
when 0xA3; array_enc << 0x2083
|
240
|
+
when 0xA4; array_enc << 0x2086
|
241
|
+
when 0xA5; array_enc << 0x2088
|
242
|
+
when 0xA6; array_enc << 0x2087
|
243
|
+
when 0xA7; array_enc << 0xF6FD
|
244
|
+
when 0xA9; array_enc << 0xF6DF
|
245
|
+
when 0xAA; array_enc << 0x2082
|
246
|
+
when 0xAC; array_enc << 0xF7A8
|
247
|
+
when 0xAE; array_enc << 0xF6F5
|
248
|
+
when 0xAF; array_enc << 0xF6F0
|
249
|
+
when 0xB0; array_enc << 0x2085
|
250
|
+
when 0xB2; array_enc << 0xF6E1
|
251
|
+
when 0xB3; array_enc << 0xF6E7
|
252
|
+
when 0xB4; array_enc << 0xF7FD
|
253
|
+
when 0xB6; array_enc << 0xF6E3
|
254
|
+
when 0xB9; array_enc << 0xF7FE
|
255
|
+
when 0xBB; array_enc << 0x2089
|
256
|
+
when 0xBC; array_enc << 0x2080
|
257
|
+
when 0xBD; array_enc << 0xF6FF
|
258
|
+
when 0xBE; array_enc << 0xF7E6 # AEsmall
|
259
|
+
when 0xBF; array_enc << 0xF7F8
|
260
|
+
when 0xC0; array_enc << 0xF7BF
|
261
|
+
when 0xC1; array_enc << 0x2081
|
262
|
+
when 0xC2; array_enc << 0xF6F9
|
263
|
+
when 0xC9; array_enc << 0xF7B8
|
264
|
+
when 0xCF; array_enc << 0xF6FA
|
265
|
+
when 0xD0; array_enc << 0x2012
|
266
|
+
when 0xD1; array_enc << 0xF6E6
|
267
|
+
when 0xD6; array_enc << 0xF7A1
|
268
|
+
when 0xD8; array_enc << 0xF7FF
|
269
|
+
when 0xDA; array_enc << 0x00B9
|
270
|
+
when 0xDB; array_enc << 0x00B2
|
271
|
+
when 0xDC; array_enc << 0x00B3
|
272
|
+
when 0xDD; array_enc << 0x2074
|
273
|
+
when 0xDE; array_enc << 0x2075
|
274
|
+
when 0xDF; array_enc << 0x2076
|
275
|
+
when 0xE0; array_enc << 0x2077
|
276
|
+
when 0xE1; array_enc << 0x2079
|
277
|
+
when 0xE2; array_enc << 0x2070
|
278
|
+
when 0xE4; array_enc << 0xF6EC
|
279
|
+
when 0xE5; array_enc << 0xF6F1
|
280
|
+
when 0xE6; array_enc << 0xF6F3
|
281
|
+
when 0xE9; array_enc << 0xF6ED
|
282
|
+
when 0xEA; array_enc << 0xF6F2
|
283
|
+
when 0xEB; array_enc << 0xF6EB
|
284
|
+
when 0xF1; array_enc << 0xF6EE
|
285
|
+
when 0xF2; array_enc << 0xF6FB
|
286
|
+
when 0xF3; array_enc << 0xF6F4
|
287
|
+
when 0xF4; array_enc << 0xF7AF
|
288
|
+
when 0xF5; array_enc << 0xF6EF
|
289
|
+
when 0xF6; array_enc << 0x207F
|
290
|
+
when 0xF7; array_enc << 0xF6EF
|
291
|
+
when 0xF8; array_enc << 0xF6E2
|
292
|
+
when 0xF9; array_enc << 0xF6E8
|
293
|
+
when 0xFA; array_enc << 0xF6F7
|
294
|
+
when 0xFB; array_enc << 0xF6FC
|
295
|
+
else
|
296
|
+
array_enc << num
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# convert any glyph names to unicode codepoints
|
301
|
+
array_enc = self.process_glyphnames(array_enc)
|
302
|
+
|
303
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
304
|
+
ret = array_enc.pack("U*")
|
305
|
+
|
306
|
+
# set the strings encoding correctly under ruby 1.9+
|
307
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
308
|
+
|
309
|
+
return ret
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# The default encoding for OSX <= v9
|
314
|
+
# see: http://en.wikipedia.org/wiki/Mac_OS_Roman
|
315
|
+
class MacRomanEncoding < Encoding
|
316
|
+
# convert a MacRomanEncoding string into UTF-8
|
317
|
+
def to_utf8(str, tounicode = nil)
|
318
|
+
# content of this method borrowed from REXML::Encoding.decode_cp1252
|
319
|
+
array_mac = str.unpack('C*')
|
320
|
+
array_mac = self.process_differences(array_mac)
|
321
|
+
array_enc = []
|
322
|
+
array_mac.each do |num|
|
323
|
+
case num
|
324
|
+
# change necesary characters to equivilant Unicode codepoints
|
325
|
+
when 0x80; array_enc << 0x00C4
|
326
|
+
when 0x81; array_enc << 0x00C5
|
327
|
+
when 0x82; array_enc << 0x00C7
|
328
|
+
when 0x83; array_enc << 0x00C9
|
329
|
+
when 0x84; array_enc << 0x00D1
|
330
|
+
when 0x85; array_enc << 0x00D6
|
331
|
+
when 0x86; array_enc << 0x00DC
|
332
|
+
when 0x87; array_enc << 0x00E1
|
333
|
+
when 0x88; array_enc << 0x00E0
|
334
|
+
when 0x89; array_enc << 0x00E2
|
335
|
+
when 0x8A; array_enc << 0x00E4
|
336
|
+
when 0x8B; array_enc << 0x00E3
|
337
|
+
when 0x8C; array_enc << 0x00E5
|
338
|
+
when 0x8D; array_enc << 0x00E7
|
339
|
+
when 0x8E; array_enc << 0x00E9
|
340
|
+
when 0x8F; array_enc << 0x00E8
|
341
|
+
when 0x90; array_enc << 0x00EA
|
342
|
+
when 0x91; array_enc << 0x00EB
|
343
|
+
when 0x92; array_enc << 0x00ED
|
344
|
+
when 0x93; array_enc << 0x00EC
|
345
|
+
when 0x94; array_enc << 0x00EE
|
346
|
+
when 0x95; array_enc << 0x00EF
|
347
|
+
when 0x96; array_enc << 0x00F1
|
348
|
+
when 0x97; array_enc << 0x00F3
|
349
|
+
when 0x98; array_enc << 0x00F2
|
350
|
+
when 0x99; array_enc << 0x00F4
|
351
|
+
when 0x9A; array_enc << 0x00F6
|
352
|
+
when 0x9B; array_enc << 0x00F5
|
353
|
+
when 0x9C; array_enc << 0x00FA
|
354
|
+
when 0x9D; array_enc << 0x00F9
|
355
|
+
when 0x9E; array_enc << 0x00FB
|
356
|
+
when 0x9F; array_enc << 0x00FC
|
357
|
+
when 0xA0; array_enc << 0x2020
|
358
|
+
when 0xA1; array_enc << 0x00B0
|
359
|
+
when 0xA2; array_enc << 0x00A2
|
360
|
+
when 0xA3; array_enc << 0x00A3
|
361
|
+
when 0xA4; array_enc << 0x00A7
|
362
|
+
when 0xA5; array_enc << 0x2022
|
363
|
+
when 0xA6; array_enc << 0x00B6
|
364
|
+
when 0xA7; array_enc << 0x00DF
|
365
|
+
when 0xA8; array_enc << 0x00AE
|
366
|
+
when 0xA9; array_enc << 0x00A9
|
367
|
+
when 0xAA; array_enc << 0x2122
|
368
|
+
when 0xAB; array_enc << 0x00B4
|
369
|
+
when 0xAC; array_enc << 0x00A8
|
370
|
+
when 0xAD; array_enc << 0x2260
|
371
|
+
when 0xAE; array_enc << 0x00C6
|
372
|
+
when 0xAF; array_enc << 0x00D8
|
373
|
+
when 0xB0; array_enc << 0x221E
|
374
|
+
when 0xB1; array_enc << 0x00B1
|
375
|
+
when 0xB2; array_enc << 0x2264
|
376
|
+
when 0xB3; array_enc << 0x2265
|
377
|
+
when 0xB4; array_enc << 0x00A5
|
378
|
+
when 0xB5; array_enc << 0x00B5
|
379
|
+
when 0xB6; array_enc << 0x2202
|
380
|
+
when 0xB7; array_enc << 0x2211
|
381
|
+
when 0xB8; array_enc << 0x220F
|
382
|
+
when 0xB9; array_enc << 0x03C0
|
383
|
+
when 0xBA; array_enc << 0x222B
|
384
|
+
when 0xBB; array_enc << 0x00AA
|
385
|
+
when 0xBC; array_enc << 0x00BA
|
386
|
+
when 0xBD; array_enc << 0x03A9
|
387
|
+
when 0xBE; array_enc << 0x00E6
|
388
|
+
when 0xBF; array_enc << 0x00F8
|
389
|
+
when 0xC0; array_enc << 0x00BF
|
390
|
+
when 0xC1; array_enc << 0x00A1
|
391
|
+
when 0xC2; array_enc << 0x00AC
|
392
|
+
when 0xC3; array_enc << 0x221A
|
393
|
+
when 0xC4; array_enc << 0x0192
|
394
|
+
when 0xC5; array_enc << 0x2248
|
395
|
+
when 0xC6; array_enc << 0x2206
|
396
|
+
when 0xC7; array_enc << 0x00AB
|
397
|
+
when 0xC8; array_enc << 0x00BB
|
398
|
+
when 0xC9; array_enc << 0x2026
|
399
|
+
when 0xCA; array_enc << 0x00A0
|
400
|
+
when 0xCB; array_enc << 0x00C0
|
401
|
+
when 0xCC; array_enc << 0x00C3
|
402
|
+
when 0xCD; array_enc << 0x00D5
|
403
|
+
when 0xCE; array_enc << 0x0152
|
404
|
+
when 0xCF; array_enc << 0x0153
|
405
|
+
when 0xD0; array_enc << 0x2013
|
406
|
+
when 0xD1; array_enc << 0x2014
|
407
|
+
when 0xD2; array_enc << 0x201C
|
408
|
+
when 0xD3; array_enc << 0x201D
|
409
|
+
when 0xD4; array_enc << 0x2018
|
410
|
+
when 0xD5; array_enc << 0x2019
|
411
|
+
when 0xD6; array_enc << 0x00F7
|
412
|
+
when 0xD7; array_enc << 0x25CA
|
413
|
+
when 0xD8; array_enc << 0x00FF
|
414
|
+
when 0xD9; array_enc << 0x0178
|
415
|
+
when 0xDA; array_enc << 0x2044
|
416
|
+
when 0xDB; array_enc << 0x20AC
|
417
|
+
when 0xDC; array_enc << 0x2039
|
418
|
+
when 0xDD; array_enc << 0x203A
|
419
|
+
when 0xDE; array_enc << 0xFB01
|
420
|
+
when 0xDF; array_enc << 0xFB02
|
421
|
+
when 0xE0; array_enc << 0x2021
|
422
|
+
when 0xE1; array_enc << 0x00B7
|
423
|
+
when 0xE2; array_enc << 0x201A
|
424
|
+
when 0xE3; array_enc << 0x201E
|
425
|
+
when 0xE4; array_enc << 0x2030
|
426
|
+
when 0xE5; array_enc << 0x00C2
|
427
|
+
when 0xE6; array_enc << 0x00CA
|
428
|
+
when 0xE7; array_enc << 0x00C1
|
429
|
+
when 0xE8; array_enc << 0x00CB
|
430
|
+
when 0xE9; array_enc << 0x00C8
|
431
|
+
when 0xEA; array_enc << 0x00CD
|
432
|
+
when 0xEB; array_enc << 0x00CE
|
433
|
+
when 0xEC; array_enc << 0x00CF
|
434
|
+
when 0xED; array_enc << 0x00CC
|
435
|
+
when 0xEE; array_enc << 0x00D3
|
436
|
+
when 0xEF; array_enc << 0x00D4
|
437
|
+
when 0xF0; array_enc << 0xF8FF
|
438
|
+
when 0xF1; array_enc << 0x00D2
|
439
|
+
when 0xF2; array_enc << 0x00DA
|
440
|
+
when 0xF3; array_enc << 0x00D8
|
441
|
+
when 0xF4; array_enc << 0x00D9
|
442
|
+
when 0xF5; array_enc << 0x0131
|
443
|
+
when 0xF6; array_enc << 0x02C6
|
444
|
+
when 0xF7; array_enc << 0x02DC
|
445
|
+
when 0xF8; array_enc << 0x00AF
|
446
|
+
when 0xF9; array_enc << 0x02D8
|
447
|
+
when 0xFA; array_enc << 0x02D9
|
448
|
+
when 0xFB; array_enc << 0x02DA
|
449
|
+
when 0xFC; array_enc << 0x00B8
|
450
|
+
when 0xFD; array_enc << 0x02DD
|
451
|
+
when 0xFE; array_enc << 0x02DB
|
452
|
+
when 0xFF; array_enc << 0x02C7
|
453
|
+
else
|
454
|
+
array_enc << num
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
# convert any glyph names to unicode codepoints
|
459
|
+
array_enc = self.process_glyphnames(array_enc)
|
460
|
+
|
461
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
462
|
+
ret = array_enc.pack("U*")
|
463
|
+
|
464
|
+
# set the strings encoding correctly under ruby 1.9+
|
465
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
466
|
+
|
467
|
+
return ret
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
class StandardEncoding < Encoding
|
472
|
+
# convert an Adobe Standard Encoding string into UTF-8
|
473
|
+
def to_utf8(str, tounicode = nil)
|
474
|
+
# based on mapping described at:
|
475
|
+
# http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txt
|
476
|
+
array_std = str.unpack('C*')
|
477
|
+
array_std = self.process_differences(array_std)
|
478
|
+
array_enc = []
|
479
|
+
array_std.each do |num|
|
480
|
+
case num
|
481
|
+
when 0x27; array_enc << 0x2019
|
482
|
+
when 0x60; array_enc << 0x2018
|
483
|
+
when 0xA4; array_enc << 0x2044
|
484
|
+
when 0xA6; array_enc << 0x0192
|
485
|
+
when 0xA8; array_enc << 0x00A4
|
486
|
+
when 0xA9; array_enc << 0x0027
|
487
|
+
when 0xAA; array_enc << 0x201C
|
488
|
+
when 0xAC; array_enc << 0x2039
|
489
|
+
when 0xAD; array_enc << 0x203A
|
490
|
+
when 0xAE; array_enc << 0xFB01
|
491
|
+
when 0xAF; array_enc << 0xFB02
|
492
|
+
when 0xB1; array_enc << 0x2013
|
493
|
+
when 0xB2; array_enc << 0x2020
|
494
|
+
when 0xB3; array_enc << 0x2021
|
495
|
+
when 0xB4; array_enc << 0x00B7
|
496
|
+
when 0xB7; array_enc << 0x2022
|
497
|
+
when 0xB8; array_enc << 0x201A
|
498
|
+
when 0xB9; array_enc << 0x201E
|
499
|
+
when 0xBA; array_enc << 0x201D
|
500
|
+
when 0xBC; array_enc << 0x2026
|
501
|
+
when 0xBD; array_enc << 0x2030
|
502
|
+
when 0xC1; array_enc << 0x0060
|
503
|
+
when 0xC2; array_enc << 0x00B4
|
504
|
+
when 0xC3; array_enc << 0x02C6
|
505
|
+
when 0xC4; array_enc << 0x02DC
|
506
|
+
when 0xC5; array_enc << 0x00AF
|
507
|
+
when 0xC6; array_enc << 0x02D8
|
508
|
+
when 0xC7; array_enc << 0x02D9
|
509
|
+
when 0xC8; array_enc << 0x00A8
|
510
|
+
when 0xCA; array_enc << 0x02DA
|
511
|
+
when 0xCB; array_enc << 0x00B8
|
512
|
+
when 0xCD; array_enc << 0x02DD
|
513
|
+
when 0xCE; array_enc << 0x02DB
|
514
|
+
when 0xCF; array_enc << 0x02C7
|
515
|
+
when 0xD0; array_enc << 0x2014
|
516
|
+
when 0xE1; array_enc << 0x00C6
|
517
|
+
when 0xE3; array_enc << 0x00AA
|
518
|
+
when 0xE8; array_enc << 0x0141
|
519
|
+
when 0xE9; array_enc << 0x00D8
|
520
|
+
when 0xEA; array_enc << 0x0152
|
521
|
+
when 0xEB; array_enc << 0x00BA
|
522
|
+
when 0xF1; array_enc << 0x00E6
|
523
|
+
when 0xF5; array_enc << 0x0131
|
524
|
+
when 0xF8; array_enc << 0x0142
|
525
|
+
when 0xF9; array_enc << 0x00F8
|
526
|
+
when 0xFA; array_enc << 0x0153
|
527
|
+
when 0xFB; array_enc << 0x00DF
|
528
|
+
else
|
529
|
+
array_enc << num
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
# convert any glyph names to unicode codepoints
|
534
|
+
array_enc = self.process_glyphnames(array_enc)
|
535
|
+
|
536
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
537
|
+
ret = array_enc.pack("U*")
|
538
|
+
|
539
|
+
# set the strings encoding correctly under ruby 1.9+
|
540
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
541
|
+
|
542
|
+
return ret
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
class SymbolEncoding < Encoding
|
547
|
+
# convert a SymbolEncoding string into UTF-8
|
548
|
+
def to_utf8(str, tounicode = nil)
|
549
|
+
array_symbol = str.unpack('C*')
|
550
|
+
array_symbol = self.process_differences(array_symbol)
|
551
|
+
array_enc = []
|
552
|
+
array_symbol.each do |num|
|
553
|
+
case num
|
554
|
+
when 0x22; array_enc << 0x2200
|
555
|
+
when 0x24; array_enc << 0x2203
|
556
|
+
when 0x27; array_enc << 0x220B
|
557
|
+
when 0x2A; array_enc << 0x2217
|
558
|
+
when 0x2D; array_enc << 0x2212
|
559
|
+
when 0x40; array_enc << 0x2245
|
560
|
+
when 0x41; array_enc << 0x0391
|
561
|
+
when 0x42; array_enc << 0x0392
|
562
|
+
when 0x43; array_enc << 0x03A7
|
563
|
+
when 0x44; array_enc << 0x0394
|
564
|
+
when 0x45; array_enc << 0x0395
|
565
|
+
when 0x46; array_enc << 0x03A6
|
566
|
+
when 0x47; array_enc << 0x0393
|
567
|
+
when 0x48; array_enc << 0x0397
|
568
|
+
when 0x49; array_enc << 0x0399
|
569
|
+
when 0x4A; array_enc << 0x03D1
|
570
|
+
when 0x4B; array_enc << 0x039A
|
571
|
+
when 0x4C; array_enc << 0x039B
|
572
|
+
when 0x4D; array_enc << 0x039C
|
573
|
+
when 0x4E; array_enc << 0x039D
|
574
|
+
when 0x4F; array_enc << 0x039F
|
575
|
+
when 0x50; array_enc << 0x03A0
|
576
|
+
when 0x51; array_enc << 0x0398
|
577
|
+
when 0x52; array_enc << 0x03A1
|
578
|
+
when 0x53; array_enc << 0x03A3
|
579
|
+
when 0x54; array_enc << 0x03A4
|
580
|
+
when 0x55; array_enc << 0x03A5
|
581
|
+
when 0x56; array_enc << 0x03C2
|
582
|
+
when 0x57; array_enc << 0x03A9
|
583
|
+
when 0x58; array_enc << 0x039E
|
584
|
+
when 0x59; array_enc << 0x03A8
|
585
|
+
when 0x5A; array_enc << 0x0396
|
586
|
+
when 0x5C; array_enc << 0x2234
|
587
|
+
when 0x5E; array_enc << 0x22A5
|
588
|
+
when 0x60; array_enc << 0xF8E5
|
589
|
+
when 0x61; array_enc << 0x03B1
|
590
|
+
when 0x62; array_enc << 0x03B2
|
591
|
+
when 0x63; array_enc << 0x03C7
|
592
|
+
when 0x64; array_enc << 0x03B4
|
593
|
+
when 0x65; array_enc << 0x03B5
|
594
|
+
when 0x66; array_enc << 0x03C6
|
595
|
+
when 0x67; array_enc << 0x03B3
|
596
|
+
when 0x68; array_enc << 0x03B7
|
597
|
+
when 0x69; array_enc << 0x03B9
|
598
|
+
when 0x6A; array_enc << 0x03D5
|
599
|
+
when 0x6B; array_enc << 0x03BA
|
600
|
+
when 0x6C; array_enc << 0x03BB
|
601
|
+
when 0x6D; array_enc << 0x03BC
|
602
|
+
when 0x6E; array_enc << 0x03BD
|
603
|
+
when 0x6F; array_enc << 0x03BF
|
604
|
+
when 0x70; array_enc << 0x03C0
|
605
|
+
when 0x71; array_enc << 0x03B8
|
606
|
+
when 0x72; array_enc << 0x03C1
|
607
|
+
when 0x73; array_enc << 0x03C3
|
608
|
+
when 0x74; array_enc << 0x03C4
|
609
|
+
when 0x75; array_enc << 0x03C5
|
610
|
+
when 0x76; array_enc << 0x03D6
|
611
|
+
when 0x77; array_enc << 0x03C9
|
612
|
+
when 0x78; array_enc << 0x03BE
|
613
|
+
when 0x79; array_enc << 0x03C8
|
614
|
+
when 0x7A; array_enc << 0x03B6
|
615
|
+
when 0x7E; array_enc << 0x223C
|
616
|
+
when 0xA0; array_enc << 0x20AC
|
617
|
+
when 0xA1; array_enc << 0x03D2
|
618
|
+
when 0xA2; array_enc << 0x2032
|
619
|
+
when 0xA3; array_enc << 0x2264
|
620
|
+
when 0xA4; array_enc << 0x2215
|
621
|
+
when 0xA5; array_enc << 0x221E
|
622
|
+
when 0xA6; array_enc << 0x0192
|
623
|
+
when 0xA7; array_enc << 0x2663
|
624
|
+
when 0xA8; array_enc << 0x2666
|
625
|
+
when 0xA9; array_enc << 0x2665
|
626
|
+
when 0xAA; array_enc << 0x2660
|
627
|
+
when 0xAB; array_enc << 0x2194
|
628
|
+
when 0xAC; array_enc << 0x2190
|
629
|
+
when 0xAD; array_enc << 0x2191
|
630
|
+
when 0xAE; array_enc << 0x2192
|
631
|
+
when 0xAF; array_enc << 0x2193
|
632
|
+
when 0xB2; array_enc << 0x2033
|
633
|
+
when 0xB3; array_enc << 0x2265
|
634
|
+
when 0xB4; array_enc << 0x00D7
|
635
|
+
when 0xB5; array_enc << 0x221D
|
636
|
+
when 0xB6; array_enc << 0x2202
|
637
|
+
when 0xB7; array_enc << 0x2022
|
638
|
+
when 0xB8; array_enc << 0x00F7
|
639
|
+
when 0xB9; array_enc << 0x2260
|
640
|
+
when 0xBA; array_enc << 0x2261
|
641
|
+
when 0xBB; array_enc << 0x2248
|
642
|
+
when 0xBC; array_enc << 0x2026
|
643
|
+
when 0xBD; array_enc << 0xF8E6
|
644
|
+
when 0xBE; array_enc << 0xF8E7
|
645
|
+
when 0xBF; array_enc << 0x21B5
|
646
|
+
when 0xC0; array_enc << 0x2135
|
647
|
+
when 0xC1; array_enc << 0x2111
|
648
|
+
when 0xC2; array_enc << 0x211C
|
649
|
+
when 0xC3; array_enc << 0x2118
|
650
|
+
when 0xC4; array_enc << 0x2297
|
651
|
+
when 0xC5; array_enc << 0x2295
|
652
|
+
when 0xC6; array_enc << 0x2205
|
653
|
+
when 0xC7; array_enc << 0x2229
|
654
|
+
when 0xC8; array_enc << 0x222A
|
655
|
+
when 0xC9; array_enc << 0x2283
|
656
|
+
when 0xCA; array_enc << 0x2287
|
657
|
+
when 0xCB; array_enc << 0x2284
|
658
|
+
when 0xCC; array_enc << 0x2282
|
659
|
+
when 0xCD; array_enc << 0x2286
|
660
|
+
when 0xCE; array_enc << 0x2208
|
661
|
+
when 0xCF; array_enc << 0x2209
|
662
|
+
when 0xD0; array_enc << 0x2220
|
663
|
+
when 0xD1; array_enc << 0x2207
|
664
|
+
when 0xD2; array_enc << 0xF6DA
|
665
|
+
when 0xD3; array_enc << 0xF6D9
|
666
|
+
when 0xD4; array_enc << 0xF6DB
|
667
|
+
when 0xD5; array_enc << 0x220F
|
668
|
+
when 0xD6; array_enc << 0x221A
|
669
|
+
when 0xD7; array_enc << 0x22C5
|
670
|
+
when 0xD8; array_enc << 0x00AC
|
671
|
+
when 0xD9; array_enc << 0x2227
|
672
|
+
when 0xDA; array_enc << 0x2228
|
673
|
+
when 0xDB; array_enc << 0x21D4
|
674
|
+
when 0xDC; array_enc << 0x21D0
|
675
|
+
when 0xDD; array_enc << 0x21D1
|
676
|
+
when 0xDE; array_enc << 0x21D2
|
677
|
+
when 0xDF; array_enc << 0x21D3
|
678
|
+
when 0xE0; array_enc << 0x25CA
|
679
|
+
when 0xE1; array_enc << 0x2329
|
680
|
+
when 0xE2; array_enc << 0xF8E8
|
681
|
+
when 0xE3; array_enc << 0xF8E9
|
682
|
+
when 0xE4; array_enc << 0xF8EA
|
683
|
+
when 0xE5; array_enc << 0x2211
|
684
|
+
when 0xE6; array_enc << 0xF8EB
|
685
|
+
when 0xE7; array_enc << 0xF8EC
|
686
|
+
when 0xE8; array_enc << 0xF8ED
|
687
|
+
when 0xE9; array_enc << 0xF8EE
|
688
|
+
when 0xEA; array_enc << 0xF8EF
|
689
|
+
when 0xEB; array_enc << 0xF8F0
|
690
|
+
when 0xEC; array_enc << 0xF8F1
|
691
|
+
when 0xED; array_enc << 0xF8F2
|
692
|
+
when 0xEE; array_enc << 0xF8F3
|
693
|
+
when 0xEF; array_enc << 0xF8F4
|
694
|
+
when 0xF1; array_enc << 0x232A
|
695
|
+
when 0xF2; array_enc << 0x222B
|
696
|
+
when 0xF3; array_enc << 0x2320
|
697
|
+
when 0xF4; array_enc << 0xF8F5
|
698
|
+
when 0xF5; array_enc << 0x2321
|
699
|
+
when 0xF6; array_enc << 0xF8F6
|
700
|
+
when 0xF7; array_enc << 0xF8F7
|
701
|
+
when 0xF8; array_enc << 0xF8F8
|
702
|
+
when 0xF9; array_enc << 0xF8F9
|
703
|
+
when 0xFA; array_enc << 0xF8FA
|
704
|
+
when 0xFB; array_enc << 0xF8FB
|
705
|
+
when 0xFC; array_enc << 0xF8FC
|
706
|
+
when 0xFD; array_enc << 0xF8FD
|
707
|
+
when 0xFE; array_enc << 0xF8FE
|
708
|
+
else
|
709
|
+
array_enc << num
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
# convert any glyph names to unicode codepoints
|
714
|
+
array_enc = self.process_glyphnames(array_enc)
|
715
|
+
|
716
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
717
|
+
ret = array_enc.pack("U*")
|
718
|
+
|
719
|
+
# set the strings encoding correctly under ruby 1.9+
|
720
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
721
|
+
|
722
|
+
return ret
|
723
|
+
end
|
724
|
+
end
|
725
|
+
|
726
|
+
class WinAnsiEncoding < Encoding
|
727
|
+
# convert a WinAnsiEncoding string into UTF-8
|
728
|
+
def to_utf8(str, tounicode = nil)
|
729
|
+
# content of this method borrowed from REXML::Encoding.decode_cp1252
|
730
|
+
# for further reading:
|
731
|
+
# http://www.intertwingly.net/stories/2004/04/14/i18n.html
|
732
|
+
array_latin9 = str.unpack('C*')
|
733
|
+
array_latin9 = self.process_differences(array_latin9)
|
734
|
+
array_enc = []
|
735
|
+
array_latin9.each do |num|
|
736
|
+
case num
|
737
|
+
# characters that added compared to iso-8859-1
|
738
|
+
when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
|
739
|
+
when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
|
740
|
+
when 0x83; array_enc << 0x0192 # 0xc6 0x92
|
741
|
+
when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
|
742
|
+
when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
|
743
|
+
when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
|
744
|
+
when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
|
745
|
+
when 0x88; array_enc << 0x02C6 # 0xcb 0x86
|
746
|
+
when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
|
747
|
+
when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
|
748
|
+
when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
|
749
|
+
when 0x8C; array_enc << 0x0152 # 0xc5 0x92
|
750
|
+
when 0x8E; array_enc << 0x017D # 0xc5 0xbd
|
751
|
+
when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
|
752
|
+
when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
|
753
|
+
when 0x93; array_enc << 0x201C
|
754
|
+
when 0x94; array_enc << 0x201D
|
755
|
+
when 0x95; array_enc << 0x2022
|
756
|
+
when 0x96; array_enc << 0x2013
|
757
|
+
when 0x97; array_enc << 0x2014
|
758
|
+
when 0x98; array_enc << 0x02DC
|
759
|
+
when 0x99; array_enc << 0x2122
|
760
|
+
when 0x9A; array_enc << 0x0161
|
761
|
+
when 0x9B; array_enc << 0x203A
|
762
|
+
when 0x9C; array_enc << 0x0152 # 0xc5 0x93
|
763
|
+
when 0x9E; array_enc << 0x017E # 0xc5 0xbe
|
764
|
+
when 0x9F; array_enc << 0x0178
|
765
|
+
else
|
766
|
+
array_enc << num
|
767
|
+
end
|
768
|
+
end
|
769
|
+
|
770
|
+
# convert any glyph names to unicode codepoints
|
771
|
+
array_enc = self.process_glyphnames(array_enc)
|
772
|
+
|
773
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
774
|
+
ret = array_enc.pack("U*")
|
775
|
+
|
776
|
+
# set the strings encoding correctly under ruby 1.9+
|
777
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
778
|
+
|
779
|
+
return ret
|
780
|
+
end
|
781
|
+
end
|
782
|
+
|
783
|
+
class ZapfDingbatsEncoding < Encoding
|
784
|
+
# convert a ZapfDingbatsEncoding string into UTF-8
|
785
|
+
def to_utf8(str, tounicode = nil)
|
786
|
+
# mapping to unicode taken from:
|
787
|
+
# http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
|
788
|
+
array_symbol = str.unpack('C*')
|
789
|
+
array_symbol = self.process_differences(array_symbol)
|
790
|
+
array_enc = []
|
791
|
+
array_symbol.each do |num|
|
792
|
+
case num
|
793
|
+
when 0x21; array_enc << 0x2701
|
794
|
+
when 0x22; array_enc << 0x2702
|
795
|
+
when 0x23; array_enc << 0x2703
|
796
|
+
when 0x24; array_enc << 0x2704
|
797
|
+
when 0x25; array_enc << 0x260E
|
798
|
+
when 0x26; array_enc << 0x2706
|
799
|
+
when 0x27; array_enc << 0x2707
|
800
|
+
when 0x28; array_enc << 0x2708
|
801
|
+
when 0x29; array_enc << 0x2709
|
802
|
+
when 0x2A; array_enc << 0x261B
|
803
|
+
when 0x2B; array_enc << 0x261E
|
804
|
+
when 0x2C; array_enc << 0x270C
|
805
|
+
when 0x2D; array_enc << 0x270D
|
806
|
+
when 0x2E; array_enc << 0x270E
|
807
|
+
when 0x2F; array_enc << 0x270F
|
808
|
+
when 0x30; array_enc << 0x2710
|
809
|
+
when 0x31; array_enc << 0x2711
|
810
|
+
when 0x32; array_enc << 0x2712
|
811
|
+
when 0x33; array_enc << 0x2713
|
812
|
+
when 0x34; array_enc << 0x2714
|
813
|
+
when 0x35; array_enc << 0x2715
|
814
|
+
when 0x36; array_enc << 0x2716
|
815
|
+
when 0x37; array_enc << 0x2717
|
816
|
+
when 0x38; array_enc << 0x2718
|
817
|
+
when 0x39; array_enc << 0x2719
|
818
|
+
when 0x3A; array_enc << 0x271A
|
819
|
+
when 0x3B; array_enc << 0x271B
|
820
|
+
when 0x3C; array_enc << 0x271C
|
821
|
+
when 0x3D; array_enc << 0x271D
|
822
|
+
when 0x3E; array_enc << 0x271E
|
823
|
+
when 0x3F; array_enc << 0x271E
|
824
|
+
when 0x40; array_enc << 0x2720
|
825
|
+
when 0x41; array_enc << 0x2721
|
826
|
+
when 0x42; array_enc << 0x2722
|
827
|
+
when 0x43; array_enc << 0x2723
|
828
|
+
when 0x44; array_enc << 0x2724
|
829
|
+
when 0x45; array_enc << 0x2725
|
830
|
+
when 0x46; array_enc << 0x2726
|
831
|
+
when 0x47; array_enc << 0x2727
|
832
|
+
when 0x48; array_enc << 0x2605
|
833
|
+
when 0x49; array_enc << 0x2729
|
834
|
+
when 0x4A; array_enc << 0x272A
|
835
|
+
when 0x4B; array_enc << 0x272B
|
836
|
+
when 0x4C; array_enc << 0x272C
|
837
|
+
when 0x4D; array_enc << 0x272D
|
838
|
+
when 0x4E; array_enc << 0x272E
|
839
|
+
when 0x4F; array_enc << 0x272F
|
840
|
+
when 0x50; array_enc << 0x2730
|
841
|
+
when 0x51; array_enc << 0x2731
|
842
|
+
when 0x52; array_enc << 0x2732
|
843
|
+
when 0x53; array_enc << 0x2733
|
844
|
+
when 0x54; array_enc << 0x2734
|
845
|
+
when 0x55; array_enc << 0x2735
|
846
|
+
when 0x56; array_enc << 0x2736
|
847
|
+
when 0x57; array_enc << 0x2737
|
848
|
+
when 0x58; array_enc << 0x2738
|
849
|
+
when 0x59; array_enc << 0x2739
|
850
|
+
when 0x5A; array_enc << 0x273A
|
851
|
+
when 0x5B; array_enc << 0x273B
|
852
|
+
when 0x5C; array_enc << 0x273C
|
853
|
+
when 0x5D; array_enc << 0x273D
|
854
|
+
when 0x5E; array_enc << 0x273E
|
855
|
+
when 0x5F; array_enc << 0x273F
|
856
|
+
when 0x60; array_enc << 0x2740
|
857
|
+
when 0x61; array_enc << 0x2741
|
858
|
+
when 0x62; array_enc << 0x2742
|
859
|
+
when 0x63; array_enc << 0x2743
|
860
|
+
when 0x64; array_enc << 0x2744
|
861
|
+
when 0x65; array_enc << 0x2745
|
862
|
+
when 0x66; array_enc << 0x2746
|
863
|
+
when 0x67; array_enc << 0x2747
|
864
|
+
when 0x68; array_enc << 0x2748
|
865
|
+
when 0x69; array_enc << 0x2749
|
866
|
+
when 0x6A; array_enc << 0x274A
|
867
|
+
when 0x6B; array_enc << 0x274B
|
868
|
+
when 0x6C; array_enc << 0x25CF
|
869
|
+
when 0x6D; array_enc << 0x274D
|
870
|
+
when 0x6E; array_enc << 0x25A0
|
871
|
+
when 0x6F; array_enc << 0x274F
|
872
|
+
when 0x70; array_enc << 0x2750
|
873
|
+
when 0x71; array_enc << 0x2751
|
874
|
+
when 0x72; array_enc << 0x2752
|
875
|
+
when 0x73; array_enc << 0x2753
|
876
|
+
when 0x74; array_enc << 0x2754
|
877
|
+
when 0x75; array_enc << 0x2755
|
878
|
+
when 0x76; array_enc << 0x2756
|
879
|
+
when 0x77; array_enc << 0x2757
|
880
|
+
when 0x78; array_enc << 0x2758
|
881
|
+
when 0x79; array_enc << 0x2759
|
882
|
+
when 0x7A; array_enc << 0x275A
|
883
|
+
when 0x7B; array_enc << 0x275B
|
884
|
+
when 0x7C; array_enc << 0x275C
|
885
|
+
when 0x7D; array_enc << 0x275D
|
886
|
+
when 0x7E; array_enc << 0x275E
|
887
|
+
when 0x80; array_enc << 0xF8D7
|
888
|
+
when 0x81; array_enc << 0xF8D8
|
889
|
+
when 0x82; array_enc << 0xF8D9
|
890
|
+
when 0x83; array_enc << 0xF8DA
|
891
|
+
when 0x84; array_enc << 0xF8DB
|
892
|
+
when 0x85; array_enc << 0xF8DC
|
893
|
+
when 0x86; array_enc << 0xF8DD
|
894
|
+
when 0x87; array_enc << 0xF8DE
|
895
|
+
when 0x88; array_enc << 0xF8DF
|
896
|
+
when 0x89; array_enc << 0xF8E0
|
897
|
+
when 0x8A; array_enc << 0xF8E1
|
898
|
+
when 0x8B; array_enc << 0xF8E2
|
899
|
+
when 0x8C; array_enc << 0xF8E3
|
900
|
+
when 0x8D; array_enc << 0xF8E4
|
901
|
+
when 0xA1; array_enc << 0x2761
|
902
|
+
when 0xA2; array_enc << 0x2762
|
903
|
+
when 0xA3; array_enc << 0x2763
|
904
|
+
when 0xA4; array_enc << 0x2764
|
905
|
+
when 0xA5; array_enc << 0x2765
|
906
|
+
when 0xA6; array_enc << 0x2766
|
907
|
+
when 0xA7; array_enc << 0x2767
|
908
|
+
when 0xA8; array_enc << 0x2663
|
909
|
+
when 0xA9; array_enc << 0x2666
|
910
|
+
when 0xAA; array_enc << 0x2665
|
911
|
+
when 0xAB; array_enc << 0x2660
|
912
|
+
when 0xAC; array_enc << 0x2460
|
913
|
+
when 0xAD; array_enc << 0x2461
|
914
|
+
when 0xAE; array_enc << 0x2462
|
915
|
+
when 0xAF; array_enc << 0x2463
|
916
|
+
when 0xB0; array_enc << 0x2464
|
917
|
+
when 0xB1; array_enc << 0x2465
|
918
|
+
when 0xB2; array_enc << 0x2466
|
919
|
+
when 0xB3; array_enc << 0x2467
|
920
|
+
when 0xB4; array_enc << 0x2468
|
921
|
+
when 0xB5; array_enc << 0x2469
|
922
|
+
when 0xB6; array_enc << 0x2776
|
923
|
+
when 0xB7; array_enc << 0x2777
|
924
|
+
when 0xB8; array_enc << 0x2778
|
925
|
+
when 0xB9; array_enc << 0x2779
|
926
|
+
when 0xBA; array_enc << 0x277A
|
927
|
+
when 0xBB; array_enc << 0x277B
|
928
|
+
when 0xBC; array_enc << 0x277C
|
929
|
+
when 0xBD; array_enc << 0x277D
|
930
|
+
when 0xBE; array_enc << 0x277E
|
931
|
+
when 0xBF; array_enc << 0x277F
|
932
|
+
when 0xC0; array_enc << 0x2780
|
933
|
+
when 0xC1; array_enc << 0x2781
|
934
|
+
when 0xC2; array_enc << 0x2782
|
935
|
+
when 0xC3; array_enc << 0x2783
|
936
|
+
when 0xC4; array_enc << 0x2784
|
937
|
+
when 0xC5; array_enc << 0x2785
|
938
|
+
when 0xC6; array_enc << 0x2786
|
939
|
+
when 0xC7; array_enc << 0x2787
|
940
|
+
when 0xC8; array_enc << 0x2788
|
941
|
+
when 0xC9; array_enc << 0x2789
|
942
|
+
when 0xCA; array_enc << 0x278A
|
943
|
+
when 0xCB; array_enc << 0x278B
|
944
|
+
when 0xCC; array_enc << 0x278C
|
945
|
+
when 0xCD; array_enc << 0x278D
|
946
|
+
when 0xCE; array_enc << 0x278E
|
947
|
+
when 0xCF; array_enc << 0x278F
|
948
|
+
when 0xD0; array_enc << 0x2790
|
949
|
+
when 0xD1; array_enc << 0x2791
|
950
|
+
when 0xD2; array_enc << 0x2792
|
951
|
+
when 0xD3; array_enc << 0x2793
|
952
|
+
when 0xD4; array_enc << 0x2794
|
953
|
+
when 0xD5; array_enc << 0x2795
|
954
|
+
when 0xD6; array_enc << 0x2796
|
955
|
+
when 0xD7; array_enc << 0x2797
|
956
|
+
when 0xD8; array_enc << 0x2798
|
957
|
+
when 0xD9; array_enc << 0x2799
|
958
|
+
when 0xDA; array_enc << 0x279A
|
959
|
+
when 0xDB; array_enc << 0x279B
|
960
|
+
when 0xDC; array_enc << 0x279C
|
961
|
+
when 0xDD; array_enc << 0x279D
|
962
|
+
when 0xDE; array_enc << 0x279E
|
963
|
+
when 0xDF; array_enc << 0x279F
|
964
|
+
when 0xE0; array_enc << 0x27A0
|
965
|
+
when 0xE1; array_enc << 0x27A1
|
966
|
+
when 0xE2; array_enc << 0x27A2
|
967
|
+
when 0xE3; array_enc << 0x27A3
|
968
|
+
when 0xE4; array_enc << 0x27A4
|
969
|
+
when 0xE5; array_enc << 0x27A5
|
970
|
+
when 0xE6; array_enc << 0x27A6
|
971
|
+
when 0xE7; array_enc << 0x27A7
|
972
|
+
when 0xE8; array_enc << 0x27A8
|
973
|
+
when 0xE9; array_enc << 0x27A9
|
974
|
+
when 0xEA; array_enc << 0x27AA
|
975
|
+
when 0xEB; array_enc << 0x27AB
|
976
|
+
when 0xEC; array_enc << 0x27AC
|
977
|
+
when 0xED; array_enc << 0x27AD
|
978
|
+
when 0xEE; array_enc << 0x27AE
|
979
|
+
when 0xEF; array_enc << 0x27AF
|
980
|
+
when 0xF1; array_enc << 0x27B1
|
981
|
+
when 0xF2; array_enc << 0x27B2
|
982
|
+
when 0xF3; array_enc << 0x27B3
|
983
|
+
when 0xF4; array_enc << 0x27B4
|
984
|
+
when 0xF5; array_enc << 0x27B5
|
985
|
+
when 0xF6; array_enc << 0x27B6
|
986
|
+
when 0xF7; array_enc << 0x27B7
|
987
|
+
when 0xF8; array_enc << 0x27B8
|
988
|
+
when 0xF9; array_enc << 0x27B9
|
989
|
+
when 0xFA; array_enc << 0x27BA
|
990
|
+
when 0xFB; array_enc << 0x27BB
|
991
|
+
when 0xFC; array_enc << 0x27BC
|
992
|
+
when 0xFD; array_enc << 0x27BD
|
993
|
+
when 0xFE; array_enc << 0x27BE
|
994
|
+
else
|
995
|
+
array_enc << num
|
996
|
+
end
|
997
|
+
end
|
998
|
+
|
999
|
+
# convert any glyph names to unicode codepoints
|
1000
|
+
array_enc = self.process_glyphnames(array_enc)
|
1001
|
+
|
1002
|
+
# pack all our Unicode codepoints into a UTF-8 string
|
1003
|
+
ret = array_enc.pack("U*")
|
1004
|
+
|
1005
|
+
# set the strings encoding correctly under ruby 1.9+
|
1006
|
+
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
1007
|
+
|
1008
|
+
return ret
|
1009
|
+
end
|
1010
|
+
end
|
1011
|
+
end
|
1012
|
+
end
|