pdf-reader 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,48 @@ class PDF::Reader
32
32
 
33
33
  attr_reader :differences
34
34
 
35
+ def initialize(enc)
36
+ if enc.kind_of?(Hash)
37
+ self.differences=enc[:Differences] if enc[:Differences]
38
+ enc = enc[:Encoding] || enc[:BaseEncoding]
39
+ elsif enc != nil
40
+ enc = enc.to_sym
41
+ end
42
+
43
+ case enc
44
+ when nil then
45
+ load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
46
+ @unpack = "C*"
47
+ when "Identity-H".to_sym then
48
+ @unpack = "n*"
49
+ @to_unicode_required = true
50
+ when :MacRomanEncoding then
51
+ load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
52
+ @unpack = "C*"
53
+ when :MacExpertEncoding then
54
+ load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
55
+ @unpack = "C*"
56
+ when :PDFDocEncoding then
57
+ load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
58
+ @unpack = "C*"
59
+ when :StandardEncoding then
60
+ load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
61
+ @unpack = "C*"
62
+ when :SymbolEncoding then
63
+ load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
64
+ @unpack = "C*"
65
+ when :UTF16Encoding then
66
+ @unpack = "n*"
67
+ when :WinAnsiEncoding then
68
+ load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
69
+ @unpack = "C*"
70
+ when :ZapfDingbatsEncoding then
71
+ load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
72
+ @unpack = "C*"
73
+ else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
74
+ end
75
+ end
76
+
35
77
  # set the differences table for this encoding. should be an array in the following format:
36
78
  #
37
79
  # [25, :A, 26, :B]
@@ -57,1117 +99,69 @@ class PDF::Reader
57
99
  @differences
58
100
  end
59
101
 
60
- # Takes the "Encoding" value of a Font dictionary and builds a PDF::Reader::Encoding object
61
- def self.factory(enc)
62
- if enc.kind_of?(Hash)
63
- diff = enc[:Differences]
64
- enc = enc[:Encoding] || enc[:BaseEncoding]
65
- elsif enc != nil
66
- enc = enc.to_sym
67
- end
102
+ # convert the specified string to utf8
103
+ def to_utf8(str, tounicode = nil)
68
104
 
69
- case enc
70
- when nil then enc = PDF::Reader::Encoding::StandardEncoding.new
71
- when "Identity-H".to_sym then enc = PDF::Reader::Encoding::IdentityH.new
72
- when :MacRomanEncoding then enc = PDF::Reader::Encoding::MacRomanEncoding.new
73
- when :MacExpertEncoding then enc = PDF::Reader::Encoding::MacExpertEncoding.new
74
- when :StandardEncoding then enc = PDF::Reader::Encoding::StandardEncoding.new
75
- when :SymbolEncoding then enc = PDF::Reader::Encoding::SymbolEncoding.new
76
- when :WinAnsiEncoding then enc = PDF::Reader::Encoding::WinAnsiEncoding.new
77
- when :ZapfDingbatsEncoding then enc = PDF::Reader::Encoding::ZapfDingbatsEncoding.new
78
- else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
105
+ # unpack the single bytes
106
+ array_orig = str.unpack(@unpack)
107
+
108
+ # replace any relevant bytes with a glyph name
109
+ array_orig = process_differences(array_orig)
110
+
111
+ # replace any remaining bytes with a unicode codepoint
112
+ array_enc = []
113
+ array_orig.each do |num|
114
+ if tounicode && (code = tounicode.decode(num))
115
+ array_enc << code
116
+ elsif tounicode || (tounicode.nil? && @to_unicode_required)
117
+ array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
118
+ elsif @mapping && @mapping[num]
119
+ array_enc << @mapping[num]
120
+ else
121
+ array_enc << num
122
+ end
79
123
  end
80
124
 
81
- enc.differences = diff if enc && diff
125
+ # convert any glyph names to unicode codepoints
126
+ array_enc = process_glyphnames(array_enc)
82
127
 
83
- return enc
84
- end
128
+ # replace charcters that didn't convert to unicode nicely with something valid
129
+ array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
85
130
 
86
- def to_utf8(str, tounicode = nil)
87
- # abstract method, of sorts
88
- raise RuntimeError, "Called abstract method"
131
+ # pack all our Unicode codepoints into a UTF-8 string
132
+ ret = array_enc.pack("U*")
133
+
134
+ # set the strings encoding correctly under ruby 1.9+
135
+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
136
+
137
+ return ret
89
138
  end
90
139
 
140
+ private
141
+
91
142
  # accepts an array of byte numbers, and replaces any that have entries in the differences table
92
143
  # with a glyph name
93
144
  def process_differences(arr)
94
145
  @differences ||= {}
95
146
  arr.collect! { |n| @differences[n].nil? ? n : @differences[n]}
96
147
  end
97
- protected :process_differences
98
148
 
99
149
  # accepts an array of unicode code points and glyphnames, and converts any glyph names to codepoints
100
150
  def process_glyphnames(arr)
101
151
  @differences ||= {}
102
152
  arr.collect! { |n| n.kind_of?(Numeric) ? n : PDF::Reader::Font.glyphnames[n]}
103
153
  end
104
- protected :process_glyphnames
105
-
106
- class IdentityH < Encoding
107
- def to_utf8(str, tounicode = nil)
108
-
109
- array_enc = []
110
-
111
- # iterate over string, reading it in 2 byte chunks and interpreting those
112
- # chunks as ints
113
- str.unpack("n*").each do |num|
114
-
115
- # convert the int to a unicode codepoint if possible.
116
- # without a ToUnicode CMap, it's impossible to reliably convert this text
117
- # to unicode, so just replace each character with a little box. Big smacks
118
- # the the PDF producing app.
119
- if tounicode && (code = tounicode.decode(num))
120
- array_enc << code
121
- else
122
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
123
- end
124
- end
125
-
126
- # replace charcters that didn't convert to unicode nicely with something valid
127
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
128
-
129
- # pack all our Unicode codepoints into a UTF-8 string
130
- ret = array_enc.pack("U*")
131
-
132
- # set the strings encoding correctly under ruby 1.9+
133
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
134
-
135
- return ret
136
- end
137
- end
138
-
139
- class MacExpertEncoding < Encoding
140
- # convert a MacExpertEncoding string into UTF-8
141
- def to_utf8(str, tounicode = nil)
142
- array_expert = str.unpack('C*')
143
- array_expert = self.process_differences(array_expert)
144
- array_enc = []
145
- array_expert.each do |num|
146
- if tounicode && (code = tounicode.decode(num))
147
- array_enc << code
148
- elsif tounicode
149
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
150
- else
151
- case num
152
- # change necesary characters to equivilant Unicode codepoints
153
- when 0x21; array_enc << 0xF721
154
- when 0x22; array_enc << 0xF6F8 # Hungarumlautsmall
155
- when 0x23; array_enc << 0xF7A2
156
- when 0x24; array_enc << 0xF724
157
- when 0x25; array_enc << 0xF6E4
158
- when 0x26; array_enc << 0xF726
159
- when 0x27; array_enc << 0xF7B4
160
- when 0x28; array_enc << 0x207D
161
- when 0x29; array_enc << 0xF07E
162
- when 0x2A; array_enc << 0x2025
163
- when 0x2B; array_enc << 0x2024
164
- when 0x2F; array_enc << 0x2044
165
- when 0x30; array_enc << 0xF730
166
- when 0x31; array_enc << 0xF731
167
- when 0x32; array_enc << 0xF732
168
- when 0x33; array_enc << 0xF733
169
- when 0x34; array_enc << 0xF734
170
- when 0x35; array_enc << 0xF735
171
- when 0x36; array_enc << 0xF736
172
- when 0x37; array_enc << 0xF737
173
- when 0x38; array_enc << 0xF738
174
- when 0x39; array_enc << 0xF739
175
- when 0x3D; array_enc << 0xF6DE
176
- when 0x3F; array_enc << 0xF73F
177
- when 0x44; array_enc << 0xF7F0
178
- when 0x47; array_enc << 0x00BC
179
- when 0x48; array_enc << 0x00BD
180
- when 0x49; array_enc << 0x00BE
181
- when 0x4A; array_enc << 0x215B
182
- when 0x4B; array_enc << 0x215C
183
- when 0x4C; array_enc << 0x215D
184
- when 0x4D; array_enc << 0x215E
185
- when 0x4E; array_enc << 0x2153
186
- when 0x4F; array_enc << 0x2154
187
- when 0x56; array_enc << 0xFB00
188
- when 0x57; array_enc << 0xFB01
189
- when 0x58; array_enc << 0xFB02
190
- when 0x59; array_enc << 0xFB03
191
- when 0x5A; array_enc << 0xFB04
192
- when 0x5B; array_enc << 0x208D
193
- when 0x5D; array_enc << 0x208E
194
- when 0x5E; array_enc << 0xF6F6
195
- when 0x5F; array_enc << 0xF6E5
196
- when 0x60; array_enc << 0xF760
197
- when 0x61; array_enc << 0xF761
198
- when 0x62; array_enc << 0xF762
199
- when 0x63; array_enc << 0xF763
200
- when 0x64; array_enc << 0xF764
201
- when 0x65; array_enc << 0xF765
202
- when 0x66; array_enc << 0xF766
203
- when 0x67; array_enc << 0xF767
204
- when 0x68; array_enc << 0xF768
205
- when 0x69; array_enc << 0xF769
206
- when 0x6A; array_enc << 0xF76A
207
- when 0x6B; array_enc << 0xF76B
208
- when 0x6C; array_enc << 0xF76C
209
- when 0x6D; array_enc << 0xF76D
210
- when 0x6E; array_enc << 0xF76E
211
- when 0x6F; array_enc << 0xF76F
212
- when 0x70; array_enc << 0xF770
213
- when 0x71; array_enc << 0xF771
214
- when 0x72; array_enc << 0xF772
215
- when 0x73; array_enc << 0xF773
216
- when 0x74; array_enc << 0xF774
217
- when 0x75; array_enc << 0xF775
218
- when 0x76; array_enc << 0xF776
219
- when 0x77; array_enc << 0xF777
220
- when 0x78; array_enc << 0xF778
221
- when 0x79; array_enc << 0xF779
222
- when 0x7A; array_enc << 0xF77A
223
- when 0x7B; array_enc << 0x20A1
224
- when 0x7C; array_enc << 0xF6DC
225
- when 0x7D; array_enc << 0xF6DD
226
- when 0x7E; array_enc << 0xF6FE
227
- when 0x81; array_enc << 0xF6E9
228
- when 0x82; array_enc << 0xF6E0
229
- when 0x87; array_enc << 0xF7E1 # Acircumflexsmall
230
- when 0x88; array_enc << 0xF7E0
231
- when 0x89; array_enc << 0xF7E2 # Acutesmall
232
- when 0x8A; array_enc << 0xF7E4
233
- when 0x8B; array_enc << 0xF7E3
234
- when 0x8C; array_enc << 0xF7E5
235
- when 0x8D; array_enc << 0xF7E7
236
- when 0x8E; array_enc << 0xF7E9
237
- when 0x8F; array_enc << 0xF7E8
238
- when 0x90; array_enc << 0xF7E4
239
- when 0x91; array_enc << 0xF7EB
240
- when 0x92; array_enc << 0xF7ED
241
- when 0x93; array_enc << 0xF7EC
242
- when 0x94; array_enc << 0xF7EE
243
- when 0x95; array_enc << 0xF7EF
244
- when 0x96; array_enc << 0xF7F1
245
- when 0x97; array_enc << 0xF7F3
246
- when 0x98; array_enc << 0xF7F2
247
- when 0x99; array_enc << 0xF7F4
248
- when 0x9A; array_enc << 0xF7F6
249
- when 0x9B; array_enc << 0xF7F5
250
- when 0x9C; array_enc << 0xF7FA
251
- when 0x9D; array_enc << 0xF7F9
252
- when 0x9E; array_enc << 0xF7FB
253
- when 0x9F; array_enc << 0xF7FC
254
- when 0xA1; array_enc << 0x2078
255
- when 0xA2; array_enc << 0x2084
256
- when 0xA3; array_enc << 0x2083
257
- when 0xA4; array_enc << 0x2086
258
- when 0xA5; array_enc << 0x2088
259
- when 0xA6; array_enc << 0x2087
260
- when 0xA7; array_enc << 0xF6FD
261
- when 0xA9; array_enc << 0xF6DF
262
- when 0xAA; array_enc << 0x2082
263
- when 0xAC; array_enc << 0xF7A8
264
- when 0xAE; array_enc << 0xF6F5
265
- when 0xAF; array_enc << 0xF6F0
266
- when 0xB0; array_enc << 0x2085
267
- when 0xB2; array_enc << 0xF6E1
268
- when 0xB3; array_enc << 0xF6E7
269
- when 0xB4; array_enc << 0xF7FD
270
- when 0xB6; array_enc << 0xF6E3
271
- when 0xB9; array_enc << 0xF7FE
272
- when 0xBB; array_enc << 0x2089
273
- when 0xBC; array_enc << 0x2080
274
- when 0xBD; array_enc << 0xF6FF
275
- when 0xBE; array_enc << 0xF7E6 # AEsmall
276
- when 0xBF; array_enc << 0xF7F8
277
- when 0xC0; array_enc << 0xF7BF
278
- when 0xC1; array_enc << 0x2081
279
- when 0xC2; array_enc << 0xF6F9
280
- when 0xC9; array_enc << 0xF7B8
281
- when 0xCF; array_enc << 0xF6FA
282
- when 0xD0; array_enc << 0x2012
283
- when 0xD1; array_enc << 0xF6E6
284
- when 0xD6; array_enc << 0xF7A1
285
- when 0xD8; array_enc << 0xF7FF
286
- when 0xDA; array_enc << 0x00B9
287
- when 0xDB; array_enc << 0x00B2
288
- when 0xDC; array_enc << 0x00B3
289
- when 0xDD; array_enc << 0x2074
290
- when 0xDE; array_enc << 0x2075
291
- when 0xDF; array_enc << 0x2076
292
- when 0xE0; array_enc << 0x2077
293
- when 0xE1; array_enc << 0x2079
294
- when 0xE2; array_enc << 0x2070
295
- when 0xE4; array_enc << 0xF6EC
296
- when 0xE5; array_enc << 0xF6F1
297
- when 0xE6; array_enc << 0xF6F3
298
- when 0xE9; array_enc << 0xF6ED
299
- when 0xEA; array_enc << 0xF6F2
300
- when 0xEB; array_enc << 0xF6EB
301
- when 0xF1; array_enc << 0xF6EE
302
- when 0xF2; array_enc << 0xF6FB
303
- when 0xF3; array_enc << 0xF6F4
304
- when 0xF4; array_enc << 0xF7AF
305
- when 0xF5; array_enc << 0xF6EF
306
- when 0xF6; array_enc << 0x207F
307
- when 0xF7; array_enc << 0xF6EF
308
- when 0xF8; array_enc << 0xF6E2
309
- when 0xF9; array_enc << 0xF6E8
310
- when 0xFA; array_enc << 0xF6F7
311
- when 0xFB; array_enc << 0xF6FC
312
- else
313
- array_enc << num
314
- end
315
- end
316
- end
317
-
318
- # convert any glyph names to unicode codepoints
319
- array_enc = self.process_glyphnames(array_enc)
320
-
321
- # replace charcters that didn't convert to unicode nicely with something valid
322
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
323
-
324
- # pack all our Unicode codepoints into a UTF-8 string
325
- ret = array_enc.pack("U*")
326
-
327
- # set the strings encoding correctly under ruby 1.9+
328
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
329
-
330
- return ret
331
- end
332
- end
333
-
334
- # The default encoding for OSX <= v9
335
- # see: http://en.wikipedia.org/wiki/Mac_OS_Roman
336
- class MacRomanEncoding < Encoding
337
- # convert a MacRomanEncoding string into UTF-8
338
- def to_utf8(str, tounicode = nil)
339
- # content of this method borrowed from REXML::Encoding.decode_cp1252
340
- array_mac = str.unpack('C*')
341
- array_mac = self.process_differences(array_mac)
342
- array_enc = []
343
- array_mac.each do |num|
344
- if tounicode && (code = tounicode.decode(num))
345
- array_enc << code
346
- elsif tounicode
347
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
348
- else
349
- case num
350
- # change necesary characters to equivilant Unicode codepoints
351
- when 0x80; array_enc << 0x00C4
352
- when 0x81; array_enc << 0x00C5
353
- when 0x82; array_enc << 0x00C7
354
- when 0x83; array_enc << 0x00C9
355
- when 0x84; array_enc << 0x00D1
356
- when 0x85; array_enc << 0x00D6
357
- when 0x86; array_enc << 0x00DC
358
- when 0x87; array_enc << 0x00E1
359
- when 0x88; array_enc << 0x00E0
360
- when 0x89; array_enc << 0x00E2
361
- when 0x8A; array_enc << 0x00E4
362
- when 0x8B; array_enc << 0x00E3
363
- when 0x8C; array_enc << 0x00E5
364
- when 0x8D; array_enc << 0x00E7
365
- when 0x8E; array_enc << 0x00E9
366
- when 0x8F; array_enc << 0x00E8
367
- when 0x90; array_enc << 0x00EA
368
- when 0x91; array_enc << 0x00EB
369
- when 0x92; array_enc << 0x00ED
370
- when 0x93; array_enc << 0x00EC
371
- when 0x94; array_enc << 0x00EE
372
- when 0x95; array_enc << 0x00EF
373
- when 0x96; array_enc << 0x00F1
374
- when 0x97; array_enc << 0x00F3
375
- when 0x98; array_enc << 0x00F2
376
- when 0x99; array_enc << 0x00F4
377
- when 0x9A; array_enc << 0x00F6
378
- when 0x9B; array_enc << 0x00F5
379
- when 0x9C; array_enc << 0x00FA
380
- when 0x9D; array_enc << 0x00F9
381
- when 0x9E; array_enc << 0x00FB
382
- when 0x9F; array_enc << 0x00FC
383
- when 0xA0; array_enc << 0x2020
384
- when 0xA1; array_enc << 0x00B0
385
- when 0xA2; array_enc << 0x00A2
386
- when 0xA3; array_enc << 0x00A3
387
- when 0xA4; array_enc << 0x00A7
388
- when 0xA5; array_enc << 0x2022
389
- when 0xA6; array_enc << 0x00B6
390
- when 0xA7; array_enc << 0x00DF
391
- when 0xA8; array_enc << 0x00AE
392
- when 0xA9; array_enc << 0x00A9
393
- when 0xAA; array_enc << 0x2122
394
- when 0xAB; array_enc << 0x00B4
395
- when 0xAC; array_enc << 0x00A8
396
- when 0xAD; array_enc << 0x2260
397
- when 0xAE; array_enc << 0x00C6
398
- when 0xAF; array_enc << 0x00D8
399
- when 0xB0; array_enc << 0x221E
400
- when 0xB1; array_enc << 0x00B1
401
- when 0xB2; array_enc << 0x2264
402
- when 0xB3; array_enc << 0x2265
403
- when 0xB4; array_enc << 0x00A5
404
- when 0xB5; array_enc << 0x00B5
405
- when 0xB6; array_enc << 0x2202
406
- when 0xB7; array_enc << 0x2211
407
- when 0xB8; array_enc << 0x220F
408
- when 0xB9; array_enc << 0x03C0
409
- when 0xBA; array_enc << 0x222B
410
- when 0xBB; array_enc << 0x00AA
411
- when 0xBC; array_enc << 0x00BA
412
- when 0xBD; array_enc << 0x03A9
413
- when 0xBE; array_enc << 0x00E6
414
- when 0xBF; array_enc << 0x00F8
415
- when 0xC0; array_enc << 0x00BF
416
- when 0xC1; array_enc << 0x00A1
417
- when 0xC2; array_enc << 0x00AC
418
- when 0xC3; array_enc << 0x221A
419
- when 0xC4; array_enc << 0x0192
420
- when 0xC5; array_enc << 0x2248
421
- when 0xC6; array_enc << 0x2206
422
- when 0xC7; array_enc << 0x00AB
423
- when 0xC8; array_enc << 0x00BB
424
- when 0xC9; array_enc << 0x2026
425
- when 0xCA; array_enc << 0x00A0
426
- when 0xCB; array_enc << 0x00C0
427
- when 0xCC; array_enc << 0x00C3
428
- when 0xCD; array_enc << 0x00D5
429
- when 0xCE; array_enc << 0x0152
430
- when 0xCF; array_enc << 0x0153
431
- when 0xD0; array_enc << 0x2013
432
- when 0xD1; array_enc << 0x2014
433
- when 0xD2; array_enc << 0x201C
434
- when 0xD3; array_enc << 0x201D
435
- when 0xD4; array_enc << 0x2018
436
- when 0xD5; array_enc << 0x2019
437
- when 0xD6; array_enc << 0x00F7
438
- when 0xD7; array_enc << 0x25CA
439
- when 0xD8; array_enc << 0x00FF
440
- when 0xD9; array_enc << 0x0178
441
- when 0xDA; array_enc << 0x2044
442
- when 0xDB; array_enc << 0x20AC
443
- when 0xDC; array_enc << 0x2039
444
- when 0xDD; array_enc << 0x203A
445
- when 0xDE; array_enc << 0xFB01
446
- when 0xDF; array_enc << 0xFB02
447
- when 0xE0; array_enc << 0x2021
448
- when 0xE1; array_enc << 0x00B7
449
- when 0xE2; array_enc << 0x201A
450
- when 0xE3; array_enc << 0x201E
451
- when 0xE4; array_enc << 0x2030
452
- when 0xE5; array_enc << 0x00C2
453
- when 0xE6; array_enc << 0x00CA
454
- when 0xE7; array_enc << 0x00C1
455
- when 0xE8; array_enc << 0x00CB
456
- when 0xE9; array_enc << 0x00C8
457
- when 0xEA; array_enc << 0x00CD
458
- when 0xEB; array_enc << 0x00CE
459
- when 0xEC; array_enc << 0x00CF
460
- when 0xED; array_enc << 0x00CC
461
- when 0xEE; array_enc << 0x00D3
462
- when 0xEF; array_enc << 0x00D4
463
- when 0xF0; array_enc << 0xF8FF
464
- when 0xF1; array_enc << 0x00D2
465
- when 0xF2; array_enc << 0x00DA
466
- when 0xF3; array_enc << 0x00D8
467
- when 0xF4; array_enc << 0x00D9
468
- when 0xF5; array_enc << 0x0131
469
- when 0xF6; array_enc << 0x02C6
470
- when 0xF7; array_enc << 0x02DC
471
- when 0xF8; array_enc << 0x00AF
472
- when 0xF9; array_enc << 0x02D8
473
- when 0xFA; array_enc << 0x02D9
474
- when 0xFB; array_enc << 0x02DA
475
- when 0xFC; array_enc << 0x00B8
476
- when 0xFD; array_enc << 0x02DD
477
- when 0xFE; array_enc << 0x02DB
478
- when 0xFF; array_enc << 0x02C7
479
- else
480
- array_enc << num
481
- end
482
- end
483
- end
484
-
485
- # convert any glyph names to unicode codepoints
486
- array_enc = self.process_glyphnames(array_enc)
487
-
488
- # replace charcters that didn't convert to unicode nicely with something valid
489
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
490
-
491
- # pack all our Unicode codepoints into a UTF-8 string
492
- ret = array_enc.pack("U*")
493
-
494
- # set the strings encoding correctly under ruby 1.9+
495
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
496
-
497
- return ret
498
- end
499
- end
500
-
501
- class PDFDocEncoding < Encoding
502
- # convert a PDFDocEncoding string into UTF-8
503
- def to_utf8(str, tounicode = nil)
504
- array_pdf = str.unpack('C*')
505
- array_pdf = self.process_differences(array_pdf)
506
- array_enc = []
507
- array_pdf.each do |num|
508
- if tounicode && (code = tounicode.decode(num))
509
- array_enc << code
510
- elsif tounicode
511
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
512
- else
513
- case num
514
- # change necesary characters to equivilant Unicode codepoints
515
- when 0x18; array_enc << 0x02D8
516
- when 0x19; array_enc << 0x02C7
517
- when 0x1A; array_enc << 0x02C6
518
- when 0x1B; array_enc << 0x02D9
519
- when 0x1C; array_enc << 0x02DD
520
- when 0x1D; array_enc << 0x02DB
521
- when 0x1E; array_enc << 0x02DA
522
- when 0x1F; array_enc << 0x02DC
523
- when 0x7F; array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR # Undefined
524
- when 0x80; array_enc << 0x2022
525
- when 0x81; array_enc << 0x2020
526
- when 0x82; array_enc << 0x2021
527
- when 0x83; array_enc << 0x2026
528
- when 0x84; array_enc << 0x2014
529
- when 0x85; array_enc << 0x2013
530
- when 0x86; array_enc << 0x0192
531
- when 0x87; array_enc << 0x2044
532
- when 0x88; array_enc << 0x2039
533
- when 0x89; array_enc << 0x203A
534
- when 0x8A; array_enc << 0x2212
535
- when 0x8B; array_enc << 0x2030
536
- when 0x8C; array_enc << 0x201E
537
- when 0x8D; array_enc << 0x201C
538
- when 0x8E; array_enc << 0x201D
539
- when 0x8F; array_enc << 0x2018
540
- when 0x90; array_enc << 0x2019
541
- when 0x91; array_enc << 0x201A
542
- when 0x92; array_enc << 0x2122
543
- when 0x93; array_enc << 0xFB01
544
- when 0x94; array_enc << 0xFB02
545
- when 0x95; array_enc << 0x0141
546
- when 0x96; array_enc << 0x0152
547
- when 0x97; array_enc << 0x0160
548
- when 0x98; array_enc << 0x0178
549
- when 0x99; array_enc << 0x017D
550
- when 0x9A; array_enc << 0x0131
551
- when 0x9B; array_enc << 0x0142
552
- when 0x9C; array_enc << 0x0153
553
- when 0x9D; array_enc << 0x0161
554
- when 0x9E; array_enc << 0x017E
555
- when 0x9F; array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR # Undefined
556
- when 0xA0; array_enc << 0x20AC
557
- else
558
- array_enc << num
559
- end
560
- end
561
- end
562
-
563
- # convert any glyph names to unicode codepoints
564
- array_enc = self.process_glyphnames(array_enc)
565
-
566
- # replace charcters that didn't convert to unicode nicely with something valid
567
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
568
-
569
- # pack all our Unicode codepoints into a UTF-8 string
570
- ret = array_enc.pack("U*")
571
-
572
- # set the strings encoding correctly under ruby 1.9+
573
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
574
-
575
- return ret
576
- end
577
- end
578
154
 
579
- class StandardEncoding < Encoding
580
- # convert an Adobe Standard Encoding string into UTF-8
581
- def to_utf8(str, tounicode = nil)
582
- # based on mapping described at:
583
- # http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txt
584
- array_std = str.unpack('C*')
585
- array_std = self.process_differences(array_std)
586
- array_enc = []
587
- array_std.each do |num|
588
- if tounicode && (code = tounicode.decode(num))
589
- array_enc << code
590
- elsif tounicode
591
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
592
- else
593
- case num
594
- when 0x27; array_enc << 0x2019
595
- when 0x60; array_enc << 0x2018
596
- when 0xA4; array_enc << 0x2044
597
- when 0xA6; array_enc << 0x0192
598
- when 0xA8; array_enc << 0x00A4
599
- when 0xA9; array_enc << 0x0027
600
- when 0xAA; array_enc << 0x201C
601
- when 0xAC; array_enc << 0x2039
602
- when 0xAD; array_enc << 0x203A
603
- when 0xAE; array_enc << 0xFB01
604
- when 0xAF; array_enc << 0xFB02
605
- when 0xB1; array_enc << 0x2013
606
- when 0xB2; array_enc << 0x2020
607
- when 0xB3; array_enc << 0x2021
608
- when 0xB4; array_enc << 0x00B7
609
- when 0xB7; array_enc << 0x2022
610
- when 0xB8; array_enc << 0x201A
611
- when 0xB9; array_enc << 0x201E
612
- when 0xBA; array_enc << 0x201D
613
- when 0xBC; array_enc << 0x2026
614
- when 0xBD; array_enc << 0x2030
615
- when 0xC1; array_enc << 0x0060
616
- when 0xC2; array_enc << 0x00B4
617
- when 0xC3; array_enc << 0x02C6
618
- when 0xC4; array_enc << 0x02DC
619
- when 0xC5; array_enc << 0x00AF
620
- when 0xC6; array_enc << 0x02D8
621
- when 0xC7; array_enc << 0x02D9
622
- when 0xC8; array_enc << 0x00A8
623
- when 0xCA; array_enc << 0x02DA
624
- when 0xCB; array_enc << 0x00B8
625
- when 0xCD; array_enc << 0x02DD
626
- when 0xCE; array_enc << 0x02DB
627
- when 0xCF; array_enc << 0x02C7
628
- when 0xD0; array_enc << 0x2014
629
- when 0xE1; array_enc << 0x00C6
630
- when 0xE3; array_enc << 0x00AA
631
- when 0xE8; array_enc << 0x0141
632
- when 0xE9; array_enc << 0x00D8
633
- when 0xEA; array_enc << 0x0152
634
- when 0xEB; array_enc << 0x00BA
635
- when 0xF1; array_enc << 0x00E6
636
- when 0xF5; array_enc << 0x0131
637
- when 0xF8; array_enc << 0x0142
638
- when 0xF9; array_enc << 0x00F8
639
- when 0xFA; array_enc << 0x0153
640
- when 0xFB; array_enc << 0x00DF
641
- else
642
- array_enc << num
643
- end
644
- end
155
+ def load_mapping(file)
156
+ @mapping = {}
157
+ RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
158
+ File.open(file, mode) do |f|
159
+ f.each do |l|
160
+ m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
161
+ @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
645
162
  end
646
-
647
- # convert any glyph names to unicode codepoints
648
- array_enc = self.process_glyphnames(array_enc)
649
-
650
- # replace charcters that didn't convert to unicode nicely with something valid
651
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
652
-
653
- # pack all our Unicode codepoints into a UTF-8 string
654
- ret = array_enc.pack("U*")
655
-
656
- # set the strings encoding correctly under ruby 1.9+
657
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
658
-
659
- return ret
660
163
  end
661
164
  end
662
165
 
663
- class SymbolEncoding < Encoding
664
- # convert a SymbolEncoding string into UTF-8
665
- def to_utf8(str, tounicode = nil)
666
- array_symbol = str.unpack('C*')
667
- array_symbol = self.process_differences(array_symbol)
668
- array_enc = []
669
- array_symbol.each do |num|
670
- if tounicode && (code = tounicode.decode(num))
671
- array_enc << code
672
- elsif tounicode
673
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
674
- else
675
- case num
676
- when 0x22; array_enc << 0x2200
677
- when 0x24; array_enc << 0x2203
678
- when 0x27; array_enc << 0x220B
679
- when 0x2A; array_enc << 0x2217
680
- when 0x2D; array_enc << 0x2212
681
- when 0x40; array_enc << 0x2245
682
- when 0x41; array_enc << 0x0391
683
- when 0x42; array_enc << 0x0392
684
- when 0x43; array_enc << 0x03A7
685
- when 0x44; array_enc << 0x0394
686
- when 0x45; array_enc << 0x0395
687
- when 0x46; array_enc << 0x03A6
688
- when 0x47; array_enc << 0x0393
689
- when 0x48; array_enc << 0x0397
690
- when 0x49; array_enc << 0x0399
691
- when 0x4A; array_enc << 0x03D1
692
- when 0x4B; array_enc << 0x039A
693
- when 0x4C; array_enc << 0x039B
694
- when 0x4D; array_enc << 0x039C
695
- when 0x4E; array_enc << 0x039D
696
- when 0x4F; array_enc << 0x039F
697
- when 0x50; array_enc << 0x03A0
698
- when 0x51; array_enc << 0x0398
699
- when 0x52; array_enc << 0x03A1
700
- when 0x53; array_enc << 0x03A3
701
- when 0x54; array_enc << 0x03A4
702
- when 0x55; array_enc << 0x03A5
703
- when 0x56; array_enc << 0x03C2
704
- when 0x57; array_enc << 0x03A9
705
- when 0x58; array_enc << 0x039E
706
- when 0x59; array_enc << 0x03A8
707
- when 0x5A; array_enc << 0x0396
708
- when 0x5C; array_enc << 0x2234
709
- when 0x5E; array_enc << 0x22A5
710
- when 0x60; array_enc << 0xF8E5
711
- when 0x61; array_enc << 0x03B1
712
- when 0x62; array_enc << 0x03B2
713
- when 0x63; array_enc << 0x03C7
714
- when 0x64; array_enc << 0x03B4
715
- when 0x65; array_enc << 0x03B5
716
- when 0x66; array_enc << 0x03C6
717
- when 0x67; array_enc << 0x03B3
718
- when 0x68; array_enc << 0x03B7
719
- when 0x69; array_enc << 0x03B9
720
- when 0x6A; array_enc << 0x03D5
721
- when 0x6B; array_enc << 0x03BA
722
- when 0x6C; array_enc << 0x03BB
723
- when 0x6D; array_enc << 0x03BC
724
- when 0x6E; array_enc << 0x03BD
725
- when 0x6F; array_enc << 0x03BF
726
- when 0x70; array_enc << 0x03C0
727
- when 0x71; array_enc << 0x03B8
728
- when 0x72; array_enc << 0x03C1
729
- when 0x73; array_enc << 0x03C3
730
- when 0x74; array_enc << 0x03C4
731
- when 0x75; array_enc << 0x03C5
732
- when 0x76; array_enc << 0x03D6
733
- when 0x77; array_enc << 0x03C9
734
- when 0x78; array_enc << 0x03BE
735
- when 0x79; array_enc << 0x03C8
736
- when 0x7A; array_enc << 0x03B6
737
- when 0x7E; array_enc << 0x223C
738
- when 0xA0; array_enc << 0x20AC
739
- when 0xA1; array_enc << 0x03D2
740
- when 0xA2; array_enc << 0x2032
741
- when 0xA3; array_enc << 0x2264
742
- when 0xA4; array_enc << 0x2215
743
- when 0xA5; array_enc << 0x221E
744
- when 0xA6; array_enc << 0x0192
745
- when 0xA7; array_enc << 0x2663
746
- when 0xA8; array_enc << 0x2666
747
- when 0xA9; array_enc << 0x2665
748
- when 0xAA; array_enc << 0x2660
749
- when 0xAB; array_enc << 0x2194
750
- when 0xAC; array_enc << 0x2190
751
- when 0xAD; array_enc << 0x2191
752
- when 0xAE; array_enc << 0x2192
753
- when 0xAF; array_enc << 0x2193
754
- when 0xB2; array_enc << 0x2033
755
- when 0xB3; array_enc << 0x2265
756
- when 0xB4; array_enc << 0x00D7
757
- when 0xB5; array_enc << 0x221D
758
- when 0xB6; array_enc << 0x2202
759
- when 0xB7; array_enc << 0x2022
760
- when 0xB8; array_enc << 0x00F7
761
- when 0xB9; array_enc << 0x2260
762
- when 0xBA; array_enc << 0x2261
763
- when 0xBB; array_enc << 0x2248
764
- when 0xBC; array_enc << 0x2026
765
- when 0xBD; array_enc << 0xF8E6
766
- when 0xBE; array_enc << 0xF8E7
767
- when 0xBF; array_enc << 0x21B5
768
- when 0xC0; array_enc << 0x2135
769
- when 0xC1; array_enc << 0x2111
770
- when 0xC2; array_enc << 0x211C
771
- when 0xC3; array_enc << 0x2118
772
- when 0xC4; array_enc << 0x2297
773
- when 0xC5; array_enc << 0x2295
774
- when 0xC6; array_enc << 0x2205
775
- when 0xC7; array_enc << 0x2229
776
- when 0xC8; array_enc << 0x222A
777
- when 0xC9; array_enc << 0x2283
778
- when 0xCA; array_enc << 0x2287
779
- when 0xCB; array_enc << 0x2284
780
- when 0xCC; array_enc << 0x2282
781
- when 0xCD; array_enc << 0x2286
782
- when 0xCE; array_enc << 0x2208
783
- when 0xCF; array_enc << 0x2209
784
- when 0xD0; array_enc << 0x2220
785
- when 0xD1; array_enc << 0x2207
786
- when 0xD2; array_enc << 0xF6DA
787
- when 0xD3; array_enc << 0xF6D9
788
- when 0xD4; array_enc << 0xF6DB
789
- when 0xD5; array_enc << 0x220F
790
- when 0xD6; array_enc << 0x221A
791
- when 0xD7; array_enc << 0x22C5
792
- when 0xD8; array_enc << 0x00AC
793
- when 0xD9; array_enc << 0x2227
794
- when 0xDA; array_enc << 0x2228
795
- when 0xDB; array_enc << 0x21D4
796
- when 0xDC; array_enc << 0x21D0
797
- when 0xDD; array_enc << 0x21D1
798
- when 0xDE; array_enc << 0x21D2
799
- when 0xDF; array_enc << 0x21D3
800
- when 0xE0; array_enc << 0x25CA
801
- when 0xE1; array_enc << 0x2329
802
- when 0xE2; array_enc << 0xF8E8
803
- when 0xE3; array_enc << 0xF8E9
804
- when 0xE4; array_enc << 0xF8EA
805
- when 0xE5; array_enc << 0x2211
806
- when 0xE6; array_enc << 0xF8EB
807
- when 0xE7; array_enc << 0xF8EC
808
- when 0xE8; array_enc << 0xF8ED
809
- when 0xE9; array_enc << 0xF8EE
810
- when 0xEA; array_enc << 0xF8EF
811
- when 0xEB; array_enc << 0xF8F0
812
- when 0xEC; array_enc << 0xF8F1
813
- when 0xED; array_enc << 0xF8F2
814
- when 0xEE; array_enc << 0xF8F3
815
- when 0xEF; array_enc << 0xF8F4
816
- when 0xF1; array_enc << 0x232A
817
- when 0xF2; array_enc << 0x222B
818
- when 0xF3; array_enc << 0x2320
819
- when 0xF4; array_enc << 0xF8F5
820
- when 0xF5; array_enc << 0x2321
821
- when 0xF6; array_enc << 0xF8F6
822
- when 0xF7; array_enc << 0xF8F7
823
- when 0xF8; array_enc << 0xF8F8
824
- when 0xF9; array_enc << 0xF8F9
825
- when 0xFA; array_enc << 0xF8FA
826
- when 0xFB; array_enc << 0xF8FB
827
- when 0xFC; array_enc << 0xF8FC
828
- when 0xFD; array_enc << 0xF8FD
829
- when 0xFE; array_enc << 0xF8FE
830
- else
831
- array_enc << num
832
- end
833
- end
834
- end
835
-
836
- # replace charcters that didn't convert to unicode nicely with something valid
837
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
838
-
839
- # convert any glyph names to unicode codepoints
840
- array_enc = self.process_glyphnames(array_enc)
841
-
842
- # pack all our Unicode codepoints into a UTF-8 string
843
- ret = array_enc.pack("U*")
844
-
845
- # set the strings encoding correctly under ruby 1.9+
846
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
847
-
848
- return ret
849
- end
850
- end
851
-
852
- class UTF16Encoding < Encoding
853
- # convert a UTF-16 string into UTF-8
854
- def to_utf8(str, tounicode = nil)
855
-
856
- # remove the UTF-16 Byte Order Mark if it exists
857
- str = str[2, str.size-2] if str[0,2] == "\376\377"
858
-
859
- # convert away
860
- str = str.unpack("n*").pack("U*")
861
-
862
- # set the strings encoding correctly under ruby 1.9+
863
- str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
864
-
865
- return str
866
- end
867
- end
868
-
869
- class WinAnsiEncoding < Encoding
870
- # convert a WinAnsiEncoding string into UTF-8
871
- def to_utf8(str, tounicode = nil)
872
- # content of this method borrowed from REXML::Encoding.decode_cp1252
873
- # for further reading:
874
- # http://www.intertwingly.net/stories/2004/04/14/i18n.html
875
- array_latin9 = str.unpack('C*')
876
- array_latin9 = self.process_differences(array_latin9)
877
- array_enc = []
878
- array_latin9.each do |num|
879
- if tounicode && (code = tounicode.decode(num))
880
- array_enc << code
881
- elsif tounicode
882
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
883
- else
884
- case num
885
- # characters that added compared to iso-8859-1
886
- when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
887
- when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
888
- when 0x83; array_enc << 0x0192 # 0xc6 0x92
889
- when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
890
- when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
891
- when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
892
- when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
893
- when 0x88; array_enc << 0x02C6 # 0xcb 0x86
894
- when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
895
- when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
896
- when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
897
- when 0x8C; array_enc << 0x0152 # 0xc5 0x92
898
- when 0x8E; array_enc << 0x017D # 0xc5 0xbd
899
- when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
900
- when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
901
- when 0x93; array_enc << 0x201C
902
- when 0x94; array_enc << 0x201D
903
- when 0x95; array_enc << 0x2022
904
- when 0x96; array_enc << 0x2013
905
- when 0x97; array_enc << 0x2014
906
- when 0x98; array_enc << 0x02DC
907
- when 0x99; array_enc << 0x2122
908
- when 0x9A; array_enc << 0x0161
909
- when 0x9B; array_enc << 0x203A
910
- when 0x9C; array_enc << 0x0152 # 0xc5 0x93
911
- when 0x9E; array_enc << 0x017E # 0xc5 0xbe
912
- when 0x9F; array_enc << 0x0178
913
- else
914
- array_enc << num
915
- end
916
- end
917
- end
918
-
919
- # convert any glyph names to unicode codepoints
920
- array_enc = self.process_glyphnames(array_enc)
921
-
922
- # replace charcters that didn't convert to unicode nicely with something valid
923
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
924
-
925
- # pack all our Unicode codepoints into a UTF-8 string
926
- ret = array_enc.pack("U*")
927
-
928
- # set the strings encoding correctly under ruby 1.9+
929
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
930
-
931
- return ret
932
- end
933
- end
934
-
935
- class ZapfDingbatsEncoding < Encoding
936
- # convert a ZapfDingbatsEncoding string into UTF-8
937
- def to_utf8(str, tounicode = nil)
938
- # mapping to unicode taken from:
939
- # http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
940
- array_symbol = str.unpack('C*')
941
- array_symbol = self.process_differences(array_symbol)
942
- array_enc = []
943
- array_symbol.each do |num|
944
- if tounicode && (code = tounicode.decode(num))
945
- array_enc << code
946
- elsif tounicode
947
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
948
- else
949
- case num
950
- when 0x21; array_enc << 0x2701
951
- when 0x22; array_enc << 0x2702
952
- when 0x23; array_enc << 0x2703
953
- when 0x24; array_enc << 0x2704
954
- when 0x25; array_enc << 0x260E
955
- when 0x26; array_enc << 0x2706
956
- when 0x27; array_enc << 0x2707
957
- when 0x28; array_enc << 0x2708
958
- when 0x29; array_enc << 0x2709
959
- when 0x2A; array_enc << 0x261B
960
- when 0x2B; array_enc << 0x261E
961
- when 0x2C; array_enc << 0x270C
962
- when 0x2D; array_enc << 0x270D
963
- when 0x2E; array_enc << 0x270E
964
- when 0x2F; array_enc << 0x270F
965
- when 0x30; array_enc << 0x2710
966
- when 0x31; array_enc << 0x2711
967
- when 0x32; array_enc << 0x2712
968
- when 0x33; array_enc << 0x2713
969
- when 0x34; array_enc << 0x2714
970
- when 0x35; array_enc << 0x2715
971
- when 0x36; array_enc << 0x2716
972
- when 0x37; array_enc << 0x2717
973
- when 0x38; array_enc << 0x2718
974
- when 0x39; array_enc << 0x2719
975
- when 0x3A; array_enc << 0x271A
976
- when 0x3B; array_enc << 0x271B
977
- when 0x3C; array_enc << 0x271C
978
- when 0x3D; array_enc << 0x271D
979
- when 0x3E; array_enc << 0x271E
980
- when 0x3F; array_enc << 0x271E
981
- when 0x40; array_enc << 0x2720
982
- when 0x41; array_enc << 0x2721
983
- when 0x42; array_enc << 0x2722
984
- when 0x43; array_enc << 0x2723
985
- when 0x44; array_enc << 0x2724
986
- when 0x45; array_enc << 0x2725
987
- when 0x46; array_enc << 0x2726
988
- when 0x47; array_enc << 0x2727
989
- when 0x48; array_enc << 0x2605
990
- when 0x49; array_enc << 0x2729
991
- when 0x4A; array_enc << 0x272A
992
- when 0x4B; array_enc << 0x272B
993
- when 0x4C; array_enc << 0x272C
994
- when 0x4D; array_enc << 0x272D
995
- when 0x4E; array_enc << 0x272E
996
- when 0x4F; array_enc << 0x272F
997
- when 0x50; array_enc << 0x2730
998
- when 0x51; array_enc << 0x2731
999
- when 0x52; array_enc << 0x2732
1000
- when 0x53; array_enc << 0x2733
1001
- when 0x54; array_enc << 0x2734
1002
- when 0x55; array_enc << 0x2735
1003
- when 0x56; array_enc << 0x2736
1004
- when 0x57; array_enc << 0x2737
1005
- when 0x58; array_enc << 0x2738
1006
- when 0x59; array_enc << 0x2739
1007
- when 0x5A; array_enc << 0x273A
1008
- when 0x5B; array_enc << 0x273B
1009
- when 0x5C; array_enc << 0x273C
1010
- when 0x5D; array_enc << 0x273D
1011
- when 0x5E; array_enc << 0x273E
1012
- when 0x5F; array_enc << 0x273F
1013
- when 0x60; array_enc << 0x2740
1014
- when 0x61; array_enc << 0x2741
1015
- when 0x62; array_enc << 0x2742
1016
- when 0x63; array_enc << 0x2743
1017
- when 0x64; array_enc << 0x2744
1018
- when 0x65; array_enc << 0x2745
1019
- when 0x66; array_enc << 0x2746
1020
- when 0x67; array_enc << 0x2747
1021
- when 0x68; array_enc << 0x2748
1022
- when 0x69; array_enc << 0x2749
1023
- when 0x6A; array_enc << 0x274A
1024
- when 0x6B; array_enc << 0x274B
1025
- when 0x6C; array_enc << 0x25CF
1026
- when 0x6D; array_enc << 0x274D
1027
- when 0x6E; array_enc << 0x25A0
1028
- when 0x6F; array_enc << 0x274F
1029
- when 0x70; array_enc << 0x2750
1030
- when 0x71; array_enc << 0x2751
1031
- when 0x72; array_enc << 0x2752
1032
- when 0x73; array_enc << 0x2753
1033
- when 0x74; array_enc << 0x2754
1034
- when 0x75; array_enc << 0x2755
1035
- when 0x76; array_enc << 0x2756
1036
- when 0x77; array_enc << 0x2757
1037
- when 0x78; array_enc << 0x2758
1038
- when 0x79; array_enc << 0x2759
1039
- when 0x7A; array_enc << 0x275A
1040
- when 0x7B; array_enc << 0x275B
1041
- when 0x7C; array_enc << 0x275C
1042
- when 0x7D; array_enc << 0x275D
1043
- when 0x7E; array_enc << 0x275E
1044
- when 0x80; array_enc << 0xF8D7
1045
- when 0x81; array_enc << 0xF8D8
1046
- when 0x82; array_enc << 0xF8D9
1047
- when 0x83; array_enc << 0xF8DA
1048
- when 0x84; array_enc << 0xF8DB
1049
- when 0x85; array_enc << 0xF8DC
1050
- when 0x86; array_enc << 0xF8DD
1051
- when 0x87; array_enc << 0xF8DE
1052
- when 0x88; array_enc << 0xF8DF
1053
- when 0x89; array_enc << 0xF8E0
1054
- when 0x8A; array_enc << 0xF8E1
1055
- when 0x8B; array_enc << 0xF8E2
1056
- when 0x8C; array_enc << 0xF8E3
1057
- when 0x8D; array_enc << 0xF8E4
1058
- when 0xA1; array_enc << 0x2761
1059
- when 0xA2; array_enc << 0x2762
1060
- when 0xA3; array_enc << 0x2763
1061
- when 0xA4; array_enc << 0x2764
1062
- when 0xA5; array_enc << 0x2765
1063
- when 0xA6; array_enc << 0x2766
1064
- when 0xA7; array_enc << 0x2767
1065
- when 0xA8; array_enc << 0x2663
1066
- when 0xA9; array_enc << 0x2666
1067
- when 0xAA; array_enc << 0x2665
1068
- when 0xAB; array_enc << 0x2660
1069
- when 0xAC; array_enc << 0x2460
1070
- when 0xAD; array_enc << 0x2461
1071
- when 0xAE; array_enc << 0x2462
1072
- when 0xAF; array_enc << 0x2463
1073
- when 0xB0; array_enc << 0x2464
1074
- when 0xB1; array_enc << 0x2465
1075
- when 0xB2; array_enc << 0x2466
1076
- when 0xB3; array_enc << 0x2467
1077
- when 0xB4; array_enc << 0x2468
1078
- when 0xB5; array_enc << 0x2469
1079
- when 0xB6; array_enc << 0x2776
1080
- when 0xB7; array_enc << 0x2777
1081
- when 0xB8; array_enc << 0x2778
1082
- when 0xB9; array_enc << 0x2779
1083
- when 0xBA; array_enc << 0x277A
1084
- when 0xBB; array_enc << 0x277B
1085
- when 0xBC; array_enc << 0x277C
1086
- when 0xBD; array_enc << 0x277D
1087
- when 0xBE; array_enc << 0x277E
1088
- when 0xBF; array_enc << 0x277F
1089
- when 0xC0; array_enc << 0x2780
1090
- when 0xC1; array_enc << 0x2781
1091
- when 0xC2; array_enc << 0x2782
1092
- when 0xC3; array_enc << 0x2783
1093
- when 0xC4; array_enc << 0x2784
1094
- when 0xC5; array_enc << 0x2785
1095
- when 0xC6; array_enc << 0x2786
1096
- when 0xC7; array_enc << 0x2787
1097
- when 0xC8; array_enc << 0x2788
1098
- when 0xC9; array_enc << 0x2789
1099
- when 0xCA; array_enc << 0x278A
1100
- when 0xCB; array_enc << 0x278B
1101
- when 0xCC; array_enc << 0x278C
1102
- when 0xCD; array_enc << 0x278D
1103
- when 0xCE; array_enc << 0x278E
1104
- when 0xCF; array_enc << 0x278F
1105
- when 0xD0; array_enc << 0x2790
1106
- when 0xD1; array_enc << 0x2791
1107
- when 0xD2; array_enc << 0x2792
1108
- when 0xD3; array_enc << 0x2793
1109
- when 0xD4; array_enc << 0x2794
1110
- when 0xD5; array_enc << 0x2795
1111
- when 0xD6; array_enc << 0x2796
1112
- when 0xD7; array_enc << 0x2797
1113
- when 0xD8; array_enc << 0x2798
1114
- when 0xD9; array_enc << 0x2799
1115
- when 0xDA; array_enc << 0x279A
1116
- when 0xDB; array_enc << 0x279B
1117
- when 0xDC; array_enc << 0x279C
1118
- when 0xDD; array_enc << 0x279D
1119
- when 0xDE; array_enc << 0x279E
1120
- when 0xDF; array_enc << 0x279F
1121
- when 0xE0; array_enc << 0x27A0
1122
- when 0xE1; array_enc << 0x27A1
1123
- when 0xE2; array_enc << 0x27A2
1124
- when 0xE3; array_enc << 0x27A3
1125
- when 0xE4; array_enc << 0x27A4
1126
- when 0xE5; array_enc << 0x27A5
1127
- when 0xE6; array_enc << 0x27A6
1128
- when 0xE7; array_enc << 0x27A7
1129
- when 0xE8; array_enc << 0x27A8
1130
- when 0xE9; array_enc << 0x27A9
1131
- when 0xEA; array_enc << 0x27AA
1132
- when 0xEB; array_enc << 0x27AB
1133
- when 0xEC; array_enc << 0x27AC
1134
- when 0xED; array_enc << 0x27AD
1135
- when 0xEE; array_enc << 0x27AE
1136
- when 0xEF; array_enc << 0x27AF
1137
- when 0xF1; array_enc << 0x27B1
1138
- when 0xF2; array_enc << 0x27B2
1139
- when 0xF3; array_enc << 0x27B3
1140
- when 0xF4; array_enc << 0x27B4
1141
- when 0xF5; array_enc << 0x27B5
1142
- when 0xF6; array_enc << 0x27B6
1143
- when 0xF7; array_enc << 0x27B7
1144
- when 0xF8; array_enc << 0x27B8
1145
- when 0xF9; array_enc << 0x27B9
1146
- when 0xFA; array_enc << 0x27BA
1147
- when 0xFB; array_enc << 0x27BB
1148
- when 0xFC; array_enc << 0x27BC
1149
- when 0xFD; array_enc << 0x27BD
1150
- when 0xFE; array_enc << 0x27BE
1151
- else
1152
- array_enc << num
1153
- end
1154
- end
1155
- end
1156
-
1157
- # convert any glyph names to unicode codepoints
1158
- array_enc = self.process_glyphnames(array_enc)
1159
-
1160
- # replace charcters that didn't convert to unicode nicely with something valid
1161
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
1162
-
1163
- # pack all our Unicode codepoints into a UTF-8 string
1164
- ret = array_enc.pack("U*")
1165
-
1166
- # set the strings encoding correctly under ruby 1.9+
1167
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
1168
-
1169
- return ret
1170
- end
1171
- end
1172
166
  end
1173
167
  end