pdf-reader 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -32,6 +32,48 @@ class PDF::Reader
32
32
 
33
33
  attr_reader :differences
34
34
 
35
+ def initialize(enc)
36
+ if enc.kind_of?(Hash)
37
+ self.differences=enc[:Differences] if enc[:Differences]
38
+ enc = enc[:Encoding] || enc[:BaseEncoding]
39
+ elsif enc != nil
40
+ enc = enc.to_sym
41
+ end
42
+
43
+ case enc
44
+ when nil then
45
+ load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
46
+ @unpack = "C*"
47
+ when "Identity-H".to_sym then
48
+ @unpack = "n*"
49
+ @to_unicode_required = true
50
+ when :MacRomanEncoding then
51
+ load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
52
+ @unpack = "C*"
53
+ when :MacExpertEncoding then
54
+ load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
55
+ @unpack = "C*"
56
+ when :PDFDocEncoding then
57
+ load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
58
+ @unpack = "C*"
59
+ when :StandardEncoding then
60
+ load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
61
+ @unpack = "C*"
62
+ when :SymbolEncoding then
63
+ load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
64
+ @unpack = "C*"
65
+ when :UTF16Encoding then
66
+ @unpack = "n*"
67
+ when :WinAnsiEncoding then
68
+ load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
69
+ @unpack = "C*"
70
+ when :ZapfDingbatsEncoding then
71
+ load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
72
+ @unpack = "C*"
73
+ else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
74
+ end
75
+ end
76
+
35
77
  # set the differences table for this encoding. should be an array in the following format:
36
78
  #
37
79
  # [25, :A, 26, :B]
@@ -57,1117 +99,69 @@ class PDF::Reader
57
99
  @differences
58
100
  end
59
101
 
60
- # Takes the "Encoding" value of a Font dictionary and builds a PDF::Reader::Encoding object
61
- def self.factory(enc)
62
- if enc.kind_of?(Hash)
63
- diff = enc[:Differences]
64
- enc = enc[:Encoding] || enc[:BaseEncoding]
65
- elsif enc != nil
66
- enc = enc.to_sym
67
- end
102
+ # convert the specified string to utf8
103
+ def to_utf8(str, tounicode = nil)
68
104
 
69
- case enc
70
- when nil then enc = PDF::Reader::Encoding::StandardEncoding.new
71
- when "Identity-H".to_sym then enc = PDF::Reader::Encoding::IdentityH.new
72
- when :MacRomanEncoding then enc = PDF::Reader::Encoding::MacRomanEncoding.new
73
- when :MacExpertEncoding then enc = PDF::Reader::Encoding::MacExpertEncoding.new
74
- when :StandardEncoding then enc = PDF::Reader::Encoding::StandardEncoding.new
75
- when :SymbolEncoding then enc = PDF::Reader::Encoding::SymbolEncoding.new
76
- when :WinAnsiEncoding then enc = PDF::Reader::Encoding::WinAnsiEncoding.new
77
- when :ZapfDingbatsEncoding then enc = PDF::Reader::Encoding::ZapfDingbatsEncoding.new
78
- else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
105
+ # unpack the single bytes
106
+ array_orig = str.unpack(@unpack)
107
+
108
+ # replace any relevant bytes with a glyph name
109
+ array_orig = process_differences(array_orig)
110
+
111
+ # replace any remaining bytes with a unicode codepoint
112
+ array_enc = []
113
+ array_orig.each do |num|
114
+ if tounicode && (code = tounicode.decode(num))
115
+ array_enc << code
116
+ elsif tounicode || (tounicode.nil? && @to_unicode_required)
117
+ array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
118
+ elsif @mapping && @mapping[num]
119
+ array_enc << @mapping[num]
120
+ else
121
+ array_enc << num
122
+ end
79
123
  end
80
124
 
81
- enc.differences = diff if enc && diff
125
+ # convert any glyph names to unicode codepoints
126
+ array_enc = process_glyphnames(array_enc)
82
127
 
83
- return enc
84
- end
128
+ # replace charcters that didn't convert to unicode nicely with something valid
129
+ array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
85
130
 
86
- def to_utf8(str, tounicode = nil)
87
- # abstract method, of sorts
88
- raise RuntimeError, "Called abstract method"
131
+ # pack all our Unicode codepoints into a UTF-8 string
132
+ ret = array_enc.pack("U*")
133
+
134
+ # set the strings encoding correctly under ruby 1.9+
135
+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
136
+
137
+ return ret
89
138
  end
90
139
 
140
+ private
141
+
91
142
  # accepts an array of byte numbers, and replaces any that have entries in the differences table
92
143
  # with a glyph name
93
144
  def process_differences(arr)
94
145
  @differences ||= {}
95
146
  arr.collect! { |n| @differences[n].nil? ? n : @differences[n]}
96
147
  end
97
- protected :process_differences
98
148
 
99
149
  # accepts an array of unicode code points and glyphnames, and converts any glyph names to codepoints
100
150
  def process_glyphnames(arr)
101
151
  @differences ||= {}
102
152
  arr.collect! { |n| n.kind_of?(Numeric) ? n : PDF::Reader::Font.glyphnames[n]}
103
153
  end
104
- protected :process_glyphnames
105
-
106
- class IdentityH < Encoding
107
- def to_utf8(str, tounicode = nil)
108
-
109
- array_enc = []
110
-
111
- # iterate over string, reading it in 2 byte chunks and interpreting those
112
- # chunks as ints
113
- str.unpack("n*").each do |num|
114
-
115
- # convert the int to a unicode codepoint if possible.
116
- # without a ToUnicode CMap, it's impossible to reliably convert this text
117
- # to unicode, so just replace each character with a little box. Big smacks
118
- # the the PDF producing app.
119
- if tounicode && (code = tounicode.decode(num))
120
- array_enc << code
121
- else
122
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
123
- end
124
- end
125
-
126
- # replace charcters that didn't convert to unicode nicely with something valid
127
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
128
-
129
- # pack all our Unicode codepoints into a UTF-8 string
130
- ret = array_enc.pack("U*")
131
-
132
- # set the strings encoding correctly under ruby 1.9+
133
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
134
-
135
- return ret
136
- end
137
- end
138
-
139
- class MacExpertEncoding < Encoding
140
- # convert a MacExpertEncoding string into UTF-8
141
- def to_utf8(str, tounicode = nil)
142
- array_expert = str.unpack('C*')
143
- array_expert = self.process_differences(array_expert)
144
- array_enc = []
145
- array_expert.each do |num|
146
- if tounicode && (code = tounicode.decode(num))
147
- array_enc << code
148
- elsif tounicode
149
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
150
- else
151
- case num
152
- # change necesary characters to equivilant Unicode codepoints
153
- when 0x21; array_enc << 0xF721
154
- when 0x22; array_enc << 0xF6F8 # Hungarumlautsmall
155
- when 0x23; array_enc << 0xF7A2
156
- when 0x24; array_enc << 0xF724
157
- when 0x25; array_enc << 0xF6E4
158
- when 0x26; array_enc << 0xF726
159
- when 0x27; array_enc << 0xF7B4
160
- when 0x28; array_enc << 0x207D
161
- when 0x29; array_enc << 0xF07E
162
- when 0x2A; array_enc << 0x2025
163
- when 0x2B; array_enc << 0x2024
164
- when 0x2F; array_enc << 0x2044
165
- when 0x30; array_enc << 0xF730
166
- when 0x31; array_enc << 0xF731
167
- when 0x32; array_enc << 0xF732
168
- when 0x33; array_enc << 0xF733
169
- when 0x34; array_enc << 0xF734
170
- when 0x35; array_enc << 0xF735
171
- when 0x36; array_enc << 0xF736
172
- when 0x37; array_enc << 0xF737
173
- when 0x38; array_enc << 0xF738
174
- when 0x39; array_enc << 0xF739
175
- when 0x3D; array_enc << 0xF6DE
176
- when 0x3F; array_enc << 0xF73F
177
- when 0x44; array_enc << 0xF7F0
178
- when 0x47; array_enc << 0x00BC
179
- when 0x48; array_enc << 0x00BD
180
- when 0x49; array_enc << 0x00BE
181
- when 0x4A; array_enc << 0x215B
182
- when 0x4B; array_enc << 0x215C
183
- when 0x4C; array_enc << 0x215D
184
- when 0x4D; array_enc << 0x215E
185
- when 0x4E; array_enc << 0x2153
186
- when 0x4F; array_enc << 0x2154
187
- when 0x56; array_enc << 0xFB00
188
- when 0x57; array_enc << 0xFB01
189
- when 0x58; array_enc << 0xFB02
190
- when 0x59; array_enc << 0xFB03
191
- when 0x5A; array_enc << 0xFB04
192
- when 0x5B; array_enc << 0x208D
193
- when 0x5D; array_enc << 0x208E
194
- when 0x5E; array_enc << 0xF6F6
195
- when 0x5F; array_enc << 0xF6E5
196
- when 0x60; array_enc << 0xF760
197
- when 0x61; array_enc << 0xF761
198
- when 0x62; array_enc << 0xF762
199
- when 0x63; array_enc << 0xF763
200
- when 0x64; array_enc << 0xF764
201
- when 0x65; array_enc << 0xF765
202
- when 0x66; array_enc << 0xF766
203
- when 0x67; array_enc << 0xF767
204
- when 0x68; array_enc << 0xF768
205
- when 0x69; array_enc << 0xF769
206
- when 0x6A; array_enc << 0xF76A
207
- when 0x6B; array_enc << 0xF76B
208
- when 0x6C; array_enc << 0xF76C
209
- when 0x6D; array_enc << 0xF76D
210
- when 0x6E; array_enc << 0xF76E
211
- when 0x6F; array_enc << 0xF76F
212
- when 0x70; array_enc << 0xF770
213
- when 0x71; array_enc << 0xF771
214
- when 0x72; array_enc << 0xF772
215
- when 0x73; array_enc << 0xF773
216
- when 0x74; array_enc << 0xF774
217
- when 0x75; array_enc << 0xF775
218
- when 0x76; array_enc << 0xF776
219
- when 0x77; array_enc << 0xF777
220
- when 0x78; array_enc << 0xF778
221
- when 0x79; array_enc << 0xF779
222
- when 0x7A; array_enc << 0xF77A
223
- when 0x7B; array_enc << 0x20A1
224
- when 0x7C; array_enc << 0xF6DC
225
- when 0x7D; array_enc << 0xF6DD
226
- when 0x7E; array_enc << 0xF6FE
227
- when 0x81; array_enc << 0xF6E9
228
- when 0x82; array_enc << 0xF6E0
229
- when 0x87; array_enc << 0xF7E1 # Acircumflexsmall
230
- when 0x88; array_enc << 0xF7E0
231
- when 0x89; array_enc << 0xF7E2 # Acutesmall
232
- when 0x8A; array_enc << 0xF7E4
233
- when 0x8B; array_enc << 0xF7E3
234
- when 0x8C; array_enc << 0xF7E5
235
- when 0x8D; array_enc << 0xF7E7
236
- when 0x8E; array_enc << 0xF7E9
237
- when 0x8F; array_enc << 0xF7E8
238
- when 0x90; array_enc << 0xF7E4
239
- when 0x91; array_enc << 0xF7EB
240
- when 0x92; array_enc << 0xF7ED
241
- when 0x93; array_enc << 0xF7EC
242
- when 0x94; array_enc << 0xF7EE
243
- when 0x95; array_enc << 0xF7EF
244
- when 0x96; array_enc << 0xF7F1
245
- when 0x97; array_enc << 0xF7F3
246
- when 0x98; array_enc << 0xF7F2
247
- when 0x99; array_enc << 0xF7F4
248
- when 0x9A; array_enc << 0xF7F6
249
- when 0x9B; array_enc << 0xF7F5
250
- when 0x9C; array_enc << 0xF7FA
251
- when 0x9D; array_enc << 0xF7F9
252
- when 0x9E; array_enc << 0xF7FB
253
- when 0x9F; array_enc << 0xF7FC
254
- when 0xA1; array_enc << 0x2078
255
- when 0xA2; array_enc << 0x2084
256
- when 0xA3; array_enc << 0x2083
257
- when 0xA4; array_enc << 0x2086
258
- when 0xA5; array_enc << 0x2088
259
- when 0xA6; array_enc << 0x2087
260
- when 0xA7; array_enc << 0xF6FD
261
- when 0xA9; array_enc << 0xF6DF
262
- when 0xAA; array_enc << 0x2082
263
- when 0xAC; array_enc << 0xF7A8
264
- when 0xAE; array_enc << 0xF6F5
265
- when 0xAF; array_enc << 0xF6F0
266
- when 0xB0; array_enc << 0x2085
267
- when 0xB2; array_enc << 0xF6E1
268
- when 0xB3; array_enc << 0xF6E7
269
- when 0xB4; array_enc << 0xF7FD
270
- when 0xB6; array_enc << 0xF6E3
271
- when 0xB9; array_enc << 0xF7FE
272
- when 0xBB; array_enc << 0x2089
273
- when 0xBC; array_enc << 0x2080
274
- when 0xBD; array_enc << 0xF6FF
275
- when 0xBE; array_enc << 0xF7E6 # AEsmall
276
- when 0xBF; array_enc << 0xF7F8
277
- when 0xC0; array_enc << 0xF7BF
278
- when 0xC1; array_enc << 0x2081
279
- when 0xC2; array_enc << 0xF6F9
280
- when 0xC9; array_enc << 0xF7B8
281
- when 0xCF; array_enc << 0xF6FA
282
- when 0xD0; array_enc << 0x2012
283
- when 0xD1; array_enc << 0xF6E6
284
- when 0xD6; array_enc << 0xF7A1
285
- when 0xD8; array_enc << 0xF7FF
286
- when 0xDA; array_enc << 0x00B9
287
- when 0xDB; array_enc << 0x00B2
288
- when 0xDC; array_enc << 0x00B3
289
- when 0xDD; array_enc << 0x2074
290
- when 0xDE; array_enc << 0x2075
291
- when 0xDF; array_enc << 0x2076
292
- when 0xE0; array_enc << 0x2077
293
- when 0xE1; array_enc << 0x2079
294
- when 0xE2; array_enc << 0x2070
295
- when 0xE4; array_enc << 0xF6EC
296
- when 0xE5; array_enc << 0xF6F1
297
- when 0xE6; array_enc << 0xF6F3
298
- when 0xE9; array_enc << 0xF6ED
299
- when 0xEA; array_enc << 0xF6F2
300
- when 0xEB; array_enc << 0xF6EB
301
- when 0xF1; array_enc << 0xF6EE
302
- when 0xF2; array_enc << 0xF6FB
303
- when 0xF3; array_enc << 0xF6F4
304
- when 0xF4; array_enc << 0xF7AF
305
- when 0xF5; array_enc << 0xF6EF
306
- when 0xF6; array_enc << 0x207F
307
- when 0xF7; array_enc << 0xF6EF
308
- when 0xF8; array_enc << 0xF6E2
309
- when 0xF9; array_enc << 0xF6E8
310
- when 0xFA; array_enc << 0xF6F7
311
- when 0xFB; array_enc << 0xF6FC
312
- else
313
- array_enc << num
314
- end
315
- end
316
- end
317
-
318
- # convert any glyph names to unicode codepoints
319
- array_enc = self.process_glyphnames(array_enc)
320
-
321
- # replace charcters that didn't convert to unicode nicely with something valid
322
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
323
-
324
- # pack all our Unicode codepoints into a UTF-8 string
325
- ret = array_enc.pack("U*")
326
-
327
- # set the strings encoding correctly under ruby 1.9+
328
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
329
-
330
- return ret
331
- end
332
- end
333
-
334
- # The default encoding for OSX <= v9
335
- # see: http://en.wikipedia.org/wiki/Mac_OS_Roman
336
- class MacRomanEncoding < Encoding
337
- # convert a MacRomanEncoding string into UTF-8
338
- def to_utf8(str, tounicode = nil)
339
- # content of this method borrowed from REXML::Encoding.decode_cp1252
340
- array_mac = str.unpack('C*')
341
- array_mac = self.process_differences(array_mac)
342
- array_enc = []
343
- array_mac.each do |num|
344
- if tounicode && (code = tounicode.decode(num))
345
- array_enc << code
346
- elsif tounicode
347
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
348
- else
349
- case num
350
- # change necesary characters to equivilant Unicode codepoints
351
- when 0x80; array_enc << 0x00C4
352
- when 0x81; array_enc << 0x00C5
353
- when 0x82; array_enc << 0x00C7
354
- when 0x83; array_enc << 0x00C9
355
- when 0x84; array_enc << 0x00D1
356
- when 0x85; array_enc << 0x00D6
357
- when 0x86; array_enc << 0x00DC
358
- when 0x87; array_enc << 0x00E1
359
- when 0x88; array_enc << 0x00E0
360
- when 0x89; array_enc << 0x00E2
361
- when 0x8A; array_enc << 0x00E4
362
- when 0x8B; array_enc << 0x00E3
363
- when 0x8C; array_enc << 0x00E5
364
- when 0x8D; array_enc << 0x00E7
365
- when 0x8E; array_enc << 0x00E9
366
- when 0x8F; array_enc << 0x00E8
367
- when 0x90; array_enc << 0x00EA
368
- when 0x91; array_enc << 0x00EB
369
- when 0x92; array_enc << 0x00ED
370
- when 0x93; array_enc << 0x00EC
371
- when 0x94; array_enc << 0x00EE
372
- when 0x95; array_enc << 0x00EF
373
- when 0x96; array_enc << 0x00F1
374
- when 0x97; array_enc << 0x00F3
375
- when 0x98; array_enc << 0x00F2
376
- when 0x99; array_enc << 0x00F4
377
- when 0x9A; array_enc << 0x00F6
378
- when 0x9B; array_enc << 0x00F5
379
- when 0x9C; array_enc << 0x00FA
380
- when 0x9D; array_enc << 0x00F9
381
- when 0x9E; array_enc << 0x00FB
382
- when 0x9F; array_enc << 0x00FC
383
- when 0xA0; array_enc << 0x2020
384
- when 0xA1; array_enc << 0x00B0
385
- when 0xA2; array_enc << 0x00A2
386
- when 0xA3; array_enc << 0x00A3
387
- when 0xA4; array_enc << 0x00A7
388
- when 0xA5; array_enc << 0x2022
389
- when 0xA6; array_enc << 0x00B6
390
- when 0xA7; array_enc << 0x00DF
391
- when 0xA8; array_enc << 0x00AE
392
- when 0xA9; array_enc << 0x00A9
393
- when 0xAA; array_enc << 0x2122
394
- when 0xAB; array_enc << 0x00B4
395
- when 0xAC; array_enc << 0x00A8
396
- when 0xAD; array_enc << 0x2260
397
- when 0xAE; array_enc << 0x00C6
398
- when 0xAF; array_enc << 0x00D8
399
- when 0xB0; array_enc << 0x221E
400
- when 0xB1; array_enc << 0x00B1
401
- when 0xB2; array_enc << 0x2264
402
- when 0xB3; array_enc << 0x2265
403
- when 0xB4; array_enc << 0x00A5
404
- when 0xB5; array_enc << 0x00B5
405
- when 0xB6; array_enc << 0x2202
406
- when 0xB7; array_enc << 0x2211
407
- when 0xB8; array_enc << 0x220F
408
- when 0xB9; array_enc << 0x03C0
409
- when 0xBA; array_enc << 0x222B
410
- when 0xBB; array_enc << 0x00AA
411
- when 0xBC; array_enc << 0x00BA
412
- when 0xBD; array_enc << 0x03A9
413
- when 0xBE; array_enc << 0x00E6
414
- when 0xBF; array_enc << 0x00F8
415
- when 0xC0; array_enc << 0x00BF
416
- when 0xC1; array_enc << 0x00A1
417
- when 0xC2; array_enc << 0x00AC
418
- when 0xC3; array_enc << 0x221A
419
- when 0xC4; array_enc << 0x0192
420
- when 0xC5; array_enc << 0x2248
421
- when 0xC6; array_enc << 0x2206
422
- when 0xC7; array_enc << 0x00AB
423
- when 0xC8; array_enc << 0x00BB
424
- when 0xC9; array_enc << 0x2026
425
- when 0xCA; array_enc << 0x00A0
426
- when 0xCB; array_enc << 0x00C0
427
- when 0xCC; array_enc << 0x00C3
428
- when 0xCD; array_enc << 0x00D5
429
- when 0xCE; array_enc << 0x0152
430
- when 0xCF; array_enc << 0x0153
431
- when 0xD0; array_enc << 0x2013
432
- when 0xD1; array_enc << 0x2014
433
- when 0xD2; array_enc << 0x201C
434
- when 0xD3; array_enc << 0x201D
435
- when 0xD4; array_enc << 0x2018
436
- when 0xD5; array_enc << 0x2019
437
- when 0xD6; array_enc << 0x00F7
438
- when 0xD7; array_enc << 0x25CA
439
- when 0xD8; array_enc << 0x00FF
440
- when 0xD9; array_enc << 0x0178
441
- when 0xDA; array_enc << 0x2044
442
- when 0xDB; array_enc << 0x20AC
443
- when 0xDC; array_enc << 0x2039
444
- when 0xDD; array_enc << 0x203A
445
- when 0xDE; array_enc << 0xFB01
446
- when 0xDF; array_enc << 0xFB02
447
- when 0xE0; array_enc << 0x2021
448
- when 0xE1; array_enc << 0x00B7
449
- when 0xE2; array_enc << 0x201A
450
- when 0xE3; array_enc << 0x201E
451
- when 0xE4; array_enc << 0x2030
452
- when 0xE5; array_enc << 0x00C2
453
- when 0xE6; array_enc << 0x00CA
454
- when 0xE7; array_enc << 0x00C1
455
- when 0xE8; array_enc << 0x00CB
456
- when 0xE9; array_enc << 0x00C8
457
- when 0xEA; array_enc << 0x00CD
458
- when 0xEB; array_enc << 0x00CE
459
- when 0xEC; array_enc << 0x00CF
460
- when 0xED; array_enc << 0x00CC
461
- when 0xEE; array_enc << 0x00D3
462
- when 0xEF; array_enc << 0x00D4
463
- when 0xF0; array_enc << 0xF8FF
464
- when 0xF1; array_enc << 0x00D2
465
- when 0xF2; array_enc << 0x00DA
466
- when 0xF3; array_enc << 0x00D8
467
- when 0xF4; array_enc << 0x00D9
468
- when 0xF5; array_enc << 0x0131
469
- when 0xF6; array_enc << 0x02C6
470
- when 0xF7; array_enc << 0x02DC
471
- when 0xF8; array_enc << 0x00AF
472
- when 0xF9; array_enc << 0x02D8
473
- when 0xFA; array_enc << 0x02D9
474
- when 0xFB; array_enc << 0x02DA
475
- when 0xFC; array_enc << 0x00B8
476
- when 0xFD; array_enc << 0x02DD
477
- when 0xFE; array_enc << 0x02DB
478
- when 0xFF; array_enc << 0x02C7
479
- else
480
- array_enc << num
481
- end
482
- end
483
- end
484
-
485
- # convert any glyph names to unicode codepoints
486
- array_enc = self.process_glyphnames(array_enc)
487
-
488
- # replace charcters that didn't convert to unicode nicely with something valid
489
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
490
-
491
- # pack all our Unicode codepoints into a UTF-8 string
492
- ret = array_enc.pack("U*")
493
-
494
- # set the strings encoding correctly under ruby 1.9+
495
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
496
-
497
- return ret
498
- end
499
- end
500
-
501
- class PDFDocEncoding < Encoding
502
- # convert a PDFDocEncoding string into UTF-8
503
- def to_utf8(str, tounicode = nil)
504
- array_pdf = str.unpack('C*')
505
- array_pdf = self.process_differences(array_pdf)
506
- array_enc = []
507
- array_pdf.each do |num|
508
- if tounicode && (code = tounicode.decode(num))
509
- array_enc << code
510
- elsif tounicode
511
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
512
- else
513
- case num
514
- # change necesary characters to equivilant Unicode codepoints
515
- when 0x18; array_enc << 0x02D8
516
- when 0x19; array_enc << 0x02C7
517
- when 0x1A; array_enc << 0x02C6
518
- when 0x1B; array_enc << 0x02D9
519
- when 0x1C; array_enc << 0x02DD
520
- when 0x1D; array_enc << 0x02DB
521
- when 0x1E; array_enc << 0x02DA
522
- when 0x1F; array_enc << 0x02DC
523
- when 0x7F; array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR # Undefined
524
- when 0x80; array_enc << 0x2022
525
- when 0x81; array_enc << 0x2020
526
- when 0x82; array_enc << 0x2021
527
- when 0x83; array_enc << 0x2026
528
- when 0x84; array_enc << 0x2014
529
- when 0x85; array_enc << 0x2013
530
- when 0x86; array_enc << 0x0192
531
- when 0x87; array_enc << 0x2044
532
- when 0x88; array_enc << 0x2039
533
- when 0x89; array_enc << 0x203A
534
- when 0x8A; array_enc << 0x2212
535
- when 0x8B; array_enc << 0x2030
536
- when 0x8C; array_enc << 0x201E
537
- when 0x8D; array_enc << 0x201C
538
- when 0x8E; array_enc << 0x201D
539
- when 0x8F; array_enc << 0x2018
540
- when 0x90; array_enc << 0x2019
541
- when 0x91; array_enc << 0x201A
542
- when 0x92; array_enc << 0x2122
543
- when 0x93; array_enc << 0xFB01
544
- when 0x94; array_enc << 0xFB02
545
- when 0x95; array_enc << 0x0141
546
- when 0x96; array_enc << 0x0152
547
- when 0x97; array_enc << 0x0160
548
- when 0x98; array_enc << 0x0178
549
- when 0x99; array_enc << 0x017D
550
- when 0x9A; array_enc << 0x0131
551
- when 0x9B; array_enc << 0x0142
552
- when 0x9C; array_enc << 0x0153
553
- when 0x9D; array_enc << 0x0161
554
- when 0x9E; array_enc << 0x017E
555
- when 0x9F; array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR # Undefined
556
- when 0xA0; array_enc << 0x20AC
557
- else
558
- array_enc << num
559
- end
560
- end
561
- end
562
-
563
- # convert any glyph names to unicode codepoints
564
- array_enc = self.process_glyphnames(array_enc)
565
-
566
- # replace charcters that didn't convert to unicode nicely with something valid
567
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
568
-
569
- # pack all our Unicode codepoints into a UTF-8 string
570
- ret = array_enc.pack("U*")
571
-
572
- # set the strings encoding correctly under ruby 1.9+
573
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
574
-
575
- return ret
576
- end
577
- end
578
154
 
579
- class StandardEncoding < Encoding
580
- # convert an Adobe Standard Encoding string into UTF-8
581
- def to_utf8(str, tounicode = nil)
582
- # based on mapping described at:
583
- # http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txt
584
- array_std = str.unpack('C*')
585
- array_std = self.process_differences(array_std)
586
- array_enc = []
587
- array_std.each do |num|
588
- if tounicode && (code = tounicode.decode(num))
589
- array_enc << code
590
- elsif tounicode
591
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
592
- else
593
- case num
594
- when 0x27; array_enc << 0x2019
595
- when 0x60; array_enc << 0x2018
596
- when 0xA4; array_enc << 0x2044
597
- when 0xA6; array_enc << 0x0192
598
- when 0xA8; array_enc << 0x00A4
599
- when 0xA9; array_enc << 0x0027
600
- when 0xAA; array_enc << 0x201C
601
- when 0xAC; array_enc << 0x2039
602
- when 0xAD; array_enc << 0x203A
603
- when 0xAE; array_enc << 0xFB01
604
- when 0xAF; array_enc << 0xFB02
605
- when 0xB1; array_enc << 0x2013
606
- when 0xB2; array_enc << 0x2020
607
- when 0xB3; array_enc << 0x2021
608
- when 0xB4; array_enc << 0x00B7
609
- when 0xB7; array_enc << 0x2022
610
- when 0xB8; array_enc << 0x201A
611
- when 0xB9; array_enc << 0x201E
612
- when 0xBA; array_enc << 0x201D
613
- when 0xBC; array_enc << 0x2026
614
- when 0xBD; array_enc << 0x2030
615
- when 0xC1; array_enc << 0x0060
616
- when 0xC2; array_enc << 0x00B4
617
- when 0xC3; array_enc << 0x02C6
618
- when 0xC4; array_enc << 0x02DC
619
- when 0xC5; array_enc << 0x00AF
620
- when 0xC6; array_enc << 0x02D8
621
- when 0xC7; array_enc << 0x02D9
622
- when 0xC8; array_enc << 0x00A8
623
- when 0xCA; array_enc << 0x02DA
624
- when 0xCB; array_enc << 0x00B8
625
- when 0xCD; array_enc << 0x02DD
626
- when 0xCE; array_enc << 0x02DB
627
- when 0xCF; array_enc << 0x02C7
628
- when 0xD0; array_enc << 0x2014
629
- when 0xE1; array_enc << 0x00C6
630
- when 0xE3; array_enc << 0x00AA
631
- when 0xE8; array_enc << 0x0141
632
- when 0xE9; array_enc << 0x00D8
633
- when 0xEA; array_enc << 0x0152
634
- when 0xEB; array_enc << 0x00BA
635
- when 0xF1; array_enc << 0x00E6
636
- when 0xF5; array_enc << 0x0131
637
- when 0xF8; array_enc << 0x0142
638
- when 0xF9; array_enc << 0x00F8
639
- when 0xFA; array_enc << 0x0153
640
- when 0xFB; array_enc << 0x00DF
641
- else
642
- array_enc << num
643
- end
644
- end
155
+ def load_mapping(file)
156
+ @mapping = {}
157
+ RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
158
+ File.open(file, mode) do |f|
159
+ f.each do |l|
160
+ m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
161
+ @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
645
162
  end
646
-
647
- # convert any glyph names to unicode codepoints
648
- array_enc = self.process_glyphnames(array_enc)
649
-
650
- # replace charcters that didn't convert to unicode nicely with something valid
651
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
652
-
653
- # pack all our Unicode codepoints into a UTF-8 string
654
- ret = array_enc.pack("U*")
655
-
656
- # set the strings encoding correctly under ruby 1.9+
657
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
658
-
659
- return ret
660
163
  end
661
164
  end
662
165
 
663
- class SymbolEncoding < Encoding
664
- # convert a SymbolEncoding string into UTF-8
665
- def to_utf8(str, tounicode = nil)
666
- array_symbol = str.unpack('C*')
667
- array_symbol = self.process_differences(array_symbol)
668
- array_enc = []
669
- array_symbol.each do |num|
670
- if tounicode && (code = tounicode.decode(num))
671
- array_enc << code
672
- elsif tounicode
673
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
674
- else
675
- case num
676
- when 0x22; array_enc << 0x2200
677
- when 0x24; array_enc << 0x2203
678
- when 0x27; array_enc << 0x220B
679
- when 0x2A; array_enc << 0x2217
680
- when 0x2D; array_enc << 0x2212
681
- when 0x40; array_enc << 0x2245
682
- when 0x41; array_enc << 0x0391
683
- when 0x42; array_enc << 0x0392
684
- when 0x43; array_enc << 0x03A7
685
- when 0x44; array_enc << 0x0394
686
- when 0x45; array_enc << 0x0395
687
- when 0x46; array_enc << 0x03A6
688
- when 0x47; array_enc << 0x0393
689
- when 0x48; array_enc << 0x0397
690
- when 0x49; array_enc << 0x0399
691
- when 0x4A; array_enc << 0x03D1
692
- when 0x4B; array_enc << 0x039A
693
- when 0x4C; array_enc << 0x039B
694
- when 0x4D; array_enc << 0x039C
695
- when 0x4E; array_enc << 0x039D
696
- when 0x4F; array_enc << 0x039F
697
- when 0x50; array_enc << 0x03A0
698
- when 0x51; array_enc << 0x0398
699
- when 0x52; array_enc << 0x03A1
700
- when 0x53; array_enc << 0x03A3
701
- when 0x54; array_enc << 0x03A4
702
- when 0x55; array_enc << 0x03A5
703
- when 0x56; array_enc << 0x03C2
704
- when 0x57; array_enc << 0x03A9
705
- when 0x58; array_enc << 0x039E
706
- when 0x59; array_enc << 0x03A8
707
- when 0x5A; array_enc << 0x0396
708
- when 0x5C; array_enc << 0x2234
709
- when 0x5E; array_enc << 0x22A5
710
- when 0x60; array_enc << 0xF8E5
711
- when 0x61; array_enc << 0x03B1
712
- when 0x62; array_enc << 0x03B2
713
- when 0x63; array_enc << 0x03C7
714
- when 0x64; array_enc << 0x03B4
715
- when 0x65; array_enc << 0x03B5
716
- when 0x66; array_enc << 0x03C6
717
- when 0x67; array_enc << 0x03B3
718
- when 0x68; array_enc << 0x03B7
719
- when 0x69; array_enc << 0x03B9
720
- when 0x6A; array_enc << 0x03D5
721
- when 0x6B; array_enc << 0x03BA
722
- when 0x6C; array_enc << 0x03BB
723
- when 0x6D; array_enc << 0x03BC
724
- when 0x6E; array_enc << 0x03BD
725
- when 0x6F; array_enc << 0x03BF
726
- when 0x70; array_enc << 0x03C0
727
- when 0x71; array_enc << 0x03B8
728
- when 0x72; array_enc << 0x03C1
729
- when 0x73; array_enc << 0x03C3
730
- when 0x74; array_enc << 0x03C4
731
- when 0x75; array_enc << 0x03C5
732
- when 0x76; array_enc << 0x03D6
733
- when 0x77; array_enc << 0x03C9
734
- when 0x78; array_enc << 0x03BE
735
- when 0x79; array_enc << 0x03C8
736
- when 0x7A; array_enc << 0x03B6
737
- when 0x7E; array_enc << 0x223C
738
- when 0xA0; array_enc << 0x20AC
739
- when 0xA1; array_enc << 0x03D2
740
- when 0xA2; array_enc << 0x2032
741
- when 0xA3; array_enc << 0x2264
742
- when 0xA4; array_enc << 0x2215
743
- when 0xA5; array_enc << 0x221E
744
- when 0xA6; array_enc << 0x0192
745
- when 0xA7; array_enc << 0x2663
746
- when 0xA8; array_enc << 0x2666
747
- when 0xA9; array_enc << 0x2665
748
- when 0xAA; array_enc << 0x2660
749
- when 0xAB; array_enc << 0x2194
750
- when 0xAC; array_enc << 0x2190
751
- when 0xAD; array_enc << 0x2191
752
- when 0xAE; array_enc << 0x2192
753
- when 0xAF; array_enc << 0x2193
754
- when 0xB2; array_enc << 0x2033
755
- when 0xB3; array_enc << 0x2265
756
- when 0xB4; array_enc << 0x00D7
757
- when 0xB5; array_enc << 0x221D
758
- when 0xB6; array_enc << 0x2202
759
- when 0xB7; array_enc << 0x2022
760
- when 0xB8; array_enc << 0x00F7
761
- when 0xB9; array_enc << 0x2260
762
- when 0xBA; array_enc << 0x2261
763
- when 0xBB; array_enc << 0x2248
764
- when 0xBC; array_enc << 0x2026
765
- when 0xBD; array_enc << 0xF8E6
766
- when 0xBE; array_enc << 0xF8E7
767
- when 0xBF; array_enc << 0x21B5
768
- when 0xC0; array_enc << 0x2135
769
- when 0xC1; array_enc << 0x2111
770
- when 0xC2; array_enc << 0x211C
771
- when 0xC3; array_enc << 0x2118
772
- when 0xC4; array_enc << 0x2297
773
- when 0xC5; array_enc << 0x2295
774
- when 0xC6; array_enc << 0x2205
775
- when 0xC7; array_enc << 0x2229
776
- when 0xC8; array_enc << 0x222A
777
- when 0xC9; array_enc << 0x2283
778
- when 0xCA; array_enc << 0x2287
779
- when 0xCB; array_enc << 0x2284
780
- when 0xCC; array_enc << 0x2282
781
- when 0xCD; array_enc << 0x2286
782
- when 0xCE; array_enc << 0x2208
783
- when 0xCF; array_enc << 0x2209
784
- when 0xD0; array_enc << 0x2220
785
- when 0xD1; array_enc << 0x2207
786
- when 0xD2; array_enc << 0xF6DA
787
- when 0xD3; array_enc << 0xF6D9
788
- when 0xD4; array_enc << 0xF6DB
789
- when 0xD5; array_enc << 0x220F
790
- when 0xD6; array_enc << 0x221A
791
- when 0xD7; array_enc << 0x22C5
792
- when 0xD8; array_enc << 0x00AC
793
- when 0xD9; array_enc << 0x2227
794
- when 0xDA; array_enc << 0x2228
795
- when 0xDB; array_enc << 0x21D4
796
- when 0xDC; array_enc << 0x21D0
797
- when 0xDD; array_enc << 0x21D1
798
- when 0xDE; array_enc << 0x21D2
799
- when 0xDF; array_enc << 0x21D3
800
- when 0xE0; array_enc << 0x25CA
801
- when 0xE1; array_enc << 0x2329
802
- when 0xE2; array_enc << 0xF8E8
803
- when 0xE3; array_enc << 0xF8E9
804
- when 0xE4; array_enc << 0xF8EA
805
- when 0xE5; array_enc << 0x2211
806
- when 0xE6; array_enc << 0xF8EB
807
- when 0xE7; array_enc << 0xF8EC
808
- when 0xE8; array_enc << 0xF8ED
809
- when 0xE9; array_enc << 0xF8EE
810
- when 0xEA; array_enc << 0xF8EF
811
- when 0xEB; array_enc << 0xF8F0
812
- when 0xEC; array_enc << 0xF8F1
813
- when 0xED; array_enc << 0xF8F2
814
- when 0xEE; array_enc << 0xF8F3
815
- when 0xEF; array_enc << 0xF8F4
816
- when 0xF1; array_enc << 0x232A
817
- when 0xF2; array_enc << 0x222B
818
- when 0xF3; array_enc << 0x2320
819
- when 0xF4; array_enc << 0xF8F5
820
- when 0xF5; array_enc << 0x2321
821
- when 0xF6; array_enc << 0xF8F6
822
- when 0xF7; array_enc << 0xF8F7
823
- when 0xF8; array_enc << 0xF8F8
824
- when 0xF9; array_enc << 0xF8F9
825
- when 0xFA; array_enc << 0xF8FA
826
- when 0xFB; array_enc << 0xF8FB
827
- when 0xFC; array_enc << 0xF8FC
828
- when 0xFD; array_enc << 0xF8FD
829
- when 0xFE; array_enc << 0xF8FE
830
- else
831
- array_enc << num
832
- end
833
- end
834
- end
835
-
836
- # replace charcters that didn't convert to unicode nicely with something valid
837
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
838
-
839
- # convert any glyph names to unicode codepoints
840
- array_enc = self.process_glyphnames(array_enc)
841
-
842
- # pack all our Unicode codepoints into a UTF-8 string
843
- ret = array_enc.pack("U*")
844
-
845
- # set the strings encoding correctly under ruby 1.9+
846
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
847
-
848
- return ret
849
- end
850
- end
851
-
852
- class UTF16Encoding < Encoding
853
- # convert a UTF-16 string into UTF-8
854
- def to_utf8(str, tounicode = nil)
855
-
856
- # remove the UTF-16 Byte Order Mark if it exists
857
- str = str[2, str.size-2] if str[0,2] == "\376\377"
858
-
859
- # convert away
860
- str = str.unpack("n*").pack("U*")
861
-
862
- # set the strings encoding correctly under ruby 1.9+
863
- str.force_encoding("UTF-8") if str.respond_to?(:force_encoding)
864
-
865
- return str
866
- end
867
- end
868
-
869
- class WinAnsiEncoding < Encoding
870
- # convert a WinAnsiEncoding string into UTF-8
871
- def to_utf8(str, tounicode = nil)
872
- # content of this method borrowed from REXML::Encoding.decode_cp1252
873
- # for further reading:
874
- # http://www.intertwingly.net/stories/2004/04/14/i18n.html
875
- array_latin9 = str.unpack('C*')
876
- array_latin9 = self.process_differences(array_latin9)
877
- array_enc = []
878
- array_latin9.each do |num|
879
- if tounicode && (code = tounicode.decode(num))
880
- array_enc << code
881
- elsif tounicode
882
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
883
- else
884
- case num
885
- # characters that added compared to iso-8859-1
886
- when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
887
- when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
888
- when 0x83; array_enc << 0x0192 # 0xc6 0x92
889
- when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
890
- when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
891
- when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
892
- when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
893
- when 0x88; array_enc << 0x02C6 # 0xcb 0x86
894
- when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
895
- when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
896
- when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
897
- when 0x8C; array_enc << 0x0152 # 0xc5 0x92
898
- when 0x8E; array_enc << 0x017D # 0xc5 0xbd
899
- when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
900
- when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
901
- when 0x93; array_enc << 0x201C
902
- when 0x94; array_enc << 0x201D
903
- when 0x95; array_enc << 0x2022
904
- when 0x96; array_enc << 0x2013
905
- when 0x97; array_enc << 0x2014
906
- when 0x98; array_enc << 0x02DC
907
- when 0x99; array_enc << 0x2122
908
- when 0x9A; array_enc << 0x0161
909
- when 0x9B; array_enc << 0x203A
910
- when 0x9C; array_enc << 0x0152 # 0xc5 0x93
911
- when 0x9E; array_enc << 0x017E # 0xc5 0xbe
912
- when 0x9F; array_enc << 0x0178
913
- else
914
- array_enc << num
915
- end
916
- end
917
- end
918
-
919
- # convert any glyph names to unicode codepoints
920
- array_enc = self.process_glyphnames(array_enc)
921
-
922
- # replace charcters that didn't convert to unicode nicely with something valid
923
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
924
-
925
- # pack all our Unicode codepoints into a UTF-8 string
926
- ret = array_enc.pack("U*")
927
-
928
- # set the strings encoding correctly under ruby 1.9+
929
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
930
-
931
- return ret
932
- end
933
- end
934
-
935
- class ZapfDingbatsEncoding < Encoding
936
- # convert a ZapfDingbatsEncoding string into UTF-8
937
- def to_utf8(str, tounicode = nil)
938
- # mapping to unicode taken from:
939
- # http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
940
- array_symbol = str.unpack('C*')
941
- array_symbol = self.process_differences(array_symbol)
942
- array_enc = []
943
- array_symbol.each do |num|
944
- if tounicode && (code = tounicode.decode(num))
945
- array_enc << code
946
- elsif tounicode
947
- array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
948
- else
949
- case num
950
- when 0x21; array_enc << 0x2701
951
- when 0x22; array_enc << 0x2702
952
- when 0x23; array_enc << 0x2703
953
- when 0x24; array_enc << 0x2704
954
- when 0x25; array_enc << 0x260E
955
- when 0x26; array_enc << 0x2706
956
- when 0x27; array_enc << 0x2707
957
- when 0x28; array_enc << 0x2708
958
- when 0x29; array_enc << 0x2709
959
- when 0x2A; array_enc << 0x261B
960
- when 0x2B; array_enc << 0x261E
961
- when 0x2C; array_enc << 0x270C
962
- when 0x2D; array_enc << 0x270D
963
- when 0x2E; array_enc << 0x270E
964
- when 0x2F; array_enc << 0x270F
965
- when 0x30; array_enc << 0x2710
966
- when 0x31; array_enc << 0x2711
967
- when 0x32; array_enc << 0x2712
968
- when 0x33; array_enc << 0x2713
969
- when 0x34; array_enc << 0x2714
970
- when 0x35; array_enc << 0x2715
971
- when 0x36; array_enc << 0x2716
972
- when 0x37; array_enc << 0x2717
973
- when 0x38; array_enc << 0x2718
974
- when 0x39; array_enc << 0x2719
975
- when 0x3A; array_enc << 0x271A
976
- when 0x3B; array_enc << 0x271B
977
- when 0x3C; array_enc << 0x271C
978
- when 0x3D; array_enc << 0x271D
979
- when 0x3E; array_enc << 0x271E
980
- when 0x3F; array_enc << 0x271E
981
- when 0x40; array_enc << 0x2720
982
- when 0x41; array_enc << 0x2721
983
- when 0x42; array_enc << 0x2722
984
- when 0x43; array_enc << 0x2723
985
- when 0x44; array_enc << 0x2724
986
- when 0x45; array_enc << 0x2725
987
- when 0x46; array_enc << 0x2726
988
- when 0x47; array_enc << 0x2727
989
- when 0x48; array_enc << 0x2605
990
- when 0x49; array_enc << 0x2729
991
- when 0x4A; array_enc << 0x272A
992
- when 0x4B; array_enc << 0x272B
993
- when 0x4C; array_enc << 0x272C
994
- when 0x4D; array_enc << 0x272D
995
- when 0x4E; array_enc << 0x272E
996
- when 0x4F; array_enc << 0x272F
997
- when 0x50; array_enc << 0x2730
998
- when 0x51; array_enc << 0x2731
999
- when 0x52; array_enc << 0x2732
1000
- when 0x53; array_enc << 0x2733
1001
- when 0x54; array_enc << 0x2734
1002
- when 0x55; array_enc << 0x2735
1003
- when 0x56; array_enc << 0x2736
1004
- when 0x57; array_enc << 0x2737
1005
- when 0x58; array_enc << 0x2738
1006
- when 0x59; array_enc << 0x2739
1007
- when 0x5A; array_enc << 0x273A
1008
- when 0x5B; array_enc << 0x273B
1009
- when 0x5C; array_enc << 0x273C
1010
- when 0x5D; array_enc << 0x273D
1011
- when 0x5E; array_enc << 0x273E
1012
- when 0x5F; array_enc << 0x273F
1013
- when 0x60; array_enc << 0x2740
1014
- when 0x61; array_enc << 0x2741
1015
- when 0x62; array_enc << 0x2742
1016
- when 0x63; array_enc << 0x2743
1017
- when 0x64; array_enc << 0x2744
1018
- when 0x65; array_enc << 0x2745
1019
- when 0x66; array_enc << 0x2746
1020
- when 0x67; array_enc << 0x2747
1021
- when 0x68; array_enc << 0x2748
1022
- when 0x69; array_enc << 0x2749
1023
- when 0x6A; array_enc << 0x274A
1024
- when 0x6B; array_enc << 0x274B
1025
- when 0x6C; array_enc << 0x25CF
1026
- when 0x6D; array_enc << 0x274D
1027
- when 0x6E; array_enc << 0x25A0
1028
- when 0x6F; array_enc << 0x274F
1029
- when 0x70; array_enc << 0x2750
1030
- when 0x71; array_enc << 0x2751
1031
- when 0x72; array_enc << 0x2752
1032
- when 0x73; array_enc << 0x2753
1033
- when 0x74; array_enc << 0x2754
1034
- when 0x75; array_enc << 0x2755
1035
- when 0x76; array_enc << 0x2756
1036
- when 0x77; array_enc << 0x2757
1037
- when 0x78; array_enc << 0x2758
1038
- when 0x79; array_enc << 0x2759
1039
- when 0x7A; array_enc << 0x275A
1040
- when 0x7B; array_enc << 0x275B
1041
- when 0x7C; array_enc << 0x275C
1042
- when 0x7D; array_enc << 0x275D
1043
- when 0x7E; array_enc << 0x275E
1044
- when 0x80; array_enc << 0xF8D7
1045
- when 0x81; array_enc << 0xF8D8
1046
- when 0x82; array_enc << 0xF8D9
1047
- when 0x83; array_enc << 0xF8DA
1048
- when 0x84; array_enc << 0xF8DB
1049
- when 0x85; array_enc << 0xF8DC
1050
- when 0x86; array_enc << 0xF8DD
1051
- when 0x87; array_enc << 0xF8DE
1052
- when 0x88; array_enc << 0xF8DF
1053
- when 0x89; array_enc << 0xF8E0
1054
- when 0x8A; array_enc << 0xF8E1
1055
- when 0x8B; array_enc << 0xF8E2
1056
- when 0x8C; array_enc << 0xF8E3
1057
- when 0x8D; array_enc << 0xF8E4
1058
- when 0xA1; array_enc << 0x2761
1059
- when 0xA2; array_enc << 0x2762
1060
- when 0xA3; array_enc << 0x2763
1061
- when 0xA4; array_enc << 0x2764
1062
- when 0xA5; array_enc << 0x2765
1063
- when 0xA6; array_enc << 0x2766
1064
- when 0xA7; array_enc << 0x2767
1065
- when 0xA8; array_enc << 0x2663
1066
- when 0xA9; array_enc << 0x2666
1067
- when 0xAA; array_enc << 0x2665
1068
- when 0xAB; array_enc << 0x2660
1069
- when 0xAC; array_enc << 0x2460
1070
- when 0xAD; array_enc << 0x2461
1071
- when 0xAE; array_enc << 0x2462
1072
- when 0xAF; array_enc << 0x2463
1073
- when 0xB0; array_enc << 0x2464
1074
- when 0xB1; array_enc << 0x2465
1075
- when 0xB2; array_enc << 0x2466
1076
- when 0xB3; array_enc << 0x2467
1077
- when 0xB4; array_enc << 0x2468
1078
- when 0xB5; array_enc << 0x2469
1079
- when 0xB6; array_enc << 0x2776
1080
- when 0xB7; array_enc << 0x2777
1081
- when 0xB8; array_enc << 0x2778
1082
- when 0xB9; array_enc << 0x2779
1083
- when 0xBA; array_enc << 0x277A
1084
- when 0xBB; array_enc << 0x277B
1085
- when 0xBC; array_enc << 0x277C
1086
- when 0xBD; array_enc << 0x277D
1087
- when 0xBE; array_enc << 0x277E
1088
- when 0xBF; array_enc << 0x277F
1089
- when 0xC0; array_enc << 0x2780
1090
- when 0xC1; array_enc << 0x2781
1091
- when 0xC2; array_enc << 0x2782
1092
- when 0xC3; array_enc << 0x2783
1093
- when 0xC4; array_enc << 0x2784
1094
- when 0xC5; array_enc << 0x2785
1095
- when 0xC6; array_enc << 0x2786
1096
- when 0xC7; array_enc << 0x2787
1097
- when 0xC8; array_enc << 0x2788
1098
- when 0xC9; array_enc << 0x2789
1099
- when 0xCA; array_enc << 0x278A
1100
- when 0xCB; array_enc << 0x278B
1101
- when 0xCC; array_enc << 0x278C
1102
- when 0xCD; array_enc << 0x278D
1103
- when 0xCE; array_enc << 0x278E
1104
- when 0xCF; array_enc << 0x278F
1105
- when 0xD0; array_enc << 0x2790
1106
- when 0xD1; array_enc << 0x2791
1107
- when 0xD2; array_enc << 0x2792
1108
- when 0xD3; array_enc << 0x2793
1109
- when 0xD4; array_enc << 0x2794
1110
- when 0xD5; array_enc << 0x2795
1111
- when 0xD6; array_enc << 0x2796
1112
- when 0xD7; array_enc << 0x2797
1113
- when 0xD8; array_enc << 0x2798
1114
- when 0xD9; array_enc << 0x2799
1115
- when 0xDA; array_enc << 0x279A
1116
- when 0xDB; array_enc << 0x279B
1117
- when 0xDC; array_enc << 0x279C
1118
- when 0xDD; array_enc << 0x279D
1119
- when 0xDE; array_enc << 0x279E
1120
- when 0xDF; array_enc << 0x279F
1121
- when 0xE0; array_enc << 0x27A0
1122
- when 0xE1; array_enc << 0x27A1
1123
- when 0xE2; array_enc << 0x27A2
1124
- when 0xE3; array_enc << 0x27A3
1125
- when 0xE4; array_enc << 0x27A4
1126
- when 0xE5; array_enc << 0x27A5
1127
- when 0xE6; array_enc << 0x27A6
1128
- when 0xE7; array_enc << 0x27A7
1129
- when 0xE8; array_enc << 0x27A8
1130
- when 0xE9; array_enc << 0x27A9
1131
- when 0xEA; array_enc << 0x27AA
1132
- when 0xEB; array_enc << 0x27AB
1133
- when 0xEC; array_enc << 0x27AC
1134
- when 0xED; array_enc << 0x27AD
1135
- when 0xEE; array_enc << 0x27AE
1136
- when 0xEF; array_enc << 0x27AF
1137
- when 0xF1; array_enc << 0x27B1
1138
- when 0xF2; array_enc << 0x27B2
1139
- when 0xF3; array_enc << 0x27B3
1140
- when 0xF4; array_enc << 0x27B4
1141
- when 0xF5; array_enc << 0x27B5
1142
- when 0xF6; array_enc << 0x27B6
1143
- when 0xF7; array_enc << 0x27B7
1144
- when 0xF8; array_enc << 0x27B8
1145
- when 0xF9; array_enc << 0x27B9
1146
- when 0xFA; array_enc << 0x27BA
1147
- when 0xFB; array_enc << 0x27BB
1148
- when 0xFC; array_enc << 0x27BC
1149
- when 0xFD; array_enc << 0x27BD
1150
- when 0xFE; array_enc << 0x27BE
1151
- else
1152
- array_enc << num
1153
- end
1154
- end
1155
- end
1156
-
1157
- # convert any glyph names to unicode codepoints
1158
- array_enc = self.process_glyphnames(array_enc)
1159
-
1160
- # replace charcters that didn't convert to unicode nicely with something valid
1161
- array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
1162
-
1163
- # pack all our Unicode codepoints into a UTF-8 string
1164
- ret = array_enc.pack("U*")
1165
-
1166
- # set the strings encoding correctly under ruby 1.9+
1167
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
1168
-
1169
- return ret
1170
- end
1171
- end
1172
166
  end
1173
167
  end