rfeedparser 0.9.9 → 0.9.85

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,432 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- module FeedParserUtilities
4
- # Adapted from python2.4's encodings/aliases.py
5
-
6
- Encoding_Aliases = {
7
- 'unicode' => 'utf-16',
8
-
9
- # MacOSX does not have Unicode as a separate encoding nor even
10
- # aliased. My Ubuntu box has it as a separate encoding but I cannot
11
- # for the life of me figure out where the source code for UNICODE.so
12
- # is (supposedly, in libc6 .deb but that's a damn lie), so I don't
13
- # know what it expects. After some extensive research, I've decided
14
- # to alias it to utf-16 much like Python does when it is built with
15
- # --enable-unicode=ucs2. This could be seriously wrong. I have no idea.
16
-
17
- # ascii codec
18
- '646' => 'ascii',
19
- 'ansi_x3.4_1968' => 'ascii',
20
- 'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
21
- 'ansi_x3.4_1986' => 'ascii',
22
- 'cp367' => 'ascii',
23
- 'csascii' => 'ascii',
24
- 'ibm367' => 'ascii',
25
- 'iso646_us' => 'ascii',
26
- 'iso_646.irv_1991' => 'ascii',
27
- 'iso_ir_6' => 'ascii',
28
- 'us' => 'ascii',
29
- 'us_ascii' => 'ascii',
30
-
31
- # big5 codec
32
- 'big5_tw' => 'big5',
33
- 'csbig5' => 'big5',
34
-
35
- # big5hkscs codec
36
- 'big5_hkscs' => 'big5hkscs',
37
- 'hkscs' => 'big5hkscs',
38
-
39
- # cp037 codec
40
- '037' => 'cp037',
41
- 'csibm037' => 'cp037',
42
- 'ebcdic_cp_ca' => 'cp037',
43
- 'ebcdic_cp_nl' => 'cp037',
44
- 'ebcdic_cp_us' => 'cp037',
45
- 'ebcdic_cp_wt' => 'cp037',
46
- 'ibm037' => 'cp037',
47
- 'ibm039' => 'cp037',
48
-
49
- # cp1026 codec
50
- '1026' => 'cp1026',
51
- 'csibm1026' => 'cp1026',
52
- 'ibm1026' => 'cp1026',
53
-
54
- # cp1140 codec
55
- '1140' => 'cp1140',
56
- 'ibm1140' => 'cp1140',
57
-
58
- # cp1250 codec
59
- '1250' => 'cp1250',
60
- 'windows_1250' => 'cp1250',
61
-
62
- # cp1251 codec
63
- '1251' => 'cp1251',
64
- 'windows_1251' => 'cp1251',
65
-
66
- # cp1252 codec
67
- '1252' => 'cp1252',
68
- 'windows_1252' => 'cp1252',
69
-
70
- # cp1253 codec
71
- '1253' => 'cp1253',
72
- 'windows_1253' => 'cp1253',
73
-
74
- # cp1254 codec
75
- '1254' => 'cp1254',
76
- 'windows_1254' => 'cp1254',
77
-
78
- # cp1255 codec
79
- '1255' => 'cp1255',
80
- 'windows_1255' => 'cp1255',
81
-
82
- # cp1256 codec
83
- '1256' => 'cp1256',
84
- 'windows_1256' => 'cp1256',
85
-
86
- # cp1257 codec
87
- '1257' => 'cp1257',
88
- 'windows_1257' => 'cp1257',
89
-
90
- # cp1258 codec
91
- '1258' => 'cp1258',
92
- 'windows_1258' => 'cp1258',
93
-
94
- # cp424 codec
95
- '424' => 'cp424',
96
- 'csibm424' => 'cp424',
97
- 'ebcdic_cp_he' => 'cp424',
98
- 'ibm424' => 'cp424',
99
-
100
- # cp437 codec
101
- '437' => 'cp437',
102
- 'cspc8codepage437' => 'cp437',
103
- 'ibm437' => 'cp437',
104
-
105
- # cp500 codec
106
- '500' => 'cp500',
107
- 'csibm500' => 'cp500',
108
- 'ebcdic_cp_be' => 'cp500',
109
- 'ebcdic_cp_ch' => 'cp500',
110
- 'ibm500' => 'cp500',
111
-
112
- # cp775 codec
113
- '775' => 'cp775',
114
- 'cspc775baltic' => 'cp775',
115
- 'ibm775' => 'cp775',
116
-
117
- # cp850 codec
118
- '850' => 'cp850',
119
- 'cspc850multilingual' => 'cp850',
120
- 'ibm850' => 'cp850',
121
-
122
- # cp852 codec
123
- '852' => 'cp852',
124
- 'cspcp852' => 'cp852',
125
- 'ibm852' => 'cp852',
126
-
127
- # cp855 codec
128
- '855' => 'cp855',
129
- 'csibm855' => 'cp855',
130
- 'ibm855' => 'cp855',
131
-
132
- # cp857 codec
133
- '857' => 'cp857',
134
- 'csibm857' => 'cp857',
135
- 'ibm857' => 'cp857',
136
-
137
- # cp860 codec
138
- '860' => 'cp860',
139
- 'csibm860' => 'cp860',
140
- 'ibm860' => 'cp860',
141
-
142
- # cp861 codec
143
- '861' => 'cp861',
144
- 'cp_is' => 'cp861',
145
- 'csibm861' => 'cp861',
146
- 'ibm861' => 'cp861',
147
-
148
- # cp862 codec
149
- '862' => 'cp862',
150
- 'cspc862latinhebrew' => 'cp862',
151
- 'ibm862' => 'cp862',
152
-
153
- # cp863 codec
154
- '863' => 'cp863',
155
- 'csibm863' => 'cp863',
156
- 'ibm863' => 'cp863',
157
-
158
- # cp864 codec
159
- '864' => 'cp864',
160
- 'csibm864' => 'cp864',
161
- 'ibm864' => 'cp864',
162
-
163
- # cp865 codec
164
- '865' => 'cp865',
165
- 'csibm865' => 'cp865',
166
- 'ibm865' => 'cp865',
167
-
168
- # cp866 codec
169
- '866' => 'cp866',
170
- 'csibm866' => 'cp866',
171
- 'ibm866' => 'cp866',
172
-
173
- # cp869 codec
174
- '869' => 'cp869',
175
- 'cp_gr' => 'cp869',
176
- 'csibm869' => 'cp869',
177
- 'ibm869' => 'cp869',
178
-
179
- # cp932 codec
180
- '932' => 'cp932',
181
- 'ms932' => 'cp932',
182
- 'mskanji' => 'cp932',
183
- 'ms_kanji' => 'cp932',
184
-
185
- # cp949 codec
186
- '949' => 'cp949',
187
- 'ms949' => 'cp949',
188
- 'uhc' => 'cp949',
189
-
190
- # cp950 codec
191
- '950' => 'cp950',
192
- 'ms950' => 'cp950',
193
-
194
- # euc_jp codec
195
- 'euc_jp' => 'euc-jp',
196
- 'eucjp' => 'euc-jp',
197
- 'ujis' => 'euc-jp',
198
- 'u_jis' => 'euc-jp',
199
-
200
- # euc_kr codec
201
- 'euc_kr' => 'euc-kr',
202
- 'euckr' => 'euc-kr',
203
- 'korean' => 'euc-kr',
204
- 'ksc5601' => 'euc-kr',
205
- 'ks_c_5601' => 'euc-kr',
206
- 'ks_c_5601_1987' => 'euc-kr',
207
- 'ksx1001' => 'euc-kr',
208
- 'ks_x_1001' => 'euc-kr',
209
-
210
- # gb18030 codec
211
- 'gb18030_2000' => 'gb18030',
212
-
213
- # gb2312 codec
214
- 'chinese' => 'gb2312',
215
- 'csiso58gb231280' => 'gb2312',
216
- 'euc_cn' => 'gb2312',
217
- 'euccn' => 'gb2312',
218
- 'eucgb2312_cn' => 'gb2312',
219
- 'gb2312_1980' => 'gb2312',
220
- 'gb2312_80' => 'gb2312',
221
- 'iso_ir_58' => 'gb2312',
222
-
223
- # gbk codec
224
- '936' => 'gbk',
225
- 'cp936' => 'gbk',
226
- 'ms936' => 'gbk',
227
-
228
- # hp-roman8 codec
229
- 'hp_roman8' => 'hp-roman8',
230
- 'roman8' => 'hp-roman8',
231
- 'r8' => 'hp-roman8',
232
- 'csHPRoman8' => 'hp-roman8',
233
-
234
- # iso2022_jp codec
235
- 'iso2022_jp' => 'iso-2022-jp',
236
- 'csiso2022jp' => 'iso-2022-jp',
237
- 'iso2022jp' => 'iso-2022-jp',
238
- 'iso_2022_jp' => 'iso-2022-jp',
239
-
240
- # iso2022_jp_1 codec
241
- 'iso2002_jp_1' => 'iso-2022-jp-1',
242
- 'iso2022jp_1' => 'iso-2022-jp-1',
243
- 'iso_2022_jp_1' => 'iso-2022-jp-1',
244
-
245
- # iso2022_jp_2 codec
246
- 'iso2022_jp_2' => 'iso-2002-jp-2',
247
- 'iso2022jp_2' => 'iso-2022-jp-2',
248
- 'iso_2022_jp_2' => 'iso-2022-jp-2',
249
-
250
- # iso2022_jp_3 codec
251
- 'iso2002_jp_3' => 'iso-2022-jp-3',
252
- 'iso2022jp_3' => 'iso-2022-jp-3',
253
- 'iso_2022_jp_3' => 'iso-2022-jp-3',
254
-
255
- # iso2022_kr codec
256
- 'iso2022_kr' => 'iso-2022-kr',
257
- 'csiso2022kr' => 'iso-2022-kr',
258
- 'iso2022kr' => 'iso-2022-kr',
259
- 'iso_2022_kr' => 'iso-2022-kr',
260
-
261
- # iso8859_10 codec
262
- 'iso8859_10' => 'iso-8859-10',
263
- 'csisolatin6' => 'iso-8859-10',
264
- 'iso_8859_10' => 'iso-8859-10',
265
- 'iso_8859_10_1992' => 'iso-8859-10',
266
- 'iso_ir_157' => 'iso-8859-10',
267
- 'l6' => 'iso-8859-10',
268
- 'latin6' => 'iso-8859-10',
269
-
270
- # iso8859_13 codec
271
- 'iso8859_13' => 'iso-8859-13',
272
- 'iso_8859_13' => 'iso-8859-13',
273
-
274
- # iso8859_14 codec
275
- 'iso8859_14' => 'iso-8859-14',
276
- 'iso_8859_14' => 'iso-8859-14',
277
- 'iso_8859_14_1998' => 'iso-8859-14',
278
- 'iso_celtic' => 'iso-8859-14',
279
- 'iso_ir_199' => 'iso-8859-14',
280
- 'l8' => 'iso-8859-14',
281
- 'latin8' => 'iso-8859-14',
282
-
283
- # iso8859_15 codec
284
- 'iso8859_15' => 'iso-8859-15',
285
- 'iso_8859_15' => 'iso-8859-15',
286
-
287
- # iso8859_1 codec
288
- 'latin_1' => 'iso-8859-1',
289
- 'cp819' => 'iso-8859-1',
290
- 'csisolatin1' => 'iso-8859-1',
291
- 'ibm819' => 'iso-8859-1',
292
- 'iso8859' => 'iso-8859-1',
293
- 'iso_8859_1' => 'iso-8859-1',
294
- 'iso_8859_1_1987' => 'iso-8859-1',
295
- 'iso_ir_100' => 'iso-8859-1',
296
- 'l1' => 'iso-8859-1',
297
- 'latin' => 'iso-8859-1',
298
- 'latin1' => 'iso-8859-1',
299
-
300
- # iso8859_2 codec
301
- 'iso8859_2' => 'iso-8859-2',
302
- 'csisolatin2' => 'iso-8859-2',
303
- 'iso_8859_2' => 'iso-8859-2',
304
- 'iso_8859_2_1987' => 'iso-8859-2',
305
- 'iso_ir_101' => 'iso-8859-2',
306
- 'l2' => 'iso-8859-2',
307
- 'latin2' => 'iso-8859-2',
308
-
309
- # iso8859_3 codec
310
- 'iso8859_3' => 'iso-8859-3',
311
- 'csisolatin3' => 'iso-8859-3',
312
- 'iso_8859_3' => 'iso-8859-3',
313
- 'iso_8859_3_1988' => 'iso-8859-3',
314
- 'iso_ir_109' => 'iso-8859-3',
315
- 'l3' => 'iso-8859-3',
316
- 'latin3' => 'iso-8859-3',
317
-
318
- # iso8859_4 codec
319
- 'iso8849_4' => 'iso-8859-4',
320
- 'csisolatin4' => 'iso-8859-4',
321
- 'iso_8859_4' => 'iso-8859-4',
322
- 'iso_8859_4_1988' => 'iso-8859-4',
323
- 'iso_ir_110' => 'iso-8859-4',
324
- 'l4' => 'iso-8859-4',
325
- 'latin4' => 'iso-8859-4',
326
-
327
- # iso8859_5 codec
328
- 'iso8859_5' => 'iso-8859-5',
329
- 'csisolatincyrillic' => 'iso-8859-5',
330
- 'cyrillic' => 'iso-8859-5',
331
- 'iso_8859_5' => 'iso-8859-5',
332
- 'iso_8859_5_1988' => 'iso-8859-5',
333
- 'iso_ir_144' => 'iso-8859-5',
334
-
335
- # iso8859_6 codec
336
- 'iso8859_6' => 'iso-8859-6',
337
- 'arabic' => 'iso-8859-6',
338
- 'asmo_708' => 'iso-8859-6',
339
- 'csisolatinarabic' => 'iso-8859-6',
340
- 'ecma_114' => 'iso-8859-6',
341
- 'iso_8859_6' => 'iso-8859-6',
342
- 'iso_8859_6_1987' => 'iso-8859-6',
343
- 'iso_ir_127' => 'iso-8859-6',
344
-
345
- # iso8859_7 codec
346
- 'iso8859_7' => 'iso-8859-7',
347
- 'csisolatingreek' => 'iso-8859-7',
348
- 'ecma_118' => 'iso-8859-7',
349
- 'elot_928' => 'iso-8859-7',
350
- 'greek' => 'iso-8859-7',
351
- 'greek8' => 'iso-8859-7',
352
- 'iso_8859_7' => 'iso-8859-7',
353
- 'iso_8859_7_1987' => 'iso-8859-7',
354
- 'iso_ir_126' => 'iso-8859-7',
355
-
356
- # iso8859_8 codec
357
- 'iso8859_9' => 'iso8859_8',
358
- 'csisolatinhebrew' => 'iso-8859-8',
359
- 'hebrew' => 'iso-8859-8',
360
- 'iso_8859_8' => 'iso-8859-8',
361
- 'iso_8859_8_1988' => 'iso-8859-8',
362
- 'iso_ir_138' => 'iso-8859-8',
363
-
364
- # iso8859_9 codec
365
- 'iso8859_9' => 'iso-8859-9',
366
- 'csisolatin5' => 'iso-8859-9',
367
- 'iso_8859_9' => 'iso-8859-9',
368
- 'iso_8859_9_1989' => 'iso-8859-9',
369
- 'iso_ir_148' => 'iso-8859-9',
370
- 'l5' => 'iso-8859-9',
371
- 'latin5' => 'iso-8859-9',
372
-
373
- # iso8859_11 codec
374
- 'iso8859_11' => 'iso-8859-11',
375
- 'thai' => 'iso-8859-11',
376
- 'iso_8859_11' => 'iso-8859-11',
377
- 'iso_8859_11_2001' => 'iso-8859-11',
378
-
379
- # iso8859_16 codec
380
- 'iso8859_16' => 'iso-8859-16',
381
- 'iso_8859_16' => 'iso-8859-16',
382
- 'iso_8859_16_2001' => 'iso-8859-16',
383
- 'iso_ir_226' => 'iso-8859-16',
384
- 'l10' => 'iso-8859-16',
385
- 'latin10' => 'iso-8859-16',
386
-
387
- # cskoi8r codec
388
- 'koi8_r' => 'cskoi8r',
389
-
390
- # mac_cyrillic codec
391
- 'mac_cyrillic' => 'maccyrillic',
392
-
393
- # shift_jis codec
394
- 'csshiftjis' => 'shift_jis',
395
- 'shiftjis' => 'shift_jis',
396
- 'sjis' => 'shift_jis',
397
- 's_jis' => 'shift_jis',
398
-
399
- # shift_jisx0213 codec
400
- 'shiftjisx0213' => 'shift_jisx0213',
401
- 'sjisx0213' => 'shift_jisx0213',
402
- 's_jisx0213' => 'shift_jisx0213',
403
-
404
- # utf_16 codec
405
- 'utf_16' => 'utf-16',
406
- 'u16' => 'utf-16',
407
- 'utf16' => 'utf-16',
408
-
409
- # utf_16_be codec
410
- 'utf_16_be' => 'utf-16be',
411
- 'unicodebigunmarked' => 'utf-16be',
412
- 'utf_16be' => 'utf-16be',
413
-
414
- # utf_16_le codec
415
- 'utf_16_le' => 'utf-16le',
416
- 'unicodelittleunmarked' => 'utf-16le',
417
- 'utf_16le' => 'utf-16le',
418
-
419
- # utf_7 codec
420
- 'utf_7' => 'utf-7',
421
- 'u7' => 'utf-7',
422
- 'utf7' => 'utf-7',
423
-
424
- # utf_8 codec
425
- 'utf_8' => 'utf-8',
426
- 'u8' => 'utf-8',
427
- 'utf' => 'utf-8',
428
- 'utf8' => 'utf-8',
429
- 'utf8_ucs2' => 'utf-8',
430
- 'utf8_ucs4' => 'utf-8',
431
- }
432
- end
@@ -1,41 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- # Add some helper methods to make AttributeList (all of those damn attrs
4
- # and attrsD used by StrictFeedParser) act more like a Hash.
5
- # NOTE AttributeList is still Read-Only (AFAICT).
6
- # Monkey patching is terrible, and I have an addiction.
7
- module XML
8
- module SAX
9
- module AttributeList # in xml/sax.rb
10
- def [](key)
11
- getValue(key)
12
- end
13
-
14
- def each(&blk)
15
- (0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
16
- end
17
-
18
- def each_key(&blk)
19
- (0...getLength).each{|pos| yield getName(pos) }
20
- end
21
-
22
- def each_value(&blk)
23
- (0...getLength).each{|pos| yield getValue(pos) }
24
- end
25
-
26
- def to_a # Rather use collect? grep for to_a.collect
27
- l = []
28
- each{|k,v| l << [k,v]}
29
- return l
30
- end
31
-
32
- def to_s
33
- l = []
34
- each{|k,v| l << "#{k} => #{v}"}
35
- "{ "+l.join(", ")+" }"
36
- end
37
- end
38
- end
39
- end
40
-
41
-