UnderpantsGnome-rfeedparser 0.9.960

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,432 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module FeedParserUtilities
4
+ # Adapted from python2.4's encodings/aliases.py
5
+
6
+ Encoding_Aliases = {
7
+ 'unicode' => 'utf-16',
8
+
9
+ # MacOSX does not have Unicode as a separate encoding nor even
10
+ # aliased. My Ubuntu box has it as a separate encoding but I cannot
11
+ # for the life of me figure out where the source code for UNICODE.so
12
+ # is (supposedly, in libc6 .deb but that's a damn lie), so I don't
13
+ # know what it expects. After some extensive research, I've decided
14
+ # to alias it to utf-16 much like Python does when it is built with
15
+ # --enable-unicode=ucs2. This could be seriously wrong. I have no idea.
16
+
17
+ # ascii codec
18
+ '646' => 'ascii',
19
+ 'ansi_x3.4_1968' => 'ascii',
20
+ 'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
21
+ 'ansi_x3.4_1986' => 'ascii',
22
+ 'cp367' => 'ascii',
23
+ 'csascii' => 'ascii',
24
+ 'ibm367' => 'ascii',
25
+ 'iso646_us' => 'ascii',
26
+ 'iso_646.irv_1991' => 'ascii',
27
+ 'iso_ir_6' => 'ascii',
28
+ 'us' => 'ascii',
29
+ 'us_ascii' => 'ascii',
30
+
31
+ # big5 codec
32
+ 'big5_tw' => 'big5',
33
+ 'csbig5' => 'big5',
34
+
35
+ # big5hkscs codec
36
+ 'big5_hkscs' => 'big5hkscs',
37
+ 'hkscs' => 'big5hkscs',
38
+
39
+ # cp037 codec
40
+ '037' => 'cp037',
41
+ 'csibm037' => 'cp037',
42
+ 'ebcdic_cp_ca' => 'cp037',
43
+ 'ebcdic_cp_nl' => 'cp037',
44
+ 'ebcdic_cp_us' => 'cp037',
45
+ 'ebcdic_cp_wt' => 'cp037',
46
+ 'ibm037' => 'cp037',
47
+ 'ibm039' => 'cp037',
48
+
49
+ # cp1026 codec
50
+ '1026' => 'cp1026',
51
+ 'csibm1026' => 'cp1026',
52
+ 'ibm1026' => 'cp1026',
53
+
54
+ # cp1140 codec
55
+ '1140' => 'cp1140',
56
+ 'ibm1140' => 'cp1140',
57
+
58
+ # cp1250 codec
59
+ '1250' => 'cp1250',
60
+ 'windows_1250' => 'cp1250',
61
+
62
+ # cp1251 codec
63
+ '1251' => 'cp1251',
64
+ 'windows_1251' => 'cp1251',
65
+
66
+ # cp1252 codec
67
+ '1252' => 'cp1252',
68
+ 'windows_1252' => 'cp1252',
69
+
70
+ # cp1253 codec
71
+ '1253' => 'cp1253',
72
+ 'windows_1253' => 'cp1253',
73
+
74
+ # cp1254 codec
75
+ '1254' => 'cp1254',
76
+ 'windows_1254' => 'cp1254',
77
+
78
+ # cp1255 codec
79
+ '1255' => 'cp1255',
80
+ 'windows_1255' => 'cp1255',
81
+
82
+ # cp1256 codec
83
+ '1256' => 'cp1256',
84
+ 'windows_1256' => 'cp1256',
85
+
86
+ # cp1257 codec
87
+ '1257' => 'cp1257',
88
+ 'windows_1257' => 'cp1257',
89
+
90
+ # cp1258 codec
91
+ '1258' => 'cp1258',
92
+ 'windows_1258' => 'cp1258',
93
+
94
+ # cp424 codec
95
+ '424' => 'cp424',
96
+ 'csibm424' => 'cp424',
97
+ 'ebcdic_cp_he' => 'cp424',
98
+ 'ibm424' => 'cp424',
99
+
100
+ # cp437 codec
101
+ '437' => 'cp437',
102
+ 'cspc8codepage437' => 'cp437',
103
+ 'ibm437' => 'cp437',
104
+
105
+ # cp500 codec
106
+ '500' => 'cp500',
107
+ 'csibm500' => 'cp500',
108
+ 'ebcdic_cp_be' => 'cp500',
109
+ 'ebcdic_cp_ch' => 'cp500',
110
+ 'ibm500' => 'cp500',
111
+
112
+ # cp775 codec
113
+ '775' => 'cp775',
114
+ 'cspc775baltic' => 'cp775',
115
+ 'ibm775' => 'cp775',
116
+
117
+ # cp850 codec
118
+ '850' => 'cp850',
119
+ 'cspc850multilingual' => 'cp850',
120
+ 'ibm850' => 'cp850',
121
+
122
+ # cp852 codec
123
+ '852' => 'cp852',
124
+ 'cspcp852' => 'cp852',
125
+ 'ibm852' => 'cp852',
126
+
127
+ # cp855 codec
128
+ '855' => 'cp855',
129
+ 'csibm855' => 'cp855',
130
+ 'ibm855' => 'cp855',
131
+
132
+ # cp857 codec
133
+ '857' => 'cp857',
134
+ 'csibm857' => 'cp857',
135
+ 'ibm857' => 'cp857',
136
+
137
+ # cp860 codec
138
+ '860' => 'cp860',
139
+ 'csibm860' => 'cp860',
140
+ 'ibm860' => 'cp860',
141
+
142
+ # cp861 codec
143
+ '861' => 'cp861',
144
+ 'cp_is' => 'cp861',
145
+ 'csibm861' => 'cp861',
146
+ 'ibm861' => 'cp861',
147
+
148
+ # cp862 codec
149
+ '862' => 'cp862',
150
+ 'cspc862latinhebrew' => 'cp862',
151
+ 'ibm862' => 'cp862',
152
+
153
+ # cp863 codec
154
+ '863' => 'cp863',
155
+ 'csibm863' => 'cp863',
156
+ 'ibm863' => 'cp863',
157
+
158
+ # cp864 codec
159
+ '864' => 'cp864',
160
+ 'csibm864' => 'cp864',
161
+ 'ibm864' => 'cp864',
162
+
163
+ # cp865 codec
164
+ '865' => 'cp865',
165
+ 'csibm865' => 'cp865',
166
+ 'ibm865' => 'cp865',
167
+
168
+ # cp866 codec
169
+ '866' => 'cp866',
170
+ 'csibm866' => 'cp866',
171
+ 'ibm866' => 'cp866',
172
+
173
+ # cp869 codec
174
+ '869' => 'cp869',
175
+ 'cp_gr' => 'cp869',
176
+ 'csibm869' => 'cp869',
177
+ 'ibm869' => 'cp869',
178
+
179
+ # cp932 codec
180
+ '932' => 'cp932',
181
+ 'ms932' => 'cp932',
182
+ 'mskanji' => 'cp932',
183
+ 'ms_kanji' => 'cp932',
184
+
185
+ # cp949 codec
186
+ '949' => 'cp949',
187
+ 'ms949' => 'cp949',
188
+ 'uhc' => 'cp949',
189
+
190
+ # cp950 codec
191
+ '950' => 'cp950',
192
+ 'ms950' => 'cp950',
193
+
194
+ # euc_jp codec
195
+ 'euc_jp' => 'euc-jp',
196
+ 'eucjp' => 'euc-jp',
197
+ 'ujis' => 'euc-jp',
198
+ 'u_jis' => 'euc-jp',
199
+
200
+ # euc_kr codec
201
+ 'euc_kr' => 'euc-kr',
202
+ 'euckr' => 'euc-kr',
203
+ 'korean' => 'euc-kr',
204
+ 'ksc5601' => 'euc-kr',
205
+ 'ks_c_5601' => 'euc-kr',
206
+ 'ks_c_5601_1987' => 'euc-kr',
207
+ 'ksx1001' => 'euc-kr',
208
+ 'ks_x_1001' => 'euc-kr',
209
+
210
+ # gb18030 codec
211
+ 'gb18030_2000' => 'gb18030',
212
+
213
+ # gb2312 codec
214
+ 'chinese' => 'gb2312',
215
+ 'csiso58gb231280' => 'gb2312',
216
+ 'euc_cn' => 'gb2312',
217
+ 'euccn' => 'gb2312',
218
+ 'eucgb2312_cn' => 'gb2312',
219
+ 'gb2312_1980' => 'gb2312',
220
+ 'gb2312_80' => 'gb2312',
221
+ 'iso_ir_58' => 'gb2312',
222
+
223
+ # gbk codec
224
+ '936' => 'gbk',
225
+ 'cp936' => 'gbk',
226
+ 'ms936' => 'gbk',
227
+
228
+ # hp-roman8 codec
229
+ 'hp_roman8' => 'hp-roman8',
230
+ 'roman8' => 'hp-roman8',
231
+ 'r8' => 'hp-roman8',
232
+ 'csHPRoman8' => 'hp-roman8',
233
+
234
+ # iso2022_jp codec
235
+ 'iso2022_jp' => 'iso-2022-jp',
236
+ 'csiso2022jp' => 'iso-2022-jp',
237
+ 'iso2022jp' => 'iso-2022-jp',
238
+ 'iso_2022_jp' => 'iso-2022-jp',
239
+
240
+ # iso2022_jp_1 codec
241
+ 'iso2002_jp_1' => 'iso-2022-jp-1',
242
+ 'iso2022jp_1' => 'iso-2022-jp-1',
243
+ 'iso_2022_jp_1' => 'iso-2022-jp-1',
244
+
245
+ # iso2022_jp_2 codec
246
+ 'iso2022_jp_2' => 'iso-2002-jp-2',
247
+ 'iso2022jp_2' => 'iso-2022-jp-2',
248
+ 'iso_2022_jp_2' => 'iso-2022-jp-2',
249
+
250
+ # iso2022_jp_3 codec
251
+ 'iso2002_jp_3' => 'iso-2022-jp-3',
252
+ 'iso2022jp_3' => 'iso-2022-jp-3',
253
+ 'iso_2022_jp_3' => 'iso-2022-jp-3',
254
+
255
+ # iso2022_kr codec
256
+ 'iso2022_kr' => 'iso-2022-kr',
257
+ 'csiso2022kr' => 'iso-2022-kr',
258
+ 'iso2022kr' => 'iso-2022-kr',
259
+ 'iso_2022_kr' => 'iso-2022-kr',
260
+
261
+ # iso8859_10 codec
262
+ 'iso8859_10' => 'iso-8859-10',
263
+ 'csisolatin6' => 'iso-8859-10',
264
+ 'iso_8859_10' => 'iso-8859-10',
265
+ 'iso_8859_10_1992' => 'iso-8859-10',
266
+ 'iso_ir_157' => 'iso-8859-10',
267
+ 'l6' => 'iso-8859-10',
268
+ 'latin6' => 'iso-8859-10',
269
+
270
+ # iso8859_13 codec
271
+ 'iso8859_13' => 'iso-8859-13',
272
+ 'iso_8859_13' => 'iso-8859-13',
273
+
274
+ # iso8859_14 codec
275
+ 'iso8859_14' => 'iso-8859-14',
276
+ 'iso_8859_14' => 'iso-8859-14',
277
+ 'iso_8859_14_1998' => 'iso-8859-14',
278
+ 'iso_celtic' => 'iso-8859-14',
279
+ 'iso_ir_199' => 'iso-8859-14',
280
+ 'l8' => 'iso-8859-14',
281
+ 'latin8' => 'iso-8859-14',
282
+
283
+ # iso8859_15 codec
284
+ 'iso8859_15' => 'iso-8859-15',
285
+ 'iso_8859_15' => 'iso-8859-15',
286
+
287
+ # iso8859_1 codec
288
+ 'latin_1' => 'iso-8859-1',
289
+ 'cp819' => 'iso-8859-1',
290
+ 'csisolatin1' => 'iso-8859-1',
291
+ 'ibm819' => 'iso-8859-1',
292
+ 'iso8859' => 'iso-8859-1',
293
+ 'iso_8859_1' => 'iso-8859-1',
294
+ 'iso_8859_1_1987' => 'iso-8859-1',
295
+ 'iso_ir_100' => 'iso-8859-1',
296
+ 'l1' => 'iso-8859-1',
297
+ 'latin' => 'iso-8859-1',
298
+ 'latin1' => 'iso-8859-1',
299
+
300
+ # iso8859_2 codec
301
+ 'iso8859_2' => 'iso-8859-2',
302
+ 'csisolatin2' => 'iso-8859-2',
303
+ 'iso_8859_2' => 'iso-8859-2',
304
+ 'iso_8859_2_1987' => 'iso-8859-2',
305
+ 'iso_ir_101' => 'iso-8859-2',
306
+ 'l2' => 'iso-8859-2',
307
+ 'latin2' => 'iso-8859-2',
308
+
309
+ # iso8859_3 codec
310
+ 'iso8859_3' => 'iso-8859-3',
311
+ 'csisolatin3' => 'iso-8859-3',
312
+ 'iso_8859_3' => 'iso-8859-3',
313
+ 'iso_8859_3_1988' => 'iso-8859-3',
314
+ 'iso_ir_109' => 'iso-8859-3',
315
+ 'l3' => 'iso-8859-3',
316
+ 'latin3' => 'iso-8859-3',
317
+
318
+ # iso8859_4 codec
319
+ 'iso8849_4' => 'iso-8859-4',
320
+ 'csisolatin4' => 'iso-8859-4',
321
+ 'iso_8859_4' => 'iso-8859-4',
322
+ 'iso_8859_4_1988' => 'iso-8859-4',
323
+ 'iso_ir_110' => 'iso-8859-4',
324
+ 'l4' => 'iso-8859-4',
325
+ 'latin4' => 'iso-8859-4',
326
+
327
+ # iso8859_5 codec
328
+ 'iso8859_5' => 'iso-8859-5',
329
+ 'csisolatincyrillic' => 'iso-8859-5',
330
+ 'cyrillic' => 'iso-8859-5',
331
+ 'iso_8859_5' => 'iso-8859-5',
332
+ 'iso_8859_5_1988' => 'iso-8859-5',
333
+ 'iso_ir_144' => 'iso-8859-5',
334
+
335
+ # iso8859_6 codec
336
+ 'iso8859_6' => 'iso-8859-6',
337
+ 'arabic' => 'iso-8859-6',
338
+ 'asmo_708' => 'iso-8859-6',
339
+ 'csisolatinarabic' => 'iso-8859-6',
340
+ 'ecma_114' => 'iso-8859-6',
341
+ 'iso_8859_6' => 'iso-8859-6',
342
+ 'iso_8859_6_1987' => 'iso-8859-6',
343
+ 'iso_ir_127' => 'iso-8859-6',
344
+
345
+ # iso8859_7 codec
346
+ 'iso8859_7' => 'iso-8859-7',
347
+ 'csisolatingreek' => 'iso-8859-7',
348
+ 'ecma_118' => 'iso-8859-7',
349
+ 'elot_928' => 'iso-8859-7',
350
+ 'greek' => 'iso-8859-7',
351
+ 'greek8' => 'iso-8859-7',
352
+ 'iso_8859_7' => 'iso-8859-7',
353
+ 'iso_8859_7_1987' => 'iso-8859-7',
354
+ 'iso_ir_126' => 'iso-8859-7',
355
+
356
+ # iso8859_8 codec
357
+ 'iso8859_9' => 'iso8859_8',
358
+ 'csisolatinhebrew' => 'iso-8859-8',
359
+ 'hebrew' => 'iso-8859-8',
360
+ 'iso_8859_8' => 'iso-8859-8',
361
+ 'iso_8859_8_1988' => 'iso-8859-8',
362
+ 'iso_ir_138' => 'iso-8859-8',
363
+
364
+ # iso8859_9 codec
365
+ 'iso8859_9' => 'iso-8859-9',
366
+ 'csisolatin5' => 'iso-8859-9',
367
+ 'iso_8859_9' => 'iso-8859-9',
368
+ 'iso_8859_9_1989' => 'iso-8859-9',
369
+ 'iso_ir_148' => 'iso-8859-9',
370
+ 'l5' => 'iso-8859-9',
371
+ 'latin5' => 'iso-8859-9',
372
+
373
+ # iso8859_11 codec
374
+ 'iso8859_11' => 'iso-8859-11',
375
+ 'thai' => 'iso-8859-11',
376
+ 'iso_8859_11' => 'iso-8859-11',
377
+ 'iso_8859_11_2001' => 'iso-8859-11',
378
+
379
+ # iso8859_16 codec
380
+ 'iso8859_16' => 'iso-8859-16',
381
+ 'iso_8859_16' => 'iso-8859-16',
382
+ 'iso_8859_16_2001' => 'iso-8859-16',
383
+ 'iso_ir_226' => 'iso-8859-16',
384
+ 'l10' => 'iso-8859-16',
385
+ 'latin10' => 'iso-8859-16',
386
+
387
+ # cskoi8r codec
388
+ 'koi8_r' => 'cskoi8r',
389
+
390
+ # mac_cyrillic codec
391
+ 'mac_cyrillic' => 'maccyrillic',
392
+
393
+ # shift_jis codec
394
+ 'csshiftjis' => 'shift_jis',
395
+ 'shiftjis' => 'shift_jis',
396
+ 'sjis' => 'shift_jis',
397
+ 's_jis' => 'shift_jis',
398
+
399
+ # shift_jisx0213 codec
400
+ 'shiftjisx0213' => 'shift_jisx0213',
401
+ 'sjisx0213' => 'shift_jisx0213',
402
+ 's_jisx0213' => 'shift_jisx0213',
403
+
404
+ # utf_16 codec
405
+ 'utf_16' => 'utf-16',
406
+ 'u16' => 'utf-16',
407
+ 'utf16' => 'utf-16',
408
+
409
+ # utf_16_be codec
410
+ 'utf_16_be' => 'utf-16be',
411
+ 'unicodebigunmarked' => 'utf-16be',
412
+ 'utf_16be' => 'utf-16be',
413
+
414
+ # utf_16_le codec
415
+ 'utf_16_le' => 'utf-16le',
416
+ 'unicodelittleunmarked' => 'utf-16le',
417
+ 'utf_16le' => 'utf-16le',
418
+
419
+ # utf_7 codec
420
+ 'utf_7' => 'utf-7',
421
+ 'u7' => 'utf-7',
422
+ 'utf7' => 'utf-7',
423
+
424
+ # utf_8 codec
425
+ 'utf_8' => 'utf-8',
426
+ 'u8' => 'utf-8',
427
+ 'utf' => 'utf-8',
428
+ 'utf8' => 'utf-8',
429
+ 'utf8_ucs2' => 'utf-8',
430
+ 'utf8_ucs4' => 'utf-8',
431
+ }
432
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Add some helper methods to make AttributeList (all of those damn attrs
4
+ # and attrsD used by StrictFeedParser) act more like a Hash.
5
+ # NOTE AttributeList is still Read-Only (AFAICT).
6
+ # Monkey patching is terrible, and I have an addiction.
7
+ module XML
8
+ module SAX
9
+ module AttributeList # in xml/sax.rb
10
+ def [](key)
11
+ getValue(key)
12
+ end
13
+
14
+ def each(&blk)
15
+ (0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
16
+ end
17
+
18
+ def each_key(&blk)
19
+ (0...getLength).each{|pos| yield getName(pos) }
20
+ end
21
+
22
+ def each_value(&blk)
23
+ (0...getLength).each{|pos| yield getValue(pos) }
24
+ end
25
+
26
+ def to_a # Rather use collect? grep for to_a.collect
27
+ l = []
28
+ each{|k,v| l << [k,v]}
29
+ return l
30
+ end
31
+
32
+ def to_s
33
+ l = []
34
+ each{|k,v| l << "#{k} => #{v}"}
35
+ "{ "+l.join(", ")+" }"
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+