rfeedparser 0.9.8 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,432 @@
1
+ #!/usr/bin/ruby
2
+
3
+ module FeedParserUtilities
4
+ # Adapted from python2.4's encodings/aliases.py
5
+
6
+ Encoding_Aliases = {
7
+ 'unicode' => 'utf-16',
8
+
9
+ # MacOSX does not have Unicode as a separate encoding nor even
10
+ # aliased. My Ubuntu box has it as a separate encoding but I cannot
11
+ # for the life of me figure out where the source code for UNICODE.so
12
+ # is (supposedly, in libc6 .deb but that's a damn lie), so I don't
13
+ # know what it expects. After some extensive research, I've decided
14
+ # to alias it to utf-16 much like Python does when it is built with
15
+ # --enable-unicode=ucs2. This could be seriously wrong. I have no idea.
16
+
17
+ # ascii codec
18
+ '646' => 'ascii',
19
+ 'ansi_x3.4_1968' => 'ascii',
20
+ 'ansi_x3_4_1968' => 'ascii', # some email headers use this non-standard name
21
+ 'ansi_x3.4_1986' => 'ascii',
22
+ 'cp367' => 'ascii',
23
+ 'csascii' => 'ascii',
24
+ 'ibm367' => 'ascii',
25
+ 'iso646_us' => 'ascii',
26
+ 'iso_646.irv_1991' => 'ascii',
27
+ 'iso_ir_6' => 'ascii',
28
+ 'us' => 'ascii',
29
+ 'us_ascii' => 'ascii',
30
+
31
+ # big5 codec
32
+ 'big5_tw' => 'big5',
33
+ 'csbig5' => 'big5',
34
+
35
+ # big5hkscs codec
36
+ 'big5_hkscs' => 'big5hkscs',
37
+ 'hkscs' => 'big5hkscs',
38
+
39
+ # cp037 codec
40
+ '037' => 'cp037',
41
+ 'csibm037' => 'cp037',
42
+ 'ebcdic_cp_ca' => 'cp037',
43
+ 'ebcdic_cp_nl' => 'cp037',
44
+ 'ebcdic_cp_us' => 'cp037',
45
+ 'ebcdic_cp_wt' => 'cp037',
46
+ 'ibm037' => 'cp037',
47
+ 'ibm039' => 'cp037',
48
+
49
+ # cp1026 codec
50
+ '1026' => 'cp1026',
51
+ 'csibm1026' => 'cp1026',
52
+ 'ibm1026' => 'cp1026',
53
+
54
+ # cp1140 codec
55
+ '1140' => 'cp1140',
56
+ 'ibm1140' => 'cp1140',
57
+
58
+ # cp1250 codec
59
+ '1250' => 'cp1250',
60
+ 'windows_1250' => 'cp1250',
61
+
62
+ # cp1251 codec
63
+ '1251' => 'cp1251',
64
+ 'windows_1251' => 'cp1251',
65
+
66
+ # cp1252 codec
67
+ '1252' => 'cp1252',
68
+ 'windows_1252' => 'cp1252',
69
+
70
+ # cp1253 codec
71
+ '1253' => 'cp1253',
72
+ 'windows_1253' => 'cp1253',
73
+
74
+ # cp1254 codec
75
+ '1254' => 'cp1254',
76
+ 'windows_1254' => 'cp1254',
77
+
78
+ # cp1255 codec
79
+ '1255' => 'cp1255',
80
+ 'windows_1255' => 'cp1255',
81
+
82
+ # cp1256 codec
83
+ '1256' => 'cp1256',
84
+ 'windows_1256' => 'cp1256',
85
+
86
+ # cp1257 codec
87
+ '1257' => 'cp1257',
88
+ 'windows_1257' => 'cp1257',
89
+
90
+ # cp1258 codec
91
+ '1258' => 'cp1258',
92
+ 'windows_1258' => 'cp1258',
93
+
94
+ # cp424 codec
95
+ '424' => 'cp424',
96
+ 'csibm424' => 'cp424',
97
+ 'ebcdic_cp_he' => 'cp424',
98
+ 'ibm424' => 'cp424',
99
+
100
+ # cp437 codec
101
+ '437' => 'cp437',
102
+ 'cspc8codepage437' => 'cp437',
103
+ 'ibm437' => 'cp437',
104
+
105
+ # cp500 codec
106
+ '500' => 'cp500',
107
+ 'csibm500' => 'cp500',
108
+ 'ebcdic_cp_be' => 'cp500',
109
+ 'ebcdic_cp_ch' => 'cp500',
110
+ 'ibm500' => 'cp500',
111
+
112
+ # cp775 codec
113
+ '775' => 'cp775',
114
+ 'cspc775baltic' => 'cp775',
115
+ 'ibm775' => 'cp775',
116
+
117
+ # cp850 codec
118
+ '850' => 'cp850',
119
+ 'cspc850multilingual' => 'cp850',
120
+ 'ibm850' => 'cp850',
121
+
122
+ # cp852 codec
123
+ '852' => 'cp852',
124
+ 'cspcp852' => 'cp852',
125
+ 'ibm852' => 'cp852',
126
+
127
+ # cp855 codec
128
+ '855' => 'cp855',
129
+ 'csibm855' => 'cp855',
130
+ 'ibm855' => 'cp855',
131
+
132
+ # cp857 codec
133
+ '857' => 'cp857',
134
+ 'csibm857' => 'cp857',
135
+ 'ibm857' => 'cp857',
136
+
137
+ # cp860 codec
138
+ '860' => 'cp860',
139
+ 'csibm860' => 'cp860',
140
+ 'ibm860' => 'cp860',
141
+
142
+ # cp861 codec
143
+ '861' => 'cp861',
144
+ 'cp_is' => 'cp861',
145
+ 'csibm861' => 'cp861',
146
+ 'ibm861' => 'cp861',
147
+
148
+ # cp862 codec
149
+ '862' => 'cp862',
150
+ 'cspc862latinhebrew' => 'cp862',
151
+ 'ibm862' => 'cp862',
152
+
153
+ # cp863 codec
154
+ '863' => 'cp863',
155
+ 'csibm863' => 'cp863',
156
+ 'ibm863' => 'cp863',
157
+
158
+ # cp864 codec
159
+ '864' => 'cp864',
160
+ 'csibm864' => 'cp864',
161
+ 'ibm864' => 'cp864',
162
+
163
+ # cp865 codec
164
+ '865' => 'cp865',
165
+ 'csibm865' => 'cp865',
166
+ 'ibm865' => 'cp865',
167
+
168
+ # cp866 codec
169
+ '866' => 'cp866',
170
+ 'csibm866' => 'cp866',
171
+ 'ibm866' => 'cp866',
172
+
173
+ # cp869 codec
174
+ '869' => 'cp869',
175
+ 'cp_gr' => 'cp869',
176
+ 'csibm869' => 'cp869',
177
+ 'ibm869' => 'cp869',
178
+
179
+ # cp932 codec
180
+ '932' => 'cp932',
181
+ 'ms932' => 'cp932',
182
+ 'mskanji' => 'cp932',
183
+ 'ms_kanji' => 'cp932',
184
+
185
+ # cp949 codec
186
+ '949' => 'cp949',
187
+ 'ms949' => 'cp949',
188
+ 'uhc' => 'cp949',
189
+
190
+ # cp950 codec
191
+ '950' => 'cp950',
192
+ 'ms950' => 'cp950',
193
+
194
+ # euc_jp codec
195
+ 'euc_jp' => 'euc-jp',
196
+ 'eucjp' => 'euc-jp',
197
+ 'ujis' => 'euc-jp',
198
+ 'u_jis' => 'euc-jp',
199
+
200
+ # euc_kr codec
201
+ 'euc_kr' => 'euc-kr',
202
+ 'euckr' => 'euc-kr',
203
+ 'korean' => 'euc-kr',
204
+ 'ksc5601' => 'euc-kr',
205
+ 'ks_c_5601' => 'euc-kr',
206
+ 'ks_c_5601_1987' => 'euc-kr',
207
+ 'ksx1001' => 'euc-kr',
208
+ 'ks_x_1001' => 'euc-kr',
209
+
210
+ # gb18030 codec
211
+ 'gb18030_2000' => 'gb18030',
212
+
213
+ # gb2312 codec
214
+ 'chinese' => 'gb2312',
215
+ 'csiso58gb231280' => 'gb2312',
216
+ 'euc_cn' => 'gb2312',
217
+ 'euccn' => 'gb2312',
218
+ 'eucgb2312_cn' => 'gb2312',
219
+ 'gb2312_1980' => 'gb2312',
220
+ 'gb2312_80' => 'gb2312',
221
+ 'iso_ir_58' => 'gb2312',
222
+
223
+ # gbk codec
224
+ '936' => 'gbk',
225
+ 'cp936' => 'gbk',
226
+ 'ms936' => 'gbk',
227
+
228
+ # hp-roman8 codec
229
+ 'hp_roman8' => 'hp-roman8',
230
+ 'roman8' => 'hp-roman8',
231
+ 'r8' => 'hp-roman8',
232
+ 'csHPRoman8' => 'hp-roman8',
233
+
234
+ # iso2022_jp codec
235
+ 'iso2022_jp' => 'iso-2022-jp',
236
+ 'csiso2022jp' => 'iso-2022-jp',
237
+ 'iso2022jp' => 'iso-2022-jp',
238
+ 'iso_2022_jp' => 'iso-2022-jp',
239
+
240
+ # iso2022_jp_1 codec
241
+ 'iso2002_jp_1' => 'iso-2022-jp-1',
242
+ 'iso2022jp_1' => 'iso-2022-jp-1',
243
+ 'iso_2022_jp_1' => 'iso-2022-jp-1',
244
+
245
+ # iso2022_jp_2 codec
246
+ 'iso2022_jp_2' => 'iso-2002-jp-2',
247
+ 'iso2022jp_2' => 'iso-2022-jp-2',
248
+ 'iso_2022_jp_2' => 'iso-2022-jp-2',
249
+
250
+ # iso2022_jp_3 codec
251
+ 'iso2002_jp_3' => 'iso-2022-jp-3',
252
+ 'iso2022jp_3' => 'iso-2022-jp-3',
253
+ 'iso_2022_jp_3' => 'iso-2022-jp-3',
254
+
255
+ # iso2022_kr codec
256
+ 'iso2022_kr' => 'iso-2022-kr',
257
+ 'csiso2022kr' => 'iso-2022-kr',
258
+ 'iso2022kr' => 'iso-2022-kr',
259
+ 'iso_2022_kr' => 'iso-2022-kr',
260
+
261
+ # iso8859_10 codec
262
+ 'iso8859_10' => 'iso-8859-10',
263
+ 'csisolatin6' => 'iso-8859-10',
264
+ 'iso_8859_10' => 'iso-8859-10',
265
+ 'iso_8859_10_1992' => 'iso-8859-10',
266
+ 'iso_ir_157' => 'iso-8859-10',
267
+ 'l6' => 'iso-8859-10',
268
+ 'latin6' => 'iso-8859-10',
269
+
270
+ # iso8859_13 codec
271
+ 'iso8859_13' => 'iso-8859-13',
272
+ 'iso_8859_13' => 'iso-8859-13',
273
+
274
+ # iso8859_14 codec
275
+ 'iso8859_14' => 'iso-8859-14',
276
+ 'iso_8859_14' => 'iso-8859-14',
277
+ 'iso_8859_14_1998' => 'iso-8859-14',
278
+ 'iso_celtic' => 'iso-8859-14',
279
+ 'iso_ir_199' => 'iso-8859-14',
280
+ 'l8' => 'iso-8859-14',
281
+ 'latin8' => 'iso-8859-14',
282
+
283
+ # iso8859_15 codec
284
+ 'iso8859_15' => 'iso-8859-15',
285
+ 'iso_8859_15' => 'iso-8859-15',
286
+
287
+ # iso8859_1 codec
288
+ 'latin_1' => 'iso-8859-1',
289
+ 'cp819' => 'iso-8859-1',
290
+ 'csisolatin1' => 'iso-8859-1',
291
+ 'ibm819' => 'iso-8859-1',
292
+ 'iso8859' => 'iso-8859-1',
293
+ 'iso_8859_1' => 'iso-8859-1',
294
+ 'iso_8859_1_1987' => 'iso-8859-1',
295
+ 'iso_ir_100' => 'iso-8859-1',
296
+ 'l1' => 'iso-8859-1',
297
+ 'latin' => 'iso-8859-1',
298
+ 'latin1' => 'iso-8859-1',
299
+
300
+ # iso8859_2 codec
301
+ 'iso8859_2' => 'iso-8859-2',
302
+ 'csisolatin2' => 'iso-8859-2',
303
+ 'iso_8859_2' => 'iso-8859-2',
304
+ 'iso_8859_2_1987' => 'iso-8859-2',
305
+ 'iso_ir_101' => 'iso-8859-2',
306
+ 'l2' => 'iso-8859-2',
307
+ 'latin2' => 'iso-8859-2',
308
+
309
+ # iso8859_3 codec
310
+ 'iso8859_3' => 'iso-8859-3',
311
+ 'csisolatin3' => 'iso-8859-3',
312
+ 'iso_8859_3' => 'iso-8859-3',
313
+ 'iso_8859_3_1988' => 'iso-8859-3',
314
+ 'iso_ir_109' => 'iso-8859-3',
315
+ 'l3' => 'iso-8859-3',
316
+ 'latin3' => 'iso-8859-3',
317
+
318
+ # iso8859_4 codec
319
+ 'iso8849_4' => 'iso-8859-4',
320
+ 'csisolatin4' => 'iso-8859-4',
321
+ 'iso_8859_4' => 'iso-8859-4',
322
+ 'iso_8859_4_1988' => 'iso-8859-4',
323
+ 'iso_ir_110' => 'iso-8859-4',
324
+ 'l4' => 'iso-8859-4',
325
+ 'latin4' => 'iso-8859-4',
326
+
327
+ # iso8859_5 codec
328
+ 'iso8859_5' => 'iso-8859-5',
329
+ 'csisolatincyrillic' => 'iso-8859-5',
330
+ 'cyrillic' => 'iso-8859-5',
331
+ 'iso_8859_5' => 'iso-8859-5',
332
+ 'iso_8859_5_1988' => 'iso-8859-5',
333
+ 'iso_ir_144' => 'iso-8859-5',
334
+
335
+ # iso8859_6 codec
336
+ 'iso8859_6' => 'iso-8859-6',
337
+ 'arabic' => 'iso-8859-6',
338
+ 'asmo_708' => 'iso-8859-6',
339
+ 'csisolatinarabic' => 'iso-8859-6',
340
+ 'ecma_114' => 'iso-8859-6',
341
+ 'iso_8859_6' => 'iso-8859-6',
342
+ 'iso_8859_6_1987' => 'iso-8859-6',
343
+ 'iso_ir_127' => 'iso-8859-6',
344
+
345
+ # iso8859_7 codec
346
+ 'iso8859_7' => 'iso-8859-7',
347
+ 'csisolatingreek' => 'iso-8859-7',
348
+ 'ecma_118' => 'iso-8859-7',
349
+ 'elot_928' => 'iso-8859-7',
350
+ 'greek' => 'iso-8859-7',
351
+ 'greek8' => 'iso-8859-7',
352
+ 'iso_8859_7' => 'iso-8859-7',
353
+ 'iso_8859_7_1987' => 'iso-8859-7',
354
+ 'iso_ir_126' => 'iso-8859-7',
355
+
356
+ # iso8859_8 codec
357
+ 'iso8859_9' => 'iso8859_8',
358
+ 'csisolatinhebrew' => 'iso-8859-8',
359
+ 'hebrew' => 'iso-8859-8',
360
+ 'iso_8859_8' => 'iso-8859-8',
361
+ 'iso_8859_8_1988' => 'iso-8859-8',
362
+ 'iso_ir_138' => 'iso-8859-8',
363
+
364
+ # iso8859_9 codec
365
+ 'iso8859_9' => 'iso-8859-9',
366
+ 'csisolatin5' => 'iso-8859-9',
367
+ 'iso_8859_9' => 'iso-8859-9',
368
+ 'iso_8859_9_1989' => 'iso-8859-9',
369
+ 'iso_ir_148' => 'iso-8859-9',
370
+ 'l5' => 'iso-8859-9',
371
+ 'latin5' => 'iso-8859-9',
372
+
373
+ # iso8859_11 codec
374
+ 'iso8859_11' => 'iso-8859-11',
375
+ 'thai' => 'iso-8859-11',
376
+ 'iso_8859_11' => 'iso-8859-11',
377
+ 'iso_8859_11_2001' => 'iso-8859-11',
378
+
379
+ # iso8859_16 codec
380
+ 'iso8859_16' => 'iso-8859-16',
381
+ 'iso_8859_16' => 'iso-8859-16',
382
+ 'iso_8859_16_2001' => 'iso-8859-16',
383
+ 'iso_ir_226' => 'iso-8859-16',
384
+ 'l10' => 'iso-8859-16',
385
+ 'latin10' => 'iso-8859-16',
386
+
387
+ # cskoi8r codec
388
+ 'koi8_r' => 'cskoi8r',
389
+
390
+ # mac_cyrillic codec
391
+ 'mac_cyrillic' => 'maccyrillic',
392
+
393
+ # shift_jis codec
394
+ 'csshiftjis' => 'shift_jis',
395
+ 'shiftjis' => 'shift_jis',
396
+ 'sjis' => 'shift_jis',
397
+ 's_jis' => 'shift_jis',
398
+
399
+ # shift_jisx0213 codec
400
+ 'shiftjisx0213' => 'shift_jisx0213',
401
+ 'sjisx0213' => 'shift_jisx0213',
402
+ 's_jisx0213' => 'shift_jisx0213',
403
+
404
+ # utf_16 codec
405
+ 'utf_16' => 'utf-16',
406
+ 'u16' => 'utf-16',
407
+ 'utf16' => 'utf-16',
408
+
409
+ # utf_16_be codec
410
+ 'utf_16_be' => 'utf-16be',
411
+ 'unicodebigunmarked' => 'utf-16be',
412
+ 'utf_16be' => 'utf-16be',
413
+
414
+ # utf_16_le codec
415
+ 'utf_16_le' => 'utf-16le',
416
+ 'unicodelittleunmarked' => 'utf-16le',
417
+ 'utf_16le' => 'utf-16le',
418
+
419
+ # utf_7 codec
420
+ 'utf_7' => 'utf-7',
421
+ 'u7' => 'utf-7',
422
+ 'utf7' => 'utf-7',
423
+
424
+ # utf_8 codec
425
+ 'utf_8' => 'utf-8',
426
+ 'u8' => 'utf-8',
427
+ 'utf' => 'utf-8',
428
+ 'utf8' => 'utf-8',
429
+ 'utf8_ucs2' => 'utf-8',
430
+ 'utf8_ucs4' => 'utf-8',
431
+ }
432
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/ruby
2
+
3
+ # Add some helper methods to make AttributeList (all of those damn attrs
4
+ # and attrsD used by StrictFeedParser) act more like a Hash.
5
+ # NOTE AttributeList is still Read-Only (AFAICT).
6
+ # Monkey patching is terrible, and I have an addiction.
7
+ module XML
8
+ module SAX
9
+ module AttributeList # in xml/sax.rb
10
+ def [](key)
11
+ getValue(key)
12
+ end
13
+
14
+ def each(&blk)
15
+ (0...getLength).each{|pos| yield [getName(pos), getValue(pos)]}
16
+ end
17
+
18
+ def each_key(&blk)
19
+ (0...getLength).each{|pos| yield getName(pos) }
20
+ end
21
+
22
+ def each_value(&blk)
23
+ (0...getLength).each{|pos| yield getValue(pos) }
24
+ end
25
+
26
+ def to_a # Rather use collect? grep for to_a.collect
27
+ l = []
28
+ each{|k,v| l << [k,v]}
29
+ return l
30
+ end
31
+
32
+ def to_s
33
+ l = []
34
+ each{|k,v| l << "#{k} => #{v}"}
35
+ "{ "+l.join(", ")+" }"
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+