kindle-feeds 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2008-08-22
2
+
3
+ * 1 major enhancement
4
+
5
+ * Initial release.
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,14 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/kindle-feeds
6
+ lib/kindle-feeds.rb
7
+ lib/htmlentities.rb
8
+ lib/htmlentities/html4.rb
9
+ lib/htmlentities/legacy.rb
10
+ lib/htmlentities/string.rb
11
+ lib/htmlentities/xhtml1.rb
12
+ lib/kindle-feeds.erb.html
13
+
14
+
data/README.txt ADDED
@@ -0,0 +1,48 @@
1
+ = kindle-feeds
2
+
3
+ * FIX (url)
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2008 FIX
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ $: << 'lib/'
6
+ require './lib/kindle-feeds.rb'
7
+ require './lib/htmlentities.rb'
8
+
9
+ Hoe.new('kindle-feeds', KindleFeeds::VERSION) do |p|
10
+ # p.rubyforge_name = 'kindle-feedsx' # if different than lowercase project name
11
+ p.author = 'Daniel Choi'
12
+ p.email = 'dhchoi@gmail.com'
13
+ p.description = "Format Atom and RSS feeds for the Kindle."
14
+ p.summary = "Format Atom and RSS feeds for the Kindle."
15
+ p.url = "http://danielchoi.com/software/kindle-feeds.html"
16
+ p.extra_deps << ['feed-normalizer', '>= 1.5.1']
17
+ p.extra_deps << ['hpricot', '>= 0.6']
18
+ p.post_install_message = 'Type kindle-feeds -h for instructions.'
19
+ end
20
+
21
+ # vim: syntax=Ruby
data/bin/kindle-feeds ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'kindle-feeds'
3
+ KindleFeeds.run ARGV
4
+
@@ -0,0 +1,257 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['html4'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'Aring' => 197,
20
+ 'aring' => 229,
21
+ 'asymp' => 8776,
22
+ 'Atilde' => 195,
23
+ 'atilde' => 227,
24
+ 'Auml' => 196,
25
+ 'auml' => 228,
26
+ 'bdquo' => 8222,
27
+ 'Beta' => 914,
28
+ 'beta' => 946,
29
+ 'brvbar' => 166,
30
+ 'bull' => 8226,
31
+ 'cap' => 8745,
32
+ 'Ccedil' => 199,
33
+ 'ccedil' => 231,
34
+ 'cedil' => 184,
35
+ 'cent' => 162,
36
+ 'Chi' => 935,
37
+ 'chi' => 967,
38
+ 'circ' => 710,
39
+ 'clubs' => 9827,
40
+ 'cong' => 8773,
41
+ 'copy' => 169,
42
+ 'crarr' => 8629,
43
+ 'cup' => 8746,
44
+ 'curren' => 164,
45
+ 'Dagger' => 8225,
46
+ 'dagger' => 8224,
47
+ 'dArr' => 8659,
48
+ 'darr' => 8595,
49
+ 'deg' => 176,
50
+ 'Delta' => 916,
51
+ 'delta' => 948,
52
+ 'diams' => 9830,
53
+ 'divide' => 247,
54
+ 'Eacute' => 201,
55
+ 'eacute' => 233,
56
+ 'Ecirc' => 202,
57
+ 'ecirc' => 234,
58
+ 'Egrave' => 200,
59
+ 'egrave' => 232,
60
+ 'empty' => 8709,
61
+ 'emsp' => 8195,
62
+ 'ensp' => 8194,
63
+ 'Epsilon' => 917,
64
+ 'epsilon' => 949,
65
+ 'equiv' => 8801,
66
+ 'Eta' => 919,
67
+ 'eta' => 951,
68
+ 'ETH' => 208,
69
+ 'eth' => 240,
70
+ 'Euml' => 203,
71
+ 'euml' => 235,
72
+ 'euro' => 8364,
73
+ 'exist' => 8707,
74
+ 'fnof' => 402,
75
+ 'forall' => 8704,
76
+ 'frac12' => 189,
77
+ 'frac14' => 188,
78
+ 'frac34' => 190,
79
+ 'frasl' => 8260,
80
+ 'Gamma' => 915,
81
+ 'gamma' => 947,
82
+ 'ge' => 8805,
83
+ 'gt' => 62,
84
+ 'hArr' => 8660,
85
+ 'harr' => 8596,
86
+ 'hearts' => 9829,
87
+ 'hellip' => 8230,
88
+ 'Iacute' => 205,
89
+ 'iacute' => 237,
90
+ 'Icirc' => 206,
91
+ 'icirc' => 238,
92
+ 'iexcl' => 161,
93
+ 'Igrave' => 204,
94
+ 'igrave' => 236,
95
+ 'image' => 8465,
96
+ 'infin' => 8734,
97
+ 'int' => 8747,
98
+ 'Iota' => 921,
99
+ 'iota' => 953,
100
+ 'iquest' => 191,
101
+ 'isin' => 8712,
102
+ 'Iuml' => 207,
103
+ 'iuml' => 239,
104
+ 'Kappa' => 922,
105
+ 'kappa' => 954,
106
+ 'Lambda' => 923,
107
+ 'lambda' => 955,
108
+ 'lang' => 9001,
109
+ 'laquo' => 171,
110
+ 'lArr' => 8656,
111
+ 'larr' => 8592,
112
+ 'lceil' => 8968,
113
+ 'ldquo' => 8220,
114
+ 'le' => 8804,
115
+ 'lfloor' => 8970,
116
+ 'lowast' => 8727,
117
+ 'loz' => 9674,
118
+ 'lrm' => 8206,
119
+ 'lsaquo' => 8249,
120
+ 'lsquo' => 8216,
121
+ 'lt' => 60,
122
+ 'macr' => 175,
123
+ 'mdash' => 8212,
124
+ 'micro' => 181,
125
+ 'middot' => 183,
126
+ 'minus' => 8722,
127
+ 'Mu' => 924,
128
+ 'mu' => 956,
129
+ 'nabla' => 8711,
130
+ 'nbsp' => 160,
131
+ 'ndash' => 8211,
132
+ 'ne' => 8800,
133
+ 'ni' => 8715,
134
+ 'not' => 172,
135
+ 'notin' => 8713,
136
+ 'nsub' => 8836,
137
+ 'Ntilde' => 209,
138
+ 'ntilde' => 241,
139
+ 'Nu' => 925,
140
+ 'nu' => 957,
141
+ 'Oacute' => 211,
142
+ 'oacute' => 243,
143
+ 'Ocirc' => 212,
144
+ 'ocirc' => 244,
145
+ 'OElig' => 338,
146
+ 'oelig' => 339,
147
+ 'Ograve' => 210,
148
+ 'ograve' => 242,
149
+ 'oline' => 8254,
150
+ 'Omega' => 937,
151
+ 'omega' => 969,
152
+ 'Omicron' => 927,
153
+ 'omicron' => 959,
154
+ 'oplus' => 8853,
155
+ 'or' => 8744,
156
+ 'ordf' => 170,
157
+ 'ordm' => 186,
158
+ 'Oslash' => 216,
159
+ 'oslash' => 248,
160
+ 'Otilde' => 213,
161
+ 'otilde' => 245,
162
+ 'otimes' => 8855,
163
+ 'Ouml' => 214,
164
+ 'ouml' => 246,
165
+ 'para' => 182,
166
+ 'part' => 8706,
167
+ 'permil' => 8240,
168
+ 'perp' => 8869,
169
+ 'Phi' => 934,
170
+ 'phi' => 966,
171
+ 'Pi' => 928,
172
+ 'pi' => 960,
173
+ 'piv' => 982,
174
+ 'plusmn' => 177,
175
+ 'pound' => 163,
176
+ 'Prime' => 8243,
177
+ 'prime' => 8242,
178
+ 'prod' => 8719,
179
+ 'prop' => 8733,
180
+ 'Psi' => 936,
181
+ 'psi' => 968,
182
+ 'quot' => 34,
183
+ 'radic' => 8730,
184
+ 'rang' => 9002,
185
+ 'raquo' => 187,
186
+ 'rArr' => 8658,
187
+ 'rarr' => 8594,
188
+ 'rceil' => 8969,
189
+ 'rdquo' => 8221,
190
+ 'real' => 8476,
191
+ 'reg' => 174,
192
+ 'rfloor' => 8971,
193
+ 'Rho' => 929,
194
+ 'rho' => 961,
195
+ 'rlm' => 8207,
196
+ 'rsaquo' => 8250,
197
+ 'rsquo' => 8217,
198
+ 'sbquo' => 8218,
199
+ 'Scaron' => 352,
200
+ 'scaron' => 353,
201
+ 'sdot' => 8901,
202
+ 'sect' => 167,
203
+ 'shy' => 173,
204
+ 'Sigma' => 931,
205
+ 'sigma' => 963,
206
+ 'sigmaf' => 962,
207
+ 'sim' => 8764,
208
+ 'spades' => 9824,
209
+ 'sub' => 8834,
210
+ 'sube' => 8838,
211
+ 'sum' => 8721,
212
+ 'sup' => 8835,
213
+ 'sup1' => 185,
214
+ 'sup2' => 178,
215
+ 'sup3' => 179,
216
+ 'supe' => 8839,
217
+ 'szlig' => 223,
218
+ 'Tau' => 932,
219
+ 'tau' => 964,
220
+ 'there4' => 8756,
221
+ 'Theta' => 920,
222
+ 'theta' => 952,
223
+ 'thetasym' => 977,
224
+ 'thinsp' => 8201,
225
+ 'THORN' => 222,
226
+ 'thorn' => 254,
227
+ 'tilde' => 732,
228
+ 'times' => 215,
229
+ 'trade' => 8482,
230
+ 'Uacute' => 218,
231
+ 'uacute' => 250,
232
+ 'uArr' => 8657,
233
+ 'uarr' => 8593,
234
+ 'Ucirc' => 219,
235
+ 'ucirc' => 251,
236
+ 'Ugrave' => 217,
237
+ 'ugrave' => 249,
238
+ 'uml' => 168,
239
+ 'upsih' => 978,
240
+ 'Upsilon' => 933,
241
+ 'upsilon' => 965,
242
+ 'Uuml' => 220,
243
+ 'uuml' => 252,
244
+ 'weierp' => 8472,
245
+ 'Xi' => 926,
246
+ 'xi' => 958,
247
+ 'Yacute' => 221,
248
+ 'yacute' => 253,
249
+ 'yen' => 165,
250
+ 'Yuml' => 376,
251
+ 'yuml' => 255,
252
+ 'Zeta' => 918,
253
+ 'zeta' => 950,
254
+ 'zwj' => 8205,
255
+ 'zwnj' => 8204
256
+ }
257
+ end
@@ -0,0 +1,27 @@
1
+ class HTMLEntities
2
+ class << self
3
+
4
+ #
5
+ # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
+ # See HTMLEntities#encode for description of parameters.
7
+ #
8
+ def encode_entities(*args)
9
+ xhtml1_entities.encode(*args)
10
+ end
11
+
12
+ #
13
+ # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
14
+ # See HTMLEntities#decode for description of parameters.
15
+ #
16
+ def decode_entities(*args)
17
+ xhtml1_entities.decode(*args)
18
+ end
19
+
20
+ private
21
+
22
+ def xhtml1_entities
23
+ @xhtml1_entities ||= new('xhtml1')
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ require 'htmlentities'
2
+
3
+ #
4
+ # This file extends the String class with methods to allow encoding and decoding of
5
+ # HTML/XML entities from/to their corresponding UTF-8 codepoints.
6
+ #
7
+ class String
8
+
9
+ #
10
+ # Decode XML and HTML 4.01 entities in a string into their UTF-8
11
+ # equivalents.
12
+ #
13
+ def decode_entities
14
+ return HTMLEntities.decode_entities(self)
15
+ end
16
+
17
+ #
18
+ # Encode codepoints in a string into their corresponding entities. See
19
+ # the documentation of HTMLEntities.encode_entities for a list of possible
20
+ # instructions.
21
+ #
22
+ def encode_entities(*instructions)
23
+ return HTMLEntities.encode_entities(self, *instructions)
24
+ end
25
+
26
+ end
@@ -0,0 +1,258 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['xhtml1'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'apos' => 39,
20
+ 'Aring' => 197,
21
+ 'aring' => 229,
22
+ 'asymp' => 8776,
23
+ 'Atilde' => 195,
24
+ 'atilde' => 227,
25
+ 'Auml' => 196,
26
+ 'auml' => 228,
27
+ 'bdquo' => 8222,
28
+ 'Beta' => 914,
29
+ 'beta' => 946,
30
+ 'brvbar' => 166,
31
+ 'bull' => 8226,
32
+ 'cap' => 8745,
33
+ 'Ccedil' => 199,
34
+ 'ccedil' => 231,
35
+ 'cedil' => 184,
36
+ 'cent' => 162,
37
+ 'Chi' => 935,
38
+ 'chi' => 967,
39
+ 'circ' => 710,
40
+ 'clubs' => 9827,
41
+ 'cong' => 8773,
42
+ 'copy' => 169,
43
+ 'crarr' => 8629,
44
+ 'cup' => 8746,
45
+ 'curren' => 164,
46
+ 'Dagger' => 8225,
47
+ 'dagger' => 8224,
48
+ 'dArr' => 8659,
49
+ 'darr' => 8595,
50
+ 'deg' => 176,
51
+ 'Delta' => 916,
52
+ 'delta' => 948,
53
+ 'diams' => 9830,
54
+ 'divide' => 247,
55
+ 'Eacute' => 201,
56
+ 'eacute' => 233,
57
+ 'Ecirc' => 202,
58
+ 'ecirc' => 234,
59
+ 'Egrave' => 200,
60
+ 'egrave' => 232,
61
+ 'empty' => 8709,
62
+ 'emsp' => 8195,
63
+ 'ensp' => 8194,
64
+ 'Epsilon' => 917,
65
+ 'epsilon' => 949,
66
+ 'equiv' => 8801,
67
+ 'Eta' => 919,
68
+ 'eta' => 951,
69
+ 'ETH' => 208,
70
+ 'eth' => 240,
71
+ 'Euml' => 203,
72
+ 'euml' => 235,
73
+ 'euro' => 8364,
74
+ 'exist' => 8707,
75
+ 'fnof' => 402,
76
+ 'forall' => 8704,
77
+ 'frac12' => 189,
78
+ 'frac14' => 188,
79
+ 'frac34' => 190,
80
+ 'frasl' => 8260,
81
+ 'Gamma' => 915,
82
+ 'gamma' => 947,
83
+ 'ge' => 8805,
84
+ 'gt' => 62,
85
+ 'hArr' => 8660,
86
+ 'harr' => 8596,
87
+ 'hearts' => 9829,
88
+ 'hellip' => 8230,
89
+ 'Iacute' => 205,
90
+ 'iacute' => 237,
91
+ 'Icirc' => 206,
92
+ 'icirc' => 238,
93
+ 'iexcl' => 161,
94
+ 'Igrave' => 204,
95
+ 'igrave' => 236,
96
+ 'image' => 8465,
97
+ 'infin' => 8734,
98
+ 'int' => 8747,
99
+ 'Iota' => 921,
100
+ 'iota' => 953,
101
+ 'iquest' => 191,
102
+ 'isin' => 8712,
103
+ 'Iuml' => 207,
104
+ 'iuml' => 239,
105
+ 'Kappa' => 922,
106
+ 'kappa' => 954,
107
+ 'Lambda' => 923,
108
+ 'lambda' => 955,
109
+ 'lang' => 9001,
110
+ 'laquo' => 171,
111
+ 'lArr' => 8656,
112
+ 'larr' => 8592,
113
+ 'lceil' => 8968,
114
+ 'ldquo' => 8220,
115
+ 'le' => 8804,
116
+ 'lfloor' => 8970,
117
+ 'lowast' => 8727,
118
+ 'loz' => 9674,
119
+ 'lrm' => 8206,
120
+ 'lsaquo' => 8249,
121
+ 'lsquo' => 8216,
122
+ 'lt' => 60,
123
+ 'macr' => 175,
124
+ 'mdash' => 8212,
125
+ 'micro' => 181,
126
+ 'middot' => 183,
127
+ 'minus' => 8722,
128
+ 'Mu' => 924,
129
+ 'mu' => 956,
130
+ 'nabla' => 8711,
131
+ 'nbsp' => 160,
132
+ 'ndash' => 8211,
133
+ 'ne' => 8800,
134
+ 'ni' => 8715,
135
+ 'not' => 172,
136
+ 'notin' => 8713,
137
+ 'nsub' => 8836,
138
+ 'Ntilde' => 209,
139
+ 'ntilde' => 241,
140
+ 'Nu' => 925,
141
+ 'nu' => 957,
142
+ 'Oacute' => 211,
143
+ 'oacute' => 243,
144
+ 'Ocirc' => 212,
145
+ 'ocirc' => 244,
146
+ 'OElig' => 338,
147
+ 'oelig' => 339,
148
+ 'Ograve' => 210,
149
+ 'ograve' => 242,
150
+ 'oline' => 8254,
151
+ 'Omega' => 937,
152
+ 'omega' => 969,
153
+ 'Omicron' => 927,
154
+ 'omicron' => 959,
155
+ 'oplus' => 8853,
156
+ 'or' => 8744,
157
+ 'ordf' => 170,
158
+ 'ordm' => 186,
159
+ 'Oslash' => 216,
160
+ 'oslash' => 248,
161
+ 'Otilde' => 213,
162
+ 'otilde' => 245,
163
+ 'otimes' => 8855,
164
+ 'Ouml' => 214,
165
+ 'ouml' => 246,
166
+ 'para' => 182,
167
+ 'part' => 8706,
168
+ 'permil' => 8240,
169
+ 'perp' => 8869,
170
+ 'Phi' => 934,
171
+ 'phi' => 966,
172
+ 'Pi' => 928,
173
+ 'pi' => 960,
174
+ 'piv' => 982,
175
+ 'plusmn' => 177,
176
+ 'pound' => 163,
177
+ 'Prime' => 8243,
178
+ 'prime' => 8242,
179
+ 'prod' => 8719,
180
+ 'prop' => 8733,
181
+ 'Psi' => 936,
182
+ 'psi' => 968,
183
+ 'quot' => 34,
184
+ 'radic' => 8730,
185
+ 'rang' => 9002,
186
+ 'raquo' => 187,
187
+ 'rArr' => 8658,
188
+ 'rarr' => 8594,
189
+ 'rceil' => 8969,
190
+ 'rdquo' => 8221,
191
+ 'real' => 8476,
192
+ 'reg' => 174,
193
+ 'rfloor' => 8971,
194
+ 'Rho' => 929,
195
+ 'rho' => 961,
196
+ 'rlm' => 8207,
197
+ 'rsaquo' => 8250,
198
+ 'rsquo' => 8217,
199
+ 'sbquo' => 8218,
200
+ 'Scaron' => 352,
201
+ 'scaron' => 353,
202
+ 'sdot' => 8901,
203
+ 'sect' => 167,
204
+ 'shy' => 173,
205
+ 'Sigma' => 931,
206
+ 'sigma' => 963,
207
+ 'sigmaf' => 962,
208
+ 'sim' => 8764,
209
+ 'spades' => 9824,
210
+ 'sub' => 8834,
211
+ 'sube' => 8838,
212
+ 'sum' => 8721,
213
+ 'sup' => 8835,
214
+ 'sup1' => 185,
215
+ 'sup2' => 178,
216
+ 'sup3' => 179,
217
+ 'supe' => 8839,
218
+ 'szlig' => 223,
219
+ 'Tau' => 932,
220
+ 'tau' => 964,
221
+ 'there4' => 8756,
222
+ 'Theta' => 920,
223
+ 'theta' => 952,
224
+ 'thetasym' => 977,
225
+ 'thinsp' => 8201,
226
+ 'THORN' => 222,
227
+ 'thorn' => 254,
228
+ 'tilde' => 732,
229
+ 'times' => 215,
230
+ 'trade' => 8482,
231
+ 'Uacute' => 218,
232
+ 'uacute' => 250,
233
+ 'uArr' => 8657,
234
+ 'uarr' => 8593,
235
+ 'Ucirc' => 219,
236
+ 'ucirc' => 251,
237
+ 'Ugrave' => 217,
238
+ 'ugrave' => 249,
239
+ 'uml' => 168,
240
+ 'upsih' => 978,
241
+ 'Upsilon' => 933,
242
+ 'upsilon' => 965,
243
+ 'Uuml' => 220,
244
+ 'uuml' => 252,
245
+ 'weierp' => 8472,
246
+ 'Xi' => 926,
247
+ 'xi' => 958,
248
+ 'Yacute' => 221,
249
+ 'yacute' => 253,
250
+ 'yen' => 165,
251
+ 'Yuml' => 376,
252
+ 'yuml' => 255,
253
+ 'Zeta' => 918,
254
+ 'zeta' => 950,
255
+ 'zwj' => 8205,
256
+ 'zwnj' => 8204
257
+ }
258
+ end
@@ -0,0 +1,165 @@
1
+ require 'htmlentities/legacy'
2
+
3
+ #
4
+ # HTML entity encoding and decoding for Ruby
5
+ #
6
+
7
+ class HTMLEntities
8
+
9
+ VERSION = '4.0.0'
10
+ FLAVORS = %w[html4 xhtml1]
11
+ INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal]
12
+
13
+ class InstructionError < RuntimeError
14
+ end
15
+ class UnknownFlavor < RuntimeError
16
+ end
17
+
18
+ #
19
+ # Create a new HTMLEntities coder for the specified flavor.
20
+ # Available flavors are 'html4' and 'xhtml1' (the default).
21
+ # The only difference in functionality between the two is in the handling of the apos
22
+ # (apostrophe) named entity, which is not defined in HTML4.
23
+ #
24
+ def initialize(flavor='xhtml1')
25
+ @flavor = flavor.to_s.downcase
26
+ raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor)
27
+ end
28
+
29
+ #
30
+ # Decode entities in a string into their UTF-8
31
+ # equivalents. Obviously, if your string is not already in UTF-8, you'd
32
+ # better convert it before using this method, or the output will be mixed
33
+ # up.
34
+ #
35
+ # Unknown named entities will not be converted
36
+ #
37
+ def decode(source)
38
+ return source.to_s.gsub(named_entity_regexp) {
39
+ (cp = map[$1]) ? [cp].pack('U') : $&
40
+ }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
41
+ $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
42
+ }
43
+ end
44
+
45
+ #
46
+ # Encode codepoints into their corresponding entities. Various operations
47
+ # are possible, and may be specified in order:
48
+ #
49
+ # :basic :: Convert the five XML entities ('"<>&)
50
+ # :named :: Convert non-ASCII characters to their named HTML 4.01 equivalent
51
+ # :decimal :: Convert non-ASCII characters to decimal entities (e.g. &#1234;)
52
+ # :hexadecimal :: Convert non-ASCII characters to hexadecimal entities (e.g. # &#x12ab;)
53
+ #
54
+ # You can specify the commands in any order, but they will be executed in
55
+ # the order listed above to ensure that entity ampersands are not
56
+ # clobbered and that named entities are replaced before numeric ones.
57
+ #
58
+ # If no instructions are specified, :basic will be used.
59
+ #
60
+ # Examples:
61
+ # encode_entities(str) - XML-safe
62
+ # encode_entities(str, :basic, :decimal) - XML-safe and 7-bit clean
63
+ # encode_entities(str, :basic, :named, :decimal) - 7-bit clean, with all
64
+ # non-ASCII characters replaced with their named entity where possible, and
65
+ # decimal equivalents otherwise.
66
+ #
67
+ # Note: It is the program's responsibility to ensure that the source
68
+ # contains valid UTF-8 before calling this method.
69
+ #
70
+ def encode(source, *instructions)
71
+ string = source.to_s.dup
72
+ if (instructions.empty?)
73
+ instructions = [:basic]
74
+ elsif (unknown_instructions = instructions - INSTRUCTIONS) != []
75
+ raise InstructionError,
76
+ "unknown encode_entities command(s): #{unknown_instructions.inspect}"
77
+ end
78
+
79
+ basic_entity_encoder =
80
+ if instructions.include?(:basic) || instructions.include?(:named)
81
+ :encode_named
82
+ elsif instructions.include?(:decimal)
83
+ :encode_decimal
84
+ else instructions.include?(:hexadecimal)
85
+ :encode_hexadecimal
86
+ end
87
+ string.gsub!(basic_entity_regexp){ __send__(basic_entity_encoder, $&) }
88
+
89
+ extended_entity_encoders = []
90
+ if instructions.include?(:named)
91
+ extended_entity_encoders << :encode_named
92
+ end
93
+ if instructions.include?(:decimal)
94
+ extended_entity_encoders << :encode_decimal
95
+ elsif instructions.include?(:hexadecimal)
96
+ extended_entity_encoders << :encode_hexadecimal
97
+ end
98
+ unless extended_entity_encoders.empty?
99
+ string.gsub!(extended_entity_regexp){
100
+ encode_extended(extended_entity_encoders, $&)
101
+ }
102
+ end
103
+
104
+ return string
105
+ end
106
+
107
+ private
108
+
109
+ def map
110
+ @map ||= (require "htmlentities/#{@flavor}"; HTMLEntities::MAPPINGS[@flavor])
111
+ end
112
+
113
+ def basic_entity_regexp
114
+ @basic_entity_regexp ||= (
115
+ case @flavor
116
+ when /^html/
117
+ /[<>"&]/
118
+ else
119
+ /[<>'"&]/
120
+ end
121
+ )
122
+ end
123
+
124
+ def extended_entity_regexp
125
+ @extended_entity_regexp ||= (
126
+ regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+'
127
+ regexp += "|'" if @flavor == 'html4'
128
+ Regexp.new(regexp)
129
+ )
130
+ end
131
+
132
+ def named_entity_regexp
133
+ @named_entity_regexp ||= (
134
+ min_length = map.keys.map{ |a| a.length }.min
135
+ max_length = map.keys.map{ |a| a.length }.max
136
+ /&([a-z][a-z0-9]{#{min_length-1},#{max_length-1}});/i
137
+ )
138
+ end
139
+
140
+ def reverse_map
141
+ @reverse_map ||= map.invert
142
+ end
143
+
144
+ def encode_named(char)
145
+ cp = char.unpack('U')[0]
146
+ (e = reverse_map[cp]) && "&#{e};"
147
+ end
148
+
149
+ def encode_decimal(char)
150
+ "&##{char.unpack('U')[0]};"
151
+ end
152
+
153
+ def encode_hexadecimal(char)
154
+ "&#x#{char.unpack('U')[0].to_s(16)};"
155
+ end
156
+
157
+ def encode_extended(encoders, char)
158
+ encoders.each do |encoder|
159
+ encoded = __send__(encoder, char)
160
+ return encoded if encoded
161
+ end
162
+ return char
163
+ end
164
+
165
+ end
@@ -0,0 +1,76 @@
1
+ <html>
2
+ <head>
3
+ <title>kindle feeds</title>
4
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/>
5
+ </head>
6
+ <body>
7
+ <a name="start"></a>
8
+
9
+ <h1>Kindle Feeds</h1>
10
+ <p>copyright 2008 Daniel Choi</p>
11
+ <p>contact: dhchoi@gmail.com</p>
12
+ <p>open source license: <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a></p>
13
+ <p>homepage: <a href="http://danielchoi.com/software/kindle-feeds.html">http://danielchoi.com/software/kindle-feeds.html</a></p>
14
+ <br/>
15
+ <br/>
16
+
17
+ <a name="toc"></a>
18
+ <h2>Table of Contents</h2>
19
+ <ul>
20
+ <% @sections.each_with_index do |x, i| %>
21
+ <li><a href="#section-<%= i %>"><%= x.title %></li>
22
+ <% end %>
23
+ </ul>
24
+ <% @sections.each_with_index do |s, i| %>
25
+
26
+ <br/>
27
+ <br/>
28
+ <div class="section details">
29
+ <a name="section-<%= i %>" ></a>
30
+ <div style="text-align:right">
31
+ <ul style="list-style-type:none">
32
+ <li><a href="#toc">table of contents</a></li>
33
+ </ul>
34
+ </div>
35
+ <h2>Section: <%= s.title %></h2>
36
+ <p>Feeds in this section:<p>
37
+ <ol>
38
+ <% s.feeds.each_with_index do |f, m| %>
39
+ <li><a href="#section-<%= i %>-feed-<%= m %>"><%= f.title %></li>
40
+ <% end %>
41
+ </ol>
42
+ <% s.feeds.each_with_index do |f, j| %>
43
+ <a name="section-<%= i%>-feed-<%= j %>"></a>
44
+ <h2>Feed: <a href="<%= f.url %>"><%= f.title %></a></h2>
45
+ <p><%= f.entries.size %> Entries:</p>
46
+ <ol>
47
+ <% f.entries.each_with_index do |e, n| %>
48
+ <li><a href="#section-<%= i %>-feed-<%= j %>-entry-<%= n %>"><%= e.title %></li>
49
+ <% end %>
50
+ </ol>
51
+
52
+ <% f.entries.each_with_index do |e, n| %>
53
+ <a name="section-<%= i %>-feed-<%= j %>-entry-<%= n %>"></a>
54
+ <h3>(<%= n + 1 %>/<%= f.entries.size %>) <a href="<%= e.url %>"><%= e.title %></a></h3>
55
+ <div style="text-align:right"><i><%= e.date_published ? e.date_published.strftime('%B %d, %Y') : nil %></i></div>
56
+ <div>
57
+ <%= e.content %>
58
+ </div>
59
+ <div style="text-align:right">
60
+ <ul style="list-style-type:none">
61
+ <li><a href="#toc">table of contents</a></li>
62
+ <li><a href="#section-<%= i%>"><%= s.title %></a></li>
63
+ <li><a href="#section-<%= i%>-feed-<%= j %>"><%= f.title %></a></li>
64
+ </ul>
65
+ </div>
66
+ <br/>
67
+ <br/>
68
+ <% end %>
69
+ <% end %>
70
+ </div>
71
+
72
+ <% end %>
73
+ </body>
74
+ </html>
75
+
76
+
@@ -0,0 +1,267 @@
1
+ # kindle-feeds
2
+
3
+ # copyright 2008 Daniel Choi
4
+ # dhchoi@gmail.com
5
+ # License: MIT
6
+
7
+ require 'rubygems'
8
+ require 'optparse'
9
+ require 'open-uri'
10
+ require 'feed-normalizer'
11
+ require 'htmlentities'
12
+ require 'iconv'
13
+ require 'erb'
14
+ require 'hpricot'
15
+ CONFIGFILE = "kindle_feeds.conf"
16
+ ERB_TEMPLATE = File.dirname(__FILE__) + "/kindle-feeds.erb.html"
17
+ DEFAULT_FEEDS = <<END
18
+ # kindle-feeds feed list
19
+ #
20
+ # Please edit this file so that it contains the section titles and feed URLs
21
+ # that you want. Follow the format of the example: section titles immediately
22
+ # followed by a list of URLs. The URLs should either be URLs of RSS or Atom
23
+ # feeds or URLs of web pages that contain links to a RSS or Atom feed. The
24
+ # 'http://' at the beginning of the URL is optional. Sections should be separated
25
+ # by exactly one blank line.
26
+
27
+ General News
28
+ nytimes.com
29
+ slate.com
30
+
31
+ Tech News
32
+ techcrunch.com
33
+ http://readwriteweb.com
34
+ slashdot.org
35
+
36
+ Apple
37
+ macworld.com
38
+ macintouch.com
39
+
40
+ Ebook
41
+ teleread.org/blog
42
+ END
43
+
44
+ class Autodiscovery
45
+ def initialize(page_html)
46
+ # Downcase the html because capitalized stuff might mess up the Hpricot matching
47
+ @doc = Hpricot(page_html)
48
+ end
49
+
50
+ # Returns the url of the feed, or nil if none found
51
+ def discover
52
+ # Look for rss link, e.g.
53
+ # <link rel="alternate" type="application/rss+xml" title="RSS"
54
+ # href="http://feeds.feedburner.com/TheRssBlog">
55
+ # Tricky: Hpricot CSS attribute selectors are written like XPath selectors
56
+ [:rss, :atom].each do |flavor|
57
+ if x=@doc.at("head link[@type=application/#{flavor}+xml]")
58
+ return x[:href]
59
+ end
60
+ end
61
+ if x=@doc.at("head link[@type=text/xml]")
62
+ return x[:href]
63
+ end
64
+ return nil
65
+ end
66
+ end
67
+
68
+ class Feed
69
+ def self.create_feed(xml, feed_url)
70
+ feed = FeedNormalizer::FeedNormalizer.parse(xml)
71
+ return nil unless feed.is_a?(FeedNormalizer::Feed)
72
+ # clean up entries:
73
+ ic = Iconv.new('ISO-8859-1//TRANSLIT', 'utf-8')
74
+ ic2 = Iconv.new('ISO-8859-1//IGNORE', 'utf-8')
75
+ #ic = Iconv.new('ASCII//TRANSLIT', 'utf-8')
76
+ coder = HTMLEntities.new
77
+ puts "#{feed.entries.size} entries downloaded."
78
+ puts
79
+ feed.entries.each do |e|
80
+ e.title = coder.decode(e.title)
81
+ e.content = coder.decode(e.content)
82
+
83
+ begin
84
+ e.title = ic.iconv(e.title)
85
+ rescue
86
+ e.title = ic2.iconv(e.title)
87
+ end
88
+ begin
89
+ e.content = ic.iconv(e.content)
90
+ rescue
91
+ e.content = ic2.iconv(e.content)
92
+ end
93
+ doc = Hpricot(e.content)
94
+ doc.search('h1, h2, h3') do |h|
95
+ h.swap("<h4>#{h.inner_text}</h4>")
96
+ end
97
+ doc.search('//font') do |font|
98
+ font.swap(font.inner_text)
99
+ end
100
+ doc.search('//img').remove
101
+ doc.search('svg, object, embed').remove
102
+ doc.search('script').remove
103
+ e.content = doc.to_s
104
+ end
105
+ return feed
106
+ end
107
+
108
+ def self.subscribe(feed_url) # try to repair the URL if possible
109
+ unless feed_url =~ /^http:\/\//
110
+ feed_url = "http://" + feed_url
111
+ end
112
+ puts "Downloading #{feed_url}"
113
+ begin
114
+ xml = fetch(feed_url)
115
+ rescue SocketError
116
+ puts "Error trying to load page at #{feed_url}"
117
+ return
118
+ end
119
+ if xml.nil?
120
+ puts "Can't find any resource at #{feed_url}"
121
+ return
122
+ end
123
+ feed = Feed.create_feed( xml, feed_url.strip )
124
+ if feed.nil?
125
+ puts "#{feed_url}: Attempting autodiscovery..."
126
+ feed_url = auto_discover_and_subscribe(feed_url)
127
+ if feed_url
128
+ xml = fetch(feed_url)
129
+ feed = Feed.create_feed( xml, feed_url.strip )
130
+ end
131
+ end
132
+ feed
133
+ end
134
+
135
+ def self.auto_discover_and_subscribe(url)
136
+ uri = URI.parse(url)
137
+ feed_url = Autodiscovery.new(fetch(url)).discover
138
+ if feed_url
139
+ feed_url = uri.merge(feed_url).to_s
140
+ puts "Found feed: #{feed_url}"
141
+ return feed_url
142
+ else
143
+ puts "Can't find feed for #{url}"
144
+ return nil
145
+ end
146
+ end
147
+
148
+ # a simple wrapper over open-uri call. Easier to mock in testing.
149
+ def self.fetch(url)
150
+ begin
151
+ open(url).read
152
+ rescue Timeout::Error
153
+ puts "-> attempt to fetch #{url} timed out"
154
+ rescue Exception => e
155
+ puts "-> error trying to fetch #{url}: #{$!}"
156
+ end
157
+ end
158
+ end
159
+
160
+ class Section
161
+ attr_accessor :title, :uris, :feeds
162
+ def initialize(title, uris)
163
+ @feeds = []
164
+ @title = title
165
+ @uris = uris
166
+ # generate the feeds
167
+ @uris.each do |uri|
168
+ if (feed=Feed.subscribe(uri))
169
+ @feeds << feed
170
+ end
171
+ end
172
+ end
173
+ end
174
+
175
+ class KindleFeeds
176
+ VERSION = "1.0.0"
177
+ attr_accessor :sections
178
+ # config is a text file with a certain format
179
+ def initialize(config)
180
+ @sections = []
181
+ raw_sections = config.split(/^\s*$/)
182
+ results = []
183
+ raw_sections.each do |section|
184
+ lines = section.strip.split("\n")
185
+ title = lines.shift.strip
186
+ urls = lines.map {|line| line.strip}
187
+ results << [title, *urls]
188
+ end
189
+ # an array of arrays. each array is composed of a section title followed by urls of the feeds
190
+ results
191
+ puts "Fetching feeds:"
192
+ results.each do |r|
193
+ puts "- " + r.first
194
+ r[1..-1].each do |x|
195
+ puts " - " + x
196
+ end
197
+ end
198
+ puts
199
+ # subscribe
200
+ results.each do |r|
201
+ @sections << Section.new(r.shift, r)
202
+ end
203
+ end
204
+
205
+ def to_html
206
+ puts "Converting feeds into Kindle-compatible and optimized HTML..."
207
+ puts
208
+ erb = ERB.new(File.read(ERB_TEMPLATE))
209
+ out = erb.result(binding())
210
+ # TODO put timestamp in filename
211
+ date = Time.now.strftime('%m-%H-%Y')
212
+ outfile = "Kindle Feeds #{date}.html"
213
+ File.open(outfile, "w") do |f|
214
+ f.write out
215
+ end
216
+ puts "Output written to file:"
217
+ puts outfile
218
+ puts
219
+ puts "Email this file as an attachment to YOUR_KINDLE_USERNAME@kindle.com or YOUR_KINDLE_USERNAME@free.kindle.com."
220
+ puts
221
+ puts "Visit http://www.amazon.com/gp/help/customer/display.html?nodeId=200140600 for more help."
222
+ puts "Done."
223
+ end
224
+
225
+ def self.run(argv=ARGV)
226
+ opts = OptionParser.new do |opt|
227
+ opt.program_name = File.basename $0
228
+ opt.version = KindleFeeds::VERSION
229
+ opt.banner = <<-EOT
230
+ Usage: #{opt.program_name}
231
+
232
+ kindle-feeds reads a feed list from #{CONFIGFILE}.conf, downloads the feeds, and
233
+ generates a Kindle-compatiable and optimized HTML file that can be sent to
234
+ YOUR_KINDLE_USERNAME@kindle.com or YOUR_KINDLE_USERNAME@free.kindle.com for conversion
235
+ into an .azw file for reading on the Kindle.
236
+
237
+ The first time kindle-feeds is run, it will generate a stub #{CONFIGFILE}.conf file
238
+ in the same directory. Please edit this file to specify the feeds you want to
239
+ download and convert for Kindle reading. Further instructions can be found at the
240
+ top of kindle-feeds.conf once it is generated.
241
+
242
+ Project homepage:
243
+ http://danielchoi.com/software/kindle-feeds.html
244
+ EOT
245
+ end
246
+ opts.parse! argv
247
+
248
+ if ! File.exist?(CONFIGFILE)
249
+ puts "Can't find #{CONFIGFILE}. Generating..."
250
+ File.open(CONFIGFILE, "w") do |f|
251
+ f.write DEFAULT_FEEDS
252
+ end
253
+ puts "Please edit #{CONFIGFILE} before running kindle-feeds again."
254
+ exit
255
+ end
256
+ puts "Reading #{CONFIGFILE} for feed URLs."
257
+ puts
258
+ configfile = File.open(CONFIGFILE).readlines
259
+ configfile = configfile.select {|line| line !~ /^#/}.join
260
+ kf = KindleFeeds.new(configfile)
261
+ kf.to_html
262
+ end
263
+ end
264
+
265
+ if __FILE__ == $0
266
+ KindleFeeds.run ARGV
267
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kindle-feeds
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Choi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-08-22 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: feed-normalizer
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.5.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0.6"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hoe
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.7.0
44
+ version:
45
+ description: Format Atom and RSS feeds for the Kindle.
46
+ email: dhchoi@gmail.com
47
+ executables:
48
+ - kindle-feeds
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - History.txt
53
+ - Manifest.txt
54
+ - README.txt
55
+ files:
56
+ - History.txt
57
+ - Manifest.txt
58
+ - README.txt
59
+ - Rakefile
60
+ - bin/kindle-feeds
61
+ - lib/kindle-feeds.rb
62
+ - lib/htmlentities.rb
63
+ - lib/htmlentities/html4.rb
64
+ - lib/htmlentities/legacy.rb
65
+ - lib/htmlentities/string.rb
66
+ - lib/htmlentities/xhtml1.rb
67
+ - lib/kindle-feeds.erb.html
68
+ has_rdoc: true
69
+ homepage: http://danielchoi.com/software/kindle-feeds.html
70
+ post_install_message: Type kindle-feeds -h for instructions.
71
+ rdoc_options:
72
+ - --main
73
+ - README.txt
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: "0"
81
+ version:
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ requirements: []
89
+
90
+ rubyforge_project: kindle-feeds
91
+ rubygems_version: 1.2.0
92
+ signing_key:
93
+ specification_version: 2
94
+ summary: Format Atom and RSS feeds for the Kindle.
95
+ test_files: []
96
+