kindle-feeds 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2008-08-22
2
+
3
+ * 1 major enhancement
4
+
5
+ * Initial release.
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,14 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/kindle-feeds
6
+ lib/kindle-feeds.rb
7
+ lib/htmlentities.rb
8
+ lib/htmlentities/html4.rb
9
+ lib/htmlentities/legacy.rb
10
+ lib/htmlentities/string.rb
11
+ lib/htmlentities/xhtml1.rb
12
+ lib/kindle-feeds.erb.html
13
+
14
+
data/README.txt ADDED
@@ -0,0 +1,48 @@
1
+ = kindle-feeds
2
+
3
+ * FIX (url)
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2008 FIX
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ $: << 'lib/'
6
+ require './lib/kindle-feeds.rb'
7
+ require './lib/htmlentities.rb'
8
+
9
+ Hoe.new('kindle-feeds', KindleFeeds::VERSION) do |p|
10
+ # p.rubyforge_name = 'kindle-feedsx' # if different than lowercase project name
11
+ p.author = 'Daniel Choi'
12
+ p.email = 'dhchoi@gmail.com'
13
+ p.description = "Format Atom and RSS feeds for the Kindle."
14
+ p.summary = "Format Atom and RSS feeds for the Kindle."
15
+ p.url = "http://danielchoi.com/software/kindle-feeds.html"
16
+ p.extra_deps << ['feed-normalizer', '>= 1.5.1']
17
+ p.extra_deps << ['hpricot', '>= 0.6']
18
+ p.post_install_message = 'Type kindle-feeds -h for instructions.'
19
+ end
20
+
21
+ # vim: syntax=Ruby
data/bin/kindle-feeds ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'kindle-feeds'
3
+ KindleFeeds.run ARGV
4
+
@@ -0,0 +1,257 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['html4'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'Aring' => 197,
20
+ 'aring' => 229,
21
+ 'asymp' => 8776,
22
+ 'Atilde' => 195,
23
+ 'atilde' => 227,
24
+ 'Auml' => 196,
25
+ 'auml' => 228,
26
+ 'bdquo' => 8222,
27
+ 'Beta' => 914,
28
+ 'beta' => 946,
29
+ 'brvbar' => 166,
30
+ 'bull' => 8226,
31
+ 'cap' => 8745,
32
+ 'Ccedil' => 199,
33
+ 'ccedil' => 231,
34
+ 'cedil' => 184,
35
+ 'cent' => 162,
36
+ 'Chi' => 935,
37
+ 'chi' => 967,
38
+ 'circ' => 710,
39
+ 'clubs' => 9827,
40
+ 'cong' => 8773,
41
+ 'copy' => 169,
42
+ 'crarr' => 8629,
43
+ 'cup' => 8746,
44
+ 'curren' => 164,
45
+ 'Dagger' => 8225,
46
+ 'dagger' => 8224,
47
+ 'dArr' => 8659,
48
+ 'darr' => 8595,
49
+ 'deg' => 176,
50
+ 'Delta' => 916,
51
+ 'delta' => 948,
52
+ 'diams' => 9830,
53
+ 'divide' => 247,
54
+ 'Eacute' => 201,
55
+ 'eacute' => 233,
56
+ 'Ecirc' => 202,
57
+ 'ecirc' => 234,
58
+ 'Egrave' => 200,
59
+ 'egrave' => 232,
60
+ 'empty' => 8709,
61
+ 'emsp' => 8195,
62
+ 'ensp' => 8194,
63
+ 'Epsilon' => 917,
64
+ 'epsilon' => 949,
65
+ 'equiv' => 8801,
66
+ 'Eta' => 919,
67
+ 'eta' => 951,
68
+ 'ETH' => 208,
69
+ 'eth' => 240,
70
+ 'Euml' => 203,
71
+ 'euml' => 235,
72
+ 'euro' => 8364,
73
+ 'exist' => 8707,
74
+ 'fnof' => 402,
75
+ 'forall' => 8704,
76
+ 'frac12' => 189,
77
+ 'frac14' => 188,
78
+ 'frac34' => 190,
79
+ 'frasl' => 8260,
80
+ 'Gamma' => 915,
81
+ 'gamma' => 947,
82
+ 'ge' => 8805,
83
+ 'gt' => 62,
84
+ 'hArr' => 8660,
85
+ 'harr' => 8596,
86
+ 'hearts' => 9829,
87
+ 'hellip' => 8230,
88
+ 'Iacute' => 205,
89
+ 'iacute' => 237,
90
+ 'Icirc' => 206,
91
+ 'icirc' => 238,
92
+ 'iexcl' => 161,
93
+ 'Igrave' => 204,
94
+ 'igrave' => 236,
95
+ 'image' => 8465,
96
+ 'infin' => 8734,
97
+ 'int' => 8747,
98
+ 'Iota' => 921,
99
+ 'iota' => 953,
100
+ 'iquest' => 191,
101
+ 'isin' => 8712,
102
+ 'Iuml' => 207,
103
+ 'iuml' => 239,
104
+ 'Kappa' => 922,
105
+ 'kappa' => 954,
106
+ 'Lambda' => 923,
107
+ 'lambda' => 955,
108
+ 'lang' => 9001,
109
+ 'laquo' => 171,
110
+ 'lArr' => 8656,
111
+ 'larr' => 8592,
112
+ 'lceil' => 8968,
113
+ 'ldquo' => 8220,
114
+ 'le' => 8804,
115
+ 'lfloor' => 8970,
116
+ 'lowast' => 8727,
117
+ 'loz' => 9674,
118
+ 'lrm' => 8206,
119
+ 'lsaquo' => 8249,
120
+ 'lsquo' => 8216,
121
+ 'lt' => 60,
122
+ 'macr' => 175,
123
+ 'mdash' => 8212,
124
+ 'micro' => 181,
125
+ 'middot' => 183,
126
+ 'minus' => 8722,
127
+ 'Mu' => 924,
128
+ 'mu' => 956,
129
+ 'nabla' => 8711,
130
+ 'nbsp' => 160,
131
+ 'ndash' => 8211,
132
+ 'ne' => 8800,
133
+ 'ni' => 8715,
134
+ 'not' => 172,
135
+ 'notin' => 8713,
136
+ 'nsub' => 8836,
137
+ 'Ntilde' => 209,
138
+ 'ntilde' => 241,
139
+ 'Nu' => 925,
140
+ 'nu' => 957,
141
+ 'Oacute' => 211,
142
+ 'oacute' => 243,
143
+ 'Ocirc' => 212,
144
+ 'ocirc' => 244,
145
+ 'OElig' => 338,
146
+ 'oelig' => 339,
147
+ 'Ograve' => 210,
148
+ 'ograve' => 242,
149
+ 'oline' => 8254,
150
+ 'Omega' => 937,
151
+ 'omega' => 969,
152
+ 'Omicron' => 927,
153
+ 'omicron' => 959,
154
+ 'oplus' => 8853,
155
+ 'or' => 8744,
156
+ 'ordf' => 170,
157
+ 'ordm' => 186,
158
+ 'Oslash' => 216,
159
+ 'oslash' => 248,
160
+ 'Otilde' => 213,
161
+ 'otilde' => 245,
162
+ 'otimes' => 8855,
163
+ 'Ouml' => 214,
164
+ 'ouml' => 246,
165
+ 'para' => 182,
166
+ 'part' => 8706,
167
+ 'permil' => 8240,
168
+ 'perp' => 8869,
169
+ 'Phi' => 934,
170
+ 'phi' => 966,
171
+ 'Pi' => 928,
172
+ 'pi' => 960,
173
+ 'piv' => 982,
174
+ 'plusmn' => 177,
175
+ 'pound' => 163,
176
+ 'Prime' => 8243,
177
+ 'prime' => 8242,
178
+ 'prod' => 8719,
179
+ 'prop' => 8733,
180
+ 'Psi' => 936,
181
+ 'psi' => 968,
182
+ 'quot' => 34,
183
+ 'radic' => 8730,
184
+ 'rang' => 9002,
185
+ 'raquo' => 187,
186
+ 'rArr' => 8658,
187
+ 'rarr' => 8594,
188
+ 'rceil' => 8969,
189
+ 'rdquo' => 8221,
190
+ 'real' => 8476,
191
+ 'reg' => 174,
192
+ 'rfloor' => 8971,
193
+ 'Rho' => 929,
194
+ 'rho' => 961,
195
+ 'rlm' => 8207,
196
+ 'rsaquo' => 8250,
197
+ 'rsquo' => 8217,
198
+ 'sbquo' => 8218,
199
+ 'Scaron' => 352,
200
+ 'scaron' => 353,
201
+ 'sdot' => 8901,
202
+ 'sect' => 167,
203
+ 'shy' => 173,
204
+ 'Sigma' => 931,
205
+ 'sigma' => 963,
206
+ 'sigmaf' => 962,
207
+ 'sim' => 8764,
208
+ 'spades' => 9824,
209
+ 'sub' => 8834,
210
+ 'sube' => 8838,
211
+ 'sum' => 8721,
212
+ 'sup' => 8835,
213
+ 'sup1' => 185,
214
+ 'sup2' => 178,
215
+ 'sup3' => 179,
216
+ 'supe' => 8839,
217
+ 'szlig' => 223,
218
+ 'Tau' => 932,
219
+ 'tau' => 964,
220
+ 'there4' => 8756,
221
+ 'Theta' => 920,
222
+ 'theta' => 952,
223
+ 'thetasym' => 977,
224
+ 'thinsp' => 8201,
225
+ 'THORN' => 222,
226
+ 'thorn' => 254,
227
+ 'tilde' => 732,
228
+ 'times' => 215,
229
+ 'trade' => 8482,
230
+ 'Uacute' => 218,
231
+ 'uacute' => 250,
232
+ 'uArr' => 8657,
233
+ 'uarr' => 8593,
234
+ 'Ucirc' => 219,
235
+ 'ucirc' => 251,
236
+ 'Ugrave' => 217,
237
+ 'ugrave' => 249,
238
+ 'uml' => 168,
239
+ 'upsih' => 978,
240
+ 'Upsilon' => 933,
241
+ 'upsilon' => 965,
242
+ 'Uuml' => 220,
243
+ 'uuml' => 252,
244
+ 'weierp' => 8472,
245
+ 'Xi' => 926,
246
+ 'xi' => 958,
247
+ 'Yacute' => 221,
248
+ 'yacute' => 253,
249
+ 'yen' => 165,
250
+ 'Yuml' => 376,
251
+ 'yuml' => 255,
252
+ 'Zeta' => 918,
253
+ 'zeta' => 950,
254
+ 'zwj' => 8205,
255
+ 'zwnj' => 8204
256
+ }
257
+ end
@@ -0,0 +1,27 @@
1
+ class HTMLEntities
2
+ class << self
3
+
4
+ #
5
+ # Legacy compatibility class method allowing direct encoding of XHTML1 entities.
6
+ # See HTMLEntities#encode for description of parameters.
7
+ #
8
+ def encode_entities(*args)
9
+ xhtml1_entities.encode(*args)
10
+ end
11
+
12
+ #
13
+ # Legacy compatibility class method allowing direct decoding of XHTML1 entities.
14
+ # See HTMLEntities#decode for description of parameters.
15
+ #
16
+ def decode_entities(*args)
17
+ xhtml1_entities.decode(*args)
18
+ end
19
+
20
+ private
21
+
22
+ def xhtml1_entities
23
+ @xhtml1_entities ||= new('xhtml1')
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ require 'htmlentities'
2
+
3
+ #
4
+ # This file extends the String class with methods to allow encoding and decoding of
5
+ # HTML/XML entities from/to their corresponding UTF-8 codepoints.
6
+ #
7
+ class String
8
+
9
+ #
10
+ # Decode XML and HTML 4.01 entities in a string into their UTF-8
11
+ # equivalents.
12
+ #
13
+ def decode_entities
14
+ return HTMLEntities.decode_entities(self)
15
+ end
16
+
17
+ #
18
+ # Encode codepoints in a string into their corresponding entities. See
19
+ # the documentation of HTMLEntities.encode_entities for a list of possible
20
+ # instructions.
21
+ #
22
+ def encode_entities(*instructions)
23
+ return HTMLEntities.encode_entities(self, *instructions)
24
+ end
25
+
26
+ end
@@ -0,0 +1,258 @@
1
+ class HTMLEntities
2
+ MAPPINGS = {} unless defined? MAPPINGS
3
+ MAPPINGS['xhtml1'] = {
4
+ 'Aacute' => 193,
5
+ 'aacute' => 225,
6
+ 'Acirc' => 194,
7
+ 'acirc' => 226,
8
+ 'acute' => 180,
9
+ 'AElig' => 198,
10
+ 'aelig' => 230,
11
+ 'Agrave' => 192,
12
+ 'agrave' => 224,
13
+ 'alefsym' => 8501,
14
+ 'Alpha' => 913,
15
+ 'alpha' => 945,
16
+ 'amp' => 38,
17
+ 'and' => 8743,
18
+ 'ang' => 8736,
19
+ 'apos' => 39,
20
+ 'Aring' => 197,
21
+ 'aring' => 229,
22
+ 'asymp' => 8776,
23
+ 'Atilde' => 195,
24
+ 'atilde' => 227,
25
+ 'Auml' => 196,
26
+ 'auml' => 228,
27
+ 'bdquo' => 8222,
28
+ 'Beta' => 914,
29
+ 'beta' => 946,
30
+ 'brvbar' => 166,
31
+ 'bull' => 8226,
32
+ 'cap' => 8745,
33
+ 'Ccedil' => 199,
34
+ 'ccedil' => 231,
35
+ 'cedil' => 184,
36
+ 'cent' => 162,
37
+ 'Chi' => 935,
38
+ 'chi' => 967,
39
+ 'circ' => 710,
40
+ 'clubs' => 9827,
41
+ 'cong' => 8773,
42
+ 'copy' => 169,
43
+ 'crarr' => 8629,
44
+ 'cup' => 8746,
45
+ 'curren' => 164,
46
+ 'Dagger' => 8225,
47
+ 'dagger' => 8224,
48
+ 'dArr' => 8659,
49
+ 'darr' => 8595,
50
+ 'deg' => 176,
51
+ 'Delta' => 916,
52
+ 'delta' => 948,
53
+ 'diams' => 9830,
54
+ 'divide' => 247,
55
+ 'Eacute' => 201,
56
+ 'eacute' => 233,
57
+ 'Ecirc' => 202,
58
+ 'ecirc' => 234,
59
+ 'Egrave' => 200,
60
+ 'egrave' => 232,
61
+ 'empty' => 8709,
62
+ 'emsp' => 8195,
63
+ 'ensp' => 8194,
64
+ 'Epsilon' => 917,
65
+ 'epsilon' => 949,
66
+ 'equiv' => 8801,
67
+ 'Eta' => 919,
68
+ 'eta' => 951,
69
+ 'ETH' => 208,
70
+ 'eth' => 240,
71
+ 'Euml' => 203,
72
+ 'euml' => 235,
73
+ 'euro' => 8364,
74
+ 'exist' => 8707,
75
+ 'fnof' => 402,
76
+ 'forall' => 8704,
77
+ 'frac12' => 189,
78
+ 'frac14' => 188,
79
+ 'frac34' => 190,
80
+ 'frasl' => 8260,
81
+ 'Gamma' => 915,
82
+ 'gamma' => 947,
83
+ 'ge' => 8805,
84
+ 'gt' => 62,
85
+ 'hArr' => 8660,
86
+ 'harr' => 8596,
87
+ 'hearts' => 9829,
88
+ 'hellip' => 8230,
89
+ 'Iacute' => 205,
90
+ 'iacute' => 237,
91
+ 'Icirc' => 206,
92
+ 'icirc' => 238,
93
+ 'iexcl' => 161,
94
+ 'Igrave' => 204,
95
+ 'igrave' => 236,
96
+ 'image' => 8465,
97
+ 'infin' => 8734,
98
+ 'int' => 8747,
99
+ 'Iota' => 921,
100
+ 'iota' => 953,
101
+ 'iquest' => 191,
102
+ 'isin' => 8712,
103
+ 'Iuml' => 207,
104
+ 'iuml' => 239,
105
+ 'Kappa' => 922,
106
+ 'kappa' => 954,
107
+ 'Lambda' => 923,
108
+ 'lambda' => 955,
109
+ 'lang' => 9001,
110
+ 'laquo' => 171,
111
+ 'lArr' => 8656,
112
+ 'larr' => 8592,
113
+ 'lceil' => 8968,
114
+ 'ldquo' => 8220,
115
+ 'le' => 8804,
116
+ 'lfloor' => 8970,
117
+ 'lowast' => 8727,
118
+ 'loz' => 9674,
119
+ 'lrm' => 8206,
120
+ 'lsaquo' => 8249,
121
+ 'lsquo' => 8216,
122
+ 'lt' => 60,
123
+ 'macr' => 175,
124
+ 'mdash' => 8212,
125
+ 'micro' => 181,
126
+ 'middot' => 183,
127
+ 'minus' => 8722,
128
+ 'Mu' => 924,
129
+ 'mu' => 956,
130
+ 'nabla' => 8711,
131
+ 'nbsp' => 160,
132
+ 'ndash' => 8211,
133
+ 'ne' => 8800,
134
+ 'ni' => 8715,
135
+ 'not' => 172,
136
+ 'notin' => 8713,
137
+ 'nsub' => 8836,
138
+ 'Ntilde' => 209,
139
+ 'ntilde' => 241,
140
+ 'Nu' => 925,
141
+ 'nu' => 957,
142
+ 'Oacute' => 211,
143
+ 'oacute' => 243,
144
+ 'Ocirc' => 212,
145
+ 'ocirc' => 244,
146
+ 'OElig' => 338,
147
+ 'oelig' => 339,
148
+ 'Ograve' => 210,
149
+ 'ograve' => 242,
150
+ 'oline' => 8254,
151
+ 'Omega' => 937,
152
+ 'omega' => 969,
153
+ 'Omicron' => 927,
154
+ 'omicron' => 959,
155
+ 'oplus' => 8853,
156
+ 'or' => 8744,
157
+ 'ordf' => 170,
158
+ 'ordm' => 186,
159
+ 'Oslash' => 216,
160
+ 'oslash' => 248,
161
+ 'Otilde' => 213,
162
+ 'otilde' => 245,
163
+ 'otimes' => 8855,
164
+ 'Ouml' => 214,
165
+ 'ouml' => 246,
166
+ 'para' => 182,
167
+ 'part' => 8706,
168
+ 'permil' => 8240,
169
+ 'perp' => 8869,
170
+ 'Phi' => 934,
171
+ 'phi' => 966,
172
+ 'Pi' => 928,
173
+ 'pi' => 960,
174
+ 'piv' => 982,
175
+ 'plusmn' => 177,
176
+ 'pound' => 163,
177
+ 'Prime' => 8243,
178
+ 'prime' => 8242,
179
+ 'prod' => 8719,
180
+ 'prop' => 8733,
181
+ 'Psi' => 936,
182
+ 'psi' => 968,
183
+ 'quot' => 34,
184
+ 'radic' => 8730,
185
+ 'rang' => 9002,
186
+ 'raquo' => 187,
187
+ 'rArr' => 8658,
188
+ 'rarr' => 8594,
189
+ 'rceil' => 8969,
190
+ 'rdquo' => 8221,
191
+ 'real' => 8476,
192
+ 'reg' => 174,
193
+ 'rfloor' => 8971,
194
+ 'Rho' => 929,
195
+ 'rho' => 961,
196
+ 'rlm' => 8207,
197
+ 'rsaquo' => 8250,
198
+ 'rsquo' => 8217,
199
+ 'sbquo' => 8218,
200
+ 'Scaron' => 352,
201
+ 'scaron' => 353,
202
+ 'sdot' => 8901,
203
+ 'sect' => 167,
204
+ 'shy' => 173,
205
+ 'Sigma' => 931,
206
+ 'sigma' => 963,
207
+ 'sigmaf' => 962,
208
+ 'sim' => 8764,
209
+ 'spades' => 9824,
210
+ 'sub' => 8834,
211
+ 'sube' => 8838,
212
+ 'sum' => 8721,
213
+ 'sup' => 8835,
214
+ 'sup1' => 185,
215
+ 'sup2' => 178,
216
+ 'sup3' => 179,
217
+ 'supe' => 8839,
218
+ 'szlig' => 223,
219
+ 'Tau' => 932,
220
+ 'tau' => 964,
221
+ 'there4' => 8756,
222
+ 'Theta' => 920,
223
+ 'theta' => 952,
224
+ 'thetasym' => 977,
225
+ 'thinsp' => 8201,
226
+ 'THORN' => 222,
227
+ 'thorn' => 254,
228
+ 'tilde' => 732,
229
+ 'times' => 215,
230
+ 'trade' => 8482,
231
+ 'Uacute' => 218,
232
+ 'uacute' => 250,
233
+ 'uArr' => 8657,
234
+ 'uarr' => 8593,
235
+ 'Ucirc' => 219,
236
+ 'ucirc' => 251,
237
+ 'Ugrave' => 217,
238
+ 'ugrave' => 249,
239
+ 'uml' => 168,
240
+ 'upsih' => 978,
241
+ 'Upsilon' => 933,
242
+ 'upsilon' => 965,
243
+ 'Uuml' => 220,
244
+ 'uuml' => 252,
245
+ 'weierp' => 8472,
246
+ 'Xi' => 926,
247
+ 'xi' => 958,
248
+ 'Yacute' => 221,
249
+ 'yacute' => 253,
250
+ 'yen' => 165,
251
+ 'Yuml' => 376,
252
+ 'yuml' => 255,
253
+ 'Zeta' => 918,
254
+ 'zeta' => 950,
255
+ 'zwj' => 8205,
256
+ 'zwnj' => 8204
257
+ }
258
+ end
@@ -0,0 +1,165 @@
1
+ require 'htmlentities/legacy'
2
+
3
+ #
4
+ # HTML entity encoding and decoding for Ruby
5
+ #
6
+
7
+ class HTMLEntities
8
+
9
+ VERSION = '4.0.0'
10
+ FLAVORS = %w[html4 xhtml1]
11
+ INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal]
12
+
13
+ class InstructionError < RuntimeError
14
+ end
15
+ class UnknownFlavor < RuntimeError
16
+ end
17
+
18
+ #
19
+ # Create a new HTMLEntities coder for the specified flavor.
20
+ # Available flavors are 'html4' and 'xhtml1' (the default).
21
+ # The only difference in functionality between the two is in the handling of the apos
22
+ # (apostrophe) named entity, which is not defined in HTML4.
23
+ #
24
+ def initialize(flavor='xhtml1')
25
+ @flavor = flavor.to_s.downcase
26
+ raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor)
27
+ end
28
+
29
+ #
30
+ # Decode entities in a string into their UTF-8
31
+ # equivalents. Obviously, if your string is not already in UTF-8, you'd
32
+ # better convert it before using this method, or the output will be mixed
33
+ # up.
34
+ #
35
+ # Unknown named entities will not be converted
36
+ #
37
+ def decode(source)
38
+ return source.to_s.gsub(named_entity_regexp) {
39
+ (cp = map[$1]) ? [cp].pack('U') : $&
40
+ }.gsub(/&#([0-9]{1,7});|&#x([0-9a-f]{1,6});/i) {
41
+ $1 ? [$1.to_i].pack('U') : [$2.to_i(16)].pack('U')
42
+ }
43
+ end
44
+
45
+ #
46
+ # Encode codepoints into their corresponding entities. Various operations
47
+ # are possible, and may be specified in order:
48
+ #
49
+ # :basic :: Convert the five XML entities ('"<>&)
50
+ # :named :: Convert non-ASCII characters to their named HTML 4.01 equivalent
51
+ # :decimal :: Convert non-ASCII characters to decimal entities (e.g. &#1234;)
52
+ # :hexadecimal :: Convert non-ASCII characters to hexadecimal entities (e.g. # &#x12ab;)
53
+ #
54
+ # You can specify the commands in any order, but they will be executed in
55
+ # the order listed above to ensure that entity ampersands are not
56
+ # clobbered and that named entities are replaced before numeric ones.
57
+ #
58
+ # If no instructions are specified, :basic will be used.
59
+ #
60
+ # Examples:
61
+ # encode_entities(str) - XML-safe
62
+ # encode_entities(str, :basic, :decimal) - XML-safe and 7-bit clean
63
+ # encode_entities(str, :basic, :named, :decimal) - 7-bit clean, with all
64
+ # non-ASCII characters replaced with their named entity where possible, and
65
+ # decimal equivalents otherwise.
66
+ #
67
+ # Note: It is the program's responsibility to ensure that the source
68
+ # contains valid UTF-8 before calling this method.
69
+ #
70
+ def encode(source, *instructions)
71
+ string = source.to_s.dup
72
+ if (instructions.empty?)
73
+ instructions = [:basic]
74
+ elsif (unknown_instructions = instructions - INSTRUCTIONS) != []
75
+ raise InstructionError,
76
+ "unknown encode_entities command(s): #{unknown_instructions.inspect}"
77
+ end
78
+
79
+ basic_entity_encoder =
80
+ if instructions.include?(:basic) || instructions.include?(:named)
81
+ :encode_named
82
+ elsif instructions.include?(:decimal)
83
+ :encode_decimal
84
+ else instructions.include?(:hexadecimal)
85
+ :encode_hexadecimal
86
+ end
87
+ string.gsub!(basic_entity_regexp){ __send__(basic_entity_encoder, $&) }
88
+
89
+ extended_entity_encoders = []
90
+ if instructions.include?(:named)
91
+ extended_entity_encoders << :encode_named
92
+ end
93
+ if instructions.include?(:decimal)
94
+ extended_entity_encoders << :encode_decimal
95
+ elsif instructions.include?(:hexadecimal)
96
+ extended_entity_encoders << :encode_hexadecimal
97
+ end
98
+ unless extended_entity_encoders.empty?
99
+ string.gsub!(extended_entity_regexp){
100
+ encode_extended(extended_entity_encoders, $&)
101
+ }
102
+ end
103
+
104
+ return string
105
+ end
106
+
107
+ private
108
+
109
+ def map
110
+ @map ||= (require "htmlentities/#{@flavor}"; HTMLEntities::MAPPINGS[@flavor])
111
+ end
112
+
113
+ def basic_entity_regexp
114
+ @basic_entity_regexp ||= (
115
+ case @flavor
116
+ when /^html/
117
+ /[<>"&]/
118
+ else
119
+ /[<>'"&]/
120
+ end
121
+ )
122
+ end
123
+
124
+ def extended_entity_regexp
125
+ @extended_entity_regexp ||= (
126
+ regexp = '[\x00-\x1f]|[\xc0-\xfd][\x80-\xbf]+'
127
+ regexp += "|'" if @flavor == 'html4'
128
+ Regexp.new(regexp)
129
+ )
130
+ end
131
+
132
+ def named_entity_regexp
133
+ @named_entity_regexp ||= (
134
+ min_length = map.keys.map{ |a| a.length }.min
135
+ max_length = map.keys.map{ |a| a.length }.max
136
+ /&([a-z][a-z0-9]{#{min_length-1},#{max_length-1}});/i
137
+ )
138
+ end
139
+
140
+ def reverse_map
141
+ @reverse_map ||= map.invert
142
+ end
143
+
144
+ def encode_named(char)
145
+ cp = char.unpack('U')[0]
146
+ (e = reverse_map[cp]) && "&#{e};"
147
+ end
148
+
149
+ def encode_decimal(char)
150
+ "&##{char.unpack('U')[0]};"
151
+ end
152
+
153
+ def encode_hexadecimal(char)
154
+ "&#x#{char.unpack('U')[0].to_s(16)};"
155
+ end
156
+
157
+ def encode_extended(encoders, char)
158
+ encoders.each do |encoder|
159
+ encoded = __send__(encoder, char)
160
+ return encoded if encoded
161
+ end
162
+ return char
163
+ end
164
+
165
+ end
@@ -0,0 +1,76 @@
1
+ <html>
2
+ <head>
3
+ <title>kindle feeds</title>
4
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/>
5
+ </head>
6
+ <body>
7
+ <a name="start"></a>
8
+
9
+ <h1>Kindle Feeds</h1>
10
+ <p>copyright 2008 Daniel Choi</p>
11
+ <p>contact: dhchoi@gmail.com</p>
12
+ <p>open source license: <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a></p>
13
+ <p>homepage: <a href="http://danielchoi.com/software/kindle-feeds.html">http://danielchoi.com/software/kindle-feeds.html</a></p>
14
+ <br/>
15
+ <br/>
16
+
17
+ <a name="toc"></a>
18
+ <h2>Table of Contents</h2>
19
+ <ul>
20
+ <% @sections.each_with_index do |x, i| %>
21
+ <li><a href="#section-<%= i %>"><%= x.title %></li>
22
+ <% end %>
23
+ </ul>
24
+ <% @sections.each_with_index do |s, i| %>
25
+
26
+ <br/>
27
+ <br/>
28
+ <div class="section details">
29
+ <a name="section-<%= i %>" ></a>
30
+ <div style="text-align:right">
31
+ <ul style="list-style-type:none">
32
+ <li><a href="#toc">table of contents</a></li>
33
+ </ul>
34
+ </div>
35
+ <h2>Section: <%= s.title %></h2>
36
+ <p>Feeds in this section:<p>
37
+ <ol>
38
+ <% s.feeds.each_with_index do |f, m| %>
39
+ <li><a href="#section-<%= i %>-feed-<%= m %>"><%= f.title %></li>
40
+ <% end %>
41
+ </ol>
42
+ <% s.feeds.each_with_index do |f, j| %>
43
+ <a name="section-<%= i%>-feed-<%= j %>"></a>
44
+ <h2>Feed: <a href="<%= f.url %>"><%= f.title %></a></h2>
45
+ <p><%= f.entries.size %> Entries:</p>
46
+ <ol>
47
+ <% f.entries.each_with_index do |e, n| %>
48
+ <li><a href="#section-<%= i %>-feed-<%= j %>-entry-<%= n %>"><%= e.title %></li>
49
+ <% end %>
50
+ </ol>
51
+
52
+ <% f.entries.each_with_index do |e, n| %>
53
+ <a name="section-<%= i %>-feed-<%= j %>-entry-<%= n %>"></a>
54
+ <h3>(<%= n + 1 %>/<%= f.entries.size %>) <a href="<%= e.url %>"><%= e.title %></a></h3>
55
+ <div style="text-align:right"><i><%= e.date_published ? e.date_published.strftime('%B %d, %Y') : nil %></i></div>
56
+ <div>
57
+ <%= e.content %>
58
+ </div>
59
+ <div style="text-align:right">
60
+ <ul style="list-style-type:none">
61
+ <li><a href="#toc">table of contents</a></li>
62
+ <li><a href="#section-<%= i%>"><%= s.title %></a></li>
63
+ <li><a href="#section-<%= i%>-feed-<%= j %>"><%= f.title %></a></li>
64
+ </ul>
65
+ </div>
66
+ <br/>
67
+ <br/>
68
+ <% end %>
69
+ <% end %>
70
+ </div>
71
+
72
+ <% end %>
73
+ </body>
74
+ </html>
75
+
76
+
@@ -0,0 +1,267 @@
1
+ # kindle-feeds
2
+
3
+ # copyright 2008 Daniel Choi
4
+ # dhchoi@gmail.com
5
+ # License: MIT
6
+
7
+ require 'rubygems'
8
+ require 'optparse'
9
+ require 'open-uri'
10
+ require 'feed-normalizer'
11
+ require 'htmlentities'
12
+ require 'iconv'
13
+ require 'erb'
14
+ require 'hpricot'
15
+ CONFIGFILE = "kindle_feeds.conf"
16
+ ERB_TEMPLATE = File.dirname(__FILE__) + "/kindle-feeds.erb.html"
17
+ DEFAULT_FEEDS = <<END
18
+ # kindle-feeds feed list
19
+ #
20
+ # Please edit this file so that it contains the section titles and feed URLs
21
+ # that you want. Follow the format of the example: section titles immediately
22
+ # followed by a list of URLs. The URLs should either be URLs of RSS or Atom
23
+ # feeds or URLs of web pages that contain links to a RSS or Atom feed. The
24
+ # 'http://' at the beginning of the URL is optional. Sections should be separated
25
+ # by exactly one blank line.
26
+
27
+ General News
28
+ nytimes.com
29
+ slate.com
30
+
31
+ Tech News
32
+ techcrunch.com
33
+ http://readwriteweb.com
34
+ slashdot.org
35
+
36
+ Apple
37
+ macworld.com
38
+ macintouch.com
39
+
40
+ Ebook
41
+ teleread.org/blog
42
+ END
43
+
44
+ class Autodiscovery
45
+ def initialize(page_html)
46
+ # Downcase the html because capitalized stuff might mess up the Hpricot matching
47
+ @doc = Hpricot(page_html)
48
+ end
49
+
50
+ # Returns the url of the feed, or nil if none found
51
+ def discover
52
+ # Look for rss link, e.g.
53
+ # <link rel="alternate" type="application/rss+xml" title="RSS"
54
+ # href="http://feeds.feedburner.com/TheRssBlog">
55
+ # Tricky: Hpricot CSS attribute selectors are written like XPath selectors
56
+ [:rss, :atom].each do |flavor|
57
+ if x=@doc.at("head link[@type=application/#{flavor}+xml]")
58
+ return x[:href]
59
+ end
60
+ end
61
+ if x=@doc.at("head link[@type=text/xml]")
62
+ return x[:href]
63
+ end
64
+ return nil
65
+ end
66
+ end
67
+
68
+ class Feed
69
+ def self.create_feed(xml, feed_url)
70
+ feed = FeedNormalizer::FeedNormalizer.parse(xml)
71
+ return nil unless feed.is_a?(FeedNormalizer::Feed)
72
+ # clean up entries:
73
+ ic = Iconv.new('ISO-8859-1//TRANSLIT', 'utf-8')
74
+ ic2 = Iconv.new('ISO-8859-1//IGNORE', 'utf-8')
75
+ #ic = Iconv.new('ASCII//TRANSLIT', 'utf-8')
76
+ coder = HTMLEntities.new
77
+ puts "#{feed.entries.size} entries downloaded."
78
+ puts
79
+ feed.entries.each do |e|
80
+ e.title = coder.decode(e.title)
81
+ e.content = coder.decode(e.content)
82
+
83
+ begin
84
+ e.title = ic.iconv(e.title)
85
+ rescue
86
+ e.title = ic2.iconv(e.title)
87
+ end
88
+ begin
89
+ e.content = ic.iconv(e.content)
90
+ rescue
91
+ e.content = ic2.iconv(e.content)
92
+ end
93
+ doc = Hpricot(e.content)
94
+ doc.search('h1, h2, h3') do |h|
95
+ h.swap("<h4>#{h.inner_text}</h4>")
96
+ end
97
+ doc.search('//font') do |font|
98
+ font.swap(font.inner_text)
99
+ end
100
+ doc.search('//img').remove
101
+ doc.search('svg, object, embed').remove
102
+ doc.search('script').remove
103
+ e.content = doc.to_s
104
+ end
105
+ return feed
106
+ end
107
+
108
+ def self.subscribe(feed_url) # try to repair the URL if possible
109
+ unless feed_url =~ /^http:\/\//
110
+ feed_url = "http://" + feed_url
111
+ end
112
+ puts "Downloading #{feed_url}"
113
+ begin
114
+ xml = fetch(feed_url)
115
+ rescue SocketError
116
+ puts "Error trying to load page at #{feed_url}"
117
+ return
118
+ end
119
+ if xml.nil?
120
+ puts "Can't find any resource at #{feed_url}"
121
+ return
122
+ end
123
+ feed = Feed.create_feed( xml, feed_url.strip )
124
+ if feed.nil?
125
+ puts "#{feed_url}: Attempting autodiscovery..."
126
+ feed_url = auto_discover_and_subscribe(feed_url)
127
+ if feed_url
128
+ xml = fetch(feed_url)
129
+ feed = Feed.create_feed( xml, feed_url.strip )
130
+ end
131
+ end
132
+ feed
133
+ end
134
+
135
+ def self.auto_discover_and_subscribe(url)
136
+ uri = URI.parse(url)
137
+ feed_url = Autodiscovery.new(fetch(url)).discover
138
+ if feed_url
139
+ feed_url = uri.merge(feed_url).to_s
140
+ puts "Found feed: #{feed_url}"
141
+ return feed_url
142
+ else
143
+ puts "Can't find feed for #{url}"
144
+ return nil
145
+ end
146
+ end
147
+
148
+ # a simple wrapper over open-uri call. Easier to mock in testing.
149
+ def self.fetch(url)
150
+ begin
151
+ open(url).read
152
+ rescue Timeout::Error
153
+ puts "-> attempt to fetch #{url} timed out"
154
+ rescue Exception => e
155
+ puts "-> error trying to fetch #{url}: #{$!}"
156
+ end
157
+ end
158
+ end
159
+
160
+ class Section
161
+ attr_accessor :title, :uris, :feeds
162
+ def initialize(title, uris)
163
+ @feeds = []
164
+ @title = title
165
+ @uris = uris
166
+ # generate the feeds
167
+ @uris.each do |uri|
168
+ if (feed=Feed.subscribe(uri))
169
+ @feeds << feed
170
+ end
171
+ end
172
+ end
173
+ end
174
+
175
+ class KindleFeeds
176
+ VERSION = "1.0.0"
177
+ attr_accessor :sections
178
+ # config is a text file with a certain format
179
+ def initialize(config)
180
+ @sections = []
181
+ raw_sections = config.split(/^\s*$/)
182
+ results = []
183
+ raw_sections.each do |section|
184
+ lines = section.strip.split("\n")
185
+ title = lines.shift.strip
186
+ urls = lines.map {|line| line.strip}
187
+ results << [title, *urls]
188
+ end
189
+ # an array of arrays. each array is composed of a section title followed by urls of the feeds
190
+ results
191
+ puts "Fetching feeds:"
192
+ results.each do |r|
193
+ puts "- " + r.first
194
+ r[1..-1].each do |x|
195
+ puts " - " + x
196
+ end
197
+ end
198
+ puts
199
+ # subscribe
200
+ results.each do |r|
201
+ @sections << Section.new(r.shift, r)
202
+ end
203
+ end
204
+
205
+ def to_html
206
+ puts "Converting feeds into Kindle-compatible and optimized HTML..."
207
+ puts
208
+ erb = ERB.new(File.read(ERB_TEMPLATE))
209
+ out = erb.result(binding())
210
+ # TODO put timestamp in filename
211
+ date = Time.now.strftime('%m-%H-%Y')
212
+ outfile = "Kindle Feeds #{date}.html"
213
+ File.open(outfile, "w") do |f|
214
+ f.write out
215
+ end
216
+ puts "Output written to file:"
217
+ puts outfile
218
+ puts
219
+ puts "Email this file as an attachment to YOUR_KINDLE_USERNAME@kindle.com or YOUR_KINDLE_USERNAME@free.kindle.com."
220
+ puts
221
+ puts "Visit http://www.amazon.com/gp/help/customer/display.html?nodeId=200140600 for more help."
222
+ puts "Done."
223
+ end
224
+
225
+ def self.run(argv=ARGV)
226
+ opts = OptionParser.new do |opt|
227
+ opt.program_name = File.basename $0
228
+ opt.version = KindleFeeds::VERSION
229
+ opt.banner = <<-EOT
230
+ Usage: #{opt.program_name}
231
+
232
+ kindle-feeds reads a feed list from #{CONFIGFILE}.conf, downloads the feeds, and
233
+ generates a Kindle-compatiable and optimized HTML file that can be sent to
234
+ YOUR_KINDLE_USERNAME@kindle.com or YOUR_KINDLE_USERNAME@free.kindle.com for conversion
235
+ into an .azw file for reading on the Kindle.
236
+
237
+ The first time kindle-feeds is run, it will generate a stub #{CONFIGFILE}.conf file
238
+ in the same directory. Please edit this file to specify the feeds you want to
239
+ download and convert for Kindle reading. Further instructions can be found at the
240
+ top of kindle-feeds.conf once it is generated.
241
+
242
+ Project homepage:
243
+ http://danielchoi.com/software/kindle-feeds.html
244
+ EOT
245
+ end
246
+ opts.parse! argv
247
+
248
+ if ! File.exist?(CONFIGFILE)
249
+ puts "Can't find #{CONFIGFILE}. Generating..."
250
+ File.open(CONFIGFILE, "w") do |f|
251
+ f.write DEFAULT_FEEDS
252
+ end
253
+ puts "Please edit #{CONFIGFILE} before running kindle-feeds again."
254
+ exit
255
+ end
256
+ puts "Reading #{CONFIGFILE} for feed URLs."
257
+ puts
258
+ configfile = File.open(CONFIGFILE).readlines
259
+ configfile = configfile.select {|line| line !~ /^#/}.join
260
+ kf = KindleFeeds.new(configfile)
261
+ kf.to_html
262
+ end
263
+ end
264
+
265
+ if __FILE__ == $0
266
+ KindleFeeds.run ARGV
267
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kindle-feeds
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Choi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-08-22 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: feed-normalizer
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.5.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hpricot
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0.6"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hoe
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.7.0
44
+ version:
45
+ description: Format Atom and RSS feeds for the Kindle.
46
+ email: dhchoi@gmail.com
47
+ executables:
48
+ - kindle-feeds
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - History.txt
53
+ - Manifest.txt
54
+ - README.txt
55
+ files:
56
+ - History.txt
57
+ - Manifest.txt
58
+ - README.txt
59
+ - Rakefile
60
+ - bin/kindle-feeds
61
+ - lib/kindle-feeds.rb
62
+ - lib/htmlentities.rb
63
+ - lib/htmlentities/html4.rb
64
+ - lib/htmlentities/legacy.rb
65
+ - lib/htmlentities/string.rb
66
+ - lib/htmlentities/xhtml1.rb
67
+ - lib/kindle-feeds.erb.html
68
+ has_rdoc: true
69
+ homepage: http://danielchoi.com/software/kindle-feeds.html
70
+ post_install_message: Type kindle-feeds -h for instructions.
71
+ rdoc_options:
72
+ - --main
73
+ - README.txt
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: "0"
81
+ version:
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ requirements: []
89
+
90
+ rubyforge_project: kindle-feeds
91
+ rubygems_version: 1.2.0
92
+ signing_key:
93
+ specification_version: 2
94
+ summary: Format Atom and RSS feeds for the Kindle.
95
+ test_files: []
96
+