harvester 0.8.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/CHANGELOG.rdoc +45 -0
  2. data/README.rdoc +74 -0
  3. data/Rakefile +28 -0
  4. data/bin/harvester +13 -0
  5. data/bin/harvester-chart +5 -0
  6. data/bin/harvester-clock +35 -0
  7. data/bin/harvester-db +15 -0
  8. data/bin/harvester-fetch +5 -0
  9. data/bin/harvester-generate +5 -0
  10. data/bin/harvester-jabber +6 -0
  11. data/bin/harvester-new +25 -0
  12. data/bin/harvester-post +5 -0
  13. data/bin/harvester-run +14 -0
  14. data/collections.yaml +15 -0
  15. data/config.yaml +13 -0
  16. data/data/ent/HTMLlat1.ent +194 -0
  17. data/data/ent/HTMLspecial.ent +77 -0
  18. data/data/ent/HTMLsymbol.ent +241 -0
  19. data/data/sql/dbd-mysql-isotime.diff +11 -0
  20. data/data/sql/harvester-0.6-mysql.diff +59 -0
  21. data/data/sql/harvester-0.7-mysql.diff +39 -0
  22. data/data/sql/mysql/chart.sql +1 -0
  23. data/data/sql/mysql/create.table.enclosures.sql +9 -0
  24. data/data/sql/mysql/create.table.items.sql +8 -0
  25. data/data/sql/mysql/create.table.jabbersettings.sql +5 -0
  26. data/data/sql/mysql/create.table.jabbersubscriptions.sql +5 -0
  27. data/data/sql/mysql/create.table.sources.sql +9 -0
  28. data/data/sql/mysql/create.view.last48hours.sql +1 -0
  29. data/data/sql/postgresql/chart.sql +1 -0
  30. data/data/sql/postgresql/create.table.enclosures.sql +9 -0
  31. data/data/sql/postgresql/create.table.items.sql +8 -0
  32. data/data/sql/postgresql/create.table.jabbersettings.sql +5 -0
  33. data/data/sql/postgresql/create.table.jabbersubscriptions.sql +5 -0
  34. data/data/sql/postgresql/create.table.sources.sql +9 -0
  35. data/data/sql/postgresql/create.view.last48hours.sql +1 -0
  36. data/data/sql/sqlite3/chart.sql +1 -0
  37. data/data/sql/sqlite3/create.table.enclosures.sql +9 -0
  38. data/data/sql/sqlite3/create.table.items.sql +8 -0
  39. data/data/sql/sqlite3/create.table.jabbersettings.sql +5 -0
  40. data/data/sql/sqlite3/create.table.jabbersubscriptions.sql +5 -0
  41. data/data/sql/sqlite3/create.table.sources.sql +9 -0
  42. data/data/sql/sqlite3/create.view.last48hours.sql +1 -0
  43. data/data/templates/atom-all.xml +88 -0
  44. data/data/templates/atom.xml +88 -0
  45. data/data/templates/index.html +412 -0
  46. data/data/templates/rss-all.rdf +86 -0
  47. data/data/templates/rss.rdf +85 -0
  48. data/data/templates/static/harvester.css +365 -0
  49. data/data/templates/static/harvester.gif +0 -0
  50. data/data/templates/static/harvester_ie7.css +15 -0
  51. data/data/templates/static/harvester_lte_ie6.css +27 -0
  52. data/harvester.gemspec +35 -0
  53. data/lib/harvester.rb +132 -0
  54. data/lib/harvester/chart.rb +72 -0
  55. data/lib/harvester/db.rb +123 -0
  56. data/lib/harvester/fetch.rb +96 -0
  57. data/lib/harvester/generate.rb +152 -0
  58. data/lib/harvester/generator/entity_translator.rb +46 -0
  59. data/lib/harvester/generator/link_absolutizer.rb +39 -0
  60. data/lib/harvester/jabber.rb +443 -0
  61. data/lib/harvester/mrss.rb +355 -0
  62. data/lib/harvester/post.rb +19 -0
  63. metadata +237 -0
@@ -0,0 +1,86 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <xsl:stylesheet version="1.0"
3
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
4
+ xmlns:hv="http://astroblog.spaceboyz.net/harvester/xslt-functions"
5
+ xmlns:date="http://exslt.org/dates-and-times"
6
+ exclude-result-prefixes="xsl hv date">
7
+
8
+ <xsl:output method="xml"
9
+ version="1.0"
10
+ encoding="utf-8"
11
+ media-type="application/rss+xml"
12
+ cdata-section-elements="description"
13
+ indent="yes"/>
14
+
15
+ <xsl:template match="/collections">
16
+ <rss version="2.0">
17
+ <channel>
18
+ <title>Blog Harvester (all)</title>
19
+ <link>http://astroblog.spaceboyz.net/harvester/</link>
20
+ <description>&lt;p&gt;&lt;a href="http://astroblog.spaceboyz.net/harvester/"&gt;Blog Harvester&lt;/a&gt; sammelt Blogs von Menschen rund um Astro. Enthalten sind:&lt;/p&gt;&lt;ul&gt;
21
+ <xsl:for-each select="collection/feed">
22
+ <xsl:sort select="translate(title, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"/>
23
+ &lt;li&gt;&lt;a href="<xsl:value-of select="link"/>" title="<xsl:value-of select="description"/>"&gt;<xsl:value-of select="title"/>&lt;/a&gt;&lt;/li&gt;
24
+ </xsl:for-each>
25
+ &lt;/ul&gt;
26
+ &lt;img src="chart.jpg"/&gt;</description>
27
+
28
+ <xsl:variable name="collections" select="."/>
29
+ <xsl:for-each select="hv:collection-items('%')/item">
30
+ <xsl:variable name="rss" select="string(rss)"/>
31
+ <xsl:variable name="feed" select="$collections/collection/feed[rss=$rss]"/>
32
+ <item>
33
+ <title><xsl:value-of select="concat($feed/title,': ',title)"/></title>
34
+ <link><xsl:value-of select="link"/></link>
35
+ <guid isPermaLink="true"><xsl:value-of select="link"/></guid>
36
+ <pubDate>
37
+ <xsl:value-of select="concat(date:day-abbreviation(date),
38
+ ', ',
39
+ format-number(date:day-in-month(date), '00'),
40
+ ' ',
41
+ date:month-abbreviation(date),
42
+ ' ',
43
+ date:year(date),
44
+ ' ',
45
+ format-number(date:hour-in-day(date), '00'),
46
+ ':',
47
+ format-number(date:minute-in-hour(date), '00'),
48
+ ':',
49
+ format-number(date:second-in-minute(date), '00'),
50
+ ' GMT'
51
+ )"/>
52
+ </pubDate>
53
+ <description>
54
+ <xsl:value-of select="hv:item-description($rss, string(link))"/>
55
+ </description>
56
+ <source url="{$feed/link}"><xsl:value-of select="$feed/title"/></source>
57
+
58
+ <!-- Enclosures -->
59
+ <xsl:for-each select="hv:item-enclosures($rss, string(link))/enclosure">
60
+ <enclosure url="{href}">
61
+ <xsl:if test="mime">
62
+ <xsl:attribute name="type">
63
+ <xsl:value-of select="mime"/>
64
+ </xsl:attribute>
65
+ </xsl:if>
66
+ <xsl:if test="string-length(title) &gt; 0">
67
+ <xsl:attribute name="title">
68
+ <xsl:value-of select="title"/>
69
+ </xsl:attribute>
70
+ </xsl:if>
71
+ <xsl:if test="number(length)">
72
+ <xsl:attribute name="length">
73
+ <xsl:value-of select="number(length)"/>
74
+ </xsl:attribute>
75
+ </xsl:if>
76
+ </enclosure>
77
+ </xsl:for-each>
78
+
79
+ </item>
80
+ </xsl:for-each>
81
+
82
+ </channel>
83
+ </rss>
84
+ </xsl:template>
85
+
86
+ </xsl:stylesheet>
@@ -0,0 +1,85 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <xsl:stylesheet version="1.0"
3
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
4
+ xmlns:hv="http://astroblog.spaceboyz.net/harvester/xslt-functions"
5
+ xmlns:date="http://exslt.org/dates-and-times"
6
+ exclude-result-prefixes="xsl hv date">
7
+
8
+ <xsl:output method="xml"
9
+ version="1.0"
10
+ encoding="utf-8"
11
+ media-type="application/rss+xml"
12
+ cdata-section-elements="description"
13
+ indent="yes"/>
14
+
15
+ <xsl:template match="/collections">
16
+ <rss version="2.0">
17
+ <channel>
18
+ <title>Blog Harvester</title>
19
+ <link>http://astroblog.spaceboyz.net/harvester/</link>
20
+ <description>&lt;p&gt;&lt;a href="http://astroblog.spaceboyz.net/harvester/"&gt;Blog Harvester&lt;/a&gt; sammelt Blogs von Menschen rund um Astro. Enthalten sind:&lt;/p&gt;&lt;ul&gt;
21
+ <xsl:for-each select="collection[@name='blogs']/feed">
22
+ <xsl:sort select="translate(title, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"/>
23
+ &lt;li&gt;&lt;a href="<xsl:value-of select="link"/>" title="<xsl:value-of select="description"/>"&gt;<xsl:value-of select="title"/>&lt;/a&gt;&lt;/li&gt;
24
+ </xsl:for-each>
25
+ &lt;/ul&gt;
26
+ &lt;img src="chart.jpg"/&gt;</description>
27
+
28
+ <xsl:variable name="blogs" select="collection[@name='blogs']"/>
29
+ <xsl:for-each select="hv:collection-items('blogs')/item">
30
+ <xsl:variable name="rss" select="string(rss)"/>
31
+ <xsl:variable name="feed" select="$blogs/feed[rss=$rss]"/>
32
+ <item>
33
+ <title><xsl:value-of select="concat($feed/title,': ',title)"/></title>
34
+ <link><xsl:value-of select="link"/></link>
35
+ <guid isPermaLink="true"><xsl:value-of select="link"/></guid>
36
+ <pubDate>
37
+ <xsl:value-of select="concat(date:day-abbreviation(date),
38
+ ', ',
39
+ format-number(date:day-in-month(date), '00'),
40
+ ' ',
41
+ date:month-abbreviation(date),
42
+ ' ',
43
+ date:year(date),
44
+ ' ',
45
+ format-number(date:hour-in-day(date), '00'),
46
+ ':',
47
+ format-number(date:minute-in-hour(date), '00'),
48
+ ':',
49
+ format-number(date:second-in-minute(date), '00'),
50
+ ' GMT'
51
+ )"/>
52
+ </pubDate>
53
+ <description>
54
+ <xsl:value-of select="hv:item-description($rss, string(link))"/>
55
+ </description>
56
+ <source url="{$feed/link}"><xsl:value-of select="$feed/title"/></source>
57
+
58
+ <!-- Enclosures -->
59
+ <xsl:for-each select="hv:item-enclosures($rss, string(link))/enclosure">
60
+ <enclosure url="{href}">
61
+ <xsl:if test="mime">
62
+ <xsl:attribute name="type">
63
+ <xsl:value-of select="mime"/>
64
+ </xsl:attribute>
65
+ </xsl:if>
66
+ <xsl:if test="string-length(title) &gt; 0">
67
+ <xsl:attribute name="title">
68
+ <xsl:value-of select="title"/>
69
+ </xsl:attribute>
70
+ </xsl:if>
71
+ <xsl:if test="number(length)">
72
+ <xsl:attribute name="length">
73
+ <xsl:value-of select="number(length)"/>
74
+ </xsl:attribute>
75
+ </xsl:if>
76
+ </enclosure>
77
+ </xsl:for-each>
78
+ </item>
79
+ </xsl:for-each>
80
+
81
+ </channel>
82
+ </rss>
83
+ </xsl:template>
84
+
85
+ </xsl:stylesheet>
@@ -0,0 +1,365 @@
1
+ /*
2
+ * harvester.css
3
+ *
4
+ * Description: Stylesheet zur Webseite
5
+ *
6
+ * Website: Blog Harvester
7
+ * http://astroblog.spaceboyz.net/harvester/
8
+ * http://www.blog-harvester.de/
9
+ * Layout: 3-spaltig links fest, mittig variabel, rechts fest
10
+ * Textfarben #303030 #C00000 #FF6600 6699CC
11
+ * Browser: Firefox, Opera (webkit), Safari, IE7
12
+ * (<= IE6: no chart, little problems)
13
+ * Author: Tigion, http://tigion.de/
14
+ * Version: 18.02.2009
15
+ */
16
+
17
+ /* ------------------------------------------------------ */
18
+ /* default */
19
+ /* ------------------------------------------------------ */
20
+
21
+ * {
22
+ border: 0 none;
23
+ margin: 0;
24
+ padding: 0;
25
+ }
26
+
27
+ /* body & Meta */
28
+
29
+ body {
30
+ font-size: 62.5%;
31
+ font-family: 'Lucida Grande', Verdana, Arial, Sans-Serif;
32
+ color: #303030;
33
+ background-color: #E0E0E0;
34
+ text-align: left;
35
+ /*min-width: 1024px;*/ /* TODO */
36
+ }
37
+
38
+ a {
39
+ text-decoration: none;
40
+ color: #C00000;
41
+ }
42
+
43
+ a:hover {
44
+ text-decoration: underline;
45
+ }
46
+
47
+ a:active {
48
+ color: #FFA500;
49
+ }
50
+
51
+ h1 {
52
+ background-color: #FFFFFF;
53
+ font-size: 3em;
54
+ text-align: center;
55
+ margin-bottom: 0.6em;
56
+ padding: 0.3em;
57
+ }
58
+
59
+ /* ------------------------------------------------------ */
60
+ /* Layout */
61
+ /* ------------------------------------------------------ */
62
+
63
+ /* left ticker - Harvesting, ... */
64
+ .tickers1 {
65
+ float: left;
66
+ width: 20em;
67
+ overflow: hidden;
68
+ margin: 0 2em 0 0;
69
+ }
70
+
71
+ /* right ticker - Photos, Delicious*/
72
+ .tickers2 {
73
+ float: right;
74
+ width: 20em;
75
+ overflow: hidden;
76
+ margin: 0 0 0 2em;
77
+ }
78
+
79
+ /* right ticker - Microblogging */
80
+ .tickers3 {
81
+ float: right;
82
+ width: 20em;
83
+ overflow: hidden;
84
+ margin: 0 0 0 2em;
85
+ }
86
+
87
+ /* blog entries */
88
+ .hfeed {
89
+ overflow: hidden;
90
+ /*margin: 0 44em 0 22em;*/
91
+ /*min-width: 20em;*/
92
+ /*margin: 0 22em;*/
93
+ }
94
+
95
+ /* ------------------------------------------------------ */
96
+ /* blog entries */
97
+ /* ------------------------------------------------------ */
98
+
99
+ .entry {
100
+ background-color: #fff;
101
+ margin-bottom: 0.9em;
102
+ font-size: 1.2em;
103
+ overflow: hidden;
104
+ }
105
+
106
+ /* date - blog entry */
107
+ .entrydate {
108
+ float: right;
109
+ margin-top: -2em;
110
+ padding-right: 1em;
111
+ font-size: 0.8em;
112
+ text-align: right;
113
+ color: white;
114
+ }
115
+
116
+ /* text - blog entry */
117
+ .entrydesc {
118
+ /*text-align: justify;*/
119
+ font-size: 1.0em;
120
+ line-height: 1.4em;
121
+ padding: 0 0.8em;
122
+ }
123
+
124
+ /* link - blog entry */
125
+ .entryfoot {
126
+ font-size: 0.8em;
127
+ padding: 1em;
128
+ text-align: right;
129
+ }
130
+
131
+ /* title / link - blog */
132
+ .entry h2.blog-title {
133
+ font-size: 1.0em;
134
+ padding: 0.5em 0.8em;
135
+ background-color: #ff6600;
136
+ color: white;
137
+ }
138
+
139
+ .entry h2.blog-title a {
140
+ color: white;
141
+ }
142
+
143
+ /* title / link - blog entry */
144
+ .entry h3.entry-title {
145
+ font-size: 1.3em;
146
+ padding: 0.7em;
147
+ }
148
+
149
+ .entry h3.entry-title a {
150
+ color: #303030;
151
+ }
152
+
153
+ /* ------------------------------------------------------ */
154
+ /* blog entry content (description) */
155
+ /* ------------------------------------------------------ */
156
+
157
+ .entry-content h2, .entry-content h3 {
158
+ padding: 0;
159
+ margin: 0;
160
+ background-color: #fff;
161
+ color: #303030;
162
+ }
163
+
164
+ .entry-content h2 {
165
+ font-size: 1.2em;
166
+ margin: 1.0em 0 0.8em 0;
167
+ }
168
+
169
+ .entry-content h3 {
170
+ font-size: 1.0em;
171
+ margin: 1.0em 0 0.8em 0;
172
+ }
173
+
174
+ .entry-content p {
175
+ margin: 0 0 0.8em 0;
176
+ }
177
+
178
+ .entry-content ul, .entry-content ol {
179
+ margin: 0.8em 0 0.8em 1.8em;
180
+ }
181
+
182
+ .entry-content li {
183
+ padding-bottom: 0.5em;
184
+ }
185
+
186
+ .entry-content pre, .entry-content code {
187
+ background-color: #eee;
188
+ color: #666;
189
+ padding: 1em;
190
+ margin: 1em;
191
+ border: 1px solid #ddd;
192
+ display: block;
193
+ overflow: auto;
194
+ font: 0.9em "Andale Mono", "Courier New", Courier, Fixed, mono;
195
+ }
196
+
197
+ .entry-content pre {
198
+ white-space: pre;
199
+ }
200
+
201
+ .entry-content code {
202
+ white-space: nowrap;
203
+ }
204
+
205
+ .entry-content blockquote {
206
+ margin: 1em;
207
+ padding-left: 1em;
208
+ border-left: 5px solid #e0e0e0;
209
+ color: #666;
210
+ }
211
+
212
+ /* more dynamic images */
213
+ /* no IE6 */
214
+ html>body .entry img {
215
+ height: auto !important;
216
+ max-width: 100% !important;
217
+ padding: 0.5em 0;
218
+ }
219
+
220
+ .entry .wp-caption {
221
+ max-width: 100% !important;
222
+ }
223
+
224
+ /* ------------------------------------------------------ */
225
+ /* tickers (sidebars) */
226
+ /* ------------------------------------------------------ */
227
+
228
+ .ticker {
229
+ background-color: white;
230
+ margin: 0 0 1em 0;
231
+ font-size: 0.9em;
232
+ }
233
+
234
+ .ticker h2 {
235
+ font-size: 1.3em;
236
+ padding: 0.5em 0.7em;
237
+ color: white;
238
+ }
239
+
240
+ .tickers1 .ticker h2, .tickers2 .ticker h2 {
241
+ background-color: #6699cc;
242
+ }
243
+
244
+ .tickers3 .ticker h2 {
245
+ background-color: #888;
246
+ }
247
+
248
+ .ticker h2 a {
249
+ color: white;
250
+ text-decoration: none;
251
+ }
252
+
253
+ .ticker p {
254
+ padding: 1.0em;
255
+ font-style: italic;
256
+ line-height: 1.4em;
257
+ }
258
+
259
+ .ticker p + p {
260
+ padding-top: 0;
261
+ }
262
+
263
+
264
+ .ticker ul {
265
+ padding: 1.0em;
266
+ }
267
+
268
+ .ticker ul li {
269
+ list-style: none;
270
+ line-height: 1.4em;
271
+ }
272
+
273
+ .ticker ul li + li {
274
+ padding: 0.3em 0 0 0;
275
+ }
276
+
277
+ /* microbloggin */
278
+ .tickers3 .ticker ul li {
279
+ overflow: hidden;
280
+ font-style: italic;
281
+ }
282
+
283
+ .tickers3 .ticker ul li + li {
284
+ padding: 1em 0 0 0;
285
+ }
286
+
287
+ .tickers3 .ticker img.avatar {
288
+ margin: 3px 5px 2px 0;
289
+ height: 24px;
290
+ width: 24px;
291
+ float: left;
292
+ }
293
+
294
+
295
+ /* links */
296
+
297
+
298
+ .ticker .tickerlinks li a {
299
+ color: #303030;
300
+ text-decoration: none;
301
+ }
302
+
303
+ .ticker .tickerlinks li a:hover {
304
+ text-decoration: underline;
305
+ }
306
+
307
+ /* list */
308
+ .ticker .tickerlist {
309
+ list-style: none;
310
+ padding: 0.5em 1.0em;
311
+ }
312
+
313
+ .ticker .tickerlist li {
314
+ padding-bottom: 0.4em;
315
+ }
316
+
317
+ .ticker .tickerlist li a[href$=".bz2"] {
318
+ display: block;
319
+ }
320
+
321
+ /* photos */
322
+ .ticker ul.photos {
323
+ padding: 1em;
324
+ margin: 0;
325
+ text-align: center;
326
+ }
327
+
328
+ .ticker ul.photos li {
329
+ list-style: none;
330
+ margin: 0;
331
+ padding: 0;
332
+ }
333
+
334
+ .ticker ul.photos li + li {
335
+ padding: 1em 0 0 0;
336
+ }
337
+
338
+ .ticker ul.photos li a img {
339
+ border: 0;
340
+ margin: 0 auto;
341
+ width: 20em; /*max-height: 20em;*/
342
+ max-width: 20em;
343
+ border: 1px solid #e0e0e0;
344
+ }
345
+
346
+ /* ------------------------------------------------------ */
347
+ /* extras */
348
+ /* ------------------------------------------------------ */
349
+
350
+ /* chart */
351
+ .chart {
352
+ display: none;
353
+ position: absolute;
354
+ z-index: 1;
355
+ background-color: white;
356
+ right: 2px;
357
+ margin-top: 10px;
358
+ margin-right: 15px;
359
+ padding: 10px;
360
+ opacity: 0.85;
361
+ }
362
+
363
+ .head:hover .chart {
364
+ display: block;
365
+ }