blackwinter-rss2mail 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ = Revision history for rss2mail
2
+
3
+ == 0.0.1 [2008-12-18]
4
+
5
+ * Birthday :-)
data/README ADDED
@@ -0,0 +1,41 @@
1
+ = rss2mail - Send RSS feeds as e-mail
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to rss2mail version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ Sends new entries for configured RSS feeds as e-mail to any number of
11
+ recipients. See <tt>example/</tt> directory for a sample configuration file.
12
+
13
+
14
+ == LINKS
15
+
16
+ <b></b>
17
+ Documentation:: <http://rss2mail.rubyforge.org/>
18
+ Source code:: <http://github.com/blackwinter/rss2mail>
19
+ Rubyforge project:: <http://rubyforge.org/projects/rss2mail>
20
+
21
+
22
+ == AUTHORS
23
+
24
+ * Jens Wille <mailto:ww@blackwinter.de>
25
+
26
+
27
+ == LICENSE AND COPYRIGHT
28
+
29
+ Copyright (C) 2007-2008 Jens Wille
30
+
31
+ rss2mail is free software: you can redistribute it and/or modify it under
32
+ the terms of the GNU General Public License as published by the Free Software
33
+ Foundation, either version 3 of the License, or (at your option) any later
34
+ version.
35
+
36
+ rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY
37
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
38
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
39
+
40
+ You should have received a copy of the GNU General Public License along with
41
+ rss2mail. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,26 @@
1
+ require %q{lib/rss2mail/version}
2
+
3
+ begin
4
+ require 'hen'
5
+
6
+ Hen.lay! {{
7
+ :rubyforge => {
8
+ :project => %q{rss2mail},
9
+ :package => %q{rss2mail},
10
+ :rdoc_dir => nil
11
+ },
12
+
13
+ :gem => {
14
+ :version => RSS2Mail::VERSION,
15
+ :summary => %q{Send RSS feeds as e-mail},
16
+ :homepage => %q{http://rss2mail.rubyforge.org/},
17
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
+ :dependencies => %w[simple-rss hpricot unidecode ruby-nuggets]
20
+ }
21
+ }}
22
+ rescue LoadError
23
+ abort "Please install the 'hen' gem first."
24
+ end
25
+
26
+ ### Place your custom Rake tasks here.
@@ -0,0 +1,104 @@
1
+ #! /usr/bin/ruby
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # rss2mail -- Send RSS feeds as e-mail #
7
+ # #
8
+ # Copyright (C) 2007-2008 Jens Wille #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <ww@blackwinter.de> #
12
+ # #
13
+ # rss2mail is free software; you can redistribute it and/or modify it under #
14
+ # the terms of the GNU General Public License as published by the Free #
15
+ # Software Foundation; either version 3 of the License, or (at your option) #
16
+ # any later version. #
17
+ # #
18
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'optparse'
30
+ require 'yaml'
31
+
32
+ base = File.join(File.dirname(__FILE__), '..')
33
+ $: << File.join(base, 'lib')
34
+
35
+ require 'rss2mail'
36
+
37
+ USAGE = "Usage: #{$0} [options] <target>"
38
+ abort USAGE if ARGV.empty?
39
+
40
+ options = {
41
+ :files => nil,
42
+ :reload => false,
43
+ :verbose => false
44
+ }
45
+
46
+ OptionParser.new { |opts|
47
+ opts.banner = USAGE
48
+ opts.separator ''
49
+
50
+ opts.on('-d', '--directory DIRECTORY', 'Process all feeds in directory') { |d|
51
+ abort "Not a directory: #{d}" unless File.directory?(d)
52
+ abort "Can't read directory: #{d}" unless File.readable?(d)
53
+
54
+ options[:files] = Dir[File.join(d, '*.yaml')]
55
+ }
56
+
57
+ opts.on('-r', '--reload', 'Reload feeds') {
58
+ options[:reload] = true
59
+ }
60
+
61
+ opts.on('-v', '--verbose', 'Be verbose') {
62
+ options[:verbose] = true
63
+ }
64
+ }.parse!
65
+
66
+ if target = ARGV.shift
67
+ target = target.to_sym
68
+ else
69
+ abort "No feeds target given\n#{USAGE}"
70
+ end
71
+
72
+ templates = Hash.new { |h, k|
73
+ t = File.join(base, 'templates', "#{k}.erb")
74
+
75
+ begin
76
+ h[k] = File.read(t)
77
+ rescue Errno::ENOENT
78
+ # silently ignore
79
+ end
80
+ }
81
+
82
+ feeds_files = options.delete(:files) || [File.join(base, 'feeds.yaml')]
83
+ feeds_files.each { |feeds_file|
84
+ feeds = begin
85
+ YAML.load_file(feeds_file)
86
+ rescue Errno::ENOENT
87
+ warn "Feeds file not found: #{feeds_file}"
88
+ next
89
+ end
90
+
91
+ unless target_feeds = feeds[target]
92
+ warn "Feeds target not found: #{target} (in #{feeds_file})"
93
+ next
94
+ end
95
+
96
+ target_feeds.each { |feed|
97
+ RSS2Mail::Feed.new(feed, options).deliver(templates) unless feed[:skip]
98
+ }
99
+
100
+ # write updated feed information
101
+ File.open(feeds_file, 'w') { |file|
102
+ YAML.dump(feeds, file)
103
+ }
104
+ }
@@ -0,0 +1,30 @@
1
+ ---
2
+ :"twice-a-day":
3
+ - :url: http://feeds.feedburner.com/Bildblog
4
+ :to: my.secret@e.mail
5
+ :title: BILDblog
6
+ - :url: http://newsfeed.zeit.de/
7
+ :to: my.secret@e.mail
8
+ :title: DIE ZEIT
9
+ - :url: http://www.spiegel.de/schlagzeilen/rss/index.xml
10
+ :to: my.secret@e.mail
11
+ :title: SPIEGEL ONLINE
12
+ :hourly:
13
+ - :url: http://www.heise.de/newsticker/heise-atom.xml
14
+ :to: my.secret@e.mail
15
+ :title: heise online News
16
+ :body: heisetext
17
+ :body_encoding: ISO-8859-1
18
+ - :url: http://log.netbib.de/feed/
19
+ :to: my.secret@e.mail
20
+ :title: netbib weblog
21
+ :daily:
22
+ - :url: http://aktuell.de.selfhtml.org/weblog/rss-feed
23
+ :to: my.secret@e.mail
24
+ :title: SELFHTML Aktuell Weblog
25
+ - :url: http://rss.slashdot.org/Slashdot/slashdotLinux
26
+ :to: my.secret@e.mail
27
+ :title: "Slashdot: Linux"
28
+ - :url: http://rss.slashdot.org/Slashdot/slashdotBookReviews
29
+ :to: my.secret@e.mail
30
+ :title: "Slashdot: Book Reviews"
@@ -0,0 +1,30 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # rss2mail -- Send RSS feeds as e-mail #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'rss2mail/feed'
28
+
29
+ module RSS2Mail
30
+ end
@@ -0,0 +1,225 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'open-uri'
28
+ require 'erb'
29
+
30
+ require 'rubygems'
31
+ require 'hpricot'
32
+ require 'unidecode'
33
+ require 'nuggets/util/i18n'
34
+ require 'nuggets/string/evaluate'
35
+
36
+ require 'rss2mail/rss'
37
+
38
+ module RSS2Mail
39
+
40
+ class Feed
41
+
42
+ SUBSTITUTIONS = {
43
+ '–' => '--',
44
+ '«' => '<<',
45
+ '&amp;' => '&'
46
+ }
47
+
48
+ SUBSTITUTIONS_RE = %r{Regexp.union(*SUBSTITUTIONS.keys)}o
49
+
50
+ TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
51
+
52
+ attr_reader :feed, :verbose, :reload, :simple, :updated, :content, :rss
53
+
54
+ def initialize(feed, options = {})
55
+ raise TypeError, "Hash expected, got #{feed.class}" unless feed.is_a?(Hash)
56
+
57
+ @feed = feed
58
+ @simple = feed[:simple]
59
+ @updated = feed[:updated]
60
+
61
+ @verbose = options[:verbose]
62
+ @reload = options[:reload]
63
+
64
+ required = [:url, :to, :title]
65
+ required.delete_if { |i| feed.has_key?(i) }
66
+
67
+ raise ArgumentError, "feed incomplete: #{required.join(', ')} missing" unless required.empty?
68
+ end
69
+
70
+ def deliver(templates)
71
+ unless get && parse
72
+ warn "[#{feed[:title]}] Nothing to send" if verbose
73
+ return
74
+ end
75
+
76
+ if rss.items.empty?
77
+ warn "[#{feed[:title]}] No new items" if verbose
78
+ return
79
+ end
80
+
81
+ to = [*feed[:to]]
82
+ if to.empty?
83
+ warn "[#{feed[:title]}] No one to send to" if verbose
84
+ return
85
+ end
86
+
87
+ feed_title = feed[:title]
88
+ content_type = feed[:content_type] || 'text/html'
89
+ encoding = feed[:encoding] || 'UTF-8'
90
+
91
+ feed[:sent] ||= []
92
+
93
+ content_type_header = "Content-type: #{content_type}; charset=#{encoding}"
94
+
95
+ unless template = templates[content_type[/\/(.*)/, 1]]
96
+ warn "[#{feed[:title]}] Template not found: #{content_type}" if verbose
97
+ return
98
+ end
99
+
100
+ cmd = [
101
+ '/usr/bin/mail',
102
+ '-e',
103
+ "-a '#{content_type_header}'",
104
+ "-a 'From: rss2mail@blackwinter.de'",
105
+ "-s '[#{feed_title}] \#{subject}'",
106
+ *to
107
+ ].join(' ')
108
+
109
+ sent = 0
110
+
111
+ rss.items.each { |item|
112
+ title = item.title
113
+ link = item.link
114
+ description = item.description
115
+ date = item.date
116
+ author = item.author
117
+
118
+ if description && feed[:unescape_html]
119
+ description.gsub!(/&lt;/, '<')
120
+ description.gsub!(/&gt;/, '>')
121
+ end
122
+
123
+ if tag = feed[:body]
124
+ body = case tag
125
+ when true: open(link).read
126
+ else Hpricot(open(link)).at(tag).to_s
127
+ end.gsub(/<\/?(.*?)>/) { |m|
128
+ m if TAGS_TO_KEEP.include?($1.split.first.downcase)
129
+ }.gsub(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
130
+
131
+ if body_encoding = feed[:body_encoding]
132
+ body = Iconv.conv('UTF-8', body_encoding, body)
133
+ end
134
+ end
135
+
136
+ subject = title ? clean_subject(title) : 'NO TITLE'
137
+
138
+ _cmd = cmd.evaluate(binding)
139
+
140
+ begin
141
+ IO.popen(_cmd, 'w') { |mail| mail.puts ERB.new(template).result(binding) }
142
+ feed[:sent] << link
143
+ sent += 1
144
+ rescue Errno::EPIPE => err
145
+ warn "[#{feed[:title]}] Error while sending mail (#{err.class}): #{_cmd}"
146
+ end
147
+ }
148
+
149
+ # only keep the last 100 entries
150
+ feed[:sent].slice!(0...-100)
151
+
152
+ warn "[#{feed[:title]}] #{sent} items sent" if verbose
153
+ sent
154
+ end
155
+
156
+ private
157
+
158
+ def get(reload = reload)
159
+ if reload
160
+ @content = nil
161
+ conditions = {}
162
+ else
163
+ conditions = case
164
+ when etag = feed[:etag]: { 'If-None-Match' => etag }
165
+ when mtime = feed[:mtime]: { 'If-Modified-Since' => mtime }
166
+ else {}
167
+ end
168
+ end
169
+
170
+ begin
171
+ open(feed[:url], conditions) { |uri|
172
+ case
173
+ when etag = uri.meta['etag']: feed[:etag] = etag
174
+ when mtime = uri.last_modified: feed[:mtime] = mtime.rfc822
175
+ else feed[:updated] = Time.now
176
+ end
177
+
178
+ @content ||= uri.read
179
+ }
180
+ rescue OpenURI::HTTPError
181
+ warn "[#{feed[:title]}] Feed not found or unchanged" if verbose
182
+ rescue Timeout::Error, Errno::ETIMEDOUT, Errno::ECONNRESET => err
183
+ warn "[#{feed[:title]}] Error while getting feed: #{err} (#{err.class})"
184
+ end
185
+
186
+ @content
187
+ end
188
+
189
+ def parse(reload = reload)
190
+ @rss = nil if reload
191
+
192
+ if content && @rss ||= begin
193
+ RSS2Mail::RSS.new(content, simple)
194
+ rescue SimpleRSSError => err
195
+ warn "[#{feed[:title]}] Error while parsing feed: #{err} (#{err.class})"
196
+ end
197
+ sent = feed[:sent]
198
+
199
+ unless reload
200
+ @rss.items.delete_if { |item|
201
+ if updated && date = item.date
202
+ date <= updated
203
+ else
204
+ sent && sent.include?(item.link)
205
+ end
206
+ }
207
+ end
208
+ else
209
+ warn "[#{feed[:title]}] Nothing to parse" if verbose
210
+ end
211
+
212
+ @rss
213
+ end
214
+
215
+ def clean_subject(string)
216
+ string.
217
+ replace_diacritics.
218
+ gsub(SUBSTITUTIONS_RE) { |m| SUBSTITUTIONS[m] }.
219
+ to_ascii.
220
+ gsub(/'/, "'\\\\''")
221
+ end
222
+
223
+ end
224
+
225
+ end