rss2mail 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,41 @@
1
+ = rss2mail - Send RSS feeds as e-mail
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to rss2mail version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ Sends new entries for configured RSS feeds as e-mail to any number of
11
+ recipients. See <tt>example/</tt> directory for a sample configuration file.
12
+
13
+
14
+ == LINKS
15
+
16
+ <b></b>
17
+ Documentation:: <http://rss2mail.rubyforge.org/>
18
+ Source code:: <http://github.com/blackwinter/rss2mail>
19
+ Rubyforge project:: <http://rubyforge.org/projects/rss2mail>
20
+
21
+
22
+ == AUTHORS
23
+
24
+ * Jens Wille <mailto:ww@blackwinter.de>
25
+
26
+
27
+ == LICENSE AND COPYRIGHT
28
+
29
+ Copyright (C) 2007-2008 Jens Wille
30
+
31
+ rss2mail is free software: you can redistribute it and/or modify it under
32
+ the terms of the GNU General Public License as published by the Free Software
33
+ Foundation, either version 3 of the License, or (at your option) any later
34
+ version.
35
+
36
+ rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY
37
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
38
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
39
+
40
+ You should have received a copy of the GNU General Public License along with
41
+ rss2mail. If not, see <http://www.gnu.org/licenses/>.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require %q{lib/rss2mail/version}
2
+
3
+ begin
4
+ require 'hen'
5
+
6
+ Hen.lay! {{
7
+ :rubyforge => {
8
+ :project => %q{rss2mail},
9
+ :package => %q{rss2mail},
10
+ :rdoc_dir => nil
11
+ },
12
+
13
+ :gem => {
14
+ :version => RSS2Mail::VERSION,
15
+ :summary => %q{Send RSS feeds as e-mail},
16
+ :homepage => %q{http://rss2mail.rubyforge.org/},
17
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
+ :dependencies => %w[simple-rss hpricot unidecode ruby-nuggets]
20
+ }
21
+ }}
22
+ rescue LoadError
23
+ abort "Please install the 'hen' gem first."
24
+ end
25
+
26
+ ### Place your custom Rake tasks here.
data/bin/rss2mail ADDED
@@ -0,0 +1,104 @@
1
+ #! /usr/bin/ruby
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # rss2mail -- Send RSS feeds as e-mail #
7
+ # #
8
+ # Copyright (C) 2007-2008 Jens Wille #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <ww@blackwinter.de> #
12
+ # #
13
+ # rss2mail is free software; you can redistribute it and/or modify it under #
14
+ # the terms of the GNU General Public License as published by the Free #
15
+ # Software Foundation; either version 3 of the License, or (at your option) #
16
+ # any later version. #
17
+ # #
18
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'optparse'
30
+ require 'yaml'
31
+
32
+ base = File.join(File.dirname(__FILE__), '..')
33
+ $: << File.join(base, 'lib')
34
+
35
+ require 'rss2mail'
36
+
37
+ USAGE = "Usage: #{$0} [options] <target>"
38
+ abort USAGE if ARGV.empty?
39
+
40
+ options = {
41
+ :files => nil,
42
+ :reload => false,
43
+ :verbose => false
44
+ }
45
+
46
+ OptionParser.new { |opts|
47
+ opts.banner = USAGE
48
+ opts.separator ''
49
+
50
+ opts.on('-d', '--directory DIRECTORY', 'Process all feeds in directory') { |d|
51
+ abort "Not a directory: #{d}" unless File.directory?(d)
52
+ abort "Can't read directory: #{d}" unless File.readable?(d)
53
+
54
+ options[:files] = Dir[File.join(d, '*.yaml')]
55
+ }
56
+
57
+ opts.on('-r', '--reload', 'Reload feeds') {
58
+ options[:reload] = true
59
+ }
60
+
61
+ opts.on('-v', '--verbose', 'Be verbose') {
62
+ options[:verbose] = true
63
+ }
64
+ }.parse!
65
+
66
+ if target = ARGV.shift
67
+ target = target.to_sym
68
+ else
69
+ abort "No feeds target given\n#{USAGE}"
70
+ end
71
+
72
+ templates = Hash.new { |h, k|
73
+ t = File.join(base, 'templates', "#{k}.erb")
74
+
75
+ begin
76
+ h[k] = File.read(t)
77
+ rescue Errno::ENOENT
78
+ # silently ignore
79
+ end
80
+ }
81
+
82
+ feeds_files = options.delete(:files) || [File.join(base, 'feeds.yaml')]
83
+ feeds_files.each { |feeds_file|
84
+ feeds = begin
85
+ YAML.load_file(feeds_file)
86
+ rescue Errno::ENOENT
87
+ warn "Feeds file not found: #{feeds_file}"
88
+ next
89
+ end
90
+
91
+ unless target_feeds = feeds[target]
92
+ warn "Feeds target not found: #{target} (in #{feeds_file})"
93
+ next
94
+ end
95
+
96
+ target_feeds.each { |feed|
97
+ RSS2Mail::Feed.new(feed, options).deliver(templates) unless feed[:skip]
98
+ }
99
+
100
+ # write updated feed information
101
+ File.open(feeds_file, 'w') { |file|
102
+ YAML.dump(feeds, file)
103
+ }
104
+ }
@@ -0,0 +1,30 @@
1
+ ---
2
+ :"twice-a-day":
3
+ - :url: http://feeds.feedburner.com/Bildblog
4
+ :to: my.secret@e.mail
5
+ :title: BILDblog
6
+ - :url: http://newsfeed.zeit.de/
7
+ :to: my.secret@e.mail
8
+ :title: DIE ZEIT
9
+ - :url: http://www.spiegel.de/schlagzeilen/rss/index.xml
10
+ :to: my.secret@e.mail
11
+ :title: SPIEGEL ONLINE
12
+ :hourly:
13
+ - :url: http://www.heise.de/newsticker/heise-atom.xml
14
+ :to: my.secret@e.mail
15
+ :title: heise online News
16
+ :body: heisetext
17
+ :body_encoding: ISO-8859-1
18
+ - :url: http://log.netbib.de/feed/
19
+ :to: my.secret@e.mail
20
+ :title: netbib weblog
21
+ :daily:
22
+ - :url: http://aktuell.de.selfhtml.org/weblog/rss-feed
23
+ :to: my.secret@e.mail
24
+ :title: SELFHTML Aktuell Weblog
25
+ - :url: http://rss.slashdot.org/Slashdot/slashdotLinux
26
+ :to: my.secret@e.mail
27
+ :title: "Slashdot: Linux"
28
+ - :url: http://rss.slashdot.org/Slashdot/slashdotBookReviews
29
+ :to: my.secret@e.mail
30
+ :title: "Slashdot: Book Reviews"
@@ -0,0 +1,225 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'open-uri'
28
+ require 'erb'
29
+
30
+ require 'rubygems'
31
+ require 'hpricot'
32
+ require 'unidecode'
33
+ require 'nuggets/util/i18n'
34
+ require 'nuggets/string/evaluate'
35
+
36
+ require 'rss2mail/rss'
37
+
38
+ module RSS2Mail
39
+
40
+ class Feed
41
+
42
+ SUBSTITUTIONS = {
43
+ '–' => '--',
44
+ '«' => '<<',
45
+ '&amp;' => '&'
46
+ }
47
+
48
+ SUBSTITUTIONS_RE = %r{Regexp.union(*SUBSTITUTIONS.keys)}o
49
+
50
+ TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
51
+
52
+ attr_reader :feed, :verbose, :reload, :simple, :updated, :content, :rss
53
+
54
+ def initialize(feed, options = {})
55
+ raise TypeError, "Hash expected, got #{feed.class}" unless feed.is_a?(Hash)
56
+
57
+ @feed = feed
58
+ @simple = feed[:simple]
59
+ @updated = feed[:updated]
60
+
61
+ @verbose = options[:verbose]
62
+ @reload = options[:reload]
63
+
64
+ required = [:url, :to, :title]
65
+ required.delete_if { |i| feed.has_key?(i) }
66
+
67
+ raise ArgumentError, "feed incomplete: #{required.join(', ')} missing" unless required.empty?
68
+ end
69
+
70
+ def deliver(templates)
71
+ unless get && parse
72
+ warn "[#{feed[:title]}] Nothing to send" if verbose
73
+ return
74
+ end
75
+
76
+ if rss.items.empty?
77
+ warn "[#{feed[:title]}] No new items" if verbose
78
+ return
79
+ end
80
+
81
+ to = [*feed[:to]]
82
+ if to.empty?
83
+ warn "[#{feed[:title]}] No one to send to" if verbose
84
+ return
85
+ end
86
+
87
+ feed_title = feed[:title]
88
+ content_type = feed[:content_type] || 'text/html'
89
+ encoding = feed[:encoding] || 'UTF-8'
90
+
91
+ feed[:sent] ||= []
92
+
93
+ content_type_header = "Content-type: #{content_type}; charset=#{encoding}"
94
+
95
+ unless template = templates[content_type[/\/(.*)/, 1]]
96
+ warn "[#{feed[:title]}] Template not found: #{content_type}" if verbose
97
+ return
98
+ end
99
+
100
+ cmd = [
101
+ '/usr/bin/mail',
102
+ '-e',
103
+ "-a '#{content_type_header}'",
104
+ "-a 'From: rss2mail@blackwinter.de'",
105
+ "-s '[#{feed_title}] \#{subject}'",
106
+ *to
107
+ ].join(' ')
108
+
109
+ sent = 0
110
+
111
+ rss.items.each { |item|
112
+ title = item.title
113
+ link = item.link
114
+ description = item.description
115
+ date = item.date
116
+ author = item.author
117
+
118
+ if description && feed[:unescape_html]
119
+ description.gsub!(/&lt;/, '<')
120
+ description.gsub!(/&gt;/, '>')
121
+ end
122
+
123
+ if tag = feed[:body]
124
+ body = case tag
125
+ when true: open(link).read
126
+ else Hpricot(open(link)).at(tag).to_s
127
+ end.gsub(/<\/?(.*?)>/) { |m|
128
+ m if TAGS_TO_KEEP.include?($1.split.first.downcase)
129
+ }.gsub(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
130
+
131
+ if body_encoding = feed[:body_encoding]
132
+ body = Iconv.conv('UTF-8', body_encoding, body)
133
+ end
134
+ end
135
+
136
+ subject = title ? clean_subject(title) : 'NO TITLE'
137
+
138
+ _cmd = cmd.evaluate(binding)
139
+
140
+ begin
141
+ IO.popen(_cmd, 'w') { |mail| mail.puts ERB.new(template).result(binding) }
142
+ feed[:sent] << link
143
+ sent += 1
144
+ rescue Errno::EPIPE => err
145
+ warn "[#{feed[:title]}] Error while sending mail (#{err.class}): #{_cmd}"
146
+ end
147
+ }
148
+
149
+ # only keep the last 100 entries
150
+ feed[:sent].slice!(0...-100)
151
+
152
+ warn "[#{feed[:title]}] #{sent} items sent" if verbose
153
+ sent
154
+ end
155
+
156
+ private
157
+
158
+ def get(reload = reload)
159
+ if reload
160
+ @content = nil
161
+ conditions = {}
162
+ else
163
+ conditions = case
164
+ when etag = feed[:etag]: { 'If-None-Match' => etag }
165
+ when mtime = feed[:mtime]: { 'If-Modified-Since' => mtime }
166
+ else {}
167
+ end
168
+ end
169
+
170
+ begin
171
+ open(feed[:url], conditions) { |uri|
172
+ case
173
+ when etag = uri.meta['etag']: feed[:etag] = etag
174
+ when mtime = uri.last_modified: feed[:mtime] = mtime.rfc822
175
+ else feed[:updated] = Time.now
176
+ end
177
+
178
+ @content ||= uri.read
179
+ }
180
+ rescue OpenURI::HTTPError
181
+ warn "[#{feed[:title]}] Feed not found or unchanged" if verbose
182
+ rescue Timeout::Error, Errno::ETIMEDOUT, Errno::ECONNRESET => err
183
+ warn "[#{feed[:title]}] Error while getting feed: #{err} (#{err.class})"
184
+ end
185
+
186
+ @content
187
+ end
188
+
189
+ def parse(reload = reload)
190
+ @rss = nil if reload
191
+
192
+ if content && @rss ||= begin
193
+ RSS2Mail::RSS.new(content, simple)
194
+ rescue SimpleRSSError => err
195
+ warn "[#{feed[:title]}] Error while parsing feed: #{err} (#{err.class})"
196
+ end
197
+ sent = feed[:sent]
198
+
199
+ unless reload
200
+ @rss.items.delete_if { |item|
201
+ if updated && date = item.date
202
+ date <= updated
203
+ else
204
+ sent && sent.include?(item.link)
205
+ end
206
+ }
207
+ end
208
+ else
209
+ warn "[#{feed[:title]}] Nothing to parse" if verbose
210
+ end
211
+
212
+ @rss
213
+ end
214
+
215
+ def clean_subject(string)
216
+ string.
217
+ replace_diacritics.
218
+ gsub(SUBSTITUTIONS_RE) { |m| SUBSTITUTIONS[m] }.
219
+ to_ascii.
220
+ gsub(/'/, "'\\\\''")
221
+ end
222
+
223
+ end
224
+
225
+ end
@@ -0,0 +1,95 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'rss'
28
+
29
+ require 'rubygems'
30
+ require 'simple-rss'
31
+
32
+ module RSS2Mail
33
+
34
+ class RSS
35
+
36
+ attr_reader :content, :rss
37
+
38
+ def initialize(content, simple = false)
39
+ @content = content
40
+ @simple = simple
41
+
42
+ @rss = simple ? simple_parse : parse
43
+ end
44
+
45
+ def simple?
46
+ @simple
47
+ end
48
+
49
+ def items
50
+ @items ||= rss.items.map { |item| Item.new(item) }
51
+ end
52
+
53
+ def parse
54
+ ::RSS::Parser.parse(content, false) || simple_parse
55
+ end
56
+
57
+ def simple_parse
58
+ SimpleRSS.parse(content)
59
+ end
60
+
61
+ class Item
62
+
63
+ ALIASES = {
64
+ :title => %w[],
65
+ :link => %w[],
66
+ :description => %w[summary content],
67
+ :date => %w[pubDate updated],
68
+ :author => %w[dc_creator]
69
+ }
70
+
71
+ def initialize(item)
72
+ @item = item
73
+ end
74
+
75
+ def method_missing(method, *args, &block)
76
+ if aliases = ALIASES[method]
77
+ [method, *aliases].each { |name|
78
+ begin
79
+ res = @item.send(name)
80
+ return res if res
81
+ rescue NoMethodError
82
+ end
83
+ }
84
+
85
+ nil
86
+ else
87
+ super
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,65 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'open-uri'
28
+ require 'uri'
29
+
30
+ require 'rubygems'
31
+ require 'hpricot'
32
+
33
+ module RSS2Mail
34
+
35
+ module Util
36
+
37
+ extend self
38
+
39
+ FEED_REGEXP = %r{\Aapplication/(?:atom|rss)\+xml\z}io
40
+
41
+ # cf. <http://www.rssboard.org/rss-autodiscovery>
42
+ def discover_feed(url, or_self = false)
43
+ default = or_self ? url : nil
44
+
45
+ unless url.nil? || url.empty? || url == 'about:blank'
46
+ doc = Hpricot(open(url))
47
+
48
+ if feed_element = doc.search('//link[@rel="alternate"').find { |link|
49
+ link[:type] =~ FEED_REGEXP
50
+ }
51
+ if feed_href = feed_element[:href]
52
+ return feed_href if feed_href =~ URI.regexp(%w[http https])
53
+
54
+ base_href = doc.at('base')[:href] rescue url
55
+ return URI.join(base_href, feed_href).to_s
56
+ end
57
+ end
58
+ end
59
+
60
+ default
61
+ end
62
+
63
+ end
64
+
65
+ end
@@ -0,0 +1,27 @@
1
+ module RSS2Mail
2
+
3
+ module Version
4
+
5
+ MAJOR = 0
6
+ MINOR = 0
7
+ TINY = 1
8
+
9
+ class << self
10
+
11
+ # Returns array representation.
12
+ def to_a
13
+ [MAJOR, MINOR, TINY]
14
+ end
15
+
16
+ # Short-cut for version string.
17
+ def to_s
18
+ to_a.join('.')
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ VERSION = Version.to_s
26
+
27
+ end
data/lib/rss2mail.rb ADDED
@@ -0,0 +1,30 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # rss2mail -- Send RSS feeds as e-mail #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'rss2mail/feed'
28
+
29
+ module RSS2Mail
30
+ end