rss2mail 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,41 @@
1
+ = rss2mail - Send RSS feeds as e-mail
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to rss2mail version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ Sends new entries for configured RSS feeds as e-mail to any number of
11
+ recipients. See <tt>example/</tt> directory for a sample configuration file.
12
+
13
+
14
+ == LINKS
15
+
16
+ <b></b>
17
+ Documentation:: <http://rss2mail.rubyforge.org/>
18
+ Source code:: <http://github.com/blackwinter/rss2mail>
19
+ Rubyforge project:: <http://rubyforge.org/projects/rss2mail>
20
+
21
+
22
+ == AUTHORS
23
+
24
+ * Jens Wille <mailto:ww@blackwinter.de>
25
+
26
+
27
+ == LICENSE AND COPYRIGHT
28
+
29
+ Copyright (C) 2007-2008 Jens Wille
30
+
31
+ rss2mail is free software: you can redistribute it and/or modify it under
32
+ the terms of the GNU General Public License as published by the Free Software
33
+ Foundation, either version 3 of the License, or (at your option) any later
34
+ version.
35
+
36
+ rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY
37
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
38
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
39
+
40
+ You should have received a copy of the GNU General Public License along with
41
+ rss2mail. If not, see <http://www.gnu.org/licenses/>.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require %q{lib/rss2mail/version}
2
+
3
+ begin
4
+ require 'hen'
5
+
6
+ Hen.lay! {{
7
+ :rubyforge => {
8
+ :project => %q{rss2mail},
9
+ :package => %q{rss2mail},
10
+ :rdoc_dir => nil
11
+ },
12
+
13
+ :gem => {
14
+ :version => RSS2Mail::VERSION,
15
+ :summary => %q{Send RSS feeds as e-mail},
16
+ :homepage => %q{http://rss2mail.rubyforge.org/},
17
+ :files => FileList['lib/**/*.rb', 'bin/*'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'example/*'].to_a,
19
+ :dependencies => %w[simple-rss hpricot unidecode ruby-nuggets]
20
+ }
21
+ }}
22
+ rescue LoadError
23
+ abort "Please install the 'hen' gem first."
24
+ end
25
+
26
+ ### Place your custom Rake tasks here.
data/bin/rss2mail ADDED
@@ -0,0 +1,104 @@
1
+ #! /usr/bin/ruby
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # rss2mail -- Send RSS feeds as e-mail #
7
+ # #
8
+ # Copyright (C) 2007-2008 Jens Wille #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <ww@blackwinter.de> #
12
+ # #
13
+ # rss2mail is free software; you can redistribute it and/or modify it under #
14
+ # the terms of the GNU General Public License as published by the Free #
15
+ # Software Foundation; either version 3 of the License, or (at your option) #
16
+ # any later version. #
17
+ # #
18
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'optparse'
30
+ require 'yaml'
31
+
32
+ base = File.join(File.dirname(__FILE__), '..')
33
+ $: << File.join(base, 'lib')
34
+
35
+ require 'rss2mail'
36
+
37
+ USAGE = "Usage: #{$0} [options] <target>"
38
+ abort USAGE if ARGV.empty?
39
+
40
+ options = {
41
+ :files => nil,
42
+ :reload => false,
43
+ :verbose => false
44
+ }
45
+
46
+ OptionParser.new { |opts|
47
+ opts.banner = USAGE
48
+ opts.separator ''
49
+
50
+ opts.on('-d', '--directory DIRECTORY', 'Process all feeds in directory') { |d|
51
+ abort "Not a directory: #{d}" unless File.directory?(d)
52
+ abort "Can't read directory: #{d}" unless File.readable?(d)
53
+
54
+ options[:files] = Dir[File.join(d, '*.yaml')]
55
+ }
56
+
57
+ opts.on('-r', '--reload', 'Reload feeds') {
58
+ options[:reload] = true
59
+ }
60
+
61
+ opts.on('-v', '--verbose', 'Be verbose') {
62
+ options[:verbose] = true
63
+ }
64
+ }.parse!
65
+
66
+ if target = ARGV.shift
67
+ target = target.to_sym
68
+ else
69
+ abort "No feeds target given\n#{USAGE}"
70
+ end
71
+
72
+ templates = Hash.new { |h, k|
73
+ t = File.join(base, 'templates', "#{k}.erb")
74
+
75
+ begin
76
+ h[k] = File.read(t)
77
+ rescue Errno::ENOENT
78
+ # silently ignore
79
+ end
80
+ }
81
+
82
+ feeds_files = options.delete(:files) || [File.join(base, 'feeds.yaml')]
83
+ feeds_files.each { |feeds_file|
84
+ feeds = begin
85
+ YAML.load_file(feeds_file)
86
+ rescue Errno::ENOENT
87
+ warn "Feeds file not found: #{feeds_file}"
88
+ next
89
+ end
90
+
91
+ unless target_feeds = feeds[target]
92
+ warn "Feeds target not found: #{target} (in #{feeds_file})"
93
+ next
94
+ end
95
+
96
+ target_feeds.each { |feed|
97
+ RSS2Mail::Feed.new(feed, options).deliver(templates) unless feed[:skip]
98
+ }
99
+
100
+ # write updated feed information
101
+ File.open(feeds_file, 'w') { |file|
102
+ YAML.dump(feeds, file)
103
+ }
104
+ }
@@ -0,0 +1,30 @@
1
+ ---
2
+ :"twice-a-day":
3
+ - :url: http://feeds.feedburner.com/Bildblog
4
+ :to: my.secret@e.mail
5
+ :title: BILDblog
6
+ - :url: http://newsfeed.zeit.de/
7
+ :to: my.secret@e.mail
8
+ :title: DIE ZEIT
9
+ - :url: http://www.spiegel.de/schlagzeilen/rss/index.xml
10
+ :to: my.secret@e.mail
11
+ :title: SPIEGEL ONLINE
12
+ :hourly:
13
+ - :url: http://www.heise.de/newsticker/heise-atom.xml
14
+ :to: my.secret@e.mail
15
+ :title: heise online News
16
+ :body: heisetext
17
+ :body_encoding: ISO-8859-1
18
+ - :url: http://log.netbib.de/feed/
19
+ :to: my.secret@e.mail
20
+ :title: netbib weblog
21
+ :daily:
22
+ - :url: http://aktuell.de.selfhtml.org/weblog/rss-feed
23
+ :to: my.secret@e.mail
24
+ :title: SELFHTML Aktuell Weblog
25
+ - :url: http://rss.slashdot.org/Slashdot/slashdotLinux
26
+ :to: my.secret@e.mail
27
+ :title: "Slashdot: Linux"
28
+ - :url: http://rss.slashdot.org/Slashdot/slashdotBookReviews
29
+ :to: my.secret@e.mail
30
+ :title: "Slashdot: Book Reviews"
@@ -0,0 +1,225 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'open-uri'
28
+ require 'erb'
29
+
30
+ require 'rubygems'
31
+ require 'hpricot'
32
+ require 'unidecode'
33
+ require 'nuggets/util/i18n'
34
+ require 'nuggets/string/evaluate'
35
+
36
+ require 'rss2mail/rss'
37
+
38
+ module RSS2Mail
39
+
40
+ class Feed
41
+
42
+ SUBSTITUTIONS = {
43
+ '–' => '--',
44
+ '«' => '<<',
45
+ '&amp;' => '&'
46
+ }
47
+
48
+ SUBSTITUTIONS_RE = %r{Regexp.union(*SUBSTITUTIONS.keys)}o
49
+
50
+ TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
51
+
52
+ attr_reader :feed, :verbose, :reload, :simple, :updated, :content, :rss
53
+
54
+ def initialize(feed, options = {})
55
+ raise TypeError, "Hash expected, got #{feed.class}" unless feed.is_a?(Hash)
56
+
57
+ @feed = feed
58
+ @simple = feed[:simple]
59
+ @updated = feed[:updated]
60
+
61
+ @verbose = options[:verbose]
62
+ @reload = options[:reload]
63
+
64
+ required = [:url, :to, :title]
65
+ required.delete_if { |i| feed.has_key?(i) }
66
+
67
+ raise ArgumentError, "feed incomplete: #{required.join(', ')} missing" unless required.empty?
68
+ end
69
+
70
+ def deliver(templates)
71
+ unless get && parse
72
+ warn "[#{feed[:title]}] Nothing to send" if verbose
73
+ return
74
+ end
75
+
76
+ if rss.items.empty?
77
+ warn "[#{feed[:title]}] No new items" if verbose
78
+ return
79
+ end
80
+
81
+ to = [*feed[:to]]
82
+ if to.empty?
83
+ warn "[#{feed[:title]}] No one to send to" if verbose
84
+ return
85
+ end
86
+
87
+ feed_title = feed[:title]
88
+ content_type = feed[:content_type] || 'text/html'
89
+ encoding = feed[:encoding] || 'UTF-8'
90
+
91
+ feed[:sent] ||= []
92
+
93
+ content_type_header = "Content-type: #{content_type}; charset=#{encoding}"
94
+
95
+ unless template = templates[content_type[/\/(.*)/, 1]]
96
+ warn "[#{feed[:title]}] Template not found: #{content_type}" if verbose
97
+ return
98
+ end
99
+
100
+ cmd = [
101
+ '/usr/bin/mail',
102
+ '-e',
103
+ "-a '#{content_type_header}'",
104
+ "-a 'From: rss2mail@blackwinter.de'",
105
+ "-s '[#{feed_title}] \#{subject}'",
106
+ *to
107
+ ].join(' ')
108
+
109
+ sent = 0
110
+
111
+ rss.items.each { |item|
112
+ title = item.title
113
+ link = item.link
114
+ description = item.description
115
+ date = item.date
116
+ author = item.author
117
+
118
+ if description && feed[:unescape_html]
119
+ description.gsub!(/&lt;/, '<')
120
+ description.gsub!(/&gt;/, '>')
121
+ end
122
+
123
+ if tag = feed[:body]
124
+ body = case tag
125
+ when true: open(link).read
126
+ else Hpricot(open(link)).at(tag).to_s
127
+ end.gsub(/<\/?(.*?)>/) { |m|
128
+ m if TAGS_TO_KEEP.include?($1.split.first.downcase)
129
+ }.gsub(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
130
+
131
+ if body_encoding = feed[:body_encoding]
132
+ body = Iconv.conv('UTF-8', body_encoding, body)
133
+ end
134
+ end
135
+
136
+ subject = title ? clean_subject(title) : 'NO TITLE'
137
+
138
+ _cmd = cmd.evaluate(binding)
139
+
140
+ begin
141
+ IO.popen(_cmd, 'w') { |mail| mail.puts ERB.new(template).result(binding) }
142
+ feed[:sent] << link
143
+ sent += 1
144
+ rescue Errno::EPIPE => err
145
+ warn "[#{feed[:title]}] Error while sending mail (#{err.class}): #{_cmd}"
146
+ end
147
+ }
148
+
149
+ # only keep the last 100 entries
150
+ feed[:sent].slice!(0...-100)
151
+
152
+ warn "[#{feed[:title]}] #{sent} items sent" if verbose
153
+ sent
154
+ end
155
+
156
+ private
157
+
158
+ def get(reload = reload)
159
+ if reload
160
+ @content = nil
161
+ conditions = {}
162
+ else
163
+ conditions = case
164
+ when etag = feed[:etag]: { 'If-None-Match' => etag }
165
+ when mtime = feed[:mtime]: { 'If-Modified-Since' => mtime }
166
+ else {}
167
+ end
168
+ end
169
+
170
+ begin
171
+ open(feed[:url], conditions) { |uri|
172
+ case
173
+ when etag = uri.meta['etag']: feed[:etag] = etag
174
+ when mtime = uri.last_modified: feed[:mtime] = mtime.rfc822
175
+ else feed[:updated] = Time.now
176
+ end
177
+
178
+ @content ||= uri.read
179
+ }
180
+ rescue OpenURI::HTTPError
181
+ warn "[#{feed[:title]}] Feed not found or unchanged" if verbose
182
+ rescue Timeout::Error, Errno::ETIMEDOUT, Errno::ECONNRESET => err
183
+ warn "[#{feed[:title]}] Error while getting feed: #{err} (#{err.class})"
184
+ end
185
+
186
+ @content
187
+ end
188
+
189
+ def parse(reload = reload)
190
+ @rss = nil if reload
191
+
192
+ if content && @rss ||= begin
193
+ RSS2Mail::RSS.new(content, simple)
194
+ rescue SimpleRSSError => err
195
+ warn "[#{feed[:title]}] Error while parsing feed: #{err} (#{err.class})"
196
+ end
197
+ sent = feed[:sent]
198
+
199
+ unless reload
200
+ @rss.items.delete_if { |item|
201
+ if updated && date = item.date
202
+ date <= updated
203
+ else
204
+ sent && sent.include?(item.link)
205
+ end
206
+ }
207
+ end
208
+ else
209
+ warn "[#{feed[:title]}] Nothing to parse" if verbose
210
+ end
211
+
212
+ @rss
213
+ end
214
+
215
+ def clean_subject(string)
216
+ string.
217
+ replace_diacritics.
218
+ gsub(SUBSTITUTIONS_RE) { |m| SUBSTITUTIONS[m] }.
219
+ to_ascii.
220
+ gsub(/'/, "'\\\\''")
221
+ end
222
+
223
+ end
224
+
225
+ end
@@ -0,0 +1,95 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'rss'
28
+
29
+ require 'rubygems'
30
+ require 'simple-rss'
31
+
32
+ module RSS2Mail
33
+
34
+ class RSS
35
+
36
+ attr_reader :content, :rss
37
+
38
+ def initialize(content, simple = false)
39
+ @content = content
40
+ @simple = simple
41
+
42
+ @rss = simple ? simple_parse : parse
43
+ end
44
+
45
+ def simple?
46
+ @simple
47
+ end
48
+
49
+ def items
50
+ @items ||= rss.items.map { |item| Item.new(item) }
51
+ end
52
+
53
+ def parse
54
+ ::RSS::Parser.parse(content, false) || simple_parse
55
+ end
56
+
57
+ def simple_parse
58
+ SimpleRSS.parse(content)
59
+ end
60
+
61
+ class Item
62
+
63
+ ALIASES = {
64
+ :title => %w[],
65
+ :link => %w[],
66
+ :description => %w[summary content],
67
+ :date => %w[pubDate updated],
68
+ :author => %w[dc_creator]
69
+ }
70
+
71
+ def initialize(item)
72
+ @item = item
73
+ end
74
+
75
+ def method_missing(method, *args, &block)
76
+ if aliases = ALIASES[method]
77
+ [method, *aliases].each { |name|
78
+ begin
79
+ res = @item.send(name)
80
+ return res if res
81
+ rescue NoMethodError
82
+ end
83
+ }
84
+
85
+ nil
86
+ else
87
+ super
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,65 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of rss2mail, the RSS to e-mail forwarder. #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'open-uri'
28
+ require 'uri'
29
+
30
+ require 'rubygems'
31
+ require 'hpricot'
32
+
33
+ module RSS2Mail
34
+
35
+ module Util
36
+
37
+ extend self
38
+
39
+ FEED_REGEXP = %r{\Aapplication/(?:atom|rss)\+xml\z}io
40
+
41
+ # cf. <http://www.rssboard.org/rss-autodiscovery>
42
+ def discover_feed(url, or_self = false)
43
+ default = or_self ? url : nil
44
+
45
+ unless url.nil? || url.empty? || url == 'about:blank'
46
+ doc = Hpricot(open(url))
47
+
48
+ if feed_element = doc.search('//link[@rel="alternate"').find { |link|
49
+ link[:type] =~ FEED_REGEXP
50
+ }
51
+ if feed_href = feed_element[:href]
52
+ return feed_href if feed_href =~ URI.regexp(%w[http https])
53
+
54
+ base_href = doc.at('base')[:href] rescue url
55
+ return URI.join(base_href, feed_href).to_s
56
+ end
57
+ end
58
+ end
59
+
60
+ default
61
+ end
62
+
63
+ end
64
+
65
+ end
@@ -0,0 +1,27 @@
1
+ module RSS2Mail
2
+
3
+ module Version
4
+
5
+ MAJOR = 0
6
+ MINOR = 0
7
+ TINY = 1
8
+
9
+ class << self
10
+
11
+ # Returns array representation.
12
+ def to_a
13
+ [MAJOR, MINOR, TINY]
14
+ end
15
+
16
+ # Short-cut for version string.
17
+ def to_s
18
+ to_a.join('.')
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ VERSION = Version.to_s
26
+
27
+ end
data/lib/rss2mail.rb ADDED
@@ -0,0 +1,30 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # rss2mail -- Send RSS feeds as e-mail #
5
+ # #
6
+ # Copyright (C) 2007-2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <ww@blackwinter.de> #
10
+ # #
11
+ # rss2mail is free software; you can redistribute it and/or modify it under #
12
+ # the terms of the GNU General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'rss2mail/feed'
28
+
29
+ module RSS2Mail
30
+ end