blackwinter-rss2mail 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +676 -0
- data/ChangeLog +5 -0
- data/README +41 -0
- data/Rakefile +26 -0
- data/bin/rss2mail +104 -0
- data/example/feeds.yaml +30 -0
- data/lib/rss2mail.rb +30 -0
- data/lib/rss2mail/feed.rb +225 -0
- data/lib/rss2mail/rss.rb +95 -0
- data/lib/rss2mail/util.rb +65 -0
- data/lib/rss2mail/version.rb +27 -0
- metadata +108 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
= rss2mail - Send RSS feeds as e-mail
|
2
|
+
|
3
|
+
== VERSION
|
4
|
+
|
5
|
+
This documentation refers to rss2mail version 0.0.1
|
6
|
+
|
7
|
+
|
8
|
+
== DESCRIPTION
|
9
|
+
|
10
|
+
Sends new entries for configured RSS feeds as e-mail to any number of
|
11
|
+
recipients. See <tt>example/</tt> directory for a sample configuration file.
|
12
|
+
|
13
|
+
|
14
|
+
== LINKS
|
15
|
+
|
16
|
+
<b></b>
|
17
|
+
Documentation:: <http://rss2mail.rubyforge.org/>
|
18
|
+
Source code:: <http://github.com/blackwinter/rss2mail>
|
19
|
+
Rubyforge project:: <http://rubyforge.org/projects/rss2mail>
|
20
|
+
|
21
|
+
|
22
|
+
== AUTHORS
|
23
|
+
|
24
|
+
* Jens Wille <mailto:ww@blackwinter.de>
|
25
|
+
|
26
|
+
|
27
|
+
== LICENSE AND COPYRIGHT
|
28
|
+
|
29
|
+
Copyright (C) 2007-2008 Jens Wille
|
30
|
+
|
31
|
+
rss2mail is free software: you can redistribute it and/or modify it under
|
32
|
+
the terms of the GNU General Public License as published by the Free Software
|
33
|
+
Foundation, either version 3 of the License, or (at your option) any later
|
34
|
+
version.
|
35
|
+
|
36
|
+
rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY
|
37
|
+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
38
|
+
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
39
|
+
|
40
|
+
You should have received a copy of the GNU General Public License along with
|
41
|
+
rss2mail. If not, see <http://www.gnu.org/licenses/>.
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require %q{lib/rss2mail/version}
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'hen'
|
5
|
+
|
6
|
+
Hen.lay! {{
|
7
|
+
:rubyforge => {
|
8
|
+
:project => %q{rss2mail},
|
9
|
+
:package => %q{rss2mail},
|
10
|
+
:rdoc_dir => nil
|
11
|
+
},
|
12
|
+
|
13
|
+
:gem => {
|
14
|
+
:version => RSS2Mail::VERSION,
|
15
|
+
:summary => %q{Send RSS feeds as e-mail},
|
16
|
+
:homepage => %q{http://rss2mail.rubyforge.org/},
|
17
|
+
:files => FileList['lib/**/*.rb', 'bin/*'].to_a,
|
18
|
+
:extra_files => FileList['[A-Z]*', 'example/*'].to_a,
|
19
|
+
:dependencies => %w[simple-rss hpricot unidecode ruby-nuggets]
|
20
|
+
}
|
21
|
+
}}
|
22
|
+
rescue LoadError
|
23
|
+
abort "Please install the 'hen' gem first."
|
24
|
+
end
|
25
|
+
|
26
|
+
### Place your custom Rake tasks here.
|
data/bin/rss2mail
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# rss2mail -- Send RSS feeds as e-mail #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2007-2008 Jens Wille #
|
9
|
+
# #
|
10
|
+
# Authors: #
|
11
|
+
# Jens Wille <ww@blackwinter.de> #
|
12
|
+
# #
|
13
|
+
# rss2mail is free software; you can redistribute it and/or modify it under #
|
14
|
+
# the terms of the GNU General Public License as published by the Free #
|
15
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
16
|
+
# any later version. #
|
17
|
+
# #
|
18
|
+
# rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
|
19
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
20
|
+
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
|
21
|
+
# details. #
|
22
|
+
# #
|
23
|
+
# You should have received a copy of the GNU General Public License along #
|
24
|
+
# with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
+
# #
|
26
|
+
###############################################################################
|
27
|
+
#++
|
28
|
+
|
29
|
+
require 'optparse'
|
30
|
+
require 'yaml'
|
31
|
+
|
32
|
+
base = File.join(File.dirname(__FILE__), '..')
|
33
|
+
$: << File.join(base, 'lib')
|
34
|
+
|
35
|
+
require 'rss2mail'
|
36
|
+
|
37
|
+
USAGE = "Usage: #{$0} [options] <target>"
|
38
|
+
abort USAGE if ARGV.empty?
|
39
|
+
|
40
|
+
options = {
|
41
|
+
:files => nil,
|
42
|
+
:reload => false,
|
43
|
+
:verbose => false
|
44
|
+
}
|
45
|
+
|
46
|
+
OptionParser.new { |opts|
|
47
|
+
opts.banner = USAGE
|
48
|
+
opts.separator ''
|
49
|
+
|
50
|
+
opts.on('-d', '--directory DIRECTORY', 'Process all feeds in directory') { |d|
|
51
|
+
abort "Not a directory: #{d}" unless File.directory?(d)
|
52
|
+
abort "Can't read directory: #{d}" unless File.readable?(d)
|
53
|
+
|
54
|
+
options[:files] = Dir[File.join(d, '*.yaml')]
|
55
|
+
}
|
56
|
+
|
57
|
+
opts.on('-r', '--reload', 'Reload feeds') {
|
58
|
+
options[:reload] = true
|
59
|
+
}
|
60
|
+
|
61
|
+
opts.on('-v', '--verbose', 'Be verbose') {
|
62
|
+
options[:verbose] = true
|
63
|
+
}
|
64
|
+
}.parse!
|
65
|
+
|
66
|
+
if target = ARGV.shift
|
67
|
+
target = target.to_sym
|
68
|
+
else
|
69
|
+
abort "No feeds target given\n#{USAGE}"
|
70
|
+
end
|
71
|
+
|
72
|
+
templates = Hash.new { |h, k|
|
73
|
+
t = File.join(base, 'templates', "#{k}.erb")
|
74
|
+
|
75
|
+
begin
|
76
|
+
h[k] = File.read(t)
|
77
|
+
rescue Errno::ENOENT
|
78
|
+
# silently ignore
|
79
|
+
end
|
80
|
+
}
|
81
|
+
|
82
|
+
feeds_files = options.delete(:files) || [File.join(base, 'feeds.yaml')]
|
83
|
+
feeds_files.each { |feeds_file|
|
84
|
+
feeds = begin
|
85
|
+
YAML.load_file(feeds_file)
|
86
|
+
rescue Errno::ENOENT
|
87
|
+
warn "Feeds file not found: #{feeds_file}"
|
88
|
+
next
|
89
|
+
end
|
90
|
+
|
91
|
+
unless target_feeds = feeds[target]
|
92
|
+
warn "Feeds target not found: #{target} (in #{feeds_file})"
|
93
|
+
next
|
94
|
+
end
|
95
|
+
|
96
|
+
target_feeds.each { |feed|
|
97
|
+
RSS2Mail::Feed.new(feed, options).deliver(templates) unless feed[:skip]
|
98
|
+
}
|
99
|
+
|
100
|
+
# write updated feed information
|
101
|
+
File.open(feeds_file, 'w') { |file|
|
102
|
+
YAML.dump(feeds, file)
|
103
|
+
}
|
104
|
+
}
|
data/example/feeds.yaml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
---
|
2
|
+
:"twice-a-day":
|
3
|
+
- :url: http://feeds.feedburner.com/Bildblog
|
4
|
+
:to: my.secret@e.mail
|
5
|
+
:title: BILDblog
|
6
|
+
- :url: http://newsfeed.zeit.de/
|
7
|
+
:to: my.secret@e.mail
|
8
|
+
:title: DIE ZEIT
|
9
|
+
- :url: http://www.spiegel.de/schlagzeilen/rss/index.xml
|
10
|
+
:to: my.secret@e.mail
|
11
|
+
:title: SPIEGEL ONLINE
|
12
|
+
:hourly:
|
13
|
+
- :url: http://www.heise.de/newsticker/heise-atom.xml
|
14
|
+
:to: my.secret@e.mail
|
15
|
+
:title: heise online News
|
16
|
+
:body: heisetext
|
17
|
+
:body_encoding: ISO-8859-1
|
18
|
+
- :url: http://log.netbib.de/feed/
|
19
|
+
:to: my.secret@e.mail
|
20
|
+
:title: netbib weblog
|
21
|
+
:daily:
|
22
|
+
- :url: http://aktuell.de.selfhtml.org/weblog/rss-feed
|
23
|
+
:to: my.secret@e.mail
|
24
|
+
:title: SELFHTML Aktuell Weblog
|
25
|
+
- :url: http://rss.slashdot.org/Slashdot/slashdotLinux
|
26
|
+
:to: my.secret@e.mail
|
27
|
+
:title: "Slashdot: Linux"
|
28
|
+
- :url: http://rss.slashdot.org/Slashdot/slashdotBookReviews
|
29
|
+
:to: my.secret@e.mail
|
30
|
+
:title: "Slashdot: Book Reviews"
|
data/lib/rss2mail.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# rss2mail -- Send RSS feeds as e-mail #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2008 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <ww@blackwinter.de> #
|
10
|
+
# #
|
11
|
+
# rss2mail is free software; you can redistribute it and/or modify it under #
|
12
|
+
# the terms of the GNU General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
|
19
|
+
# details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU General Public License along #
|
22
|
+
# with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'rss2mail/feed'
|
28
|
+
|
29
|
+
module RSS2Mail
|
30
|
+
end
|
@@ -0,0 +1,225 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# A component of rss2mail, the RSS to e-mail forwarder. #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2007-2008 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <ww@blackwinter.de> #
|
10
|
+
# #
|
11
|
+
# rss2mail is free software; you can redistribute it and/or modify it under #
|
12
|
+
# the terms of the GNU General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# rss2mail is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
|
19
|
+
# details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU General Public License along #
|
22
|
+
# with rss2mail. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'open-uri'
|
28
|
+
require 'erb'
|
29
|
+
|
30
|
+
require 'rubygems'
|
31
|
+
require 'hpricot'
|
32
|
+
require 'unidecode'
|
33
|
+
require 'nuggets/util/i18n'
|
34
|
+
require 'nuggets/string/evaluate'
|
35
|
+
|
36
|
+
require 'rss2mail/rss'
|
37
|
+
|
38
|
+
module RSS2Mail
|
39
|
+
|
40
|
+
class Feed
|
41
|
+
|
42
|
+
SUBSTITUTIONS = {
|
43
|
+
'–' => '--',
|
44
|
+
'«' => '<<',
|
45
|
+
'&' => '&'
|
46
|
+
}
|
47
|
+
|
48
|
+
SUBSTITUTIONS_RE = %r{Regexp.union(*SUBSTITUTIONS.keys)}o
|
49
|
+
|
50
|
+
TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
|
51
|
+
|
52
|
+
attr_reader :feed, :verbose, :reload, :simple, :updated, :content, :rss
|
53
|
+
|
54
|
+
def initialize(feed, options = {})
|
55
|
+
raise TypeError, "Hash expected, got #{feed.class}" unless feed.is_a?(Hash)
|
56
|
+
|
57
|
+
@feed = feed
|
58
|
+
@simple = feed[:simple]
|
59
|
+
@updated = feed[:updated]
|
60
|
+
|
61
|
+
@verbose = options[:verbose]
|
62
|
+
@reload = options[:reload]
|
63
|
+
|
64
|
+
required = [:url, :to, :title]
|
65
|
+
required.delete_if { |i| feed.has_key?(i) }
|
66
|
+
|
67
|
+
raise ArgumentError, "feed incomplete: #{required.join(', ')} missing" unless required.empty?
|
68
|
+
end
|
69
|
+
|
70
|
+
def deliver(templates)
|
71
|
+
unless get && parse
|
72
|
+
warn "[#{feed[:title]}] Nothing to send" if verbose
|
73
|
+
return
|
74
|
+
end
|
75
|
+
|
76
|
+
if rss.items.empty?
|
77
|
+
warn "[#{feed[:title]}] No new items" if verbose
|
78
|
+
return
|
79
|
+
end
|
80
|
+
|
81
|
+
to = [*feed[:to]]
|
82
|
+
if to.empty?
|
83
|
+
warn "[#{feed[:title]}] No one to send to" if verbose
|
84
|
+
return
|
85
|
+
end
|
86
|
+
|
87
|
+
feed_title = feed[:title]
|
88
|
+
content_type = feed[:content_type] || 'text/html'
|
89
|
+
encoding = feed[:encoding] || 'UTF-8'
|
90
|
+
|
91
|
+
feed[:sent] ||= []
|
92
|
+
|
93
|
+
content_type_header = "Content-type: #{content_type}; charset=#{encoding}"
|
94
|
+
|
95
|
+
unless template = templates[content_type[/\/(.*)/, 1]]
|
96
|
+
warn "[#{feed[:title]}] Template not found: #{content_type}" if verbose
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
100
|
+
cmd = [
|
101
|
+
'/usr/bin/mail',
|
102
|
+
'-e',
|
103
|
+
"-a '#{content_type_header}'",
|
104
|
+
"-a 'From: rss2mail@blackwinter.de'",
|
105
|
+
"-s '[#{feed_title}] \#{subject}'",
|
106
|
+
*to
|
107
|
+
].join(' ')
|
108
|
+
|
109
|
+
sent = 0
|
110
|
+
|
111
|
+
rss.items.each { |item|
|
112
|
+
title = item.title
|
113
|
+
link = item.link
|
114
|
+
description = item.description
|
115
|
+
date = item.date
|
116
|
+
author = item.author
|
117
|
+
|
118
|
+
if description && feed[:unescape_html]
|
119
|
+
description.gsub!(/</, '<')
|
120
|
+
description.gsub!(/>/, '>')
|
121
|
+
end
|
122
|
+
|
123
|
+
if tag = feed[:body]
|
124
|
+
body = case tag
|
125
|
+
when true: open(link).read
|
126
|
+
else Hpricot(open(link)).at(tag).to_s
|
127
|
+
end.gsub(/<\/?(.*?)>/) { |m|
|
128
|
+
m if TAGS_TO_KEEP.include?($1.split.first.downcase)
|
129
|
+
}.gsub(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
|
130
|
+
|
131
|
+
if body_encoding = feed[:body_encoding]
|
132
|
+
body = Iconv.conv('UTF-8', body_encoding, body)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
subject = title ? clean_subject(title) : 'NO TITLE'
|
137
|
+
|
138
|
+
_cmd = cmd.evaluate(binding)
|
139
|
+
|
140
|
+
begin
|
141
|
+
IO.popen(_cmd, 'w') { |mail| mail.puts ERB.new(template).result(binding) }
|
142
|
+
feed[:sent] << link
|
143
|
+
sent += 1
|
144
|
+
rescue Errno::EPIPE => err
|
145
|
+
warn "[#{feed[:title]}] Error while sending mail (#{err.class}): #{_cmd}"
|
146
|
+
end
|
147
|
+
}
|
148
|
+
|
149
|
+
# only keep the last 100 entries
|
150
|
+
feed[:sent].slice!(0...-100)
|
151
|
+
|
152
|
+
warn "[#{feed[:title]}] #{sent} items sent" if verbose
|
153
|
+
sent
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def get(reload = reload)
|
159
|
+
if reload
|
160
|
+
@content = nil
|
161
|
+
conditions = {}
|
162
|
+
else
|
163
|
+
conditions = case
|
164
|
+
when etag = feed[:etag]: { 'If-None-Match' => etag }
|
165
|
+
when mtime = feed[:mtime]: { 'If-Modified-Since' => mtime }
|
166
|
+
else {}
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
begin
|
171
|
+
open(feed[:url], conditions) { |uri|
|
172
|
+
case
|
173
|
+
when etag = uri.meta['etag']: feed[:etag] = etag
|
174
|
+
when mtime = uri.last_modified: feed[:mtime] = mtime.rfc822
|
175
|
+
else feed[:updated] = Time.now
|
176
|
+
end
|
177
|
+
|
178
|
+
@content ||= uri.read
|
179
|
+
}
|
180
|
+
rescue OpenURI::HTTPError
|
181
|
+
warn "[#{feed[:title]}] Feed not found or unchanged" if verbose
|
182
|
+
rescue Timeout::Error, Errno::ETIMEDOUT, Errno::ECONNRESET => err
|
183
|
+
warn "[#{feed[:title]}] Error while getting feed: #{err} (#{err.class})"
|
184
|
+
end
|
185
|
+
|
186
|
+
@content
|
187
|
+
end
|
188
|
+
|
189
|
+
def parse(reload = reload)
|
190
|
+
@rss = nil if reload
|
191
|
+
|
192
|
+
if content && @rss ||= begin
|
193
|
+
RSS2Mail::RSS.new(content, simple)
|
194
|
+
rescue SimpleRSSError => err
|
195
|
+
warn "[#{feed[:title]}] Error while parsing feed: #{err} (#{err.class})"
|
196
|
+
end
|
197
|
+
sent = feed[:sent]
|
198
|
+
|
199
|
+
unless reload
|
200
|
+
@rss.items.delete_if { |item|
|
201
|
+
if updated && date = item.date
|
202
|
+
date <= updated
|
203
|
+
else
|
204
|
+
sent && sent.include?(item.link)
|
205
|
+
end
|
206
|
+
}
|
207
|
+
end
|
208
|
+
else
|
209
|
+
warn "[#{feed[:title]}] Nothing to parse" if verbose
|
210
|
+
end
|
211
|
+
|
212
|
+
@rss
|
213
|
+
end
|
214
|
+
|
215
|
+
def clean_subject(string)
|
216
|
+
string.
|
217
|
+
replace_diacritics.
|
218
|
+
gsub(SUBSTITUTIONS_RE) { |m| SUBSTITUTIONS[m] }.
|
219
|
+
to_ascii.
|
220
|
+
gsub(/'/, "'\\\\''")
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|