rss2mail 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +1 -1
- data/bin/rss2mail +11 -9
- data/example/feeds.yaml +5 -0
- data/lib/rss2mail/feed.rb +60 -71
- data/lib/rss2mail/rss.rb +109 -20
- data/lib/rss2mail/util.rb +1 -1
- data/lib/rss2mail/version.rb +1 -1
- data/lib/rss2mail.rb +1 -1
- data/templates/html.erb +12 -0
- data/templates/plain.erb +8 -0
- metadata +6 -4
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to rss2mail version 0.0.
|
5
|
+
This documentation refers to rss2mail version 0.0.2
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -26,7 +26,7 @@ Rubyforge project:: <http://rubyforge.org/projects/rss2mail>
|
|
26
26
|
|
27
27
|
== LICENSE AND COPYRIGHT
|
28
28
|
|
29
|
-
Copyright (C) 2007-
|
29
|
+
Copyright (C) 2007-2009 Jens Wille
|
30
30
|
|
31
31
|
rss2mail is free software: you can redistribute it and/or modify it under
|
32
32
|
the terms of the GNU General Public License as published by the Free Software
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ begin
|
|
15
15
|
:summary => %q{Send RSS feeds as e-mail},
|
16
16
|
:homepage => %q{http://rss2mail.rubyforge.org/},
|
17
17
|
:files => FileList['lib/**/*.rb', 'bin/*'].to_a,
|
18
|
-
:extra_files => FileList['[A-Z]*', 'example/*'].to_a,
|
18
|
+
:extra_files => FileList['[A-Z]*', 'templates/*', 'example/*'].to_a,
|
19
19
|
:dependencies => %w[simple-rss hpricot unidecode ruby-nuggets]
|
20
20
|
}
|
21
21
|
}}
|
data/bin/rss2mail
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# #
|
6
6
|
# rss2mail -- Send RSS feeds as e-mail #
|
7
7
|
# #
|
8
|
-
# Copyright (C) 2007-
|
8
|
+
# Copyright (C) 2007-2009 Jens Wille #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <ww@blackwinter.de> #
|
@@ -40,7 +40,8 @@ abort USAGE if ARGV.empty?
|
|
40
40
|
options = {
|
41
41
|
:files => nil,
|
42
42
|
:reload => false,
|
43
|
-
:verbose => false
|
43
|
+
:verbose => false,
|
44
|
+
:debug => false
|
44
45
|
}
|
45
46
|
|
46
47
|
OptionParser.new { |opts|
|
@@ -61,6 +62,10 @@ OptionParser.new { |opts|
|
|
61
62
|
opts.on('-v', '--verbose', 'Be verbose') {
|
62
63
|
options[:verbose] = true
|
63
64
|
}
|
65
|
+
|
66
|
+
opts.on('-D', '--debug', "Print debug output and don't send any mails") {
|
67
|
+
options[:debug] = true
|
68
|
+
}
|
64
69
|
}.parse!
|
65
70
|
|
66
71
|
if target = ARGV.shift
|
@@ -70,16 +75,15 @@ else
|
|
70
75
|
end
|
71
76
|
|
72
77
|
templates = Hash.new { |h, k|
|
73
|
-
|
74
|
-
|
75
|
-
begin
|
76
|
-
h[k] = File.read(t)
|
78
|
+
h[k] = begin
|
79
|
+
File.read(File.join(base, 'templates', "#{k}.erb"))
|
77
80
|
rescue Errno::ENOENT
|
78
81
|
# silently ignore
|
79
82
|
end
|
80
83
|
}
|
81
84
|
|
82
85
|
feeds_files = options.delete(:files) || [File.join(base, 'feeds.yaml')]
|
86
|
+
|
83
87
|
feeds_files.each { |feeds_file|
|
84
88
|
feeds = begin
|
85
89
|
YAML.load_file(feeds_file)
|
@@ -98,7 +102,5 @@ feeds_files.each { |feeds_file|
|
|
98
102
|
}
|
99
103
|
|
100
104
|
# write updated feed information
|
101
|
-
File.open(feeds_file, 'w') { |file|
|
102
|
-
YAML.dump(feeds, file)
|
103
|
-
}
|
105
|
+
File.open(feeds_file, 'w') { |file| YAML.dump(feeds, file) } unless options[:debug]
|
104
106
|
}
|
data/example/feeds.yaml
CHANGED
data/lib/rss2mail/feed.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# #
|
4
4
|
# A component of rss2mail, the RSS to e-mail forwarder. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2009 Jens Wille #
|
7
7
|
# #
|
8
8
|
# Authors: #
|
9
9
|
# Jens Wille <ww@blackwinter.de> #
|
@@ -28,9 +28,6 @@ require 'open-uri'
|
|
28
28
|
require 'erb'
|
29
29
|
|
30
30
|
require 'rubygems'
|
31
|
-
require 'hpricot'
|
32
|
-
require 'unidecode'
|
33
|
-
require 'nuggets/util/i18n'
|
34
31
|
require 'nuggets/string/evaluate'
|
35
32
|
|
36
33
|
require 'rss2mail/rss'
|
@@ -39,17 +36,9 @@ module RSS2Mail
|
|
39
36
|
|
40
37
|
class Feed
|
41
38
|
|
42
|
-
|
43
|
-
'–' => '--',
|
44
|
-
'«' => '<<',
|
45
|
-
'&' => '&'
|
46
|
-
}
|
39
|
+
HOST = ENV['HOSTNAME'] || ENV['HOST'] || %x{hostname}.chomp
|
47
40
|
|
48
|
-
|
49
|
-
|
50
|
-
TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
|
51
|
-
|
52
|
-
attr_reader :feed, :verbose, :reload, :simple, :updated, :content, :rss
|
41
|
+
attr_reader :feed, :reload, :verbose, :debug, :simple, :updated, :content, :rss
|
53
42
|
|
54
43
|
def initialize(feed, options = {})
|
55
44
|
raise TypeError, "Hash expected, got #{feed.class}" unless feed.is_a?(Hash)
|
@@ -58,33 +47,34 @@ module RSS2Mail
|
|
58
47
|
@simple = feed[:simple]
|
59
48
|
@updated = feed[:updated]
|
60
49
|
|
61
|
-
@verbose = options[:verbose]
|
62
50
|
@reload = options[:reload]
|
51
|
+
@verbose = options[:verbose]
|
52
|
+
@debug = options[:debug]
|
63
53
|
|
64
54
|
required = [:url, :to, :title]
|
65
55
|
required.delete_if { |i| feed.has_key?(i) }
|
66
56
|
|
67
|
-
raise ArgumentError, "
|
57
|
+
raise ArgumentError, "Feed incomplete: #{required.join(', ')} missing" unless required.empty?
|
68
58
|
end
|
69
59
|
|
70
60
|
def deliver(templates)
|
71
|
-
|
72
|
-
|
61
|
+
to = [*feed[:to]]
|
62
|
+
|
63
|
+
if to.empty?
|
64
|
+
log 'No one to send to'
|
73
65
|
return
|
74
66
|
end
|
75
67
|
|
76
|
-
|
77
|
-
|
68
|
+
unless get && parse
|
69
|
+
log 'Nothing to send'
|
78
70
|
return
|
79
71
|
end
|
80
72
|
|
81
|
-
|
82
|
-
|
83
|
-
warn "[#{feed[:title]}] No one to send to" if verbose
|
73
|
+
if rss.items.empty?
|
74
|
+
log 'No new items'
|
84
75
|
return
|
85
76
|
end
|
86
77
|
|
87
|
-
feed_title = feed[:title]
|
88
78
|
content_type = feed[:content_type] || 'text/html'
|
89
79
|
encoding = feed[:encoding] || 'UTF-8'
|
90
80
|
|
@@ -93,7 +83,7 @@ module RSS2Mail
|
|
93
83
|
content_type_header = "Content-type: #{content_type}; charset=#{encoding}"
|
94
84
|
|
95
85
|
unless template = templates[content_type[/\/(.*)/, 1]]
|
96
|
-
|
86
|
+
log "Template not found: #{content_type}"
|
97
87
|
return
|
98
88
|
end
|
99
89
|
|
@@ -101,8 +91,8 @@ module RSS2Mail
|
|
101
91
|
'/usr/bin/mail',
|
102
92
|
'-e',
|
103
93
|
"-a '#{content_type_header}'",
|
104
|
-
"-a 'From: rss2mail
|
105
|
-
"-s '[#{
|
94
|
+
"-a 'From: rss2mail@#{HOST}'",
|
95
|
+
"-s '[#{feed[:title]}] \#{subject}'",
|
106
96
|
*to
|
107
97
|
].join(' ')
|
108
98
|
|
@@ -111,45 +101,25 @@ module RSS2Mail
|
|
111
101
|
rss.items.each { |item|
|
112
102
|
title = item.title
|
113
103
|
link = item.link
|
114
|
-
description = item.description
|
104
|
+
description = item.description(feed[:unescape_html])
|
115
105
|
date = item.date
|
116
106
|
author = item.author
|
107
|
+
body = item.body(feed[:body])
|
108
|
+
subject = item.subject
|
117
109
|
|
118
|
-
|
119
|
-
|
120
|
-
description.gsub!(/>/, '>')
|
121
|
-
end
|
122
|
-
|
123
|
-
if tag = feed[:body]
|
124
|
-
body = case tag
|
125
|
-
when true: open(link).read
|
126
|
-
else Hpricot(open(link)).at(tag).to_s
|
127
|
-
end.gsub(/<\/?(.*?)>/) { |m|
|
128
|
-
m if TAGS_TO_KEEP.include?($1.split.first.downcase)
|
129
|
-
}.gsub(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
|
110
|
+
log "#{title} / #{date} [#{author}]", debug
|
111
|
+
log "<#{link}>", debug
|
130
112
|
|
131
|
-
|
132
|
-
body = Iconv.conv('UTF-8', body_encoding, body)
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
subject = title ? clean_subject(title) : 'NO TITLE'
|
137
|
-
|
138
|
-
_cmd = cmd.evaluate(binding)
|
139
|
-
|
140
|
-
begin
|
141
|
-
IO.popen(_cmd, 'w') { |mail| mail.puts ERB.new(template).result(binding) }
|
113
|
+
send_mail(cmd.evaluate(binding), ERB.new(template).result(binding)) {
|
142
114
|
feed[:sent] << link
|
143
115
|
sent += 1
|
144
|
-
|
145
|
-
warn "[#{feed[:title]}] Error while sending mail (#{err.class}): #{_cmd}"
|
146
|
-
end
|
116
|
+
}
|
147
117
|
}
|
148
118
|
|
149
119
|
# only keep the last 100 entries
|
150
120
|
feed[:sent].slice!(0...-100)
|
151
121
|
|
152
|
-
|
122
|
+
log "#{sent} items sent"
|
153
123
|
sent
|
154
124
|
end
|
155
125
|
|
@@ -161,26 +131,30 @@ module RSS2Mail
|
|
161
131
|
conditions = {}
|
162
132
|
else
|
163
133
|
conditions = case
|
164
|
-
when etag = feed[:etag]
|
165
|
-
when mtime = feed[:mtime]
|
166
|
-
else
|
134
|
+
when etag = feed[:etag] then { 'If-None-Match' => etag }
|
135
|
+
when mtime = feed[:mtime] then { 'If-Modified-Since' => mtime }
|
136
|
+
else {}
|
167
137
|
end
|
168
138
|
end
|
169
139
|
|
140
|
+
log conditions.inspect, debug
|
141
|
+
|
170
142
|
begin
|
171
143
|
open(feed[:url], conditions) { |uri|
|
172
144
|
case
|
173
|
-
when etag = uri.meta['etag']
|
174
|
-
when mtime = uri.last_modified
|
175
|
-
else
|
145
|
+
when etag = uri.meta['etag'] then feed[:etag] = etag
|
146
|
+
when mtime = uri.last_modified then feed[:mtime] = mtime.rfc822
|
147
|
+
else feed[:updated] = Time.now
|
176
148
|
end
|
177
149
|
|
178
150
|
@content ||= uri.read
|
179
151
|
}
|
152
|
+
|
153
|
+
log feed.values_at(:etag, :mtime, :updated).inspect, debug
|
180
154
|
rescue OpenURI::HTTPError
|
181
|
-
|
155
|
+
log 'Feed not found or unchanged'
|
182
156
|
rescue Timeout::Error, Errno::ETIMEDOUT, Errno::ECONNRESET => err
|
183
|
-
|
157
|
+
error err, 'while getting feed'
|
184
158
|
end
|
185
159
|
|
186
160
|
@content
|
@@ -192,7 +166,7 @@ module RSS2Mail
|
|
192
166
|
if content && @rss ||= begin
|
193
167
|
RSS2Mail::RSS.new(content, simple)
|
194
168
|
rescue SimpleRSSError => err
|
195
|
-
|
169
|
+
error err, 'while parsing feed'
|
196
170
|
end
|
197
171
|
sent = feed[:sent]
|
198
172
|
|
@@ -206,18 +180,33 @@ module RSS2Mail
|
|
206
180
|
}
|
207
181
|
end
|
208
182
|
else
|
209
|
-
|
183
|
+
log 'Nothing to parse'
|
210
184
|
end
|
211
185
|
|
212
186
|
@rss
|
213
187
|
end
|
214
188
|
|
215
|
-
def
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
189
|
+
def send_mail(cmd, body)
|
190
|
+
return if debug
|
191
|
+
|
192
|
+
IO.popen(cmd, 'w') { |mail| mail.puts body }
|
193
|
+
yield if block_given?
|
194
|
+
rescue Errno::EPIPE => err
|
195
|
+
error err, 'while sending mail', cmd
|
196
|
+
end
|
197
|
+
|
198
|
+
def log(msg, verbose = verbose)
|
199
|
+
warn "[#{feed[:title]}] #{msg}" if verbose
|
200
|
+
end
|
201
|
+
|
202
|
+
def error(err = nil, occasion = nil, extra = nil)
|
203
|
+
msg = 'Error'
|
204
|
+
|
205
|
+
msg << " #{occasion}" if occasion
|
206
|
+
msg << ": #{err} (#{err.class})" if err
|
207
|
+
msg << " [#{extra}]" if extra
|
208
|
+
|
209
|
+
log msg, true
|
221
210
|
end
|
222
211
|
|
223
212
|
end
|
data/lib/rss2mail/rss.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
#--
|
2
4
|
###############################################################################
|
3
5
|
# #
|
4
6
|
# A component of rss2mail, the RSS to e-mail forwarder. #
|
5
7
|
# #
|
6
|
-
# Copyright (C) 2007-
|
8
|
+
# Copyright (C) 2007-2009 Jens Wille #
|
7
9
|
# #
|
8
10
|
# Authors: #
|
9
11
|
# Jens Wille <ww@blackwinter.de> #
|
@@ -24,15 +26,34 @@
|
|
24
26
|
###############################################################################
|
25
27
|
#++
|
26
28
|
|
29
|
+
require 'open-uri'
|
27
30
|
require 'rss'
|
28
31
|
|
29
32
|
require 'rubygems'
|
30
33
|
require 'simple-rss'
|
34
|
+
require 'unidecode'
|
35
|
+
require 'nuggets/util/i18n'
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'hpricot'
|
39
|
+
rescue LoadError => err
|
40
|
+
warn err
|
41
|
+
end
|
31
42
|
|
32
43
|
module RSS2Mail
|
33
44
|
|
34
45
|
class RSS
|
35
46
|
|
47
|
+
SUBSTITUTIONS = {
|
48
|
+
'–' => '--',
|
49
|
+
'«' => '<<',
|
50
|
+
'&' => '&'
|
51
|
+
}
|
52
|
+
|
53
|
+
SUBSTITUTIONS_RE = Regexp.union(*SUBSTITUTIONS.keys)
|
54
|
+
|
55
|
+
TAGS_TO_KEEP = %w[a p br h1 h2 h3 h4]
|
56
|
+
|
36
57
|
attr_reader :content, :rss
|
37
58
|
|
38
59
|
def initialize(content, simple = false)
|
@@ -60,32 +81,100 @@ module RSS2Mail
|
|
60
81
|
|
61
82
|
class Item
|
62
83
|
|
63
|
-
ALIASES = {
|
64
|
-
:title => %w[],
|
65
|
-
:link => %w[],
|
66
|
-
:description => %w[summary content],
|
67
|
-
:date => %w[pubDate updated],
|
68
|
-
:author => %w[dc_creator]
|
69
|
-
}
|
70
|
-
|
71
84
|
def initialize(item)
|
72
85
|
@item = item
|
73
86
|
end
|
74
87
|
|
75
|
-
def
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
88
|
+
def title
|
89
|
+
@title ||= value_for(:title, :content)
|
90
|
+
end
|
91
|
+
|
92
|
+
def link
|
93
|
+
@link ||= value_for(:link, :href)
|
94
|
+
end
|
95
|
+
|
96
|
+
def description(unescape_html = false)
|
97
|
+
@description ||= get_description(unescape_html)
|
98
|
+
end
|
99
|
+
|
100
|
+
def date
|
101
|
+
@date ||= value_for({ :date => %w[pubDate updated dc_date] }, :content) { |field, value|
|
102
|
+
field == 'updated' && value.respond_to?(:content) ? Time.at(value.content.to_i) : value
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def author
|
107
|
+
@author ||= value_for({ :author => %w[contributor dc_creator] }, %w[name content])
|
108
|
+
end
|
109
|
+
|
110
|
+
def body(tag = nil, encoding = nil)
|
111
|
+
@body ||= get_body(tag, encoding)
|
112
|
+
end
|
113
|
+
|
114
|
+
def subject
|
115
|
+
@subject ||= title ? clean_subject(title) : 'NO TITLE'
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def value_for(field, methods = nil, &block)
|
121
|
+
value = get_value_for(field, &block)
|
122
|
+
|
123
|
+
if methods
|
124
|
+
[*methods].each { |method|
|
125
|
+
break unless value.respond_to?(method)
|
126
|
+
value = value.send(method)
|
83
127
|
}
|
128
|
+
end
|
129
|
+
|
130
|
+
value.respond_to?(:strip) ? value.strip : value
|
131
|
+
end
|
132
|
+
|
133
|
+
def get_value_for(fields, &block)
|
134
|
+
fields = fields.is_a?(Hash) ? fields.to_a.flatten : [*fields]
|
135
|
+
|
136
|
+
fields.each { |field|
|
137
|
+
begin
|
138
|
+
value = @item.send(field)
|
139
|
+
value = block[field, value] if block
|
140
|
+
return value if value
|
141
|
+
rescue NoMethodError
|
142
|
+
end
|
143
|
+
}
|
144
|
+
|
145
|
+
nil
|
146
|
+
end
|
84
147
|
|
85
|
-
|
86
|
-
|
87
|
-
|
148
|
+
def get_description(unescape_html)
|
149
|
+
description = value_for({ :description => %w[summary content] }, :content)
|
150
|
+
|
151
|
+
if description && unescape_html
|
152
|
+
description.gsub!(/</, '<')
|
153
|
+
description.gsub!(/>/, '>')
|
154
|
+
end
|
155
|
+
|
156
|
+
description
|
157
|
+
end
|
158
|
+
|
159
|
+
def get_body(tag, encoding)
|
160
|
+
body = case tag
|
161
|
+
when nil then return
|
162
|
+
when true then open(link).read
|
163
|
+
else defined?(Hpricot) ? Hpricot(open(link)).at(tag).to_s : open(link).read
|
88
164
|
end
|
165
|
+
|
166
|
+
body.gsub!(/<\/?(.*?)>/) { |m| m if TAGS_TO_KEEP.include?($1.split.first.downcase) }
|
167
|
+
body.gsub!(/<a\s+href=['"](?!http:).*?>(.*?)<\/a>/mi, '\1')
|
168
|
+
|
169
|
+
encoding ? Iconv.conv('UTF-8', encoding, body) : body
|
170
|
+
end
|
171
|
+
|
172
|
+
def clean_subject(string)
|
173
|
+
string.
|
174
|
+
replace_diacritics.
|
175
|
+
gsub(SUBSTITUTIONS_RE) { |m| SUBSTITUTIONS[m] }.
|
176
|
+
to_ascii.
|
177
|
+
gsub(/'/, "'\\\\''")
|
89
178
|
end
|
90
179
|
|
91
180
|
end
|
data/lib/rss2mail/util.rb
CHANGED
data/lib/rss2mail/version.rb
CHANGED
data/lib/rss2mail.rb
CHANGED
data/templates/html.erb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<%= title %><% if date %> / <%= date %><% end %><% if author %> [<%= author %>]<% end %><br />
|
4
|
+
<% if description %><br />
|
5
|
+
<%= description %><br />
|
6
|
+
<% end %><br />
|
7
|
+
<% if body %><br />
|
8
|
+
<%= body %><br />
|
9
|
+
<% end %><br />
|
10
|
+
<<%= link %>>
|
11
|
+
</body>
|
12
|
+
</html>
|
data/templates/plain.erb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rss2mail
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-03-02 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -73,17 +73,19 @@ files:
|
|
73
73
|
- COPYING
|
74
74
|
- ChangeLog
|
75
75
|
- README
|
76
|
+
- templates/plain.erb
|
77
|
+
- templates/html.erb
|
76
78
|
- example/feeds.yaml
|
77
79
|
has_rdoc: true
|
78
80
|
homepage: http://rss2mail.rubyforge.org/
|
79
81
|
post_install_message:
|
80
82
|
rdoc_options:
|
81
83
|
- --line-numbers
|
84
|
+
- --main
|
85
|
+
- README
|
82
86
|
- --inline-source
|
83
87
|
- --title
|
84
88
|
- rss2mail Application documentation
|
85
|
-
- --main
|
86
|
-
- README
|
87
89
|
- --charset
|
88
90
|
- UTF-8
|
89
91
|
- --all
|