feed2email 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/README.md +124 -59
- data/bin/f2e +9 -0
- data/bin/feed2email +4 -6
- data/bin/feed2email-migrate-feedlist +36 -0
- data/lib/feed2email.rb +16 -11
- data/lib/feed2email/cli.rb +158 -0
- data/lib/feed2email/config.rb +8 -3
- data/lib/feed2email/configurable.rb +7 -0
- data/lib/feed2email/feed.rb +87 -99
- data/lib/feed2email/feed_autodiscoverer.rb +55 -0
- data/lib/feed2email/feed_history.rb +63 -9
- data/lib/feed2email/feed_list.rb +147 -0
- data/lib/feed2email/lazy_smtp_connection.rb +6 -5
- data/lib/feed2email/loggable.rb +7 -0
- data/lib/feed2email/mail.rb +23 -39
- data/lib/feed2email/redirection_checker.rb +38 -0
- data/lib/feed2email/version.rb +1 -1
- metadata +61 -15
- data/TODO.md +0 -16
- data/lib/feed2email/feed_data_file.rb +0 -65
- data/lib/feed2email/feed_meta.rb +0 -28
- data/lib/feed2email/feeds.rb +0 -77
data/lib/feed2email/config.rb
CHANGED
@@ -14,7 +14,12 @@ module Feed2Email
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def [](option)
|
17
|
-
|
17
|
+
config[option] # delegate
|
18
|
+
end
|
19
|
+
|
20
|
+
def smtp_configured?
|
21
|
+
config['smtp_host'] && config['smtp_port'] && config['smtp_user'] &&
|
22
|
+
config['smtp_pass']
|
18
23
|
end
|
19
24
|
|
20
25
|
private
|
@@ -81,8 +86,8 @@ module Feed2Email
|
|
81
86
|
File.read(path)
|
82
87
|
end
|
83
88
|
|
84
|
-
def
|
85
|
-
@
|
89
|
+
def config
|
90
|
+
@config ||= defaults.merge(data)
|
86
91
|
end
|
87
92
|
|
88
93
|
def defaults
|
data/lib/feed2email/feed.rb
CHANGED
@@ -1,53 +1,24 @@
|
|
1
1
|
require 'feedzirra'
|
2
|
-
require 'forwardable'
|
3
|
-
require 'net/http'
|
4
2
|
require 'open-uri'
|
5
3
|
require 'stringio'
|
6
|
-
require 'uri'
|
7
4
|
require 'zlib'
|
5
|
+
require 'feed2email/configurable'
|
8
6
|
require 'feed2email/core_ext'
|
9
7
|
require 'feed2email/entry'
|
10
8
|
require 'feed2email/feed_history'
|
11
|
-
require 'feed2email/
|
12
|
-
require 'feed2email/
|
9
|
+
require 'feed2email/loggable'
|
10
|
+
require 'feed2email/redirection_checker'
|
13
11
|
require 'feed2email/version'
|
14
12
|
|
15
13
|
module Feed2Email
|
16
14
|
class Feed
|
17
|
-
|
15
|
+
include Configurable
|
16
|
+
include Loggable
|
18
17
|
|
19
|
-
|
18
|
+
attr_reader :meta
|
20
19
|
|
21
|
-
def
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.smtp_connection
|
26
|
-
Feed2Email.smtp_connection # delegate
|
27
|
-
end
|
28
|
-
|
29
|
-
logger.debug 'Loading feed subscriptions...'
|
30
|
-
@feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
|
31
|
-
logger.info "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
|
32
|
-
|
33
|
-
def self.process_all
|
34
|
-
begin
|
35
|
-
feed_uris.each_with_index do |uri, i|
|
36
|
-
feed = new(uri)
|
37
|
-
feed.process
|
38
|
-
feed_uris[i] = feed.uri # persist possible permanent redirect
|
39
|
-
end
|
40
|
-
ensure
|
41
|
-
smtp_connection.finalize
|
42
|
-
end
|
43
|
-
|
44
|
-
feed_uris.sync
|
45
|
-
end
|
46
|
-
|
47
|
-
attr_reader :uri
|
48
|
-
|
49
|
-
def initialize(uri)
|
50
|
-
@uri = uri
|
20
|
+
def initialize(meta)
|
21
|
+
@meta = meta
|
51
22
|
end
|
52
23
|
|
53
24
|
def process
|
@@ -56,23 +27,39 @@ module Feed2Email
|
|
56
27
|
return unless fetch_and_parse_feed
|
57
28
|
|
58
29
|
if entries.any?
|
59
|
-
process_entries
|
30
|
+
processed = process_entries
|
60
31
|
history.sync
|
61
|
-
meta.sync
|
62
32
|
else
|
33
|
+
processed = true
|
63
34
|
logger.warn 'Feed does not have entries'
|
64
35
|
end
|
36
|
+
|
37
|
+
processed
|
65
38
|
end
|
66
39
|
|
67
40
|
private
|
68
41
|
|
42
|
+
def apply_send_delay
|
43
|
+
return if config['send_delay'] == 0
|
44
|
+
|
45
|
+
return if last_email_sent_at.nil?
|
46
|
+
|
47
|
+
secs_since_last_email = Time.now - last_email_sent_at
|
48
|
+
secs_to_sleep = config['send_delay'] - secs_since_last_email
|
49
|
+
|
50
|
+
return if secs_to_sleep <= 0
|
51
|
+
|
52
|
+
logger.debug "Sleeping for #{secs_to_sleep} seconds..."
|
53
|
+
sleep(secs_to_sleep)
|
54
|
+
end
|
55
|
+
|
69
56
|
def fetch_feed
|
70
57
|
logger.debug 'Fetching feed...'
|
71
58
|
|
72
59
|
begin
|
73
|
-
|
60
|
+
cache_feed = !permanently_redirected?
|
74
61
|
|
75
|
-
open(uri, fetch_feed_options) do |f|
|
62
|
+
open(uri, fetch_feed_options(cache_feed)) do |f|
|
76
63
|
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
77
64
|
meta[:last_modified] = f.meta['last-modified']
|
78
65
|
end
|
@@ -83,31 +70,28 @@ module Feed2Email
|
|
83
70
|
|
84
71
|
return decode_content(f.read, f.meta['content-encoding'])
|
85
72
|
end
|
86
|
-
rescue
|
87
|
-
if e.message == '304 Not Modified'
|
73
|
+
rescue => e
|
74
|
+
if e.is_a?(OpenURI::HTTPError) && e.message == '304 Not Modified'
|
88
75
|
logger.info 'Feed not modified; skipping...'
|
89
|
-
|
76
|
+
else
|
77
|
+
logger.error 'Failed to fetch feed'
|
78
|
+
log_exception(e)
|
90
79
|
end
|
91
80
|
|
92
|
-
raise
|
93
|
-
rescue => e
|
94
|
-
logger.error 'Failed to fetch feed'
|
95
|
-
log_exception(e)
|
96
81
|
return false
|
97
82
|
end
|
98
83
|
end
|
99
84
|
|
100
|
-
def
|
101
|
-
|
102
|
-
http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
|
103
|
-
http.use_ssl = (parsed_uri.scheme == 'https')
|
104
|
-
response = http.head(parsed_uri.request_uri)
|
85
|
+
def permanently_redirected?
|
86
|
+
checker = RedirectionChecker.new(uri)
|
105
87
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
88
|
+
return false unless checker.permanently_redirected?
|
89
|
+
|
90
|
+
self.uri = checker.location
|
91
|
+
logger.warn 'Got permanently redirected!'
|
92
|
+
logger.warn "Updated feed location to #{checker.location}"
|
93
|
+
|
94
|
+
true
|
111
95
|
end
|
112
96
|
|
113
97
|
def decode_content(data, content_encoding)
|
@@ -125,18 +109,20 @@ module Feed2Email
|
|
125
109
|
xml
|
126
110
|
end
|
127
111
|
|
128
|
-
def fetch_feed_options
|
112
|
+
def fetch_feed_options(cache_feed)
|
129
113
|
options = {
|
130
114
|
'User-Agent' => "feed2email/#{VERSION}",
|
131
115
|
'Accept-Encoding' => 'gzip, deflate',
|
132
116
|
}
|
133
117
|
|
134
|
-
if
|
135
|
-
|
136
|
-
|
118
|
+
if cache_feed
|
119
|
+
if meta[:last_modified]
|
120
|
+
options['If-Modified-Since'] = meta[:last_modified]
|
121
|
+
end
|
137
122
|
|
138
|
-
|
139
|
-
|
123
|
+
if meta[:etag]
|
124
|
+
options['If-None-Match'] = meta[:etag]
|
125
|
+
end
|
140
126
|
end
|
141
127
|
|
142
128
|
options
|
@@ -162,10 +148,13 @@ module Feed2Email
|
|
162
148
|
@data && @data.respond_to?(:entries)
|
163
149
|
end
|
164
150
|
|
151
|
+
def uri
|
152
|
+
meta[:uri]
|
153
|
+
end
|
154
|
+
|
165
155
|
def uri=(uri)
|
166
156
|
history.uri = uri
|
167
|
-
meta
|
168
|
-
@uri = uri
|
157
|
+
meta[:uri] = uri
|
169
158
|
end
|
170
159
|
|
171
160
|
def entries
|
@@ -174,59 +163,58 @@ module Feed2Email
|
|
174
163
|
}
|
175
164
|
end
|
176
165
|
|
177
|
-
def logger
|
178
|
-
Feed2Email.logger # delegate
|
179
|
-
end
|
180
|
-
|
181
166
|
def max_entries
|
182
167
|
config['max_entries'].to_i
|
183
168
|
end
|
184
169
|
|
185
170
|
def process_entries
|
186
171
|
logger.info "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
|
187
|
-
entries.
|
172
|
+
entries.all? {|e| process_entry(e) } # false if any entry fails
|
188
173
|
end
|
189
174
|
|
190
175
|
def process_entry(entry)
|
191
176
|
logger.info "Processing entry #{entry.uri} ..."
|
192
177
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
if config['send_delay'] > 0
|
199
|
-
logger.debug(
|
200
|
-
"Sleeping for #{'second'.pluralize(config['send_delay'])}")
|
201
|
-
sleep(config['send_delay'])
|
202
|
-
end
|
178
|
+
unless history.any?
|
179
|
+
logger.debug 'Skipping new feed entry...'
|
180
|
+
history << entry.uri
|
181
|
+
return true
|
182
|
+
end
|
203
183
|
|
204
|
-
|
184
|
+
if history.include?(entry.uri)
|
185
|
+
logger.debug 'Skipping old entry...'
|
186
|
+
return true
|
187
|
+
end
|
205
188
|
|
206
|
-
|
207
|
-
entry.send_mail
|
208
|
-
rescue => e
|
209
|
-
log_exception(e)
|
210
|
-
end
|
189
|
+
apply_send_delay
|
211
190
|
|
212
|
-
|
213
|
-
history << entry.uri
|
214
|
-
end
|
191
|
+
logger.debug 'Sending new entry...'
|
215
192
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
193
|
+
begin
|
194
|
+
mail_sent = entry.send_mail
|
195
|
+
rescue => e
|
196
|
+
log_exception(e)
|
197
|
+
return false
|
198
|
+
end
|
199
|
+
|
200
|
+
if mail_sent
|
201
|
+
self.last_email_sent_at = Time.now
|
220
202
|
history << entry.uri
|
221
203
|
end
|
204
|
+
|
205
|
+
mail_sent
|
222
206
|
end
|
223
207
|
|
224
208
|
def history
|
225
209
|
@history ||= FeedHistory.new(uri)
|
226
210
|
end
|
227
211
|
|
228
|
-
def
|
229
|
-
@
|
212
|
+
def last_email_sent_at
|
213
|
+
@last_email_sent_at
|
214
|
+
end
|
215
|
+
|
216
|
+
def last_email_sent_at=(time)
|
217
|
+
@last_email_sent_at = time
|
230
218
|
end
|
231
219
|
|
232
220
|
def log_exception(error)
|
@@ -234,9 +222,9 @@ module Feed2Email
|
|
234
222
|
error.backtrace.each {|line| logger.debug line }
|
235
223
|
end
|
236
224
|
|
237
|
-
|
238
|
-
|
239
|
-
|
225
|
+
def title
|
226
|
+
data.title # delegate
|
227
|
+
end
|
240
228
|
|
241
229
|
def data; @data end
|
242
230
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Feed2Email
|
6
|
+
class FeedAutodiscoverer
|
7
|
+
def initialize(uri)
|
8
|
+
@uri = uri
|
9
|
+
end
|
10
|
+
|
11
|
+
def content_type; @content_type end
|
12
|
+
|
13
|
+
def feeds
|
14
|
+
return @feeds if @feeds
|
15
|
+
fetch
|
16
|
+
@feeds = discoverable? ? discover : []
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def data; @data end
|
22
|
+
|
23
|
+
def discover
|
24
|
+
head = Nokogiri::HTML(data).at_css('head')
|
25
|
+
|
26
|
+
if base = head.at_css('base[href]')
|
27
|
+
base_uri = base['href']
|
28
|
+
else
|
29
|
+
base_uri = uri
|
30
|
+
end
|
31
|
+
|
32
|
+
head.css('link[rel=alternate]').select {|link|
|
33
|
+
link['href'] && link['type'] =~ /\Aapplication\/(rss|atom)\+xml\z/
|
34
|
+
}.map do |link|
|
35
|
+
if link['href'] =~ %r{\Ahttps?://} # absolute
|
36
|
+
uri = link['href']
|
37
|
+
else
|
38
|
+
uri = URI.join(base_uri, link['href']).to_s # relative
|
39
|
+
end
|
40
|
+
|
41
|
+
{ uri: uri, content_type: link['type'], title: link['title'] }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def discoverable?
|
46
|
+
content_type == 'text/html'
|
47
|
+
end
|
48
|
+
|
49
|
+
def fetch
|
50
|
+
@data, @content_type = open(uri) {|f| [f.read, f.content_type] }
|
51
|
+
end
|
52
|
+
|
53
|
+
def uri; @uri end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,28 +1,82 @@
|
|
1
|
-
require '
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'yaml'
|
2
3
|
|
3
4
|
module Feed2Email
|
4
|
-
class FeedHistory
|
5
|
-
def
|
6
|
-
|
7
|
-
|
5
|
+
class FeedHistory
|
6
|
+
def initialize(uri)
|
7
|
+
@uri = uri
|
8
|
+
@dirty = false
|
8
9
|
end
|
9
10
|
|
10
11
|
def any?
|
11
12
|
@old_feed ||= File.exist?(path)
|
12
13
|
end
|
13
14
|
|
15
|
+
def path
|
16
|
+
File.join(CONFIG_DIR, filename)
|
17
|
+
end
|
18
|
+
|
14
19
|
def include?(entry_uri)
|
15
20
|
data.include?(entry_uri) # delegate
|
16
21
|
end
|
17
22
|
|
23
|
+
def sync
|
24
|
+
open(path, 'w') {|f| f.write(to_yaml) } if dirty
|
25
|
+
end
|
26
|
+
|
27
|
+
def uri=(new_uri)
|
28
|
+
return if new_uri == uri
|
29
|
+
|
30
|
+
data # load data if not already loaded
|
31
|
+
remove_file
|
32
|
+
mark_dirty
|
33
|
+
@uri = new_uri
|
34
|
+
end
|
35
|
+
|
36
|
+
def <<(entry_uri)
|
37
|
+
mark_dirty
|
38
|
+
data << entry_uri
|
39
|
+
end
|
40
|
+
|
18
41
|
private
|
19
42
|
|
20
|
-
def
|
21
|
-
|
43
|
+
def data
|
44
|
+
@data ||= load_data
|
45
|
+
end
|
46
|
+
|
47
|
+
def dirty; @dirty end
|
48
|
+
|
49
|
+
def filename
|
50
|
+
"history-#{filename_suffix}.yml"
|
51
|
+
end
|
52
|
+
|
53
|
+
def filename_suffix
|
54
|
+
Digest::MD5.hexdigest(uri)
|
22
55
|
end
|
23
56
|
|
24
|
-
def
|
25
|
-
|
57
|
+
def load_data
|
58
|
+
begin
|
59
|
+
@data = YAML.load(open(path))
|
60
|
+
rescue Errno::ENOENT
|
61
|
+
@data = []
|
62
|
+
end
|
26
63
|
end
|
64
|
+
|
65
|
+
def mark_dirty
|
66
|
+
@dirty = true
|
67
|
+
end
|
68
|
+
|
69
|
+
def remove_file
|
70
|
+
begin
|
71
|
+
File.unlink(path)
|
72
|
+
rescue Errno::ENOENT
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_yaml
|
77
|
+
data.to_yaml # delegate
|
78
|
+
end
|
79
|
+
|
80
|
+
def uri; @uri end
|
27
81
|
end
|
28
82
|
end
|