feed2email 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/README.md +124 -59
- data/bin/f2e +9 -0
- data/bin/feed2email +4 -6
- data/bin/feed2email-migrate-feedlist +36 -0
- data/lib/feed2email.rb +16 -11
- data/lib/feed2email/cli.rb +158 -0
- data/lib/feed2email/config.rb +8 -3
- data/lib/feed2email/configurable.rb +7 -0
- data/lib/feed2email/feed.rb +87 -99
- data/lib/feed2email/feed_autodiscoverer.rb +55 -0
- data/lib/feed2email/feed_history.rb +63 -9
- data/lib/feed2email/feed_list.rb +147 -0
- data/lib/feed2email/lazy_smtp_connection.rb +6 -5
- data/lib/feed2email/loggable.rb +7 -0
- data/lib/feed2email/mail.rb +23 -39
- data/lib/feed2email/redirection_checker.rb +38 -0
- data/lib/feed2email/version.rb +1 -1
- metadata +61 -15
- data/TODO.md +0 -16
- data/lib/feed2email/feed_data_file.rb +0 -65
- data/lib/feed2email/feed_meta.rb +0 -28
- data/lib/feed2email/feeds.rb +0 -77
data/lib/feed2email/config.rb
CHANGED
@@ -14,7 +14,12 @@ module Feed2Email
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def [](option)
|
17
|
-
|
17
|
+
config[option] # delegate
|
18
|
+
end
|
19
|
+
|
20
|
+
def smtp_configured?
|
21
|
+
config['smtp_host'] && config['smtp_port'] && config['smtp_user'] &&
|
22
|
+
config['smtp_pass']
|
18
23
|
end
|
19
24
|
|
20
25
|
private
|
@@ -81,8 +86,8 @@ module Feed2Email
|
|
81
86
|
File.read(path)
|
82
87
|
end
|
83
88
|
|
84
|
-
def
|
85
|
-
@
|
89
|
+
def config
|
90
|
+
@config ||= defaults.merge(data)
|
86
91
|
end
|
87
92
|
|
88
93
|
def defaults
|
data/lib/feed2email/feed.rb
CHANGED
@@ -1,53 +1,24 @@
|
|
1
1
|
require 'feedzirra'
|
2
|
-
require 'forwardable'
|
3
|
-
require 'net/http'
|
4
2
|
require 'open-uri'
|
5
3
|
require 'stringio'
|
6
|
-
require 'uri'
|
7
4
|
require 'zlib'
|
5
|
+
require 'feed2email/configurable'
|
8
6
|
require 'feed2email/core_ext'
|
9
7
|
require 'feed2email/entry'
|
10
8
|
require 'feed2email/feed_history'
|
11
|
-
require 'feed2email/
|
12
|
-
require 'feed2email/
|
9
|
+
require 'feed2email/loggable'
|
10
|
+
require 'feed2email/redirection_checker'
|
13
11
|
require 'feed2email/version'
|
14
12
|
|
15
13
|
module Feed2Email
|
16
14
|
class Feed
|
17
|
-
|
15
|
+
include Configurable
|
16
|
+
include Loggable
|
18
17
|
|
19
|
-
|
18
|
+
attr_reader :meta
|
20
19
|
|
21
|
-
def
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.smtp_connection
|
26
|
-
Feed2Email.smtp_connection # delegate
|
27
|
-
end
|
28
|
-
|
29
|
-
logger.debug 'Loading feed subscriptions...'
|
30
|
-
@feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
|
31
|
-
logger.info "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
|
32
|
-
|
33
|
-
def self.process_all
|
34
|
-
begin
|
35
|
-
feed_uris.each_with_index do |uri, i|
|
36
|
-
feed = new(uri)
|
37
|
-
feed.process
|
38
|
-
feed_uris[i] = feed.uri # persist possible permanent redirect
|
39
|
-
end
|
40
|
-
ensure
|
41
|
-
smtp_connection.finalize
|
42
|
-
end
|
43
|
-
|
44
|
-
feed_uris.sync
|
45
|
-
end
|
46
|
-
|
47
|
-
attr_reader :uri
|
48
|
-
|
49
|
-
def initialize(uri)
|
50
|
-
@uri = uri
|
20
|
+
def initialize(meta)
|
21
|
+
@meta = meta
|
51
22
|
end
|
52
23
|
|
53
24
|
def process
|
@@ -56,23 +27,39 @@ module Feed2Email
|
|
56
27
|
return unless fetch_and_parse_feed
|
57
28
|
|
58
29
|
if entries.any?
|
59
|
-
process_entries
|
30
|
+
processed = process_entries
|
60
31
|
history.sync
|
61
|
-
meta.sync
|
62
32
|
else
|
33
|
+
processed = true
|
63
34
|
logger.warn 'Feed does not have entries'
|
64
35
|
end
|
36
|
+
|
37
|
+
processed
|
65
38
|
end
|
66
39
|
|
67
40
|
private
|
68
41
|
|
42
|
+
def apply_send_delay
|
43
|
+
return if config['send_delay'] == 0
|
44
|
+
|
45
|
+
return if last_email_sent_at.nil?
|
46
|
+
|
47
|
+
secs_since_last_email = Time.now - last_email_sent_at
|
48
|
+
secs_to_sleep = config['send_delay'] - secs_since_last_email
|
49
|
+
|
50
|
+
return if secs_to_sleep <= 0
|
51
|
+
|
52
|
+
logger.debug "Sleeping for #{secs_to_sleep} seconds..."
|
53
|
+
sleep(secs_to_sleep)
|
54
|
+
end
|
55
|
+
|
69
56
|
def fetch_feed
|
70
57
|
logger.debug 'Fetching feed...'
|
71
58
|
|
72
59
|
begin
|
73
|
-
|
60
|
+
cache_feed = !permanently_redirected?
|
74
61
|
|
75
|
-
open(uri, fetch_feed_options) do |f|
|
62
|
+
open(uri, fetch_feed_options(cache_feed)) do |f|
|
76
63
|
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
77
64
|
meta[:last_modified] = f.meta['last-modified']
|
78
65
|
end
|
@@ -83,31 +70,28 @@ module Feed2Email
|
|
83
70
|
|
84
71
|
return decode_content(f.read, f.meta['content-encoding'])
|
85
72
|
end
|
86
|
-
rescue
|
87
|
-
if e.message == '304 Not Modified'
|
73
|
+
rescue => e
|
74
|
+
if e.is_a?(OpenURI::HTTPError) && e.message == '304 Not Modified'
|
88
75
|
logger.info 'Feed not modified; skipping...'
|
89
|
-
|
76
|
+
else
|
77
|
+
logger.error 'Failed to fetch feed'
|
78
|
+
log_exception(e)
|
90
79
|
end
|
91
80
|
|
92
|
-
raise
|
93
|
-
rescue => e
|
94
|
-
logger.error 'Failed to fetch feed'
|
95
|
-
log_exception(e)
|
96
81
|
return false
|
97
82
|
end
|
98
83
|
end
|
99
84
|
|
100
|
-
def
|
101
|
-
|
102
|
-
http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
|
103
|
-
http.use_ssl = (parsed_uri.scheme == 'https')
|
104
|
-
response = http.head(parsed_uri.request_uri)
|
85
|
+
def permanently_redirected?
|
86
|
+
checker = RedirectionChecker.new(uri)
|
105
87
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
88
|
+
return false unless checker.permanently_redirected?
|
89
|
+
|
90
|
+
self.uri = checker.location
|
91
|
+
logger.warn 'Got permanently redirected!'
|
92
|
+
logger.warn "Updated feed location to #{checker.location}"
|
93
|
+
|
94
|
+
true
|
111
95
|
end
|
112
96
|
|
113
97
|
def decode_content(data, content_encoding)
|
@@ -125,18 +109,20 @@ module Feed2Email
|
|
125
109
|
xml
|
126
110
|
end
|
127
111
|
|
128
|
-
def fetch_feed_options
|
112
|
+
def fetch_feed_options(cache_feed)
|
129
113
|
options = {
|
130
114
|
'User-Agent' => "feed2email/#{VERSION}",
|
131
115
|
'Accept-Encoding' => 'gzip, deflate',
|
132
116
|
}
|
133
117
|
|
134
|
-
if
|
135
|
-
|
136
|
-
|
118
|
+
if cache_feed
|
119
|
+
if meta[:last_modified]
|
120
|
+
options['If-Modified-Since'] = meta[:last_modified]
|
121
|
+
end
|
137
122
|
|
138
|
-
|
139
|
-
|
123
|
+
if meta[:etag]
|
124
|
+
options['If-None-Match'] = meta[:etag]
|
125
|
+
end
|
140
126
|
end
|
141
127
|
|
142
128
|
options
|
@@ -162,10 +148,13 @@ module Feed2Email
|
|
162
148
|
@data && @data.respond_to?(:entries)
|
163
149
|
end
|
164
150
|
|
151
|
+
def uri
|
152
|
+
meta[:uri]
|
153
|
+
end
|
154
|
+
|
165
155
|
def uri=(uri)
|
166
156
|
history.uri = uri
|
167
|
-
meta
|
168
|
-
@uri = uri
|
157
|
+
meta[:uri] = uri
|
169
158
|
end
|
170
159
|
|
171
160
|
def entries
|
@@ -174,59 +163,58 @@ module Feed2Email
|
|
174
163
|
}
|
175
164
|
end
|
176
165
|
|
177
|
-
def logger
|
178
|
-
Feed2Email.logger # delegate
|
179
|
-
end
|
180
|
-
|
181
166
|
def max_entries
|
182
167
|
config['max_entries'].to_i
|
183
168
|
end
|
184
169
|
|
185
170
|
def process_entries
|
186
171
|
logger.info "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
|
187
|
-
entries.
|
172
|
+
entries.all? {|e| process_entry(e) } # false if any entry fails
|
188
173
|
end
|
189
174
|
|
190
175
|
def process_entry(entry)
|
191
176
|
logger.info "Processing entry #{entry.uri} ..."
|
192
177
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
if config['send_delay'] > 0
|
199
|
-
logger.debug(
|
200
|
-
"Sleeping for #{'second'.pluralize(config['send_delay'])}")
|
201
|
-
sleep(config['send_delay'])
|
202
|
-
end
|
178
|
+
unless history.any?
|
179
|
+
logger.debug 'Skipping new feed entry...'
|
180
|
+
history << entry.uri
|
181
|
+
return true
|
182
|
+
end
|
203
183
|
|
204
|
-
|
184
|
+
if history.include?(entry.uri)
|
185
|
+
logger.debug 'Skipping old entry...'
|
186
|
+
return true
|
187
|
+
end
|
205
188
|
|
206
|
-
|
207
|
-
entry.send_mail
|
208
|
-
rescue => e
|
209
|
-
log_exception(e)
|
210
|
-
end
|
189
|
+
apply_send_delay
|
211
190
|
|
212
|
-
|
213
|
-
history << entry.uri
|
214
|
-
end
|
191
|
+
logger.debug 'Sending new entry...'
|
215
192
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
193
|
+
begin
|
194
|
+
mail_sent = entry.send_mail
|
195
|
+
rescue => e
|
196
|
+
log_exception(e)
|
197
|
+
return false
|
198
|
+
end
|
199
|
+
|
200
|
+
if mail_sent
|
201
|
+
self.last_email_sent_at = Time.now
|
220
202
|
history << entry.uri
|
221
203
|
end
|
204
|
+
|
205
|
+
mail_sent
|
222
206
|
end
|
223
207
|
|
224
208
|
def history
|
225
209
|
@history ||= FeedHistory.new(uri)
|
226
210
|
end
|
227
211
|
|
228
|
-
def
|
229
|
-
@
|
212
|
+
def last_email_sent_at
|
213
|
+
@last_email_sent_at
|
214
|
+
end
|
215
|
+
|
216
|
+
def last_email_sent_at=(time)
|
217
|
+
@last_email_sent_at = time
|
230
218
|
end
|
231
219
|
|
232
220
|
def log_exception(error)
|
@@ -234,9 +222,9 @@ module Feed2Email
|
|
234
222
|
error.backtrace.each {|line| logger.debug line }
|
235
223
|
end
|
236
224
|
|
237
|
-
|
238
|
-
|
239
|
-
|
225
|
+
def title
|
226
|
+
data.title # delegate
|
227
|
+
end
|
240
228
|
|
241
229
|
def data; @data end
|
242
230
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Feed2Email
|
6
|
+
class FeedAutodiscoverer
|
7
|
+
def initialize(uri)
|
8
|
+
@uri = uri
|
9
|
+
end
|
10
|
+
|
11
|
+
def content_type; @content_type end
|
12
|
+
|
13
|
+
def feeds
|
14
|
+
return @feeds if @feeds
|
15
|
+
fetch
|
16
|
+
@feeds = discoverable? ? discover : []
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def data; @data end
|
22
|
+
|
23
|
+
def discover
|
24
|
+
head = Nokogiri::HTML(data).at_css('head')
|
25
|
+
|
26
|
+
if base = head.at_css('base[href]')
|
27
|
+
base_uri = base['href']
|
28
|
+
else
|
29
|
+
base_uri = uri
|
30
|
+
end
|
31
|
+
|
32
|
+
head.css('link[rel=alternate]').select {|link|
|
33
|
+
link['href'] && link['type'] =~ /\Aapplication\/(rss|atom)\+xml\z/
|
34
|
+
}.map do |link|
|
35
|
+
if link['href'] =~ %r{\Ahttps?://} # absolute
|
36
|
+
uri = link['href']
|
37
|
+
else
|
38
|
+
uri = URI.join(base_uri, link['href']).to_s # relative
|
39
|
+
end
|
40
|
+
|
41
|
+
{ uri: uri, content_type: link['type'], title: link['title'] }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def discoverable?
|
46
|
+
content_type == 'text/html'
|
47
|
+
end
|
48
|
+
|
49
|
+
def fetch
|
50
|
+
@data, @content_type = open(uri) {|f| [f.read, f.content_type] }
|
51
|
+
end
|
52
|
+
|
53
|
+
def uri; @uri end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,28 +1,82 @@
|
|
1
|
-
require '
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'yaml'
|
2
3
|
|
3
4
|
module Feed2Email
|
4
|
-
class FeedHistory
|
5
|
-
def
|
6
|
-
|
7
|
-
|
5
|
+
class FeedHistory
|
6
|
+
def initialize(uri)
|
7
|
+
@uri = uri
|
8
|
+
@dirty = false
|
8
9
|
end
|
9
10
|
|
10
11
|
def any?
|
11
12
|
@old_feed ||= File.exist?(path)
|
12
13
|
end
|
13
14
|
|
15
|
+
def path
|
16
|
+
File.join(CONFIG_DIR, filename)
|
17
|
+
end
|
18
|
+
|
14
19
|
def include?(entry_uri)
|
15
20
|
data.include?(entry_uri) # delegate
|
16
21
|
end
|
17
22
|
|
23
|
+
def sync
|
24
|
+
open(path, 'w') {|f| f.write(to_yaml) } if dirty
|
25
|
+
end
|
26
|
+
|
27
|
+
def uri=(new_uri)
|
28
|
+
return if new_uri == uri
|
29
|
+
|
30
|
+
data # load data if not already loaded
|
31
|
+
remove_file
|
32
|
+
mark_dirty
|
33
|
+
@uri = new_uri
|
34
|
+
end
|
35
|
+
|
36
|
+
def <<(entry_uri)
|
37
|
+
mark_dirty
|
38
|
+
data << entry_uri
|
39
|
+
end
|
40
|
+
|
18
41
|
private
|
19
42
|
|
20
|
-
def
|
21
|
-
|
43
|
+
def data
|
44
|
+
@data ||= load_data
|
45
|
+
end
|
46
|
+
|
47
|
+
def dirty; @dirty end
|
48
|
+
|
49
|
+
def filename
|
50
|
+
"history-#{filename_suffix}.yml"
|
51
|
+
end
|
52
|
+
|
53
|
+
def filename_suffix
|
54
|
+
Digest::MD5.hexdigest(uri)
|
22
55
|
end
|
23
56
|
|
24
|
-
def
|
25
|
-
|
57
|
+
def load_data
|
58
|
+
begin
|
59
|
+
@data = YAML.load(open(path))
|
60
|
+
rescue Errno::ENOENT
|
61
|
+
@data = []
|
62
|
+
end
|
26
63
|
end
|
64
|
+
|
65
|
+
def mark_dirty
|
66
|
+
@dirty = true
|
67
|
+
end
|
68
|
+
|
69
|
+
def remove_file
|
70
|
+
begin
|
71
|
+
File.unlink(path)
|
72
|
+
rescue Errno::ENOENT
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_yaml
|
77
|
+
data.to_yaml # delegate
|
78
|
+
end
|
79
|
+
|
80
|
+
def uri; @uri end
|
27
81
|
end
|
28
82
|
end
|