feed2email 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,12 @@ module Feed2Email
14
14
  end
15
15
 
16
16
  def [](option)
17
- merged_config[option] # delegate
17
+ config[option] # delegate
18
+ end
19
+
20
+ def smtp_configured?
21
+ config['smtp_host'] && config['smtp_port'] && config['smtp_user'] &&
22
+ config['smtp_pass']
18
23
  end
19
24
 
20
25
  private
@@ -81,8 +86,8 @@ module Feed2Email
81
86
  File.read(path)
82
87
  end
83
88
 
84
- def merged_config
85
- @merged_config ||= defaults.merge(data)
89
+ def config
90
+ @config ||= defaults.merge(data)
86
91
  end
87
92
 
88
93
  def defaults
@@ -0,0 +1,7 @@
1
+ module Feed2Email
2
+ module Configurable
3
+ def config
4
+ Feed2Email.config # delegate
5
+ end
6
+ end
7
+ end
@@ -1,53 +1,24 @@
1
1
  require 'feedzirra'
2
- require 'forwardable'
3
- require 'net/http'
4
2
  require 'open-uri'
5
3
  require 'stringio'
6
- require 'uri'
7
4
  require 'zlib'
5
+ require 'feed2email/configurable'
8
6
  require 'feed2email/core_ext'
9
7
  require 'feed2email/entry'
10
8
  require 'feed2email/feed_history'
11
- require 'feed2email/feed_meta'
12
- require 'feed2email/feeds'
9
+ require 'feed2email/loggable'
10
+ require 'feed2email/redirection_checker'
13
11
  require 'feed2email/version'
14
12
 
15
13
  module Feed2Email
16
14
  class Feed
17
- extend Forwardable
15
+ include Configurable
16
+ include Loggable
18
17
 
19
- def self.feed_uris; @feed_uris end
18
+ attr_reader :meta
20
19
 
21
- def self.logger
22
- Feed2Email.logger # delegate
23
- end
24
-
25
- def self.smtp_connection
26
- Feed2Email.smtp_connection # delegate
27
- end
28
-
29
- logger.debug 'Loading feed subscriptions...'
30
- @feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
31
- logger.info "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
32
-
33
- def self.process_all
34
- begin
35
- feed_uris.each_with_index do |uri, i|
36
- feed = new(uri)
37
- feed.process
38
- feed_uris[i] = feed.uri # persist possible permanent redirect
39
- end
40
- ensure
41
- smtp_connection.finalize
42
- end
43
-
44
- feed_uris.sync
45
- end
46
-
47
- attr_reader :uri
48
-
49
- def initialize(uri)
50
- @uri = uri
20
+ def initialize(meta)
21
+ @meta = meta
51
22
  end
52
23
 
53
24
  def process
@@ -56,23 +27,39 @@ module Feed2Email
56
27
  return unless fetch_and_parse_feed
57
28
 
58
29
  if entries.any?
59
- process_entries
30
+ processed = process_entries
60
31
  history.sync
61
- meta.sync
62
32
  else
33
+ processed = true
63
34
  logger.warn 'Feed does not have entries'
64
35
  end
36
+
37
+ processed
65
38
  end
66
39
 
67
40
  private
68
41
 
42
+ def apply_send_delay
43
+ return if config['send_delay'] == 0
44
+
45
+ return if last_email_sent_at.nil?
46
+
47
+ secs_since_last_email = Time.now - last_email_sent_at
48
+ secs_to_sleep = config['send_delay'] - secs_since_last_email
49
+
50
+ return if secs_to_sleep <= 0
51
+
52
+ logger.debug "Sleeping for #{secs_to_sleep} seconds..."
53
+ sleep(secs_to_sleep)
54
+ end
55
+
69
56
  def fetch_feed
70
57
  logger.debug 'Fetching feed...'
71
58
 
72
59
  begin
73
- handle_permanent_redirection
60
+ cache_feed = !permanently_redirected?
74
61
 
75
- open(uri, fetch_feed_options) do |f|
62
+ open(uri, fetch_feed_options(cache_feed)) do |f|
76
63
  if f.meta['last-modified'] || meta.has_key?(:last_modified)
77
64
  meta[:last_modified] = f.meta['last-modified']
78
65
  end
@@ -83,31 +70,28 @@ module Feed2Email
83
70
 
84
71
  return decode_content(f.read, f.meta['content-encoding'])
85
72
  end
86
- rescue OpenURI::HTTPError => e
87
- if e.message == '304 Not Modified'
73
+ rescue => e
74
+ if e.is_a?(OpenURI::HTTPError) && e.message == '304 Not Modified'
88
75
  logger.info 'Feed not modified; skipping...'
89
- return false
76
+ else
77
+ logger.error 'Failed to fetch feed'
78
+ log_exception(e)
90
79
  end
91
80
 
92
- raise
93
- rescue => e
94
- logger.error 'Failed to fetch feed'
95
- log_exception(e)
96
81
  return false
97
82
  end
98
83
  end
99
84
 
100
- def handle_permanent_redirection
101
- parsed_uri = URI.parse(uri)
102
- http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
103
- http.use_ssl = (parsed_uri.scheme == 'https')
104
- response = http.head(parsed_uri.request_uri)
85
+ def permanently_redirected?
86
+ checker = RedirectionChecker.new(uri)
105
87
 
106
- if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
107
- self.uri = response['location']
108
- logger.warn(
109
- "Got permanently redirected! Updated feed location to #{uri}")
110
- end
88
+ return false unless checker.permanently_redirected?
89
+
90
+ self.uri = checker.location
91
+ logger.warn 'Got permanently redirected!'
92
+ logger.warn "Updated feed location to #{checker.location}"
93
+
94
+ true
111
95
  end
112
96
 
113
97
  def decode_content(data, content_encoding)
@@ -125,18 +109,20 @@ module Feed2Email
125
109
  xml
126
110
  end
127
111
 
128
- def fetch_feed_options
112
+ def fetch_feed_options(cache_feed)
129
113
  options = {
130
114
  'User-Agent' => "feed2email/#{VERSION}",
131
115
  'Accept-Encoding' => 'gzip, deflate',
132
116
  }
133
117
 
134
- if meta[:last_modified]
135
- options['If-Modified-Since'] = meta[:last_modified]
136
- end
118
+ if cache_feed
119
+ if meta[:last_modified]
120
+ options['If-Modified-Since'] = meta[:last_modified]
121
+ end
137
122
 
138
- if meta[:etag]
139
- options['If-None-Match'] = meta[:etag]
123
+ if meta[:etag]
124
+ options['If-None-Match'] = meta[:etag]
125
+ end
140
126
  end
141
127
 
142
128
  options
@@ -162,10 +148,13 @@ module Feed2Email
162
148
  @data && @data.respond_to?(:entries)
163
149
  end
164
150
 
151
+ def uri
152
+ meta[:uri]
153
+ end
154
+
165
155
  def uri=(uri)
166
156
  history.uri = uri
167
- meta.uri = uri
168
- @uri = uri
157
+ meta[:uri] = uri
169
158
  end
170
159
 
171
160
  def entries
@@ -174,59 +163,58 @@ module Feed2Email
174
163
  }
175
164
  end
176
165
 
177
- def logger
178
- Feed2Email.logger # delegate
179
- end
180
-
181
166
  def max_entries
182
167
  config['max_entries'].to_i
183
168
  end
184
169
 
185
170
  def process_entries
186
171
  logger.info "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
187
- entries.each {|entry| process_entry(entry) }
172
+ entries.all? {|e| process_entry(e) } # false if any entry fails
188
173
  end
189
174
 
190
175
  def process_entry(entry)
191
176
  logger.info "Processing entry #{entry.uri} ..."
192
177
 
193
- if history.any?
194
- if history.include?(entry.uri)
195
- logger.debug 'Skipping old entry...'
196
- else
197
- # Sleep between entry processing to avoid Net::SMTPServerBusy errors
198
- if config['send_delay'] > 0
199
- logger.debug(
200
- "Sleeping for #{'second'.pluralize(config['send_delay'])}")
201
- sleep(config['send_delay'])
202
- end
178
+ unless history.any?
179
+ logger.debug 'Skipping new feed entry...'
180
+ history << entry.uri
181
+ return true
182
+ end
203
183
 
204
- logger.debug 'Sending new entry...'
184
+ if history.include?(entry.uri)
185
+ logger.debug 'Skipping old entry...'
186
+ return true
187
+ end
205
188
 
206
- begin
207
- entry.send_mail
208
- rescue => e
209
- log_exception(e)
210
- end
189
+ apply_send_delay
211
190
 
212
- if e.nil? # no errors
213
- history << entry.uri
214
- end
191
+ logger.debug 'Sending new entry...'
215
192
 
216
- e = nil
217
- end
218
- else
219
- logger.debug 'Skipping new feed entry...'
193
+ begin
194
+ mail_sent = entry.send_mail
195
+ rescue => e
196
+ log_exception(e)
197
+ return false
198
+ end
199
+
200
+ if mail_sent
201
+ self.last_email_sent_at = Time.now
220
202
  history << entry.uri
221
203
  end
204
+
205
+ mail_sent
222
206
  end
223
207
 
224
208
  def history
225
209
  @history ||= FeedHistory.new(uri)
226
210
  end
227
211
 
228
- def meta
229
- @meta ||= FeedMeta.new(uri)
212
+ def last_email_sent_at
213
+ @last_email_sent_at
214
+ end
215
+
216
+ def last_email_sent_at=(time)
217
+ @last_email_sent_at = time
230
218
  end
231
219
 
232
220
  def log_exception(error)
@@ -234,9 +222,9 @@ module Feed2Email
234
222
  error.backtrace.each {|line| logger.debug line }
235
223
  end
236
224
 
237
- def_delegator :data, :title, :title
238
-
239
- def_delegator :Feed2Email, :config, :config
225
+ def title
226
+ data.title # delegate
227
+ end
240
228
 
241
229
  def data; @data end
242
230
  end
@@ -0,0 +1,55 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ module Feed2Email
6
+ class FeedAutodiscoverer
7
+ def initialize(uri)
8
+ @uri = uri
9
+ end
10
+
11
+ def content_type; @content_type end
12
+
13
+ def feeds
14
+ return @feeds if @feeds
15
+ fetch
16
+ @feeds = discoverable? ? discover : []
17
+ end
18
+
19
+ private
20
+
21
+ def data; @data end
22
+
23
+ def discover
24
+ head = Nokogiri::HTML(data).at_css('head')
25
+
26
+ if base = head.at_css('base[href]')
27
+ base_uri = base['href']
28
+ else
29
+ base_uri = uri
30
+ end
31
+
32
+ head.css('link[rel=alternate]').select {|link|
33
+ link['href'] && link['type'] =~ /\Aapplication\/(rss|atom)\+xml\z/
34
+ }.map do |link|
35
+ if link['href'] =~ %r{\Ahttps?://} # absolute
36
+ uri = link['href']
37
+ else
38
+ uri = URI.join(base_uri, link['href']).to_s # relative
39
+ end
40
+
41
+ { uri: uri, content_type: link['type'], title: link['title'] }
42
+ end
43
+ end
44
+
45
+ def discoverable?
46
+ content_type == 'text/html'
47
+ end
48
+
49
+ def fetch
50
+ @data, @content_type = open(uri) {|f| [f.read, f.content_type] }
51
+ end
52
+
53
+ def uri; @uri end
54
+ end
55
+ end
@@ -1,28 +1,82 @@
1
- require 'feed2email/feed_data_file'
1
+ require 'digest/md5'
2
+ require 'yaml'
2
3
 
3
4
  module Feed2Email
4
- class FeedHistory < FeedDataFile
5
- def <<(entry_uri)
6
- mark_dirty
7
- data << entry_uri
5
+ class FeedHistory
6
+ def initialize(uri)
7
+ @uri = uri
8
+ @dirty = false
8
9
  end
9
10
 
10
11
  def any?
11
12
  @old_feed ||= File.exist?(path)
12
13
  end
13
14
 
15
+ def path
16
+ File.join(CONFIG_DIR, filename)
17
+ end
18
+
14
19
  def include?(entry_uri)
15
20
  data.include?(entry_uri) # delegate
16
21
  end
17
22
 
23
+ def sync
24
+ open(path, 'w') {|f| f.write(to_yaml) } if dirty
25
+ end
26
+
27
+ def uri=(new_uri)
28
+ return if new_uri == uri
29
+
30
+ data # load data if not already loaded
31
+ remove_file
32
+ mark_dirty
33
+ @uri = new_uri
34
+ end
35
+
36
+ def <<(entry_uri)
37
+ mark_dirty
38
+ data << entry_uri
39
+ end
40
+
18
41
  private
19
42
 
20
- def data_type
21
- Array
43
+ def data
44
+ @data ||= load_data
45
+ end
46
+
47
+ def dirty; @dirty end
48
+
49
+ def filename
50
+ "history-#{filename_suffix}.yml"
51
+ end
52
+
53
+ def filename_suffix
54
+ Digest::MD5.hexdigest(uri)
22
55
  end
23
56
 
24
- def filename_prefix
25
- 'history'
57
+ def load_data
58
+ begin
59
+ @data = YAML.load(open(path))
60
+ rescue Errno::ENOENT
61
+ @data = []
62
+ end
26
63
  end
64
+
65
+ def mark_dirty
66
+ @dirty = true
67
+ end
68
+
69
+ def remove_file
70
+ begin
71
+ File.unlink(path)
72
+ rescue Errno::ENOENT
73
+ end
74
+ end
75
+
76
+ def to_yaml
77
+ data.to_yaml # delegate
78
+ end
79
+
80
+ def uri; @uri end
27
81
  end
28
82
  end