feed2email 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,12 @@ module Feed2Email
14
14
  end
15
15
 
16
16
  def [](option)
17
- merged_config[option] # delegate
17
+ config[option] # delegate
18
+ end
19
+
20
+ def smtp_configured?
21
+ config['smtp_host'] && config['smtp_port'] && config['smtp_user'] &&
22
+ config['smtp_pass']
18
23
  end
19
24
 
20
25
  private
@@ -81,8 +86,8 @@ module Feed2Email
81
86
  File.read(path)
82
87
  end
83
88
 
84
- def merged_config
85
- @merged_config ||= defaults.merge(data)
89
+ def config
90
+ @config ||= defaults.merge(data)
86
91
  end
87
92
 
88
93
  def defaults
@@ -0,0 +1,7 @@
1
+ module Feed2Email
2
+ module Configurable
3
+ def config
4
+ Feed2Email.config # delegate
5
+ end
6
+ end
7
+ end
@@ -1,53 +1,24 @@
1
1
  require 'feedzirra'
2
- require 'forwardable'
3
- require 'net/http'
4
2
  require 'open-uri'
5
3
  require 'stringio'
6
- require 'uri'
7
4
  require 'zlib'
5
+ require 'feed2email/configurable'
8
6
  require 'feed2email/core_ext'
9
7
  require 'feed2email/entry'
10
8
  require 'feed2email/feed_history'
11
- require 'feed2email/feed_meta'
12
- require 'feed2email/feeds'
9
+ require 'feed2email/loggable'
10
+ require 'feed2email/redirection_checker'
13
11
  require 'feed2email/version'
14
12
 
15
13
  module Feed2Email
16
14
  class Feed
17
- extend Forwardable
15
+ include Configurable
16
+ include Loggable
18
17
 
19
- def self.feed_uris; @feed_uris end
18
+ attr_reader :meta
20
19
 
21
- def self.logger
22
- Feed2Email.logger # delegate
23
- end
24
-
25
- def self.smtp_connection
26
- Feed2Email.smtp_connection # delegate
27
- end
28
-
29
- logger.debug 'Loading feed subscriptions...'
30
- @feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
31
- logger.info "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
32
-
33
- def self.process_all
34
- begin
35
- feed_uris.each_with_index do |uri, i|
36
- feed = new(uri)
37
- feed.process
38
- feed_uris[i] = feed.uri # persist possible permanent redirect
39
- end
40
- ensure
41
- smtp_connection.finalize
42
- end
43
-
44
- feed_uris.sync
45
- end
46
-
47
- attr_reader :uri
48
-
49
- def initialize(uri)
50
- @uri = uri
20
+ def initialize(meta)
21
+ @meta = meta
51
22
  end
52
23
 
53
24
  def process
@@ -56,23 +27,39 @@ module Feed2Email
56
27
  return unless fetch_and_parse_feed
57
28
 
58
29
  if entries.any?
59
- process_entries
30
+ processed = process_entries
60
31
  history.sync
61
- meta.sync
62
32
  else
33
+ processed = true
63
34
  logger.warn 'Feed does not have entries'
64
35
  end
36
+
37
+ processed
65
38
  end
66
39
 
67
40
  private
68
41
 
42
+ def apply_send_delay
43
+ return if config['send_delay'] == 0
44
+
45
+ return if last_email_sent_at.nil?
46
+
47
+ secs_since_last_email = Time.now - last_email_sent_at
48
+ secs_to_sleep = config['send_delay'] - secs_since_last_email
49
+
50
+ return if secs_to_sleep <= 0
51
+
52
+ logger.debug "Sleeping for #{secs_to_sleep} seconds..."
53
+ sleep(secs_to_sleep)
54
+ end
55
+
69
56
  def fetch_feed
70
57
  logger.debug 'Fetching feed...'
71
58
 
72
59
  begin
73
- handle_permanent_redirection
60
+ cache_feed = !permanently_redirected?
74
61
 
75
- open(uri, fetch_feed_options) do |f|
62
+ open(uri, fetch_feed_options(cache_feed)) do |f|
76
63
  if f.meta['last-modified'] || meta.has_key?(:last_modified)
77
64
  meta[:last_modified] = f.meta['last-modified']
78
65
  end
@@ -83,31 +70,28 @@ module Feed2Email
83
70
 
84
71
  return decode_content(f.read, f.meta['content-encoding'])
85
72
  end
86
- rescue OpenURI::HTTPError => e
87
- if e.message == '304 Not Modified'
73
+ rescue => e
74
+ if e.is_a?(OpenURI::HTTPError) && e.message == '304 Not Modified'
88
75
  logger.info 'Feed not modified; skipping...'
89
- return false
76
+ else
77
+ logger.error 'Failed to fetch feed'
78
+ log_exception(e)
90
79
  end
91
80
 
92
- raise
93
- rescue => e
94
- logger.error 'Failed to fetch feed'
95
- log_exception(e)
96
81
  return false
97
82
  end
98
83
  end
99
84
 
100
- def handle_permanent_redirection
101
- parsed_uri = URI.parse(uri)
102
- http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
103
- http.use_ssl = (parsed_uri.scheme == 'https')
104
- response = http.head(parsed_uri.request_uri)
85
+ def permanently_redirected?
86
+ checker = RedirectionChecker.new(uri)
105
87
 
106
- if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
107
- self.uri = response['location']
108
- logger.warn(
109
- "Got permanently redirected! Updated feed location to #{uri}")
110
- end
88
+ return false unless checker.permanently_redirected?
89
+
90
+ self.uri = checker.location
91
+ logger.warn 'Got permanently redirected!'
92
+ logger.warn "Updated feed location to #{checker.location}"
93
+
94
+ true
111
95
  end
112
96
 
113
97
  def decode_content(data, content_encoding)
@@ -125,18 +109,20 @@ module Feed2Email
125
109
  xml
126
110
  end
127
111
 
128
- def fetch_feed_options
112
+ def fetch_feed_options(cache_feed)
129
113
  options = {
130
114
  'User-Agent' => "feed2email/#{VERSION}",
131
115
  'Accept-Encoding' => 'gzip, deflate',
132
116
  }
133
117
 
134
- if meta[:last_modified]
135
- options['If-Modified-Since'] = meta[:last_modified]
136
- end
118
+ if cache_feed
119
+ if meta[:last_modified]
120
+ options['If-Modified-Since'] = meta[:last_modified]
121
+ end
137
122
 
138
- if meta[:etag]
139
- options['If-None-Match'] = meta[:etag]
123
+ if meta[:etag]
124
+ options['If-None-Match'] = meta[:etag]
125
+ end
140
126
  end
141
127
 
142
128
  options
@@ -162,10 +148,13 @@ module Feed2Email
162
148
  @data && @data.respond_to?(:entries)
163
149
  end
164
150
 
151
+ def uri
152
+ meta[:uri]
153
+ end
154
+
165
155
  def uri=(uri)
166
156
  history.uri = uri
167
- meta.uri = uri
168
- @uri = uri
157
+ meta[:uri] = uri
169
158
  end
170
159
 
171
160
  def entries
@@ -174,59 +163,58 @@ module Feed2Email
174
163
  }
175
164
  end
176
165
 
177
- def logger
178
- Feed2Email.logger # delegate
179
- end
180
-
181
166
  def max_entries
182
167
  config['max_entries'].to_i
183
168
  end
184
169
 
185
170
  def process_entries
186
171
  logger.info "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
187
- entries.each {|entry| process_entry(entry) }
172
+ entries.all? {|e| process_entry(e) } # false if any entry fails
188
173
  end
189
174
 
190
175
  def process_entry(entry)
191
176
  logger.info "Processing entry #{entry.uri} ..."
192
177
 
193
- if history.any?
194
- if history.include?(entry.uri)
195
- logger.debug 'Skipping old entry...'
196
- else
197
- # Sleep between entry processing to avoid Net::SMTPServerBusy errors
198
- if config['send_delay'] > 0
199
- logger.debug(
200
- "Sleeping for #{'second'.pluralize(config['send_delay'])}")
201
- sleep(config['send_delay'])
202
- end
178
+ unless history.any?
179
+ logger.debug 'Skipping new feed entry...'
180
+ history << entry.uri
181
+ return true
182
+ end
203
183
 
204
- logger.debug 'Sending new entry...'
184
+ if history.include?(entry.uri)
185
+ logger.debug 'Skipping old entry...'
186
+ return true
187
+ end
205
188
 
206
- begin
207
- entry.send_mail
208
- rescue => e
209
- log_exception(e)
210
- end
189
+ apply_send_delay
211
190
 
212
- if e.nil? # no errors
213
- history << entry.uri
214
- end
191
+ logger.debug 'Sending new entry...'
215
192
 
216
- e = nil
217
- end
218
- else
219
- logger.debug 'Skipping new feed entry...'
193
+ begin
194
+ mail_sent = entry.send_mail
195
+ rescue => e
196
+ log_exception(e)
197
+ return false
198
+ end
199
+
200
+ if mail_sent
201
+ self.last_email_sent_at = Time.now
220
202
  history << entry.uri
221
203
  end
204
+
205
+ mail_sent
222
206
  end
223
207
 
224
208
  def history
225
209
  @history ||= FeedHistory.new(uri)
226
210
  end
227
211
 
228
- def meta
229
- @meta ||= FeedMeta.new(uri)
212
+ def last_email_sent_at
213
+ @last_email_sent_at
214
+ end
215
+
216
+ def last_email_sent_at=(time)
217
+ @last_email_sent_at = time
230
218
  end
231
219
 
232
220
  def log_exception(error)
@@ -234,9 +222,9 @@ module Feed2Email
234
222
  error.backtrace.each {|line| logger.debug line }
235
223
  end
236
224
 
237
- def_delegator :data, :title, :title
238
-
239
- def_delegator :Feed2Email, :config, :config
225
+ def title
226
+ data.title # delegate
227
+ end
240
228
 
241
229
  def data; @data end
242
230
  end
@@ -0,0 +1,55 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ module Feed2Email
6
+ class FeedAutodiscoverer
7
+ def initialize(uri)
8
+ @uri = uri
9
+ end
10
+
11
+ def content_type; @content_type end
12
+
13
+ def feeds
14
+ return @feeds if @feeds
15
+ fetch
16
+ @feeds = discoverable? ? discover : []
17
+ end
18
+
19
+ private
20
+
21
+ def data; @data end
22
+
23
+ def discover
24
+ head = Nokogiri::HTML(data).at_css('head')
25
+
26
+ if base = head.at_css('base[href]')
27
+ base_uri = base['href']
28
+ else
29
+ base_uri = uri
30
+ end
31
+
32
+ head.css('link[rel=alternate]').select {|link|
33
+ link['href'] && link['type'] =~ /\Aapplication\/(rss|atom)\+xml\z/
34
+ }.map do |link|
35
+ if link['href'] =~ %r{\Ahttps?://} # absolute
36
+ uri = link['href']
37
+ else
38
+ uri = URI.join(base_uri, link['href']).to_s # relative
39
+ end
40
+
41
+ { uri: uri, content_type: link['type'], title: link['title'] }
42
+ end
43
+ end
44
+
45
+ def discoverable?
46
+ content_type == 'text/html'
47
+ end
48
+
49
+ def fetch
50
+ @data, @content_type = open(uri) {|f| [f.read, f.content_type] }
51
+ end
52
+
53
+ def uri; @uri end
54
+ end
55
+ end
@@ -1,28 +1,82 @@
1
- require 'feed2email/feed_data_file'
1
+ require 'digest/md5'
2
+ require 'yaml'
2
3
 
3
4
  module Feed2Email
4
- class FeedHistory < FeedDataFile
5
- def <<(entry_uri)
6
- mark_dirty
7
- data << entry_uri
5
+ class FeedHistory
6
+ def initialize(uri)
7
+ @uri = uri
8
+ @dirty = false
8
9
  end
9
10
 
10
11
  def any?
11
12
  @old_feed ||= File.exist?(path)
12
13
  end
13
14
 
15
+ def path
16
+ File.join(CONFIG_DIR, filename)
17
+ end
18
+
14
19
  def include?(entry_uri)
15
20
  data.include?(entry_uri) # delegate
16
21
  end
17
22
 
23
+ def sync
24
+ open(path, 'w') {|f| f.write(to_yaml) } if dirty
25
+ end
26
+
27
+ def uri=(new_uri)
28
+ return if new_uri == uri
29
+
30
+ data # load data if not already loaded
31
+ remove_file
32
+ mark_dirty
33
+ @uri = new_uri
34
+ end
35
+
36
+ def <<(entry_uri)
37
+ mark_dirty
38
+ data << entry_uri
39
+ end
40
+
18
41
  private
19
42
 
20
- def data_type
21
- Array
43
+ def data
44
+ @data ||= load_data
45
+ end
46
+
47
+ def dirty; @dirty end
48
+
49
+ def filename
50
+ "history-#{filename_suffix}.yml"
51
+ end
52
+
53
+ def filename_suffix
54
+ Digest::MD5.hexdigest(uri)
22
55
  end
23
56
 
24
- def filename_prefix
25
- 'history'
57
+ def load_data
58
+ begin
59
+ @data = YAML.load(open(path))
60
+ rescue Errno::ENOENT
61
+ @data = []
62
+ end
26
63
  end
64
+
65
+ def mark_dirty
66
+ @dirty = true
67
+ end
68
+
69
+ def remove_file
70
+ begin
71
+ File.unlink(path)
72
+ rescue Errno::ENOENT
73
+ end
74
+ end
75
+
76
+ def to_yaml
77
+ data.to_yaml # delegate
78
+ end
79
+
80
+ def uri; @uri end
27
81
  end
28
82
  end