feed2email 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +1,173 @@
1
+ require 'feedzirra'
2
+ require 'forwardable'
3
+ require 'net/http'
4
+ require 'open-uri'
5
+ require 'stringio'
6
+ require 'uri'
7
+ require 'zlib'
8
+ require 'feed2email/core_ext'
9
+ require 'feed2email/entry'
10
+ require 'feed2email/feed_history'
11
+ require 'feed2email/feed_meta'
12
+ require 'feed2email/feeds'
13
+ require 'feed2email/version'
14
+
1
15
  module Feed2Email
2
16
  class Feed
3
- FEEDS_FILE = File.join(CONFIG_DIR, 'feeds.yml')
4
- HISTORY_FILE = File.join(CONFIG_DIR, 'history.yml')
17
+ extend Forwardable
18
+
19
+ class << self
20
+ extend Forwardable
5
21
 
6
- def self.log(*args)
7
- Feed2Email::Logger.instance.log(*args)
22
+ def_delegators :Feed2Email, :config, :log
8
23
  end
9
24
 
10
25
  def self.process_all
11
- Feed2Email::Config.instance.read!
12
-
13
- log :debug, 'Loading feed subscriptions...'
14
- feed_uris = YAML.load(open(FEEDS_FILE)) rescue nil
15
-
16
- if !feed_uris.is_a? Array
17
- $stderr.puts "Error: missing or invalid feeds file #{FEEDS_FILE}"
18
- exit 4
26
+ feed_uris.each_with_index do |uri, i|
27
+ feed = new(uri)
28
+ feed.process
29
+ feed_uris[i] = feed.uri # persist possible permanent redirect
19
30
  end
20
31
 
21
- log :info, "Subscribed to #{n = feed_uris.size} feed#{n == 1 ? '' : 's'}"
22
-
23
- log :debug, 'Loading history...'
24
- @@history = YAML.load(open(HISTORY_FILE)) rescue {}
32
+ feed_uris.sync
33
+ end
25
34
 
26
- feed_uris.each do |uri|
27
- log :info, "Found feed #{uri}"
28
- Feed.new(uri).process
29
- end
35
+ def self.feed_uris
36
+ return @feed_uris if @feed_uris
30
37
 
31
- log :debug, 'Writing history...'
32
- open(HISTORY_FILE, 'w') {|f| f.write(@@history.to_yaml) }
38
+ log :debug, 'Loading feed subscriptions...'
39
+ @feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
40
+ log :info, "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
41
+ @feed_uris
33
42
  end
34
43
 
44
+ attr_reader :uri
45
+
35
46
  def initialize(uri)
36
47
  @uri = uri
37
48
  end
38
49
 
39
50
  def process
40
- if fetched?
41
- log :debug, 'Feed is fetched'
51
+ log :info, "Processing feed #{uri} ..."
42
52
 
43
- if entries.any?
44
- log :info,
45
- "Processing #{n = entries.size} entr#{n == 1 ? 'y' : 'ies'}..."
46
- process_entries
47
- else
48
- log :warn, 'Feed does not have entries'
49
- end
53
+ return unless fetch_and_parse_feed
54
+
55
+ if entries.any?
56
+ process_entries
57
+ history.sync
58
+ meta.sync
50
59
  else
51
- log :error, 'Feed could not be fetched'
60
+ log :warn, 'Feed does not have entries'
52
61
  end
53
62
  end
54
63
 
55
64
  private
56
65
 
57
- def config
58
- Feed2Email::Config.instance.config
59
- end
66
+ def fetch_feed
67
+ log :debug, 'Fetching feed...'
68
+
69
+ begin
70
+ handle_permanent_redirection
71
+
72
+ open(uri, fetch_feed_options) do |f|
73
+ if f.meta['last-modified'] || meta.has_key?(:last_modified)
74
+ meta[:last_modified] = f.meta['last-modified']
75
+ end
60
76
 
61
- def data
62
- if @data.nil?
63
- log :debug, 'Fetching and parsing feed...'
77
+ if f.meta['etag'] || meta.has_key?(:etag)
78
+ meta[:etag] = f.meta['etag']
79
+ end
64
80
 
65
- begin
66
- @data = Feedzirra::Feed.fetch_and_parse(@uri,
67
- :user_agent => "feed2email/#{VERSION}",
68
- :compress => true
69
- )
70
- rescue => e
71
- log :error, "#{e.class}: #{e.message.strip}"
72
- e.backtrace.each {|line| log :debug, line }
81
+ return decode_content(f.read, f.meta['content-encoding'])
73
82
  end
83
+ rescue OpenURI::HTTPError => e
84
+ if e.message == '304 Not Modified'
85
+ log :info, 'Feed not modified; skipping...'
86
+ return false
87
+ end
88
+
89
+ raise
90
+ rescue => e
91
+ log :error, 'Failed to fetch feed'
92
+ log_exception(e)
93
+ return false
74
94
  end
95
+ end
96
+
97
+ def handle_permanent_redirection
98
+ parsed_uri = URI.parse(uri)
99
+ http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
100
+ http.use_ssl = (parsed_uri.scheme == 'https')
101
+ response = http.head(parsed_uri.request_uri)
75
102
 
76
- @data
103
+ if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
104
+ self.uri = response['location']
105
+ log :warn, "Got permanently redirected! Updated feed location to #{uri}"
106
+ end
77
107
  end
78
108
 
79
- def entries
80
- @entries ||= data.entries[0..max_entries - 1].map {|entry_data|
81
- Entry.new(entry_data, @uri, title)
109
+ def decode_content(data, content_encoding)
110
+ case content_encoding
111
+ when 'gzip'
112
+ gz = Zlib::GzipReader.new(StringIO.new(data))
113
+ xml = gz.read
114
+ gz.close
115
+ when 'deflate'
116
+ xml = Zlib::Inflate.inflate(data)
117
+ else
118
+ xml = data
119
+ end
120
+
121
+ xml
122
+ end
123
+
124
+ def fetch_feed_options
125
+ options = {
126
+ 'User-Agent' => "feed2email/#{VERSION}",
127
+ 'Accept-Encoding' => 'gzip, deflate',
82
128
  }
129
+
130
+ if meta[:last_modified]
131
+ options['If-Modified-Since'] = meta[:last_modified]
132
+ end
133
+
134
+ if meta[:etag]
135
+ options['If-None-Match'] = meta[:etag]
136
+ end
137
+
138
+ options
139
+ end
140
+
141
+ def parse_feed(xml_data)
142
+ log :debug, 'Parsing feed...'
143
+
144
+ begin
145
+ Feedzirra::Feed.parse(xml_data)
146
+ rescue => e
147
+ log :error, 'Failed to parse feed'
148
+ log_exception(e)
149
+ return false
150
+ end
151
+ end
152
+
153
+ def fetch_and_parse_feed
154
+ if xml_data = fetch_feed
155
+ @data = parse_feed(xml_data)
156
+ end
157
+
158
+ @data && @data.respond_to?(:entries)
159
+ end
160
+
161
+ def uri=(uri)
162
+ history.uri = uri
163
+ meta.uri = uri
164
+ @uri = uri
83
165
  end
84
166
 
85
- def fetched?
86
- data.respond_to?(:entries)
167
+ def entries
168
+ @entries ||= data.entries.first(max_entries).map {|entry_data|
169
+ Entry.new(entry_data, uri, title)
170
+ }
87
171
  end
88
172
 
89
173
  def log(*args)
@@ -91,50 +175,65 @@ module Feed2Email
91
175
  end
92
176
 
93
177
  def max_entries
94
- (config['max_entries'] || 20).to_i
178
+ config['max_entries'].to_i
95
179
  end
96
180
 
97
181
  def process_entries
98
- entries.each do |entry|
99
- log :info, "Found entry #{entry.uri}"
100
-
101
- if seen_before?
102
- if seen_entries.include?(entry.uri)
103
- log :debug, 'Skipping seen entry...'
104
- else
105
- log :debug, 'Processing new entry...'
106
-
107
- begin
108
- entry.process
109
- rescue => e
110
- log :error, "#{e.class}: #{e.message.strip}"
111
- e.backtrace.each {|line| log :debug, line }
112
- end
113
-
114
- seen_entries << entry.uri if e.nil? # record in history if no errors
115
- e = nil
116
- end
182
+ log :info, "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
183
+ entries.each {|entry| process_entry(entry) }
184
+ end
185
+
186
+ def process_entry(entry)
187
+ log :info, "Processing entry #{entry.uri} ..."
188
+
189
+ if history.any?
190
+ if history.include?(entry.uri)
191
+ log :debug, 'Skipping old entry...'
117
192
  else
118
- log :debug, 'Skipping new entry...'
119
- seen_entries << entry.uri # record in history
193
+ # Sleep between entry processing to avoid Net::SMTPServerBusy errors
194
+ if config['send_delay'] > 0
195
+ log :debug,
196
+ "Sleeping for #{'second'.pluralize(config['send_delay'])}"
197
+ sleep(config['send_delay'])
198
+ end
199
+
200
+ log :debug, 'Sending new entry...'
201
+
202
+ begin
203
+ entry.send_mail
204
+ rescue => e
205
+ log_exception(e)
206
+ end
207
+
208
+ if e.nil? # no errors
209
+ history << entry.uri
210
+ end
211
+
212
+ e = nil
120
213
  end
214
+ else
215
+ log :debug, 'Skipping new feed entry...'
216
+ history << entry.uri
121
217
  end
122
218
  end
123
219
 
124
- def seen_before?
125
- if @seen_before.nil?
126
- @seen_before = !@@history[@uri].nil?
127
- end
128
-
129
- @seen_before
220
+ def history
221
+ @history ||= FeedHistory.new(uri)
130
222
  end
131
223
 
132
- def seen_entries
133
- @@history[@uri] ||= []
224
+ def meta
225
+ @meta ||= FeedMeta.new(uri)
134
226
  end
135
227
 
136
- def title
137
- data.title
228
+ def log_exception(error)
229
+ log :error, "#{error.class}: #{error.message.strip}"
230
+ error.backtrace.each {|line| log :debug, line }
138
231
  end
232
+
233
+ def_delegator :data, :title, :title
234
+
235
+ def_delegator :Feed2Email, :config, :config
236
+
237
+ def data; @data end
139
238
  end
140
239
  end
@@ -0,0 +1,65 @@
1
+ require 'digest/md5'
2
+ require 'yaml'
3
+
4
+ module Feed2Email
5
+ class FeedDataFile
6
+ def initialize(uri)
7
+ @uri = uri
8
+ @dirty = false
9
+ end
10
+
11
+ def uri=(new_uri)
12
+ return if new_uri == uri
13
+
14
+ data # load data if not already loaded
15
+ remove_file
16
+ mark_dirty
17
+ @uri = new_uri
18
+ end
19
+
20
+ def sync
21
+ open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
22
+ end
23
+
24
+ private
25
+
26
+ def load_data
27
+ begin
28
+ @data = YAML.load(open(path))
29
+ rescue Errno::ENOENT
30
+ @data = data_type.new
31
+ end
32
+ end
33
+
34
+ def path
35
+ File.join(CONFIG_DIR, filename)
36
+ end
37
+
38
+ def filename
39
+ "#{filename_prefix}-#{filename_suffix}.yml"
40
+ end
41
+
42
+ def filename_suffix
43
+ Digest::MD5.hexdigest(uri)
44
+ end
45
+
46
+ def data
47
+ @data ||= load_data
48
+ end
49
+
50
+ def mark_dirty
51
+ @dirty = true
52
+ end
53
+
54
+ def remove_file
55
+ begin
56
+ File.unlink(path)
57
+ rescue Errno::ENOENT
58
+ end
59
+ end
60
+
61
+ def dirty; @dirty end
62
+
63
+ def uri; @uri end
64
+ end
65
+ end
@@ -0,0 +1,28 @@
1
+ require 'feed2email/feed_data_file'
2
+
3
+ module Feed2Email
4
+ class FeedHistory < FeedDataFile
5
+ def <<(entry_uri)
6
+ mark_dirty
7
+ data << entry_uri
8
+ end
9
+
10
+ def any?
11
+ @old_feed ||= File.exist?(path)
12
+ end
13
+
14
+ def include?(entry_uri)
15
+ data.include?(entry_uri) # delegate
16
+ end
17
+
18
+ private
19
+
20
+ def data_type
21
+ Array
22
+ end
23
+
24
+ def filename_prefix
25
+ 'history'
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,28 @@
1
+ require 'feed2email/feed_data_file'
2
+
3
+ module Feed2Email
4
+ class FeedMeta < FeedDataFile
5
+ def [](key)
6
+ data[key]
7
+ end
8
+
9
+ def []=(key, value)
10
+ mark_dirty if data[key] != value
11
+ data[key] = value
12
+ end
13
+
14
+ def has_key?(key)
15
+ data.has_key?(key)
16
+ end
17
+
18
+ private
19
+
20
+ def data_type
21
+ Hash
22
+ end
23
+
24
+ def filename_prefix
25
+ 'meta'
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,77 @@
1
+ require 'forwardable'
2
+ require 'yaml'
3
+
4
+ module Feed2Email
5
+ class Feeds
6
+ extend Forwardable
7
+
8
+ class MissingFeedsError < StandardError; end
9
+ class InvalidFeedsSyntaxError < StandardError; end
10
+ class InvalidFeedsDataTypeError < StandardError; end
11
+
12
+ def initialize(path)
13
+ @path = path
14
+ @dirty = false
15
+ check
16
+ end
17
+
18
+ def_delegators :data, :size, :each_with_index
19
+
20
+ def []=(index, uri)
21
+ mark_dirty if data[index] != uri
22
+ data[index] = uri
23
+ end
24
+
25
+ def sync
26
+ open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
27
+ end
28
+
29
+ private
30
+
31
+ def check
32
+ check_existence
33
+ check_syntax
34
+ check_data_type
35
+ end
36
+
37
+ def check_existence
38
+ if !File.exist?(path)
39
+ raise MissingFeedsError, "Missing feeds file #{path}"
40
+ end
41
+ end
42
+
43
+ def check_syntax
44
+ begin
45
+ load_yaml
46
+ rescue Psych::SyntaxError
47
+ raise InvalidFeedsSyntaxError,
48
+ "Invalid YAML syntax for feeds file #{path}"
49
+ end
50
+ end
51
+
52
+ def check_data_type
53
+ if !data.is_a?(Array)
54
+ raise InvalidFeedsDataTypeError,
55
+ "Invalid data type (not an Array) for feeds file #{path}"
56
+ end
57
+ end
58
+
59
+ def load_yaml
60
+ @data = YAML.load(read_file)
61
+ end
62
+
63
+ def read_file
64
+ File.read(path)
65
+ end
66
+
67
+ def mark_dirty
68
+ @dirty = true
69
+ end
70
+
71
+ def path; @path end
72
+
73
+ def data; @data end
74
+
75
+ def dirty; @dirty end
76
+ end
77
+ end
@@ -1,41 +1,42 @@
1
+ require 'logger'
2
+
1
3
  module Feed2Email
2
4
  class Logger
3
- include Singleton
5
+ def initialize(log_path, log_level)
6
+ @log_path = log_path
7
+ @log_level = log_level
8
+ end
4
9
 
5
10
  def log(severity, message)
6
- logger.add(::Logger.const_get(severity.upcase), message) if log?
11
+ logger.add(::Logger.const_get(severity.upcase), message)
7
12
  end
8
13
 
9
14
  private
10
15
 
11
- def config
12
- Feed2Email::Config.instance.config
13
- end
14
-
15
- def log?
16
- log_path != false
16
+ def log_to
17
+ if log_path == true
18
+ $stdout
19
+ elsif log_path # truthy but not true (a path)
20
+ File.expand_path(log_path)
21
+ end
17
22
  end
18
23
 
19
- def log_level
20
- config['log_level'] || 'info'
21
- end
24
+ def logger
25
+ return @logger if @logger
22
26
 
23
- def log_path
24
- config['log_path']
25
- end
27
+ @logger = ::Logger.new(log_to)
26
28
 
27
- def log_to
28
- if log_path.nil? || log_path == true
29
- STDOUT
29
+ if log_level
30
+ @logger.level = ::Logger.const_get(log_level.upcase)
30
31
  else
31
- File.expand_path(log_path)
32
+ @logger.level = ::Logger::INFO
32
33
  end
33
- end
34
34
 
35
- def logger
36
- @logger ||= ::Logger.new(log_to)
37
- @logger.level = ::Logger.const_get(log_level.upcase)
38
35
  @logger
39
36
  end
37
+
38
+ def log_path; @log_path end
39
+
40
+ def log_level; @log_level end
40
41
  end
41
42
  end