feed2email 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,89 +1,173 @@
1
+ require 'feedzirra'
2
+ require 'forwardable'
3
+ require 'net/http'
4
+ require 'open-uri'
5
+ require 'stringio'
6
+ require 'uri'
7
+ require 'zlib'
8
+ require 'feed2email/core_ext'
9
+ require 'feed2email/entry'
10
+ require 'feed2email/feed_history'
11
+ require 'feed2email/feed_meta'
12
+ require 'feed2email/feeds'
13
+ require 'feed2email/version'
14
+
1
15
  module Feed2Email
2
16
  class Feed
3
- FEEDS_FILE = File.join(CONFIG_DIR, 'feeds.yml')
4
- HISTORY_FILE = File.join(CONFIG_DIR, 'history.yml')
17
+ extend Forwardable
18
+
19
+ class << self
20
+ extend Forwardable
5
21
 
6
- def self.log(*args)
7
- Feed2Email::Logger.instance.log(*args)
22
+ def_delegators :Feed2Email, :config, :log
8
23
  end
9
24
 
10
25
  def self.process_all
11
- Feed2Email::Config.instance.read!
12
-
13
- log :debug, 'Loading feed subscriptions...'
14
- feed_uris = YAML.load(open(FEEDS_FILE)) rescue nil
15
-
16
- if !feed_uris.is_a? Array
17
- $stderr.puts "Error: missing or invalid feeds file #{FEEDS_FILE}"
18
- exit 4
26
+ feed_uris.each_with_index do |uri, i|
27
+ feed = new(uri)
28
+ feed.process
29
+ feed_uris[i] = feed.uri # persist possible permanent redirect
19
30
  end
20
31
 
21
- log :info, "Subscribed to #{n = feed_uris.size} feed#{n == 1 ? '' : 's'}"
22
-
23
- log :debug, 'Loading history...'
24
- @@history = YAML.load(open(HISTORY_FILE)) rescue {}
32
+ feed_uris.sync
33
+ end
25
34
 
26
- feed_uris.each do |uri|
27
- log :info, "Found feed #{uri}"
28
- Feed.new(uri).process
29
- end
35
+ def self.feed_uris
36
+ return @feed_uris if @feed_uris
30
37
 
31
- log :debug, 'Writing history...'
32
- open(HISTORY_FILE, 'w') {|f| f.write(@@history.to_yaml) }
38
+ log :debug, 'Loading feed subscriptions...'
39
+ @feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
40
+ log :info, "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
41
+ @feed_uris
33
42
  end
34
43
 
44
+ attr_reader :uri
45
+
35
46
  def initialize(uri)
36
47
  @uri = uri
37
48
  end
38
49
 
39
50
  def process
40
- if fetched?
41
- log :debug, 'Feed is fetched'
51
+ log :info, "Processing feed #{uri} ..."
42
52
 
43
- if entries.any?
44
- log :info,
45
- "Processing #{n = entries.size} entr#{n == 1 ? 'y' : 'ies'}..."
46
- process_entries
47
- else
48
- log :warn, 'Feed does not have entries'
49
- end
53
+ return unless fetch_and_parse_feed
54
+
55
+ if entries.any?
56
+ process_entries
57
+ history.sync
58
+ meta.sync
50
59
  else
51
- log :error, 'Feed could not be fetched'
60
+ log :warn, 'Feed does not have entries'
52
61
  end
53
62
  end
54
63
 
55
64
  private
56
65
 
57
- def config
58
- Feed2Email::Config.instance.config
59
- end
66
+ def fetch_feed
67
+ log :debug, 'Fetching feed...'
68
+
69
+ begin
70
+ handle_permanent_redirection
71
+
72
+ open(uri, fetch_feed_options) do |f|
73
+ if f.meta['last-modified'] || meta.has_key?(:last_modified)
74
+ meta[:last_modified] = f.meta['last-modified']
75
+ end
60
76
 
61
- def data
62
- if @data.nil?
63
- log :debug, 'Fetching and parsing feed...'
77
+ if f.meta['etag'] || meta.has_key?(:etag)
78
+ meta[:etag] = f.meta['etag']
79
+ end
64
80
 
65
- begin
66
- @data = Feedzirra::Feed.fetch_and_parse(@uri,
67
- :user_agent => "feed2email/#{VERSION}",
68
- :compress => true
69
- )
70
- rescue => e
71
- log :error, "#{e.class}: #{e.message.strip}"
72
- e.backtrace.each {|line| log :debug, line }
81
+ return decode_content(f.read, f.meta['content-encoding'])
73
82
  end
83
+ rescue OpenURI::HTTPError => e
84
+ if e.message == '304 Not Modified'
85
+ log :info, 'Feed not modified; skipping...'
86
+ return false
87
+ end
88
+
89
+ raise
90
+ rescue => e
91
+ log :error, 'Failed to fetch feed'
92
+ log_exception(e)
93
+ return false
74
94
  end
95
+ end
96
+
97
+ def handle_permanent_redirection
98
+ parsed_uri = URI.parse(uri)
99
+ http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
100
+ http.use_ssl = (parsed_uri.scheme == 'https')
101
+ response = http.head(parsed_uri.request_uri)
75
102
 
76
- @data
103
+ if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
104
+ self.uri = response['location']
105
+ log :warn, "Got permanently redirected! Updated feed location to #{uri}"
106
+ end
77
107
  end
78
108
 
79
- def entries
80
- @entries ||= data.entries[0..max_entries - 1].map {|entry_data|
81
- Entry.new(entry_data, @uri, title)
109
+ def decode_content(data, content_encoding)
110
+ case content_encoding
111
+ when 'gzip'
112
+ gz = Zlib::GzipReader.new(StringIO.new(data))
113
+ xml = gz.read
114
+ gz.close
115
+ when 'deflate'
116
+ xml = Zlib::Inflate.inflate(data)
117
+ else
118
+ xml = data
119
+ end
120
+
121
+ xml
122
+ end
123
+
124
+ def fetch_feed_options
125
+ options = {
126
+ 'User-Agent' => "feed2email/#{VERSION}",
127
+ 'Accept-Encoding' => 'gzip, deflate',
82
128
  }
129
+
130
+ if meta[:last_modified]
131
+ options['If-Modified-Since'] = meta[:last_modified]
132
+ end
133
+
134
+ if meta[:etag]
135
+ options['If-None-Match'] = meta[:etag]
136
+ end
137
+
138
+ options
139
+ end
140
+
141
+ def parse_feed(xml_data)
142
+ log :debug, 'Parsing feed...'
143
+
144
+ begin
145
+ Feedzirra::Feed.parse(xml_data)
146
+ rescue => e
147
+ log :error, 'Failed to parse feed'
148
+ log_exception(e)
149
+ return false
150
+ end
151
+ end
152
+
153
+ def fetch_and_parse_feed
154
+ if xml_data = fetch_feed
155
+ @data = parse_feed(xml_data)
156
+ end
157
+
158
+ @data && @data.respond_to?(:entries)
159
+ end
160
+
161
+ def uri=(uri)
162
+ history.uri = uri
163
+ meta.uri = uri
164
+ @uri = uri
83
165
  end
84
166
 
85
- def fetched?
86
- data.respond_to?(:entries)
167
+ def entries
168
+ @entries ||= data.entries.first(max_entries).map {|entry_data|
169
+ Entry.new(entry_data, uri, title)
170
+ }
87
171
  end
88
172
 
89
173
  def log(*args)
@@ -91,50 +175,65 @@ module Feed2Email
91
175
  end
92
176
 
93
177
  def max_entries
94
- (config['max_entries'] || 20).to_i
178
+ config['max_entries'].to_i
95
179
  end
96
180
 
97
181
  def process_entries
98
- entries.each do |entry|
99
- log :info, "Found entry #{entry.uri}"
100
-
101
- if seen_before?
102
- if seen_entries.include?(entry.uri)
103
- log :debug, 'Skipping seen entry...'
104
- else
105
- log :debug, 'Processing new entry...'
106
-
107
- begin
108
- entry.process
109
- rescue => e
110
- log :error, "#{e.class}: #{e.message.strip}"
111
- e.backtrace.each {|line| log :debug, line }
112
- end
113
-
114
- seen_entries << entry.uri if e.nil? # record in history if no errors
115
- e = nil
116
- end
182
+ log :info, "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
183
+ entries.each {|entry| process_entry(entry) }
184
+ end
185
+
186
+ def process_entry(entry)
187
+ log :info, "Processing entry #{entry.uri} ..."
188
+
189
+ if history.any?
190
+ if history.include?(entry.uri)
191
+ log :debug, 'Skipping old entry...'
117
192
  else
118
- log :debug, 'Skipping new entry...'
119
- seen_entries << entry.uri # record in history
193
+ # Sleep between entry processing to avoid Net::SMTPServerBusy errors
194
+ if config['send_delay'] > 0
195
+ log :debug,
196
+ "Sleeping for #{'second'.pluralize(config['send_delay'])}"
197
+ sleep(config['send_delay'])
198
+ end
199
+
200
+ log :debug, 'Sending new entry...'
201
+
202
+ begin
203
+ entry.send_mail
204
+ rescue => e
205
+ log_exception(e)
206
+ end
207
+
208
+ if e.nil? # no errors
209
+ history << entry.uri
210
+ end
211
+
212
+ e = nil
120
213
  end
214
+ else
215
+ log :debug, 'Skipping new feed entry...'
216
+ history << entry.uri
121
217
  end
122
218
  end
123
219
 
124
- def seen_before?
125
- if @seen_before.nil?
126
- @seen_before = !@@history[@uri].nil?
127
- end
128
-
129
- @seen_before
220
+ def history
221
+ @history ||= FeedHistory.new(uri)
130
222
  end
131
223
 
132
- def seen_entries
133
- @@history[@uri] ||= []
224
+ def meta
225
+ @meta ||= FeedMeta.new(uri)
134
226
  end
135
227
 
136
- def title
137
- data.title
228
+ def log_exception(error)
229
+ log :error, "#{error.class}: #{error.message.strip}"
230
+ error.backtrace.each {|line| log :debug, line }
138
231
  end
232
+
233
+ def_delegator :data, :title, :title
234
+
235
+ def_delegator :Feed2Email, :config, :config
236
+
237
+ def data; @data end
139
238
  end
140
239
  end
@@ -0,0 +1,65 @@
1
+ require 'digest/md5'
2
+ require 'yaml'
3
+
4
+ module Feed2Email
5
+ class FeedDataFile
6
+ def initialize(uri)
7
+ @uri = uri
8
+ @dirty = false
9
+ end
10
+
11
+ def uri=(new_uri)
12
+ return if new_uri == uri
13
+
14
+ data # load data if not already loaded
15
+ remove_file
16
+ mark_dirty
17
+ @uri = new_uri
18
+ end
19
+
20
+ def sync
21
+ open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
22
+ end
23
+
24
+ private
25
+
26
+ def load_data
27
+ begin
28
+ @data = YAML.load(open(path))
29
+ rescue Errno::ENOENT
30
+ @data = data_type.new
31
+ end
32
+ end
33
+
34
+ def path
35
+ File.join(CONFIG_DIR, filename)
36
+ end
37
+
38
+ def filename
39
+ "#{filename_prefix}-#{filename_suffix}.yml"
40
+ end
41
+
42
+ def filename_suffix
43
+ Digest::MD5.hexdigest(uri)
44
+ end
45
+
46
+ def data
47
+ @data ||= load_data
48
+ end
49
+
50
+ def mark_dirty
51
+ @dirty = true
52
+ end
53
+
54
+ def remove_file
55
+ begin
56
+ File.unlink(path)
57
+ rescue Errno::ENOENT
58
+ end
59
+ end
60
+
61
+ def dirty; @dirty end
62
+
63
+ def uri; @uri end
64
+ end
65
+ end
@@ -0,0 +1,28 @@
1
+ require 'feed2email/feed_data_file'
2
+
3
+ module Feed2Email
4
+ class FeedHistory < FeedDataFile
5
+ def <<(entry_uri)
6
+ mark_dirty
7
+ data << entry_uri
8
+ end
9
+
10
+ def any?
11
+ @old_feed ||= File.exist?(path)
12
+ end
13
+
14
+ def include?(entry_uri)
15
+ data.include?(entry_uri) # delegate
16
+ end
17
+
18
+ private
19
+
20
+ def data_type
21
+ Array
22
+ end
23
+
24
+ def filename_prefix
25
+ 'history'
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,28 @@
1
+ require 'feed2email/feed_data_file'
2
+
3
+ module Feed2Email
4
+ class FeedMeta < FeedDataFile
5
+ def [](key)
6
+ data[key]
7
+ end
8
+
9
+ def []=(key, value)
10
+ mark_dirty if data[key] != value
11
+ data[key] = value
12
+ end
13
+
14
+ def has_key?(key)
15
+ data.has_key?(key)
16
+ end
17
+
18
+ private
19
+
20
+ def data_type
21
+ Hash
22
+ end
23
+
24
+ def filename_prefix
25
+ 'meta'
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,77 @@
1
+ require 'forwardable'
2
+ require 'yaml'
3
+
4
+ module Feed2Email
5
+ class Feeds
6
+ extend Forwardable
7
+
8
+ class MissingFeedsError < StandardError; end
9
+ class InvalidFeedsSyntaxError < StandardError; end
10
+ class InvalidFeedsDataTypeError < StandardError; end
11
+
12
+ def initialize(path)
13
+ @path = path
14
+ @dirty = false
15
+ check
16
+ end
17
+
18
+ def_delegators :data, :size, :each_with_index
19
+
20
+ def []=(index, uri)
21
+ mark_dirty if data[index] != uri
22
+ data[index] = uri
23
+ end
24
+
25
+ def sync
26
+ open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
27
+ end
28
+
29
+ private
30
+
31
+ def check
32
+ check_existence
33
+ check_syntax
34
+ check_data_type
35
+ end
36
+
37
+ def check_existence
38
+ if !File.exist?(path)
39
+ raise MissingFeedsError, "Missing feeds file #{path}"
40
+ end
41
+ end
42
+
43
+ def check_syntax
44
+ begin
45
+ load_yaml
46
+ rescue Psych::SyntaxError
47
+ raise InvalidFeedsSyntaxError,
48
+ "Invalid YAML syntax for feeds file #{path}"
49
+ end
50
+ end
51
+
52
+ def check_data_type
53
+ if !data.is_a?(Array)
54
+ raise InvalidFeedsDataTypeError,
55
+ "Invalid data type (not an Array) for feeds file #{path}"
56
+ end
57
+ end
58
+
59
+ def load_yaml
60
+ @data = YAML.load(read_file)
61
+ end
62
+
63
+ def read_file
64
+ File.read(path)
65
+ end
66
+
67
+ def mark_dirty
68
+ @dirty = true
69
+ end
70
+
71
+ def path; @path end
72
+
73
+ def data; @data end
74
+
75
+ def dirty; @dirty end
76
+ end
77
+ end
@@ -1,41 +1,42 @@
1
+ require 'logger'
2
+
1
3
  module Feed2Email
2
4
  class Logger
3
- include Singleton
5
+ def initialize(log_path, log_level)
6
+ @log_path = log_path
7
+ @log_level = log_level
8
+ end
4
9
 
5
10
  def log(severity, message)
6
- logger.add(::Logger.const_get(severity.upcase), message) if log?
11
+ logger.add(::Logger.const_get(severity.upcase), message)
7
12
  end
8
13
 
9
14
  private
10
15
 
11
- def config
12
- Feed2Email::Config.instance.config
13
- end
14
-
15
- def log?
16
- log_path != false
16
+ def log_to
17
+ if log_path == true
18
+ $stdout
19
+ elsif log_path # truthy but not true (a path)
20
+ File.expand_path(log_path)
21
+ end
17
22
  end
18
23
 
19
- def log_level
20
- config['log_level'] || 'info'
21
- end
24
+ def logger
25
+ return @logger if @logger
22
26
 
23
- def log_path
24
- config['log_path']
25
- end
27
+ @logger = ::Logger.new(log_to)
26
28
 
27
- def log_to
28
- if log_path.nil? || log_path == true
29
- STDOUT
29
+ if log_level
30
+ @logger.level = ::Logger.const_get(log_level.upcase)
30
31
  else
31
- File.expand_path(log_path)
32
+ @logger.level = ::Logger::INFO
32
33
  end
33
- end
34
34
 
35
- def logger
36
- @logger ||= ::Logger.new(log_to)
37
- @logger.level = ::Logger.const_get(log_level.upcase)
38
35
  @logger
39
36
  end
37
+
38
+ def log_path; @log_path end
39
+
40
+ def log_level; @log_level end
40
41
  end
41
42
  end