feed2email 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +48 -20
- data/TODO.md +13 -0
- data/bin/feed2email +1 -0
- data/bin/feed2email-migrate-history +29 -0
- data/lib/feed2email/config.rb +92 -17
- data/lib/feed2email/core_ext.rb +12 -0
- data/lib/feed2email/entry.rb +13 -8
- data/lib/feed2email/feed.rb +183 -84
- data/lib/feed2email/feed_data_file.rb +65 -0
- data/lib/feed2email/feed_history.rb +28 -0
- data/lib/feed2email/feed_meta.rb +28 -0
- data/lib/feed2email/feeds.rb +77 -0
- data/lib/feed2email/logger.rb +23 -22
- data/lib/feed2email/mail.rb +24 -47
- data/lib/feed2email/version.rb +1 -1
- data/lib/feed2email.rb +15 -14
- metadata +127 -27
- data/.gitignore +0 -4
- data/Gemfile +0 -3
- data/Gemfile.lock +0 -39
- data/Rakefile +0 -1
- data/feed2email.gemspec +0 -27
data/lib/feed2email/feed.rb
CHANGED
@@ -1,89 +1,173 @@
|
|
1
|
+
require 'feedzirra'
|
2
|
+
require 'forwardable'
|
3
|
+
require 'net/http'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'stringio'
|
6
|
+
require 'uri'
|
7
|
+
require 'zlib'
|
8
|
+
require 'feed2email/core_ext'
|
9
|
+
require 'feed2email/entry'
|
10
|
+
require 'feed2email/feed_history'
|
11
|
+
require 'feed2email/feed_meta'
|
12
|
+
require 'feed2email/feeds'
|
13
|
+
require 'feed2email/version'
|
14
|
+
|
1
15
|
module Feed2Email
|
2
16
|
class Feed
|
3
|
-
|
4
|
-
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
class << self
|
20
|
+
extend Forwardable
|
5
21
|
|
6
|
-
|
7
|
-
Feed2Email::Logger.instance.log(*args)
|
22
|
+
def_delegators :Feed2Email, :config, :log
|
8
23
|
end
|
9
24
|
|
10
25
|
def self.process_all
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if !feed_uris.is_a? Array
|
17
|
-
$stderr.puts "Error: missing or invalid feeds file #{FEEDS_FILE}"
|
18
|
-
exit 4
|
26
|
+
feed_uris.each_with_index do |uri, i|
|
27
|
+
feed = new(uri)
|
28
|
+
feed.process
|
29
|
+
feed_uris[i] = feed.uri # persist possible permanent redirect
|
19
30
|
end
|
20
31
|
|
21
|
-
|
22
|
-
|
23
|
-
log :debug, 'Loading history...'
|
24
|
-
@@history = YAML.load(open(HISTORY_FILE)) rescue {}
|
32
|
+
feed_uris.sync
|
33
|
+
end
|
25
34
|
|
26
|
-
|
27
|
-
|
28
|
-
Feed.new(uri).process
|
29
|
-
end
|
35
|
+
def self.feed_uris
|
36
|
+
return @feed_uris if @feed_uris
|
30
37
|
|
31
|
-
log :debug, '
|
32
|
-
|
38
|
+
log :debug, 'Loading feed subscriptions...'
|
39
|
+
@feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
|
40
|
+
log :info, "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
|
41
|
+
@feed_uris
|
33
42
|
end
|
34
43
|
|
44
|
+
attr_reader :uri
|
45
|
+
|
35
46
|
def initialize(uri)
|
36
47
|
@uri = uri
|
37
48
|
end
|
38
49
|
|
39
50
|
def process
|
40
|
-
|
41
|
-
log :debug, 'Feed is fetched'
|
51
|
+
log :info, "Processing feed #{uri} ..."
|
42
52
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
53
|
+
return unless fetch_and_parse_feed
|
54
|
+
|
55
|
+
if entries.any?
|
56
|
+
process_entries
|
57
|
+
history.sync
|
58
|
+
meta.sync
|
50
59
|
else
|
51
|
-
log :
|
60
|
+
log :warn, 'Feed does not have entries'
|
52
61
|
end
|
53
62
|
end
|
54
63
|
|
55
64
|
private
|
56
65
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
66
|
+
def fetch_feed
|
67
|
+
log :debug, 'Fetching feed...'
|
68
|
+
|
69
|
+
begin
|
70
|
+
handle_permanent_redirection
|
71
|
+
|
72
|
+
open(uri, fetch_feed_options) do |f|
|
73
|
+
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
74
|
+
meta[:last_modified] = f.meta['last-modified']
|
75
|
+
end
|
60
76
|
|
61
|
-
|
62
|
-
|
63
|
-
|
77
|
+
if f.meta['etag'] || meta.has_key?(:etag)
|
78
|
+
meta[:etag] = f.meta['etag']
|
79
|
+
end
|
64
80
|
|
65
|
-
|
66
|
-
@data = Feedzirra::Feed.fetch_and_parse(@uri,
|
67
|
-
:user_agent => "feed2email/#{VERSION}",
|
68
|
-
:compress => true
|
69
|
-
)
|
70
|
-
rescue => e
|
71
|
-
log :error, "#{e.class}: #{e.message.strip}"
|
72
|
-
e.backtrace.each {|line| log :debug, line }
|
81
|
+
return decode_content(f.read, f.meta['content-encoding'])
|
73
82
|
end
|
83
|
+
rescue OpenURI::HTTPError => e
|
84
|
+
if e.message == '304 Not Modified'
|
85
|
+
log :info, 'Feed not modified; skipping...'
|
86
|
+
return false
|
87
|
+
end
|
88
|
+
|
89
|
+
raise
|
90
|
+
rescue => e
|
91
|
+
log :error, 'Failed to fetch feed'
|
92
|
+
log_exception(e)
|
93
|
+
return false
|
74
94
|
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def handle_permanent_redirection
|
98
|
+
parsed_uri = URI.parse(uri)
|
99
|
+
http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
|
100
|
+
http.use_ssl = (parsed_uri.scheme == 'https')
|
101
|
+
response = http.head(parsed_uri.request_uri)
|
75
102
|
|
76
|
-
|
103
|
+
if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
|
104
|
+
self.uri = response['location']
|
105
|
+
log :warn, "Got permanently redirected! Updated feed location to #{uri}"
|
106
|
+
end
|
77
107
|
end
|
78
108
|
|
79
|
-
def
|
80
|
-
|
81
|
-
|
109
|
+
def decode_content(data, content_encoding)
|
110
|
+
case content_encoding
|
111
|
+
when 'gzip'
|
112
|
+
gz = Zlib::GzipReader.new(StringIO.new(data))
|
113
|
+
xml = gz.read
|
114
|
+
gz.close
|
115
|
+
when 'deflate'
|
116
|
+
xml = Zlib::Inflate.inflate(data)
|
117
|
+
else
|
118
|
+
xml = data
|
119
|
+
end
|
120
|
+
|
121
|
+
xml
|
122
|
+
end
|
123
|
+
|
124
|
+
def fetch_feed_options
|
125
|
+
options = {
|
126
|
+
'User-Agent' => "feed2email/#{VERSION}",
|
127
|
+
'Accept-Encoding' => 'gzip, deflate',
|
82
128
|
}
|
129
|
+
|
130
|
+
if meta[:last_modified]
|
131
|
+
options['If-Modified-Since'] = meta[:last_modified]
|
132
|
+
end
|
133
|
+
|
134
|
+
if meta[:etag]
|
135
|
+
options['If-None-Match'] = meta[:etag]
|
136
|
+
end
|
137
|
+
|
138
|
+
options
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_feed(xml_data)
|
142
|
+
log :debug, 'Parsing feed...'
|
143
|
+
|
144
|
+
begin
|
145
|
+
Feedzirra::Feed.parse(xml_data)
|
146
|
+
rescue => e
|
147
|
+
log :error, 'Failed to parse feed'
|
148
|
+
log_exception(e)
|
149
|
+
return false
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def fetch_and_parse_feed
|
154
|
+
if xml_data = fetch_feed
|
155
|
+
@data = parse_feed(xml_data)
|
156
|
+
end
|
157
|
+
|
158
|
+
@data && @data.respond_to?(:entries)
|
159
|
+
end
|
160
|
+
|
161
|
+
def uri=(uri)
|
162
|
+
history.uri = uri
|
163
|
+
meta.uri = uri
|
164
|
+
@uri = uri
|
83
165
|
end
|
84
166
|
|
85
|
-
def
|
86
|
-
data.
|
167
|
+
def entries
|
168
|
+
@entries ||= data.entries.first(max_entries).map {|entry_data|
|
169
|
+
Entry.new(entry_data, uri, title)
|
170
|
+
}
|
87
171
|
end
|
88
172
|
|
89
173
|
def log(*args)
|
@@ -91,50 +175,65 @@ module Feed2Email
|
|
91
175
|
end
|
92
176
|
|
93
177
|
def max_entries
|
94
|
-
|
178
|
+
config['max_entries'].to_i
|
95
179
|
end
|
96
180
|
|
97
181
|
def process_entries
|
98
|
-
entries.
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
entry.process
|
109
|
-
rescue => e
|
110
|
-
log :error, "#{e.class}: #{e.message.strip}"
|
111
|
-
e.backtrace.each {|line| log :debug, line }
|
112
|
-
end
|
113
|
-
|
114
|
-
seen_entries << entry.uri if e.nil? # record in history if no errors
|
115
|
-
e = nil
|
116
|
-
end
|
182
|
+
log :info, "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
|
183
|
+
entries.each {|entry| process_entry(entry) }
|
184
|
+
end
|
185
|
+
|
186
|
+
def process_entry(entry)
|
187
|
+
log :info, "Processing entry #{entry.uri} ..."
|
188
|
+
|
189
|
+
if history.any?
|
190
|
+
if history.include?(entry.uri)
|
191
|
+
log :debug, 'Skipping old entry...'
|
117
192
|
else
|
118
|
-
|
119
|
-
|
193
|
+
# Sleep between entry processing to avoid Net::SMTPServerBusy errors
|
194
|
+
if config['send_delay'] > 0
|
195
|
+
log :debug,
|
196
|
+
"Sleeping for #{'second'.pluralize(config['send_delay'])}"
|
197
|
+
sleep(config['send_delay'])
|
198
|
+
end
|
199
|
+
|
200
|
+
log :debug, 'Sending new entry...'
|
201
|
+
|
202
|
+
begin
|
203
|
+
entry.send_mail
|
204
|
+
rescue => e
|
205
|
+
log_exception(e)
|
206
|
+
end
|
207
|
+
|
208
|
+
if e.nil? # no errors
|
209
|
+
history << entry.uri
|
210
|
+
end
|
211
|
+
|
212
|
+
e = nil
|
120
213
|
end
|
214
|
+
else
|
215
|
+
log :debug, 'Skipping new feed entry...'
|
216
|
+
history << entry.uri
|
121
217
|
end
|
122
218
|
end
|
123
219
|
|
124
|
-
def
|
125
|
-
|
126
|
-
@seen_before = !@@history[@uri].nil?
|
127
|
-
end
|
128
|
-
|
129
|
-
@seen_before
|
220
|
+
def history
|
221
|
+
@history ||= FeedHistory.new(uri)
|
130
222
|
end
|
131
223
|
|
132
|
-
def
|
133
|
-
|
224
|
+
def meta
|
225
|
+
@meta ||= FeedMeta.new(uri)
|
134
226
|
end
|
135
227
|
|
136
|
-
def
|
137
|
-
|
228
|
+
def log_exception(error)
|
229
|
+
log :error, "#{error.class}: #{error.message.strip}"
|
230
|
+
error.backtrace.each {|line| log :debug, line }
|
138
231
|
end
|
232
|
+
|
233
|
+
def_delegator :data, :title, :title
|
234
|
+
|
235
|
+
def_delegator :Feed2Email, :config, :config
|
236
|
+
|
237
|
+
def data; @data end
|
139
238
|
end
|
140
239
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
class FeedDataFile
|
6
|
+
def initialize(uri)
|
7
|
+
@uri = uri
|
8
|
+
@dirty = false
|
9
|
+
end
|
10
|
+
|
11
|
+
def uri=(new_uri)
|
12
|
+
return if new_uri == uri
|
13
|
+
|
14
|
+
data # load data if not already loaded
|
15
|
+
remove_file
|
16
|
+
mark_dirty
|
17
|
+
@uri = new_uri
|
18
|
+
end
|
19
|
+
|
20
|
+
def sync
|
21
|
+
open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def load_data
|
27
|
+
begin
|
28
|
+
@data = YAML.load(open(path))
|
29
|
+
rescue Errno::ENOENT
|
30
|
+
@data = data_type.new
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def path
|
35
|
+
File.join(CONFIG_DIR, filename)
|
36
|
+
end
|
37
|
+
|
38
|
+
def filename
|
39
|
+
"#{filename_prefix}-#{filename_suffix}.yml"
|
40
|
+
end
|
41
|
+
|
42
|
+
def filename_suffix
|
43
|
+
Digest::MD5.hexdigest(uri)
|
44
|
+
end
|
45
|
+
|
46
|
+
def data
|
47
|
+
@data ||= load_data
|
48
|
+
end
|
49
|
+
|
50
|
+
def mark_dirty
|
51
|
+
@dirty = true
|
52
|
+
end
|
53
|
+
|
54
|
+
def remove_file
|
55
|
+
begin
|
56
|
+
File.unlink(path)
|
57
|
+
rescue Errno::ENOENT
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def dirty; @dirty end
|
62
|
+
|
63
|
+
def uri; @uri end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feed2email/feed_data_file'
|
2
|
+
|
3
|
+
module Feed2Email
|
4
|
+
class FeedHistory < FeedDataFile
|
5
|
+
def <<(entry_uri)
|
6
|
+
mark_dirty
|
7
|
+
data << entry_uri
|
8
|
+
end
|
9
|
+
|
10
|
+
def any?
|
11
|
+
@old_feed ||= File.exist?(path)
|
12
|
+
end
|
13
|
+
|
14
|
+
def include?(entry_uri)
|
15
|
+
data.include?(entry_uri) # delegate
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_type
|
21
|
+
Array
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename_prefix
|
25
|
+
'history'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feed2email/feed_data_file'
|
2
|
+
|
3
|
+
module Feed2Email
|
4
|
+
class FeedMeta < FeedDataFile
|
5
|
+
def [](key)
|
6
|
+
data[key]
|
7
|
+
end
|
8
|
+
|
9
|
+
def []=(key, value)
|
10
|
+
mark_dirty if data[key] != value
|
11
|
+
data[key] = value
|
12
|
+
end
|
13
|
+
|
14
|
+
def has_key?(key)
|
15
|
+
data.has_key?(key)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_type
|
21
|
+
Hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename_prefix
|
25
|
+
'meta'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
class Feeds
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
class MissingFeedsError < StandardError; end
|
9
|
+
class InvalidFeedsSyntaxError < StandardError; end
|
10
|
+
class InvalidFeedsDataTypeError < StandardError; end
|
11
|
+
|
12
|
+
def initialize(path)
|
13
|
+
@path = path
|
14
|
+
@dirty = false
|
15
|
+
check
|
16
|
+
end
|
17
|
+
|
18
|
+
def_delegators :data, :size, :each_with_index
|
19
|
+
|
20
|
+
def []=(index, uri)
|
21
|
+
mark_dirty if data[index] != uri
|
22
|
+
data[index] = uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def sync
|
26
|
+
open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def check
|
32
|
+
check_existence
|
33
|
+
check_syntax
|
34
|
+
check_data_type
|
35
|
+
end
|
36
|
+
|
37
|
+
def check_existence
|
38
|
+
if !File.exist?(path)
|
39
|
+
raise MissingFeedsError, "Missing feeds file #{path}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def check_syntax
|
44
|
+
begin
|
45
|
+
load_yaml
|
46
|
+
rescue Psych::SyntaxError
|
47
|
+
raise InvalidFeedsSyntaxError,
|
48
|
+
"Invalid YAML syntax for feeds file #{path}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_data_type
|
53
|
+
if !data.is_a?(Array)
|
54
|
+
raise InvalidFeedsDataTypeError,
|
55
|
+
"Invalid data type (not an Array) for feeds file #{path}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def load_yaml
|
60
|
+
@data = YAML.load(read_file)
|
61
|
+
end
|
62
|
+
|
63
|
+
def read_file
|
64
|
+
File.read(path)
|
65
|
+
end
|
66
|
+
|
67
|
+
def mark_dirty
|
68
|
+
@dirty = true
|
69
|
+
end
|
70
|
+
|
71
|
+
def path; @path end
|
72
|
+
|
73
|
+
def data; @data end
|
74
|
+
|
75
|
+
def dirty; @dirty end
|
76
|
+
end
|
77
|
+
end
|
data/lib/feed2email/logger.rb
CHANGED
@@ -1,41 +1,42 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
1
3
|
module Feed2Email
|
2
4
|
class Logger
|
3
|
-
|
5
|
+
def initialize(log_path, log_level)
|
6
|
+
@log_path = log_path
|
7
|
+
@log_level = log_level
|
8
|
+
end
|
4
9
|
|
5
10
|
def log(severity, message)
|
6
|
-
logger.add(::Logger.const_get(severity.upcase), message)
|
11
|
+
logger.add(::Logger.const_get(severity.upcase), message)
|
7
12
|
end
|
8
13
|
|
9
14
|
private
|
10
15
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
16
|
+
def log_to
|
17
|
+
if log_path == true
|
18
|
+
$stdout
|
19
|
+
elsif log_path # truthy but not true (a path)
|
20
|
+
File.expand_path(log_path)
|
21
|
+
end
|
17
22
|
end
|
18
23
|
|
19
|
-
def
|
20
|
-
|
21
|
-
end
|
24
|
+
def logger
|
25
|
+
return @logger if @logger
|
22
26
|
|
23
|
-
|
24
|
-
config['log_path']
|
25
|
-
end
|
27
|
+
@logger = ::Logger.new(log_to)
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
-
STDOUT
|
29
|
+
if log_level
|
30
|
+
@logger.level = ::Logger.const_get(log_level.upcase)
|
30
31
|
else
|
31
|
-
|
32
|
+
@logger.level = ::Logger::INFO
|
32
33
|
end
|
33
|
-
end
|
34
34
|
|
35
|
-
def logger
|
36
|
-
@logger ||= ::Logger.new(log_to)
|
37
|
-
@logger.level = ::Logger.const_get(log_level.upcase)
|
38
35
|
@logger
|
39
36
|
end
|
37
|
+
|
38
|
+
def log_path; @log_path end
|
39
|
+
|
40
|
+
def log_level; @log_level end
|
40
41
|
end
|
41
42
|
end
|