feed2email 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +48 -20
- data/TODO.md +13 -0
- data/bin/feed2email +1 -0
- data/bin/feed2email-migrate-history +29 -0
- data/lib/feed2email/config.rb +92 -17
- data/lib/feed2email/core_ext.rb +12 -0
- data/lib/feed2email/entry.rb +13 -8
- data/lib/feed2email/feed.rb +183 -84
- data/lib/feed2email/feed_data_file.rb +65 -0
- data/lib/feed2email/feed_history.rb +28 -0
- data/lib/feed2email/feed_meta.rb +28 -0
- data/lib/feed2email/feeds.rb +77 -0
- data/lib/feed2email/logger.rb +23 -22
- data/lib/feed2email/mail.rb +24 -47
- data/lib/feed2email/version.rb +1 -1
- data/lib/feed2email.rb +15 -14
- metadata +127 -27
- data/.gitignore +0 -4
- data/Gemfile +0 -3
- data/Gemfile.lock +0 -39
- data/Rakefile +0 -1
- data/feed2email.gemspec +0 -27
data/lib/feed2email/feed.rb
CHANGED
@@ -1,89 +1,173 @@
|
|
1
|
+
require 'feedzirra'
|
2
|
+
require 'forwardable'
|
3
|
+
require 'net/http'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'stringio'
|
6
|
+
require 'uri'
|
7
|
+
require 'zlib'
|
8
|
+
require 'feed2email/core_ext'
|
9
|
+
require 'feed2email/entry'
|
10
|
+
require 'feed2email/feed_history'
|
11
|
+
require 'feed2email/feed_meta'
|
12
|
+
require 'feed2email/feeds'
|
13
|
+
require 'feed2email/version'
|
14
|
+
|
1
15
|
module Feed2Email
|
2
16
|
class Feed
|
3
|
-
|
4
|
-
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
class << self
|
20
|
+
extend Forwardable
|
5
21
|
|
6
|
-
|
7
|
-
Feed2Email::Logger.instance.log(*args)
|
22
|
+
def_delegators :Feed2Email, :config, :log
|
8
23
|
end
|
9
24
|
|
10
25
|
def self.process_all
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if !feed_uris.is_a? Array
|
17
|
-
$stderr.puts "Error: missing or invalid feeds file #{FEEDS_FILE}"
|
18
|
-
exit 4
|
26
|
+
feed_uris.each_with_index do |uri, i|
|
27
|
+
feed = new(uri)
|
28
|
+
feed.process
|
29
|
+
feed_uris[i] = feed.uri # persist possible permanent redirect
|
19
30
|
end
|
20
31
|
|
21
|
-
|
22
|
-
|
23
|
-
log :debug, 'Loading history...'
|
24
|
-
@@history = YAML.load(open(HISTORY_FILE)) rescue {}
|
32
|
+
feed_uris.sync
|
33
|
+
end
|
25
34
|
|
26
|
-
|
27
|
-
|
28
|
-
Feed.new(uri).process
|
29
|
-
end
|
35
|
+
def self.feed_uris
|
36
|
+
return @feed_uris if @feed_uris
|
30
37
|
|
31
|
-
log :debug, '
|
32
|
-
|
38
|
+
log :debug, 'Loading feed subscriptions...'
|
39
|
+
@feed_uris = Feeds.new(File.join(CONFIG_DIR, 'feeds.yml'))
|
40
|
+
log :info, "Subscribed to #{'feed'.pluralize(feed_uris.size)}"
|
41
|
+
@feed_uris
|
33
42
|
end
|
34
43
|
|
44
|
+
attr_reader :uri
|
45
|
+
|
35
46
|
def initialize(uri)
|
36
47
|
@uri = uri
|
37
48
|
end
|
38
49
|
|
39
50
|
def process
|
40
|
-
|
41
|
-
log :debug, 'Feed is fetched'
|
51
|
+
log :info, "Processing feed #{uri} ..."
|
42
52
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
53
|
+
return unless fetch_and_parse_feed
|
54
|
+
|
55
|
+
if entries.any?
|
56
|
+
process_entries
|
57
|
+
history.sync
|
58
|
+
meta.sync
|
50
59
|
else
|
51
|
-
log :
|
60
|
+
log :warn, 'Feed does not have entries'
|
52
61
|
end
|
53
62
|
end
|
54
63
|
|
55
64
|
private
|
56
65
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
66
|
+
def fetch_feed
|
67
|
+
log :debug, 'Fetching feed...'
|
68
|
+
|
69
|
+
begin
|
70
|
+
handle_permanent_redirection
|
71
|
+
|
72
|
+
open(uri, fetch_feed_options) do |f|
|
73
|
+
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
74
|
+
meta[:last_modified] = f.meta['last-modified']
|
75
|
+
end
|
60
76
|
|
61
|
-
|
62
|
-
|
63
|
-
|
77
|
+
if f.meta['etag'] || meta.has_key?(:etag)
|
78
|
+
meta[:etag] = f.meta['etag']
|
79
|
+
end
|
64
80
|
|
65
|
-
|
66
|
-
@data = Feedzirra::Feed.fetch_and_parse(@uri,
|
67
|
-
:user_agent => "feed2email/#{VERSION}",
|
68
|
-
:compress => true
|
69
|
-
)
|
70
|
-
rescue => e
|
71
|
-
log :error, "#{e.class}: #{e.message.strip}"
|
72
|
-
e.backtrace.each {|line| log :debug, line }
|
81
|
+
return decode_content(f.read, f.meta['content-encoding'])
|
73
82
|
end
|
83
|
+
rescue OpenURI::HTTPError => e
|
84
|
+
if e.message == '304 Not Modified'
|
85
|
+
log :info, 'Feed not modified; skipping...'
|
86
|
+
return false
|
87
|
+
end
|
88
|
+
|
89
|
+
raise
|
90
|
+
rescue => e
|
91
|
+
log :error, 'Failed to fetch feed'
|
92
|
+
log_exception(e)
|
93
|
+
return false
|
74
94
|
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def handle_permanent_redirection
|
98
|
+
parsed_uri = URI.parse(uri)
|
99
|
+
http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
|
100
|
+
http.use_ssl = (parsed_uri.scheme == 'https')
|
101
|
+
response = http.head(parsed_uri.request_uri)
|
75
102
|
|
76
|
-
|
103
|
+
if response.code == '301' && response['location'] =~ %r{\Ahttps?://}
|
104
|
+
self.uri = response['location']
|
105
|
+
log :warn, "Got permanently redirected! Updated feed location to #{uri}"
|
106
|
+
end
|
77
107
|
end
|
78
108
|
|
79
|
-
def
|
80
|
-
|
81
|
-
|
109
|
+
def decode_content(data, content_encoding)
|
110
|
+
case content_encoding
|
111
|
+
when 'gzip'
|
112
|
+
gz = Zlib::GzipReader.new(StringIO.new(data))
|
113
|
+
xml = gz.read
|
114
|
+
gz.close
|
115
|
+
when 'deflate'
|
116
|
+
xml = Zlib::Inflate.inflate(data)
|
117
|
+
else
|
118
|
+
xml = data
|
119
|
+
end
|
120
|
+
|
121
|
+
xml
|
122
|
+
end
|
123
|
+
|
124
|
+
def fetch_feed_options
|
125
|
+
options = {
|
126
|
+
'User-Agent' => "feed2email/#{VERSION}",
|
127
|
+
'Accept-Encoding' => 'gzip, deflate',
|
82
128
|
}
|
129
|
+
|
130
|
+
if meta[:last_modified]
|
131
|
+
options['If-Modified-Since'] = meta[:last_modified]
|
132
|
+
end
|
133
|
+
|
134
|
+
if meta[:etag]
|
135
|
+
options['If-None-Match'] = meta[:etag]
|
136
|
+
end
|
137
|
+
|
138
|
+
options
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_feed(xml_data)
|
142
|
+
log :debug, 'Parsing feed...'
|
143
|
+
|
144
|
+
begin
|
145
|
+
Feedzirra::Feed.parse(xml_data)
|
146
|
+
rescue => e
|
147
|
+
log :error, 'Failed to parse feed'
|
148
|
+
log_exception(e)
|
149
|
+
return false
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def fetch_and_parse_feed
|
154
|
+
if xml_data = fetch_feed
|
155
|
+
@data = parse_feed(xml_data)
|
156
|
+
end
|
157
|
+
|
158
|
+
@data && @data.respond_to?(:entries)
|
159
|
+
end
|
160
|
+
|
161
|
+
def uri=(uri)
|
162
|
+
history.uri = uri
|
163
|
+
meta.uri = uri
|
164
|
+
@uri = uri
|
83
165
|
end
|
84
166
|
|
85
|
-
def
|
86
|
-
data.
|
167
|
+
def entries
|
168
|
+
@entries ||= data.entries.first(max_entries).map {|entry_data|
|
169
|
+
Entry.new(entry_data, uri, title)
|
170
|
+
}
|
87
171
|
end
|
88
172
|
|
89
173
|
def log(*args)
|
@@ -91,50 +175,65 @@ module Feed2Email
|
|
91
175
|
end
|
92
176
|
|
93
177
|
def max_entries
|
94
|
-
|
178
|
+
config['max_entries'].to_i
|
95
179
|
end
|
96
180
|
|
97
181
|
def process_entries
|
98
|
-
entries.
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
entry.process
|
109
|
-
rescue => e
|
110
|
-
log :error, "#{e.class}: #{e.message.strip}"
|
111
|
-
e.backtrace.each {|line| log :debug, line }
|
112
|
-
end
|
113
|
-
|
114
|
-
seen_entries << entry.uri if e.nil? # record in history if no errors
|
115
|
-
e = nil
|
116
|
-
end
|
182
|
+
log :info, "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
|
183
|
+
entries.each {|entry| process_entry(entry) }
|
184
|
+
end
|
185
|
+
|
186
|
+
def process_entry(entry)
|
187
|
+
log :info, "Processing entry #{entry.uri} ..."
|
188
|
+
|
189
|
+
if history.any?
|
190
|
+
if history.include?(entry.uri)
|
191
|
+
log :debug, 'Skipping old entry...'
|
117
192
|
else
|
118
|
-
|
119
|
-
|
193
|
+
# Sleep between entry processing to avoid Net::SMTPServerBusy errors
|
194
|
+
if config['send_delay'] > 0
|
195
|
+
log :debug,
|
196
|
+
"Sleeping for #{'second'.pluralize(config['send_delay'])}"
|
197
|
+
sleep(config['send_delay'])
|
198
|
+
end
|
199
|
+
|
200
|
+
log :debug, 'Sending new entry...'
|
201
|
+
|
202
|
+
begin
|
203
|
+
entry.send_mail
|
204
|
+
rescue => e
|
205
|
+
log_exception(e)
|
206
|
+
end
|
207
|
+
|
208
|
+
if e.nil? # no errors
|
209
|
+
history << entry.uri
|
210
|
+
end
|
211
|
+
|
212
|
+
e = nil
|
120
213
|
end
|
214
|
+
else
|
215
|
+
log :debug, 'Skipping new feed entry...'
|
216
|
+
history << entry.uri
|
121
217
|
end
|
122
218
|
end
|
123
219
|
|
124
|
-
def
|
125
|
-
|
126
|
-
@seen_before = !@@history[@uri].nil?
|
127
|
-
end
|
128
|
-
|
129
|
-
@seen_before
|
220
|
+
def history
|
221
|
+
@history ||= FeedHistory.new(uri)
|
130
222
|
end
|
131
223
|
|
132
|
-
def
|
133
|
-
|
224
|
+
def meta
|
225
|
+
@meta ||= FeedMeta.new(uri)
|
134
226
|
end
|
135
227
|
|
136
|
-
def
|
137
|
-
|
228
|
+
def log_exception(error)
|
229
|
+
log :error, "#{error.class}: #{error.message.strip}"
|
230
|
+
error.backtrace.each {|line| log :debug, line }
|
138
231
|
end
|
232
|
+
|
233
|
+
def_delegator :data, :title, :title
|
234
|
+
|
235
|
+
def_delegator :Feed2Email, :config, :config
|
236
|
+
|
237
|
+
def data; @data end
|
139
238
|
end
|
140
239
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
class FeedDataFile
|
6
|
+
def initialize(uri)
|
7
|
+
@uri = uri
|
8
|
+
@dirty = false
|
9
|
+
end
|
10
|
+
|
11
|
+
def uri=(new_uri)
|
12
|
+
return if new_uri == uri
|
13
|
+
|
14
|
+
data # load data if not already loaded
|
15
|
+
remove_file
|
16
|
+
mark_dirty
|
17
|
+
@uri = new_uri
|
18
|
+
end
|
19
|
+
|
20
|
+
def sync
|
21
|
+
open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def load_data
|
27
|
+
begin
|
28
|
+
@data = YAML.load(open(path))
|
29
|
+
rescue Errno::ENOENT
|
30
|
+
@data = data_type.new
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def path
|
35
|
+
File.join(CONFIG_DIR, filename)
|
36
|
+
end
|
37
|
+
|
38
|
+
def filename
|
39
|
+
"#{filename_prefix}-#{filename_suffix}.yml"
|
40
|
+
end
|
41
|
+
|
42
|
+
def filename_suffix
|
43
|
+
Digest::MD5.hexdigest(uri)
|
44
|
+
end
|
45
|
+
|
46
|
+
def data
|
47
|
+
@data ||= load_data
|
48
|
+
end
|
49
|
+
|
50
|
+
def mark_dirty
|
51
|
+
@dirty = true
|
52
|
+
end
|
53
|
+
|
54
|
+
def remove_file
|
55
|
+
begin
|
56
|
+
File.unlink(path)
|
57
|
+
rescue Errno::ENOENT
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def dirty; @dirty end
|
62
|
+
|
63
|
+
def uri; @uri end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feed2email/feed_data_file'
|
2
|
+
|
3
|
+
module Feed2Email
|
4
|
+
class FeedHistory < FeedDataFile
|
5
|
+
def <<(entry_uri)
|
6
|
+
mark_dirty
|
7
|
+
data << entry_uri
|
8
|
+
end
|
9
|
+
|
10
|
+
def any?
|
11
|
+
@old_feed ||= File.exist?(path)
|
12
|
+
end
|
13
|
+
|
14
|
+
def include?(entry_uri)
|
15
|
+
data.include?(entry_uri) # delegate
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_type
|
21
|
+
Array
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename_prefix
|
25
|
+
'history'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'feed2email/feed_data_file'
|
2
|
+
|
3
|
+
module Feed2Email
|
4
|
+
class FeedMeta < FeedDataFile
|
5
|
+
def [](key)
|
6
|
+
data[key]
|
7
|
+
end
|
8
|
+
|
9
|
+
def []=(key, value)
|
10
|
+
mark_dirty if data[key] != value
|
11
|
+
data[key] = value
|
12
|
+
end
|
13
|
+
|
14
|
+
def has_key?(key)
|
15
|
+
data.has_key?(key)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_type
|
21
|
+
Hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename_prefix
|
25
|
+
'meta'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
class Feeds
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
class MissingFeedsError < StandardError; end
|
9
|
+
class InvalidFeedsSyntaxError < StandardError; end
|
10
|
+
class InvalidFeedsDataTypeError < StandardError; end
|
11
|
+
|
12
|
+
def initialize(path)
|
13
|
+
@path = path
|
14
|
+
@dirty = false
|
15
|
+
check
|
16
|
+
end
|
17
|
+
|
18
|
+
def_delegators :data, :size, :each_with_index
|
19
|
+
|
20
|
+
def []=(index, uri)
|
21
|
+
mark_dirty if data[index] != uri
|
22
|
+
data[index] = uri
|
23
|
+
end
|
24
|
+
|
25
|
+
def sync
|
26
|
+
open(path, 'w') {|f| f.write(data.to_yaml) } if dirty
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def check
|
32
|
+
check_existence
|
33
|
+
check_syntax
|
34
|
+
check_data_type
|
35
|
+
end
|
36
|
+
|
37
|
+
def check_existence
|
38
|
+
if !File.exist?(path)
|
39
|
+
raise MissingFeedsError, "Missing feeds file #{path}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def check_syntax
|
44
|
+
begin
|
45
|
+
load_yaml
|
46
|
+
rescue Psych::SyntaxError
|
47
|
+
raise InvalidFeedsSyntaxError,
|
48
|
+
"Invalid YAML syntax for feeds file #{path}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def check_data_type
|
53
|
+
if !data.is_a?(Array)
|
54
|
+
raise InvalidFeedsDataTypeError,
|
55
|
+
"Invalid data type (not an Array) for feeds file #{path}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def load_yaml
|
60
|
+
@data = YAML.load(read_file)
|
61
|
+
end
|
62
|
+
|
63
|
+
def read_file
|
64
|
+
File.read(path)
|
65
|
+
end
|
66
|
+
|
67
|
+
def mark_dirty
|
68
|
+
@dirty = true
|
69
|
+
end
|
70
|
+
|
71
|
+
def path; @path end
|
72
|
+
|
73
|
+
def data; @data end
|
74
|
+
|
75
|
+
def dirty; @dirty end
|
76
|
+
end
|
77
|
+
end
|
data/lib/feed2email/logger.rb
CHANGED
@@ -1,41 +1,42 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
1
3
|
module Feed2Email
|
2
4
|
class Logger
|
3
|
-
|
5
|
+
def initialize(log_path, log_level)
|
6
|
+
@log_path = log_path
|
7
|
+
@log_level = log_level
|
8
|
+
end
|
4
9
|
|
5
10
|
def log(severity, message)
|
6
|
-
logger.add(::Logger.const_get(severity.upcase), message)
|
11
|
+
logger.add(::Logger.const_get(severity.upcase), message)
|
7
12
|
end
|
8
13
|
|
9
14
|
private
|
10
15
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
16
|
+
def log_to
|
17
|
+
if log_path == true
|
18
|
+
$stdout
|
19
|
+
elsif log_path # truthy but not true (a path)
|
20
|
+
File.expand_path(log_path)
|
21
|
+
end
|
17
22
|
end
|
18
23
|
|
19
|
-
def
|
20
|
-
|
21
|
-
end
|
24
|
+
def logger
|
25
|
+
return @logger if @logger
|
22
26
|
|
23
|
-
|
24
|
-
config['log_path']
|
25
|
-
end
|
27
|
+
@logger = ::Logger.new(log_to)
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
-
STDOUT
|
29
|
+
if log_level
|
30
|
+
@logger.level = ::Logger.const_get(log_level.upcase)
|
30
31
|
else
|
31
|
-
|
32
|
+
@logger.level = ::Logger::INFO
|
32
33
|
end
|
33
|
-
end
|
34
34
|
|
35
|
-
def logger
|
36
|
-
@logger ||= ::Logger.new(log_to)
|
37
|
-
@logger.level = ::Logger.const_get(log_level.upcase)
|
38
35
|
@logger
|
39
36
|
end
|
37
|
+
|
38
|
+
def log_path; @log_path end
|
39
|
+
|
40
|
+
def log_level; @log_level end
|
40
41
|
end
|
41
42
|
end
|