feed2email 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -10
- data/README.md +138 -86
- data/bin/f2e +6 -4
- data/bin/feed2email +6 -4
- data/bin/feed2email-migrate +23 -0
- data/lib/feed2email.rb +6 -9
- data/lib/feed2email/cli.rb +112 -75
- data/lib/feed2email/config.rb +72 -52
- data/lib/feed2email/core_ext.rb +10 -0
- data/lib/feed2email/database.rb +75 -0
- data/lib/feed2email/entry.rb +149 -15
- data/lib/feed2email/feed.rb +111 -130
- data/lib/feed2email/feed_autodiscoverer.rb +8 -10
- data/lib/feed2email/migrate/convert_feeds_migration.rb +29 -0
- data/lib/feed2email/migrate/feeds_import_migration.rb +42 -0
- data/lib/feed2email/migrate/history_import_migration.rb +42 -0
- data/lib/feed2email/migrate/migration.rb +42 -0
- data/lib/feed2email/migrate/split_history_migration.rb +32 -0
- data/lib/feed2email/open-uri.rb +7 -0
- data/lib/feed2email/opml_exporter.rb +109 -0
- data/lib/feed2email/opml_importer.rb +52 -0
- data/lib/feed2email/redirection_checker.rb +2 -2
- data/lib/feed2email/smtp_connection.rb +59 -0
- data/lib/feed2email/version.rb +1 -1
- metadata +55 -30
- data/bin/feed2email-migrate-feedlist +0 -36
- data/bin/feed2email-migrate-history +0 -29
- data/lib/feed2email/feed_history.rb +0 -82
- data/lib/feed2email/feed_list.rb +0 -147
- data/lib/feed2email/lazy_smtp_connection.rb +0 -35
- data/lib/feed2email/mail.rb +0 -84
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'feed2email'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
def self.database
|
6
|
+
@database
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.database=(database)
|
10
|
+
@database = database
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.database_path
|
14
|
+
root.join('feed2email.db').to_s
|
15
|
+
end
|
16
|
+
|
17
|
+
class Database
|
18
|
+
def self.setup
|
19
|
+
return if Feed2Email.database
|
20
|
+
|
21
|
+
Feed2Email.database = new(
|
22
|
+
adapter: 'sqlite',
|
23
|
+
database: Feed2Email.database_path,
|
24
|
+
loggers: [Feed2Email.logger],
|
25
|
+
sql_log_level: :debug
|
26
|
+
)
|
27
|
+
Feed2Email.database.setup
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize(connect_options)
|
31
|
+
@connect_options = connect_options
|
32
|
+
end
|
33
|
+
|
34
|
+
def setup
|
35
|
+
unless connection
|
36
|
+
setup_connection
|
37
|
+
setup_schema
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def connect_options; @connect_options end
|
44
|
+
|
45
|
+
def connection; @connection end
|
46
|
+
|
47
|
+
def path; connect_options[:database] end
|
48
|
+
|
49
|
+
def setup_connection
|
50
|
+
@connection = Sequel::Model.db = Sequel.connect(connect_options)
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_schema
|
54
|
+
connection.create_table? :feeds do
|
55
|
+
primary_key :id
|
56
|
+
String :uri, null: false, unique: true
|
57
|
+
TrueClass :enabled, null: false, default: true
|
58
|
+
String :etag
|
59
|
+
String :last_modified
|
60
|
+
Time :last_processed_at
|
61
|
+
Time :created_at
|
62
|
+
Time :updated_at
|
63
|
+
end
|
64
|
+
|
65
|
+
connection.create_table? :entries do
|
66
|
+
primary_key :id
|
67
|
+
foreign_key :feed_id, :feeds, null: false, index: true,
|
68
|
+
on_delete: :cascade
|
69
|
+
String :uri, null: false, unique: true
|
70
|
+
Time :created_at
|
71
|
+
Time :updated_at
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/feed2email/entry.rb
CHANGED
@@ -1,41 +1,175 @@
|
|
1
|
-
require '
|
1
|
+
require 'mail'
|
2
|
+
require 'sequel'
|
3
|
+
require 'uri'
|
4
|
+
require 'feed2email'
|
5
|
+
require 'feed2email/configurable'
|
6
|
+
require 'feed2email/core_ext'
|
7
|
+
require 'feed2email/database'
|
8
|
+
require 'feed2email/loggable'
|
9
|
+
require 'feed2email/version'
|
2
10
|
|
3
11
|
module Feed2Email
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
12
|
+
Database.setup
|
13
|
+
|
14
|
+
if config['send_method'] == 'smtp'
|
15
|
+
require 'feed2email/smtp_connection'
|
16
|
+
SMTPConnection.setup
|
17
|
+
end
|
18
|
+
|
19
|
+
class Entry < Sequel::Model(:entries)
|
20
|
+
plugin :timestamps
|
21
|
+
|
22
|
+
many_to_one :feed
|
23
|
+
|
24
|
+
class << self
|
25
|
+
attr_accessor :last_email_sent_at
|
26
|
+
end
|
27
|
+
|
28
|
+
include Configurable
|
29
|
+
include Loggable
|
30
|
+
|
31
|
+
attr_accessor :data
|
32
|
+
attr_accessor :feed_data
|
33
|
+
attr_accessor :feed_uri
|
34
|
+
|
35
|
+
def process
|
36
|
+
unless feed.old?
|
37
|
+
logger.debug 'Skipping new feed entry...'
|
38
|
+
save # record as seen
|
39
|
+
return true
|
40
|
+
end
|
41
|
+
|
42
|
+
if old?
|
43
|
+
logger.debug 'Skipping old entry...'
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
|
47
|
+
return send_mail
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def apply_send_delay
|
53
|
+
return if config['send_delay'] == 0 || config['send_method'] == 'file'
|
54
|
+
|
55
|
+
return if last_email_sent_at.nil?
|
56
|
+
|
57
|
+
secs_since_last_email = Time.now - last_email_sent_at
|
58
|
+
secs_to_sleep = config['send_delay'] - secs_since_last_email
|
59
|
+
|
60
|
+
return if secs_to_sleep <= 0
|
61
|
+
|
62
|
+
logger.debug "Sleeping for #{secs_to_sleep} seconds..."
|
63
|
+
sleep(secs_to_sleep)
|
64
|
+
end
|
65
|
+
|
66
|
+
def author; data.author end
|
67
|
+
|
68
|
+
def body_html
|
69
|
+
%{
|
70
|
+
<html>
|
71
|
+
<body>
|
72
|
+
<h1><a href="%{uri}">%{title}</a></h1>
|
73
|
+
%{content}
|
74
|
+
<p>%{published}</p>
|
75
|
+
<p><a href="%{uri}">%{uri}</a></p>
|
76
|
+
<p>--<br>
|
77
|
+
Sent by <a href="https://github.com/agorf/feed2email">feed2email
|
78
|
+
#{VERSION}</a> at #{Time.now}</p>
|
79
|
+
</body>
|
80
|
+
</html>
|
81
|
+
}.gsub(/^\s+/, '') % {
|
82
|
+
content: content,
|
83
|
+
published: published_line,
|
84
|
+
title: title.strip_html,
|
85
|
+
uri: uri.escape_html,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def body_text
|
90
|
+
body_html.to_markdown
|
9
91
|
end
|
10
92
|
|
11
|
-
def
|
12
|
-
|
93
|
+
def build_mail
|
94
|
+
Mail.new.tap do |m|
|
95
|
+
m.from = %{"#{feed_title}" <#{config['sender']}>}
|
96
|
+
m.to = config['recipient']
|
97
|
+
m.subject = title.strip_html
|
98
|
+
m.html_part = build_mail_part('text/html', body_html)
|
99
|
+
m.text_part = build_mail_part('text/plain', body_text)
|
100
|
+
|
101
|
+
m.delivery_method(*delivery_method_params)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def build_mail_part(content_type, body)
|
106
|
+
part = Mail::Part.new
|
107
|
+
part.content_type = "#{content_type}; charset=UTF-8"
|
108
|
+
part.body = body
|
109
|
+
part
|
13
110
|
end
|
14
111
|
|
15
112
|
def content
|
16
|
-
|
113
|
+
data.content || data.summary
|
17
114
|
end
|
18
115
|
|
19
|
-
def
|
20
|
-
|
116
|
+
def delivery_method_params
|
117
|
+
case config['send_method']
|
118
|
+
when 'file'
|
119
|
+
[:file, location: config['mail_path']]
|
120
|
+
when 'sendmail'
|
121
|
+
[:sendmail, location: config['sendmail_path']]
|
122
|
+
when 'smtp'
|
123
|
+
[:smtp_connection, connection: Feed2Email.smtp_connection]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def feed_title; feed_data.title end
|
128
|
+
|
129
|
+
def last_email_sent_at; Entry.last_email_sent_at end
|
130
|
+
|
131
|
+
def last_email_sent_at=(time)
|
132
|
+
Entry.last_email_sent_at = time
|
133
|
+
end
|
134
|
+
|
135
|
+
def old?
|
136
|
+
feed.entries_dataset.where(uri: uri).any?
|
137
|
+
end
|
138
|
+
|
139
|
+
def published; data.published end
|
140
|
+
|
141
|
+
def published_line
|
142
|
+
return nil unless author || published
|
143
|
+
text = 'Published'
|
144
|
+
text << " by #{author}" if author
|
145
|
+
text << " at #{published}" if published
|
146
|
+
text
|
21
147
|
end
|
22
148
|
|
23
149
|
def send_mail
|
24
|
-
|
150
|
+
apply_send_delay
|
151
|
+
|
152
|
+
logger.debug 'Sending new entry...'
|
153
|
+
|
154
|
+
if build_mail.deliver!
|
155
|
+
self.last_email_sent_at = Time.now
|
156
|
+
save # record as seen
|
157
|
+
return true
|
158
|
+
end
|
25
159
|
end
|
26
160
|
|
27
161
|
def title
|
28
|
-
|
162
|
+
data.title.strip
|
29
163
|
end
|
30
164
|
|
31
165
|
def uri
|
32
166
|
return @uri if @uri
|
33
167
|
|
34
|
-
@uri =
|
168
|
+
@uri = data.url
|
35
169
|
|
36
170
|
# Make relative entry URL absolute by prepending feed URL
|
37
171
|
if @uri && @uri.start_with?('/')
|
38
|
-
@uri =
|
172
|
+
@uri = URI.join(feed_uri[%r{https?://[^/]+}], @uri)
|
39
173
|
end
|
40
174
|
|
41
175
|
@uri
|
data/lib/feed2email/feed.rb
CHANGED
@@ -1,72 +1,102 @@
|
|
1
1
|
require 'feedzirra'
|
2
|
-
require '
|
2
|
+
require 'sequel'
|
3
3
|
require 'stringio'
|
4
4
|
require 'zlib'
|
5
|
+
require 'feed2email'
|
6
|
+
require 'feed2email/config'
|
5
7
|
require 'feed2email/configurable'
|
6
8
|
require 'feed2email/core_ext'
|
9
|
+
require 'feed2email/database'
|
7
10
|
require 'feed2email/entry'
|
8
|
-
require 'feed2email/feed_history'
|
9
11
|
require 'feed2email/loggable'
|
12
|
+
require 'feed2email/open-uri'
|
10
13
|
require 'feed2email/redirection_checker'
|
11
14
|
require 'feed2email/version'
|
12
15
|
|
13
16
|
module Feed2Email
|
14
|
-
|
17
|
+
Database.setup
|
18
|
+
|
19
|
+
class Feed < Sequel::Model(:feeds)
|
20
|
+
plugin :dirty
|
21
|
+
plugin :timestamps
|
22
|
+
|
23
|
+
one_to_many :entries
|
24
|
+
|
25
|
+
subset(:enabled, enabled: true)
|
26
|
+
|
27
|
+
def_dataset_method(:by_smallest_id) { order(:id) }
|
28
|
+
|
15
29
|
include Configurable
|
16
30
|
include Loggable
|
17
31
|
|
18
|
-
|
19
|
-
|
20
|
-
def initialize(meta)
|
21
|
-
@meta = meta
|
22
|
-
end
|
32
|
+
def old?; last_processed_at end
|
23
33
|
|
24
34
|
def process
|
25
35
|
logger.info "Processing feed #{uri} ..."
|
26
36
|
|
27
|
-
return unless
|
37
|
+
return false unless fetch_and_parse
|
38
|
+
|
39
|
+
if processable?
|
40
|
+
# Reset feed caching parameters unless all entries were processed. This
|
41
|
+
# makes sure the feed will be fetched on next processing.
|
42
|
+
unless process_entries
|
43
|
+
self.last_modified = initial_value(:last_modified)
|
44
|
+
self.etag = initial_value(:etag)
|
45
|
+
end
|
28
46
|
|
29
|
-
|
30
|
-
|
31
|
-
|
47
|
+
self.last_processed_at = Time.now
|
48
|
+
|
49
|
+
save(changed: true)
|
32
50
|
else
|
33
|
-
processed = true
|
34
51
|
logger.warn 'Feed does not have entries'
|
35
52
|
end
|
53
|
+
end
|
36
54
|
|
37
|
-
|
55
|
+
def to_s
|
56
|
+
parts = [id.to_s.rjust(3)] # align right 1-999
|
57
|
+
parts << "\e[31mDISABLED\e[0m" unless enabled
|
58
|
+
parts << uri
|
59
|
+
parts.join(' ')
|
38
60
|
end
|
39
61
|
|
40
|
-
|
62
|
+
def toggle
|
63
|
+
update(enabled: !enabled)
|
64
|
+
end
|
41
65
|
|
42
|
-
def
|
43
|
-
|
66
|
+
def uncache
|
67
|
+
!cached? || update(last_modified: nil, etag: nil)
|
68
|
+
end
|
44
69
|
|
45
|
-
|
70
|
+
private
|
46
71
|
|
47
|
-
|
48
|
-
|
72
|
+
def cached?
|
73
|
+
last_modified || etag
|
74
|
+
end
|
49
75
|
|
50
|
-
|
76
|
+
def decode_content(data, content_encoding)
|
77
|
+
case content_encoding
|
78
|
+
when 'gzip'
|
79
|
+
gz = Zlib::GzipReader.new(StringIO.new(data))
|
80
|
+
xml = gz.read
|
81
|
+
gz.close
|
82
|
+
when 'deflate'
|
83
|
+
xml = Zlib::Inflate.inflate(data)
|
84
|
+
else
|
85
|
+
xml = data
|
86
|
+
end
|
51
87
|
|
52
|
-
|
53
|
-
sleep(secs_to_sleep)
|
88
|
+
xml
|
54
89
|
end
|
55
90
|
|
56
|
-
def
|
91
|
+
def fetch
|
57
92
|
logger.debug 'Fetching feed...'
|
58
93
|
|
59
94
|
begin
|
60
|
-
|
61
|
-
|
62
|
-
open(uri, fetch_feed_options(cache_feed)) do |f|
|
63
|
-
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
64
|
-
meta[:last_modified] = f.meta['last-modified']
|
65
|
-
end
|
95
|
+
open(uri, fetch_options) do |f|
|
96
|
+
handle_redirection if uri != f.base_uri.to_s
|
66
97
|
|
67
|
-
|
68
|
-
|
69
|
-
end
|
98
|
+
self.last_modified = f.meta['last-modified']
|
99
|
+
self.etag = f.meta['etag']
|
70
100
|
|
71
101
|
return decode_content(f.read, f.meta['content-encoding'])
|
72
102
|
end
|
@@ -82,53 +112,48 @@ module Feed2Email
|
|
82
112
|
end
|
83
113
|
end
|
84
114
|
|
85
|
-
def
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
self.uri = checker.location
|
91
|
-
logger.warn 'Got permanently redirected!'
|
92
|
-
logger.warn "Updated feed location to #{checker.location}"
|
93
|
-
|
94
|
-
true
|
95
|
-
end
|
96
|
-
|
97
|
-
def decode_content(data, content_encoding)
|
98
|
-
case content_encoding
|
99
|
-
when 'gzip'
|
100
|
-
gz = Zlib::GzipReader.new(StringIO.new(data))
|
101
|
-
xml = gz.read
|
102
|
-
gz.close
|
103
|
-
when 'deflate'
|
104
|
-
xml = Zlib::Inflate.inflate(data)
|
105
|
-
else
|
106
|
-
xml = data
|
115
|
+
def fetch_and_parse
|
116
|
+
if xml_data = fetch
|
117
|
+
@parsed_feed = parse(xml_data)
|
118
|
+
@parsed_feed && @parsed_feed.respond_to?(:entries)
|
107
119
|
end
|
108
|
-
|
109
|
-
xml
|
110
120
|
end
|
111
121
|
|
112
|
-
def
|
122
|
+
def fetch_options
|
113
123
|
options = {
|
114
124
|
'User-Agent' => "feed2email/#{VERSION}",
|
115
125
|
'Accept-Encoding' => 'gzip, deflate',
|
116
126
|
}
|
117
127
|
|
118
|
-
|
119
|
-
if
|
120
|
-
options['If-Modified-Since'] =
|
128
|
+
unless permanently_redirected?
|
129
|
+
if last_modified
|
130
|
+
options['If-Modified-Since'] = last_modified
|
121
131
|
end
|
122
132
|
|
123
|
-
if
|
124
|
-
options['If-None-Match'] =
|
133
|
+
if etag
|
134
|
+
options['If-None-Match'] = etag
|
125
135
|
end
|
126
136
|
end
|
127
137
|
|
128
138
|
options
|
129
139
|
end
|
130
140
|
|
131
|
-
def
|
141
|
+
def handle_redirection
|
142
|
+
checker = RedirectionChecker.new(uri)
|
143
|
+
|
144
|
+
if checker.permanently_redirected?
|
145
|
+
logger.warn 'Got permanently redirected!'
|
146
|
+
self.uri = checker.location
|
147
|
+
logger.warn "Updated feed location to #{checker.location}"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def log_exception(error)
|
152
|
+
logger.error "#{error.class}: #{error.message.strip}"
|
153
|
+
error.backtrace.each {|line| logger.debug line }
|
154
|
+
end
|
155
|
+
|
156
|
+
def parse(xml_data)
|
132
157
|
logger.debug 'Parsing feed...'
|
133
158
|
|
134
159
|
begin
|
@@ -140,92 +165,48 @@ module Feed2Email
|
|
140
165
|
end
|
141
166
|
end
|
142
167
|
|
143
|
-
def
|
144
|
-
|
145
|
-
@data = parse_feed(xml_data)
|
146
|
-
end
|
147
|
-
|
148
|
-
@data && @data.respond_to?(:entries)
|
168
|
+
def parsed_entries
|
169
|
+
parsed_feed.entries
|
149
170
|
end
|
150
171
|
|
151
|
-
def
|
152
|
-
meta[:uri]
|
153
|
-
end
|
154
|
-
|
155
|
-
def uri=(uri)
|
156
|
-
history.uri = uri
|
157
|
-
meta[:uri] = uri
|
158
|
-
end
|
172
|
+
def parsed_feed; @parsed_feed end
|
159
173
|
|
160
|
-
def
|
161
|
-
|
162
|
-
Entry.new(entry_data, uri, title)
|
163
|
-
}
|
164
|
-
end
|
165
|
-
|
166
|
-
def max_entries
|
167
|
-
config['max_entries'].to_i
|
174
|
+
def permanently_redirected?
|
175
|
+
column_changed?(:uri)
|
168
176
|
end
|
169
177
|
|
170
178
|
def process_entries
|
171
|
-
|
172
|
-
|
173
|
-
end
|
174
|
-
|
175
|
-
def process_entry(entry)
|
176
|
-
logger.info "Processing entry #{entry.uri} ..."
|
177
|
-
|
178
|
-
unless history.any?
|
179
|
-
logger.debug 'Skipping new feed entry...'
|
180
|
-
history << entry.uri
|
181
|
-
return true
|
182
|
-
end
|
179
|
+
total = processable_entries.size
|
180
|
+
processed = true
|
183
181
|
|
184
|
-
|
185
|
-
logger.
|
186
|
-
|
182
|
+
processable_entries.each_with_index do |parsed_entry, i|
|
183
|
+
logger.info "Processing entry #{i + 1}/#{total} #{parsed_entry.url} ..."
|
184
|
+
processed = false unless process_entry(parsed_entry)
|
187
185
|
end
|
188
186
|
|
189
|
-
|
187
|
+
processed
|
188
|
+
end
|
190
189
|
|
191
|
-
|
190
|
+
def process_entry(parsed_entry)
|
191
|
+
entry = Entry.new(feed_id: id, uri: parsed_entry.url)
|
192
|
+
entry.data = parsed_entry
|
193
|
+
entry.feed_data = parsed_feed
|
194
|
+
entry.feed_uri = uri
|
192
195
|
|
193
196
|
begin
|
194
|
-
|
197
|
+
return entry.process
|
195
198
|
rescue => e
|
196
199
|
log_exception(e)
|
197
200
|
return false
|
198
201
|
end
|
199
|
-
|
200
|
-
if mail_sent
|
201
|
-
self.last_email_sent_at = Time.now
|
202
|
-
history << entry.uri
|
203
|
-
end
|
204
|
-
|
205
|
-
mail_sent
|
206
202
|
end
|
207
203
|
|
208
|
-
def
|
209
|
-
|
204
|
+
def processable?
|
205
|
+
processable_entries.size > 0
|
210
206
|
end
|
211
207
|
|
212
|
-
def
|
213
|
-
|
208
|
+
def processable_entries
|
209
|
+
parsed_entries.first(config['max_entries'])
|
214
210
|
end
|
215
|
-
|
216
|
-
def last_email_sent_at=(time)
|
217
|
-
@last_email_sent_at = time
|
218
|
-
end
|
219
|
-
|
220
|
-
def log_exception(error)
|
221
|
-
logger.error "#{error.class}: #{error.message.strip}"
|
222
|
-
error.backtrace.each {|line| logger.debug line }
|
223
|
-
end
|
224
|
-
|
225
|
-
def title
|
226
|
-
data.title # delegate
|
227
|
-
end
|
228
|
-
|
229
|
-
def data; @data end
|
230
211
|
end
|
231
212
|
end
|