feed2email 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -10
- data/README.md +138 -86
- data/bin/f2e +6 -4
- data/bin/feed2email +6 -4
- data/bin/feed2email-migrate +23 -0
- data/lib/feed2email.rb +6 -9
- data/lib/feed2email/cli.rb +112 -75
- data/lib/feed2email/config.rb +72 -52
- data/lib/feed2email/core_ext.rb +10 -0
- data/lib/feed2email/database.rb +75 -0
- data/lib/feed2email/entry.rb +149 -15
- data/lib/feed2email/feed.rb +111 -130
- data/lib/feed2email/feed_autodiscoverer.rb +8 -10
- data/lib/feed2email/migrate/convert_feeds_migration.rb +29 -0
- data/lib/feed2email/migrate/feeds_import_migration.rb +42 -0
- data/lib/feed2email/migrate/history_import_migration.rb +42 -0
- data/lib/feed2email/migrate/migration.rb +42 -0
- data/lib/feed2email/migrate/split_history_migration.rb +32 -0
- data/lib/feed2email/open-uri.rb +7 -0
- data/lib/feed2email/opml_exporter.rb +109 -0
- data/lib/feed2email/opml_importer.rb +52 -0
- data/lib/feed2email/redirection_checker.rb +2 -2
- data/lib/feed2email/smtp_connection.rb +59 -0
- data/lib/feed2email/version.rb +1 -1
- metadata +55 -30
- data/bin/feed2email-migrate-feedlist +0 -36
- data/bin/feed2email-migrate-history +0 -29
- data/lib/feed2email/feed_history.rb +0 -82
- data/lib/feed2email/feed_list.rb +0 -147
- data/lib/feed2email/lazy_smtp_connection.rb +0 -35
- data/lib/feed2email/mail.rb +0 -84
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'feed2email'
|
3
|
+
|
4
|
+
module Feed2Email
|
5
|
+
def self.database
|
6
|
+
@database
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.database=(database)
|
10
|
+
@database = database
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.database_path
|
14
|
+
root.join('feed2email.db').to_s
|
15
|
+
end
|
16
|
+
|
17
|
+
class Database
|
18
|
+
def self.setup
|
19
|
+
return if Feed2Email.database
|
20
|
+
|
21
|
+
Feed2Email.database = new(
|
22
|
+
adapter: 'sqlite',
|
23
|
+
database: Feed2Email.database_path,
|
24
|
+
loggers: [Feed2Email.logger],
|
25
|
+
sql_log_level: :debug
|
26
|
+
)
|
27
|
+
Feed2Email.database.setup
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize(connect_options)
|
31
|
+
@connect_options = connect_options
|
32
|
+
end
|
33
|
+
|
34
|
+
def setup
|
35
|
+
unless connection
|
36
|
+
setup_connection
|
37
|
+
setup_schema
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def connect_options; @connect_options end
|
44
|
+
|
45
|
+
def connection; @connection end
|
46
|
+
|
47
|
+
def path; connect_options[:database] end
|
48
|
+
|
49
|
+
def setup_connection
|
50
|
+
@connection = Sequel::Model.db = Sequel.connect(connect_options)
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_schema
|
54
|
+
connection.create_table? :feeds do
|
55
|
+
primary_key :id
|
56
|
+
String :uri, null: false, unique: true
|
57
|
+
TrueClass :enabled, null: false, default: true
|
58
|
+
String :etag
|
59
|
+
String :last_modified
|
60
|
+
Time :last_processed_at
|
61
|
+
Time :created_at
|
62
|
+
Time :updated_at
|
63
|
+
end
|
64
|
+
|
65
|
+
connection.create_table? :entries do
|
66
|
+
primary_key :id
|
67
|
+
foreign_key :feed_id, :feeds, null: false, index: true,
|
68
|
+
on_delete: :cascade
|
69
|
+
String :uri, null: false, unique: true
|
70
|
+
Time :created_at
|
71
|
+
Time :updated_at
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/feed2email/entry.rb
CHANGED
@@ -1,41 +1,175 @@
|
|
1
|
-
require '
|
1
|
+
require 'mail'
|
2
|
+
require 'sequel'
|
3
|
+
require 'uri'
|
4
|
+
require 'feed2email'
|
5
|
+
require 'feed2email/configurable'
|
6
|
+
require 'feed2email/core_ext'
|
7
|
+
require 'feed2email/database'
|
8
|
+
require 'feed2email/loggable'
|
9
|
+
require 'feed2email/version'
|
2
10
|
|
3
11
|
module Feed2Email
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
12
|
+
Database.setup
|
13
|
+
|
14
|
+
if config['send_method'] == 'smtp'
|
15
|
+
require 'feed2email/smtp_connection'
|
16
|
+
SMTPConnection.setup
|
17
|
+
end
|
18
|
+
|
19
|
+
class Entry < Sequel::Model(:entries)
|
20
|
+
plugin :timestamps
|
21
|
+
|
22
|
+
many_to_one :feed
|
23
|
+
|
24
|
+
class << self
|
25
|
+
attr_accessor :last_email_sent_at
|
26
|
+
end
|
27
|
+
|
28
|
+
include Configurable
|
29
|
+
include Loggable
|
30
|
+
|
31
|
+
attr_accessor :data
|
32
|
+
attr_accessor :feed_data
|
33
|
+
attr_accessor :feed_uri
|
34
|
+
|
35
|
+
def process
|
36
|
+
unless feed.old?
|
37
|
+
logger.debug 'Skipping new feed entry...'
|
38
|
+
save # record as seen
|
39
|
+
return true
|
40
|
+
end
|
41
|
+
|
42
|
+
if old?
|
43
|
+
logger.debug 'Skipping old entry...'
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
|
47
|
+
return send_mail
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def apply_send_delay
|
53
|
+
return if config['send_delay'] == 0 || config['send_method'] == 'file'
|
54
|
+
|
55
|
+
return if last_email_sent_at.nil?
|
56
|
+
|
57
|
+
secs_since_last_email = Time.now - last_email_sent_at
|
58
|
+
secs_to_sleep = config['send_delay'] - secs_since_last_email
|
59
|
+
|
60
|
+
return if secs_to_sleep <= 0
|
61
|
+
|
62
|
+
logger.debug "Sleeping for #{secs_to_sleep} seconds..."
|
63
|
+
sleep(secs_to_sleep)
|
64
|
+
end
|
65
|
+
|
66
|
+
def author; data.author end
|
67
|
+
|
68
|
+
def body_html
|
69
|
+
%{
|
70
|
+
<html>
|
71
|
+
<body>
|
72
|
+
<h1><a href="%{uri}">%{title}</a></h1>
|
73
|
+
%{content}
|
74
|
+
<p>%{published}</p>
|
75
|
+
<p><a href="%{uri}">%{uri}</a></p>
|
76
|
+
<p>--<br>
|
77
|
+
Sent by <a href="https://github.com/agorf/feed2email">feed2email
|
78
|
+
#{VERSION}</a> at #{Time.now}</p>
|
79
|
+
</body>
|
80
|
+
</html>
|
81
|
+
}.gsub(/^\s+/, '') % {
|
82
|
+
content: content,
|
83
|
+
published: published_line,
|
84
|
+
title: title.strip_html,
|
85
|
+
uri: uri.escape_html,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def body_text
|
90
|
+
body_html.to_markdown
|
9
91
|
end
|
10
92
|
|
11
|
-
def
|
12
|
-
|
93
|
+
def build_mail
|
94
|
+
Mail.new.tap do |m|
|
95
|
+
m.from = %{"#{feed_title}" <#{config['sender']}>}
|
96
|
+
m.to = config['recipient']
|
97
|
+
m.subject = title.strip_html
|
98
|
+
m.html_part = build_mail_part('text/html', body_html)
|
99
|
+
m.text_part = build_mail_part('text/plain', body_text)
|
100
|
+
|
101
|
+
m.delivery_method(*delivery_method_params)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def build_mail_part(content_type, body)
|
106
|
+
part = Mail::Part.new
|
107
|
+
part.content_type = "#{content_type}; charset=UTF-8"
|
108
|
+
part.body = body
|
109
|
+
part
|
13
110
|
end
|
14
111
|
|
15
112
|
def content
|
16
|
-
|
113
|
+
data.content || data.summary
|
17
114
|
end
|
18
115
|
|
19
|
-
def
|
20
|
-
|
116
|
+
def delivery_method_params
|
117
|
+
case config['send_method']
|
118
|
+
when 'file'
|
119
|
+
[:file, location: config['mail_path']]
|
120
|
+
when 'sendmail'
|
121
|
+
[:sendmail, location: config['sendmail_path']]
|
122
|
+
when 'smtp'
|
123
|
+
[:smtp_connection, connection: Feed2Email.smtp_connection]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def feed_title; feed_data.title end
|
128
|
+
|
129
|
+
def last_email_sent_at; Entry.last_email_sent_at end
|
130
|
+
|
131
|
+
def last_email_sent_at=(time)
|
132
|
+
Entry.last_email_sent_at = time
|
133
|
+
end
|
134
|
+
|
135
|
+
def old?
|
136
|
+
feed.entries_dataset.where(uri: uri).any?
|
137
|
+
end
|
138
|
+
|
139
|
+
def published; data.published end
|
140
|
+
|
141
|
+
def published_line
|
142
|
+
return nil unless author || published
|
143
|
+
text = 'Published'
|
144
|
+
text << " by #{author}" if author
|
145
|
+
text << " at #{published}" if published
|
146
|
+
text
|
21
147
|
end
|
22
148
|
|
23
149
|
def send_mail
|
24
|
-
|
150
|
+
apply_send_delay
|
151
|
+
|
152
|
+
logger.debug 'Sending new entry...'
|
153
|
+
|
154
|
+
if build_mail.deliver!
|
155
|
+
self.last_email_sent_at = Time.now
|
156
|
+
save # record as seen
|
157
|
+
return true
|
158
|
+
end
|
25
159
|
end
|
26
160
|
|
27
161
|
def title
|
28
|
-
|
162
|
+
data.title.strip
|
29
163
|
end
|
30
164
|
|
31
165
|
def uri
|
32
166
|
return @uri if @uri
|
33
167
|
|
34
|
-
@uri =
|
168
|
+
@uri = data.url
|
35
169
|
|
36
170
|
# Make relative entry URL absolute by prepending feed URL
|
37
171
|
if @uri && @uri.start_with?('/')
|
38
|
-
@uri =
|
172
|
+
@uri = URI.join(feed_uri[%r{https?://[^/]+}], @uri)
|
39
173
|
end
|
40
174
|
|
41
175
|
@uri
|
data/lib/feed2email/feed.rb
CHANGED
@@ -1,72 +1,102 @@
|
|
1
1
|
require 'feedzirra'
|
2
|
-
require '
|
2
|
+
require 'sequel'
|
3
3
|
require 'stringio'
|
4
4
|
require 'zlib'
|
5
|
+
require 'feed2email'
|
6
|
+
require 'feed2email/config'
|
5
7
|
require 'feed2email/configurable'
|
6
8
|
require 'feed2email/core_ext'
|
9
|
+
require 'feed2email/database'
|
7
10
|
require 'feed2email/entry'
|
8
|
-
require 'feed2email/feed_history'
|
9
11
|
require 'feed2email/loggable'
|
12
|
+
require 'feed2email/open-uri'
|
10
13
|
require 'feed2email/redirection_checker'
|
11
14
|
require 'feed2email/version'
|
12
15
|
|
13
16
|
module Feed2Email
|
14
|
-
|
17
|
+
Database.setup
|
18
|
+
|
19
|
+
class Feed < Sequel::Model(:feeds)
|
20
|
+
plugin :dirty
|
21
|
+
plugin :timestamps
|
22
|
+
|
23
|
+
one_to_many :entries
|
24
|
+
|
25
|
+
subset(:enabled, enabled: true)
|
26
|
+
|
27
|
+
def_dataset_method(:by_smallest_id) { order(:id) }
|
28
|
+
|
15
29
|
include Configurable
|
16
30
|
include Loggable
|
17
31
|
|
18
|
-
|
19
|
-
|
20
|
-
def initialize(meta)
|
21
|
-
@meta = meta
|
22
|
-
end
|
32
|
+
def old?; last_processed_at end
|
23
33
|
|
24
34
|
def process
|
25
35
|
logger.info "Processing feed #{uri} ..."
|
26
36
|
|
27
|
-
return unless
|
37
|
+
return false unless fetch_and_parse
|
38
|
+
|
39
|
+
if processable?
|
40
|
+
# Reset feed caching parameters unless all entries were processed. This
|
41
|
+
# makes sure the feed will be fetched on next processing.
|
42
|
+
unless process_entries
|
43
|
+
self.last_modified = initial_value(:last_modified)
|
44
|
+
self.etag = initial_value(:etag)
|
45
|
+
end
|
28
46
|
|
29
|
-
|
30
|
-
|
31
|
-
|
47
|
+
self.last_processed_at = Time.now
|
48
|
+
|
49
|
+
save(changed: true)
|
32
50
|
else
|
33
|
-
processed = true
|
34
51
|
logger.warn 'Feed does not have entries'
|
35
52
|
end
|
53
|
+
end
|
36
54
|
|
37
|
-
|
55
|
+
def to_s
|
56
|
+
parts = [id.to_s.rjust(3)] # align right 1-999
|
57
|
+
parts << "\e[31mDISABLED\e[0m" unless enabled
|
58
|
+
parts << uri
|
59
|
+
parts.join(' ')
|
38
60
|
end
|
39
61
|
|
40
|
-
|
62
|
+
def toggle
|
63
|
+
update(enabled: !enabled)
|
64
|
+
end
|
41
65
|
|
42
|
-
def
|
43
|
-
|
66
|
+
def uncache
|
67
|
+
!cached? || update(last_modified: nil, etag: nil)
|
68
|
+
end
|
44
69
|
|
45
|
-
|
70
|
+
private
|
46
71
|
|
47
|
-
|
48
|
-
|
72
|
+
def cached?
|
73
|
+
last_modified || etag
|
74
|
+
end
|
49
75
|
|
50
|
-
|
76
|
+
def decode_content(data, content_encoding)
|
77
|
+
case content_encoding
|
78
|
+
when 'gzip'
|
79
|
+
gz = Zlib::GzipReader.new(StringIO.new(data))
|
80
|
+
xml = gz.read
|
81
|
+
gz.close
|
82
|
+
when 'deflate'
|
83
|
+
xml = Zlib::Inflate.inflate(data)
|
84
|
+
else
|
85
|
+
xml = data
|
86
|
+
end
|
51
87
|
|
52
|
-
|
53
|
-
sleep(secs_to_sleep)
|
88
|
+
xml
|
54
89
|
end
|
55
90
|
|
56
|
-
def
|
91
|
+
def fetch
|
57
92
|
logger.debug 'Fetching feed...'
|
58
93
|
|
59
94
|
begin
|
60
|
-
|
61
|
-
|
62
|
-
open(uri, fetch_feed_options(cache_feed)) do |f|
|
63
|
-
if f.meta['last-modified'] || meta.has_key?(:last_modified)
|
64
|
-
meta[:last_modified] = f.meta['last-modified']
|
65
|
-
end
|
95
|
+
open(uri, fetch_options) do |f|
|
96
|
+
handle_redirection if uri != f.base_uri.to_s
|
66
97
|
|
67
|
-
|
68
|
-
|
69
|
-
end
|
98
|
+
self.last_modified = f.meta['last-modified']
|
99
|
+
self.etag = f.meta['etag']
|
70
100
|
|
71
101
|
return decode_content(f.read, f.meta['content-encoding'])
|
72
102
|
end
|
@@ -82,53 +112,48 @@ module Feed2Email
|
|
82
112
|
end
|
83
113
|
end
|
84
114
|
|
85
|
-
def
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
self.uri = checker.location
|
91
|
-
logger.warn 'Got permanently redirected!'
|
92
|
-
logger.warn "Updated feed location to #{checker.location}"
|
93
|
-
|
94
|
-
true
|
95
|
-
end
|
96
|
-
|
97
|
-
def decode_content(data, content_encoding)
|
98
|
-
case content_encoding
|
99
|
-
when 'gzip'
|
100
|
-
gz = Zlib::GzipReader.new(StringIO.new(data))
|
101
|
-
xml = gz.read
|
102
|
-
gz.close
|
103
|
-
when 'deflate'
|
104
|
-
xml = Zlib::Inflate.inflate(data)
|
105
|
-
else
|
106
|
-
xml = data
|
115
|
+
def fetch_and_parse
|
116
|
+
if xml_data = fetch
|
117
|
+
@parsed_feed = parse(xml_data)
|
118
|
+
@parsed_feed && @parsed_feed.respond_to?(:entries)
|
107
119
|
end
|
108
|
-
|
109
|
-
xml
|
110
120
|
end
|
111
121
|
|
112
|
-
def
|
122
|
+
def fetch_options
|
113
123
|
options = {
|
114
124
|
'User-Agent' => "feed2email/#{VERSION}",
|
115
125
|
'Accept-Encoding' => 'gzip, deflate',
|
116
126
|
}
|
117
127
|
|
118
|
-
|
119
|
-
if
|
120
|
-
options['If-Modified-Since'] =
|
128
|
+
unless permanently_redirected?
|
129
|
+
if last_modified
|
130
|
+
options['If-Modified-Since'] = last_modified
|
121
131
|
end
|
122
132
|
|
123
|
-
if
|
124
|
-
options['If-None-Match'] =
|
133
|
+
if etag
|
134
|
+
options['If-None-Match'] = etag
|
125
135
|
end
|
126
136
|
end
|
127
137
|
|
128
138
|
options
|
129
139
|
end
|
130
140
|
|
131
|
-
def
|
141
|
+
def handle_redirection
|
142
|
+
checker = RedirectionChecker.new(uri)
|
143
|
+
|
144
|
+
if checker.permanently_redirected?
|
145
|
+
logger.warn 'Got permanently redirected!'
|
146
|
+
self.uri = checker.location
|
147
|
+
logger.warn "Updated feed location to #{checker.location}"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def log_exception(error)
|
152
|
+
logger.error "#{error.class}: #{error.message.strip}"
|
153
|
+
error.backtrace.each {|line| logger.debug line }
|
154
|
+
end
|
155
|
+
|
156
|
+
def parse(xml_data)
|
132
157
|
logger.debug 'Parsing feed...'
|
133
158
|
|
134
159
|
begin
|
@@ -140,92 +165,48 @@ module Feed2Email
|
|
140
165
|
end
|
141
166
|
end
|
142
167
|
|
143
|
-
def
|
144
|
-
|
145
|
-
@data = parse_feed(xml_data)
|
146
|
-
end
|
147
|
-
|
148
|
-
@data && @data.respond_to?(:entries)
|
168
|
+
def parsed_entries
|
169
|
+
parsed_feed.entries
|
149
170
|
end
|
150
171
|
|
151
|
-
def
|
152
|
-
meta[:uri]
|
153
|
-
end
|
154
|
-
|
155
|
-
def uri=(uri)
|
156
|
-
history.uri = uri
|
157
|
-
meta[:uri] = uri
|
158
|
-
end
|
172
|
+
def parsed_feed; @parsed_feed end
|
159
173
|
|
160
|
-
def
|
161
|
-
|
162
|
-
Entry.new(entry_data, uri, title)
|
163
|
-
}
|
164
|
-
end
|
165
|
-
|
166
|
-
def max_entries
|
167
|
-
config['max_entries'].to_i
|
174
|
+
def permanently_redirected?
|
175
|
+
column_changed?(:uri)
|
168
176
|
end
|
169
177
|
|
170
178
|
def process_entries
|
171
|
-
|
172
|
-
|
173
|
-
end
|
174
|
-
|
175
|
-
def process_entry(entry)
|
176
|
-
logger.info "Processing entry #{entry.uri} ..."
|
177
|
-
|
178
|
-
unless history.any?
|
179
|
-
logger.debug 'Skipping new feed entry...'
|
180
|
-
history << entry.uri
|
181
|
-
return true
|
182
|
-
end
|
179
|
+
total = processable_entries.size
|
180
|
+
processed = true
|
183
181
|
|
184
|
-
|
185
|
-
logger.
|
186
|
-
|
182
|
+
processable_entries.each_with_index do |parsed_entry, i|
|
183
|
+
logger.info "Processing entry #{i + 1}/#{total} #{parsed_entry.url} ..."
|
184
|
+
processed = false unless process_entry(parsed_entry)
|
187
185
|
end
|
188
186
|
|
189
|
-
|
187
|
+
processed
|
188
|
+
end
|
190
189
|
|
191
|
-
|
190
|
+
def process_entry(parsed_entry)
|
191
|
+
entry = Entry.new(feed_id: id, uri: parsed_entry.url)
|
192
|
+
entry.data = parsed_entry
|
193
|
+
entry.feed_data = parsed_feed
|
194
|
+
entry.feed_uri = uri
|
192
195
|
|
193
196
|
begin
|
194
|
-
|
197
|
+
return entry.process
|
195
198
|
rescue => e
|
196
199
|
log_exception(e)
|
197
200
|
return false
|
198
201
|
end
|
199
|
-
|
200
|
-
if mail_sent
|
201
|
-
self.last_email_sent_at = Time.now
|
202
|
-
history << entry.uri
|
203
|
-
end
|
204
|
-
|
205
|
-
mail_sent
|
206
202
|
end
|
207
203
|
|
208
|
-
def
|
209
|
-
|
204
|
+
def processable?
|
205
|
+
processable_entries.size > 0
|
210
206
|
end
|
211
207
|
|
212
|
-
def
|
213
|
-
|
208
|
+
def processable_entries
|
209
|
+
parsed_entries.first(config['max_entries'])
|
214
210
|
end
|
215
|
-
|
216
|
-
def last_email_sent_at=(time)
|
217
|
-
@last_email_sent_at = time
|
218
|
-
end
|
219
|
-
|
220
|
-
def log_exception(error)
|
221
|
-
logger.error "#{error.class}: #{error.message.strip}"
|
222
|
-
error.backtrace.each {|line| logger.debug line }
|
223
|
-
end
|
224
|
-
|
225
|
-
def title
|
226
|
-
data.title # delegate
|
227
|
-
end
|
228
|
-
|
229
|
-
def data; @data end
|
230
211
|
end
|
231
212
|
end
|