feed2email 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,75 @@
1
+ require 'sequel'
2
+ require 'feed2email'
3
+
4
+ module Feed2Email
5
+ def self.database
6
+ @database
7
+ end
8
+
9
+ def self.database=(database)
10
+ @database = database
11
+ end
12
+
13
+ def self.database_path
14
+ root.join('feed2email.db').to_s
15
+ end
16
+
17
+ class Database
18
+ def self.setup
19
+ return if Feed2Email.database
20
+
21
+ Feed2Email.database = new(
22
+ adapter: 'sqlite',
23
+ database: Feed2Email.database_path,
24
+ loggers: [Feed2Email.logger],
25
+ sql_log_level: :debug
26
+ )
27
+ Feed2Email.database.setup
28
+ end
29
+
30
+ def initialize(connect_options)
31
+ @connect_options = connect_options
32
+ end
33
+
34
+ def setup
35
+ unless connection
36
+ setup_connection
37
+ setup_schema
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def connect_options; @connect_options end
44
+
45
+ def connection; @connection end
46
+
47
+ def path; connect_options[:database] end
48
+
49
+ def setup_connection
50
+ @connection = Sequel::Model.db = Sequel.connect(connect_options)
51
+ end
52
+
53
+ def setup_schema
54
+ connection.create_table? :feeds do
55
+ primary_key :id
56
+ String :uri, null: false, unique: true
57
+ TrueClass :enabled, null: false, default: true
58
+ String :etag
59
+ String :last_modified
60
+ Time :last_processed_at
61
+ Time :created_at
62
+ Time :updated_at
63
+ end
64
+
65
+ connection.create_table? :entries do
66
+ primary_key :id
67
+ foreign_key :feed_id, :feeds, null: false, index: true,
68
+ on_delete: :cascade
69
+ String :uri, null: false, unique: true
70
+ Time :created_at
71
+ Time :updated_at
72
+ end
73
+ end
74
+ end
75
+ end
@@ -1,41 +1,175 @@
1
- require 'feed2email/mail'
1
+ require 'mail'
2
+ require 'sequel'
3
+ require 'uri'
4
+ require 'feed2email'
5
+ require 'feed2email/configurable'
6
+ require 'feed2email/core_ext'
7
+ require 'feed2email/database'
8
+ require 'feed2email/loggable'
9
+ require 'feed2email/version'
2
10
 
3
11
  module Feed2Email
4
- class Entry
5
- def initialize(data, feed_uri, feed_title)
6
- @data = data
7
- @feed_uri = feed_uri
8
- @feed_title = feed_title
12
+ Database.setup
13
+
14
+ if config['send_method'] == 'smtp'
15
+ require 'feed2email/smtp_connection'
16
+ SMTPConnection.setup
17
+ end
18
+
19
+ class Entry < Sequel::Model(:entries)
20
+ plugin :timestamps
21
+
22
+ many_to_one :feed
23
+
24
+ class << self
25
+ attr_accessor :last_email_sent_at
26
+ end
27
+
28
+ include Configurable
29
+ include Loggable
30
+
31
+ attr_accessor :data
32
+ attr_accessor :feed_data
33
+ attr_accessor :feed_uri
34
+
35
+ def process
36
+ unless feed.old?
37
+ logger.debug 'Skipping new feed entry...'
38
+ save # record as seen
39
+ return true
40
+ end
41
+
42
+ if old?
43
+ logger.debug 'Skipping old entry...'
44
+ return true
45
+ end
46
+
47
+ return send_mail
48
+ end
49
+
50
+ private
51
+
52
+ def apply_send_delay
53
+ return if config['send_delay'] == 0 || config['send_method'] == 'file'
54
+
55
+ return if last_email_sent_at.nil?
56
+
57
+ secs_since_last_email = Time.now - last_email_sent_at
58
+ secs_to_sleep = config['send_delay'] - secs_since_last_email
59
+
60
+ return if secs_to_sleep <= 0
61
+
62
+ logger.debug "Sleeping for #{secs_to_sleep} seconds..."
63
+ sleep(secs_to_sleep)
64
+ end
65
+
66
+ def author; data.author end
67
+
68
+ def body_html
69
+ %{
70
+ <html>
71
+ <body>
72
+ <h1><a href="%{uri}">%{title}</a></h1>
73
+ %{content}
74
+ <p>%{published}</p>
75
+ <p><a href="%{uri}">%{uri}</a></p>
76
+ <p>--<br>
77
+ Sent by <a href="https://github.com/agorf/feed2email">feed2email
78
+ #{VERSION}</a> at #{Time.now}</p>
79
+ </body>
80
+ </html>
81
+ }.gsub(/^\s+/, '') % {
82
+ content: content,
83
+ published: published_line,
84
+ title: title.strip_html,
85
+ uri: uri.escape_html,
86
+ }
87
+ end
88
+
89
+ def body_text
90
+ body_html.to_markdown
9
91
  end
10
92
 
11
- def author
12
- @data.author
93
+ def build_mail
94
+ Mail.new.tap do |m|
95
+ m.from = %{"#{feed_title}" <#{config['sender']}>}
96
+ m.to = config['recipient']
97
+ m.subject = title.strip_html
98
+ m.html_part = build_mail_part('text/html', body_html)
99
+ m.text_part = build_mail_part('text/plain', body_text)
100
+
101
+ m.delivery_method(*delivery_method_params)
102
+ end
103
+ end
104
+
105
+ def build_mail_part(content_type, body)
106
+ part = Mail::Part.new
107
+ part.content_type = "#{content_type}; charset=UTF-8"
108
+ part.body = body
109
+ part
13
110
  end
14
111
 
15
112
  def content
16
- @data.content || @data.summary
113
+ data.content || data.summary
17
114
  end
18
115
 
19
- def published
20
- @data.published
116
+ def delivery_method_params
117
+ case config['send_method']
118
+ when 'file'
119
+ [:file, location: config['mail_path']]
120
+ when 'sendmail'
121
+ [:sendmail, location: config['sendmail_path']]
122
+ when 'smtp'
123
+ [:smtp_connection, connection: Feed2Email.smtp_connection]
124
+ end
125
+ end
126
+
127
+ def feed_title; feed_data.title end
128
+
129
+ def last_email_sent_at; Entry.last_email_sent_at end
130
+
131
+ def last_email_sent_at=(time)
132
+ Entry.last_email_sent_at = time
133
+ end
134
+
135
+ def old?
136
+ feed.entries_dataset.where(uri: uri).any?
137
+ end
138
+
139
+ def published; data.published end
140
+
141
+ def published_line
142
+ return nil unless author || published
143
+ text = 'Published'
144
+ text << " by #{author}" if author
145
+ text << " at #{published}" if published
146
+ text
21
147
  end
22
148
 
23
149
  def send_mail
24
- Mail.new(self, @feed_title).send
150
+ apply_send_delay
151
+
152
+ logger.debug 'Sending new entry...'
153
+
154
+ if build_mail.deliver!
155
+ self.last_email_sent_at = Time.now
156
+ save # record as seen
157
+ return true
158
+ end
25
159
  end
26
160
 
27
161
  def title
28
- @data.title.strip
162
+ data.title.strip
29
163
  end
30
164
 
31
165
  def uri
32
166
  return @uri if @uri
33
167
 
34
- @uri = @data.url
168
+ @uri = data.url
35
169
 
36
170
  # Make relative entry URL absolute by prepending feed URL
37
171
  if @uri && @uri.start_with?('/')
38
- @uri = @feed_uri[%r{https?://[^/]+}] + @uri
172
+ @uri = URI.join(feed_uri[%r{https?://[^/]+}], @uri)
39
173
  end
40
174
 
41
175
  @uri
@@ -1,72 +1,102 @@
1
1
  require 'feedzirra'
2
- require 'open-uri'
2
+ require 'sequel'
3
3
  require 'stringio'
4
4
  require 'zlib'
5
+ require 'feed2email'
6
+ require 'feed2email/config'
5
7
  require 'feed2email/configurable'
6
8
  require 'feed2email/core_ext'
9
+ require 'feed2email/database'
7
10
  require 'feed2email/entry'
8
- require 'feed2email/feed_history'
9
11
  require 'feed2email/loggable'
12
+ require 'feed2email/open-uri'
10
13
  require 'feed2email/redirection_checker'
11
14
  require 'feed2email/version'
12
15
 
13
16
  module Feed2Email
14
- class Feed
17
+ Database.setup
18
+
19
+ class Feed < Sequel::Model(:feeds)
20
+ plugin :dirty
21
+ plugin :timestamps
22
+
23
+ one_to_many :entries
24
+
25
+ subset(:enabled, enabled: true)
26
+
27
+ def_dataset_method(:by_smallest_id) { order(:id) }
28
+
15
29
  include Configurable
16
30
  include Loggable
17
31
 
18
- attr_reader :meta
19
-
20
- def initialize(meta)
21
- @meta = meta
22
- end
32
+ def old?; last_processed_at end
23
33
 
24
34
  def process
25
35
  logger.info "Processing feed #{uri} ..."
26
36
 
27
- return unless fetch_and_parse_feed
37
+ return false unless fetch_and_parse
38
+
39
+ if processable?
40
+ # Reset feed caching parameters unless all entries were processed. This
41
+ # makes sure the feed will be fetched on next processing.
42
+ unless process_entries
43
+ self.last_modified = initial_value(:last_modified)
44
+ self.etag = initial_value(:etag)
45
+ end
28
46
 
29
- if entries.any?
30
- processed = process_entries
31
- history.sync
47
+ self.last_processed_at = Time.now
48
+
49
+ save(changed: true)
32
50
  else
33
- processed = true
34
51
  logger.warn 'Feed does not have entries'
35
52
  end
53
+ end
36
54
 
37
- processed
55
+ def to_s
56
+ parts = [id.to_s.rjust(3)] # align right 1-999
57
+ parts << "\e[31mDISABLED\e[0m" unless enabled
58
+ parts << uri
59
+ parts.join(' ')
38
60
  end
39
61
 
40
- private
62
+ def toggle
63
+ update(enabled: !enabled)
64
+ end
41
65
 
42
- def apply_send_delay
43
- return if config['send_delay'] == 0
66
+ def uncache
67
+ !cached? || update(last_modified: nil, etag: nil)
68
+ end
44
69
 
45
- return if last_email_sent_at.nil?
70
+ private
46
71
 
47
- secs_since_last_email = Time.now - last_email_sent_at
48
- secs_to_sleep = config['send_delay'] - secs_since_last_email
72
+ def cached?
73
+ last_modified || etag
74
+ end
49
75
 
50
- return if secs_to_sleep <= 0
76
+ def decode_content(data, content_encoding)
77
+ case content_encoding
78
+ when 'gzip'
79
+ gz = Zlib::GzipReader.new(StringIO.new(data))
80
+ xml = gz.read
81
+ gz.close
82
+ when 'deflate'
83
+ xml = Zlib::Inflate.inflate(data)
84
+ else
85
+ xml = data
86
+ end
51
87
 
52
- logger.debug "Sleeping for #{secs_to_sleep} seconds..."
53
- sleep(secs_to_sleep)
88
+ xml
54
89
  end
55
90
 
56
- def fetch_feed
91
+ def fetch
57
92
  logger.debug 'Fetching feed...'
58
93
 
59
94
  begin
60
- cache_feed = !permanently_redirected?
61
-
62
- open(uri, fetch_feed_options(cache_feed)) do |f|
63
- if f.meta['last-modified'] || meta.has_key?(:last_modified)
64
- meta[:last_modified] = f.meta['last-modified']
65
- end
95
+ open(uri, fetch_options) do |f|
96
+ handle_redirection if uri != f.base_uri.to_s
66
97
 
67
- if f.meta['etag'] || meta.has_key?(:etag)
68
- meta[:etag] = f.meta['etag']
69
- end
98
+ self.last_modified = f.meta['last-modified']
99
+ self.etag = f.meta['etag']
70
100
 
71
101
  return decode_content(f.read, f.meta['content-encoding'])
72
102
  end
@@ -82,53 +112,48 @@ module Feed2Email
82
112
  end
83
113
  end
84
114
 
85
- def permanently_redirected?
86
- checker = RedirectionChecker.new(uri)
87
-
88
- return false unless checker.permanently_redirected?
89
-
90
- self.uri = checker.location
91
- logger.warn 'Got permanently redirected!'
92
- logger.warn "Updated feed location to #{checker.location}"
93
-
94
- true
95
- end
96
-
97
- def decode_content(data, content_encoding)
98
- case content_encoding
99
- when 'gzip'
100
- gz = Zlib::GzipReader.new(StringIO.new(data))
101
- xml = gz.read
102
- gz.close
103
- when 'deflate'
104
- xml = Zlib::Inflate.inflate(data)
105
- else
106
- xml = data
115
+ def fetch_and_parse
116
+ if xml_data = fetch
117
+ @parsed_feed = parse(xml_data)
118
+ @parsed_feed && @parsed_feed.respond_to?(:entries)
107
119
  end
108
-
109
- xml
110
120
  end
111
121
 
112
- def fetch_feed_options(cache_feed)
122
+ def fetch_options
113
123
  options = {
114
124
  'User-Agent' => "feed2email/#{VERSION}",
115
125
  'Accept-Encoding' => 'gzip, deflate',
116
126
  }
117
127
 
118
- if cache_feed
119
- if meta[:last_modified]
120
- options['If-Modified-Since'] = meta[:last_modified]
128
+ unless permanently_redirected?
129
+ if last_modified
130
+ options['If-Modified-Since'] = last_modified
121
131
  end
122
132
 
123
- if meta[:etag]
124
- options['If-None-Match'] = meta[:etag]
133
+ if etag
134
+ options['If-None-Match'] = etag
125
135
  end
126
136
  end
127
137
 
128
138
  options
129
139
  end
130
140
 
131
- def parse_feed(xml_data)
141
+ def handle_redirection
142
+ checker = RedirectionChecker.new(uri)
143
+
144
+ if checker.permanently_redirected?
145
+ logger.warn 'Got permanently redirected!'
146
+ self.uri = checker.location
147
+ logger.warn "Updated feed location to #{checker.location}"
148
+ end
149
+ end
150
+
151
+ def log_exception(error)
152
+ logger.error "#{error.class}: #{error.message.strip}"
153
+ error.backtrace.each {|line| logger.debug line }
154
+ end
155
+
156
+ def parse(xml_data)
132
157
  logger.debug 'Parsing feed...'
133
158
 
134
159
  begin
@@ -140,92 +165,48 @@ module Feed2Email
140
165
  end
141
166
  end
142
167
 
143
- def fetch_and_parse_feed
144
- if xml_data = fetch_feed
145
- @data = parse_feed(xml_data)
146
- end
147
-
148
- @data && @data.respond_to?(:entries)
168
+ def parsed_entries
169
+ parsed_feed.entries
149
170
  end
150
171
 
151
- def uri
152
- meta[:uri]
153
- end
154
-
155
- def uri=(uri)
156
- history.uri = uri
157
- meta[:uri] = uri
158
- end
172
+ def parsed_feed; @parsed_feed end
159
173
 
160
- def entries
161
- @entries ||= data.entries.first(max_entries).map {|entry_data|
162
- Entry.new(entry_data, uri, title)
163
- }
164
- end
165
-
166
- def max_entries
167
- config['max_entries'].to_i
174
+ def permanently_redirected?
175
+ column_changed?(:uri)
168
176
  end
169
177
 
170
178
  def process_entries
171
- logger.info "Processing #{'entry'.pluralize(entries.size, 'entries')}..."
172
- entries.all? {|e| process_entry(e) } # false if any entry fails
173
- end
174
-
175
- def process_entry(entry)
176
- logger.info "Processing entry #{entry.uri} ..."
177
-
178
- unless history.any?
179
- logger.debug 'Skipping new feed entry...'
180
- history << entry.uri
181
- return true
182
- end
179
+ total = processable_entries.size
180
+ processed = true
183
181
 
184
- if history.include?(entry.uri)
185
- logger.debug 'Skipping old entry...'
186
- return true
182
+ processable_entries.each_with_index do |parsed_entry, i|
183
+ logger.info "Processing entry #{i + 1}/#{total} #{parsed_entry.url} ..."
184
+ processed = false unless process_entry(parsed_entry)
187
185
  end
188
186
 
189
- apply_send_delay
187
+ processed
188
+ end
190
189
 
191
- logger.debug 'Sending new entry...'
190
+ def process_entry(parsed_entry)
191
+ entry = Entry.new(feed_id: id, uri: parsed_entry.url)
192
+ entry.data = parsed_entry
193
+ entry.feed_data = parsed_feed
194
+ entry.feed_uri = uri
192
195
 
193
196
  begin
194
- mail_sent = entry.send_mail
197
+ return entry.process
195
198
  rescue => e
196
199
  log_exception(e)
197
200
  return false
198
201
  end
199
-
200
- if mail_sent
201
- self.last_email_sent_at = Time.now
202
- history << entry.uri
203
- end
204
-
205
- mail_sent
206
202
  end
207
203
 
208
- def history
209
- @history ||= FeedHistory.new(uri)
204
+ def processable?
205
+ processable_entries.size > 0
210
206
  end
211
207
 
212
- def last_email_sent_at
213
- @last_email_sent_at
208
+ def processable_entries
209
+ parsed_entries.first(config['max_entries'])
214
210
  end
215
-
216
- def last_email_sent_at=(time)
217
- @last_email_sent_at = time
218
- end
219
-
220
- def log_exception(error)
221
- logger.error "#{error.class}: #{error.message.strip}"
222
- error.backtrace.each {|line| logger.debug line }
223
- end
224
-
225
- def title
226
- data.title # delegate
227
- end
228
-
229
- def data; @data end
230
211
  end
231
212
  end