feed2email 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bf67c31096e64a3b8e790b968fbd7fecd5384c78
4
- data.tar.gz: 0a7567b5e3b8aa17aa9977d9a5485fa5d6fec138
3
+ metadata.gz: d2edf473e3d4740e4593c3c15cf7e2bb3c106815
4
+ data.tar.gz: 063706f3316c3f7610fa9b361adeca02dc2ecfe3
5
5
  SHA512:
6
- metadata.gz: e187b0d14d5687d410b9860b6ab5cbeed2fe5d38764d95da23f460a198f607547cc8b35e5faae8b65a74c2d0add7259ab37cfec13607ff9c3de0643d0264eda6
7
- data.tar.gz: 4d735364a8a9f84d6a66de1c3a7f622056517ec72dd04f648c2a9fad9966ea188f44dea1c5222f9b1c3ff7f64ae64f81f66970e5b4733272ec8b5e0c10f65907
6
+ metadata.gz: a2780f0f6836d9071e8021bbe206c3c369a0a516d6c38f5678448dea229d00a332e206b4d8ce734569d5b933ed518b80edcb5ea2c9136e127ba33bd1fde75b5a
7
+ data.tar.gz: bcc5b42264fccb8ff5e54479120c6c50d67f188ddd343b1d894bb7b810e923e9b3c1f6a4693875ee6cae387250d465f0965f4d2be5ed5cb4e5e1b7c6c662fe92
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 0.4.0
2
+
3
+ * Major rewrite to keep history of processed (seen) entries
4
+ * Handle feed fetching/parsing errors
5
+ * Limit the number of entries to process per feed
6
+ * Fix prepending of feed URI to path entry permalinks
7
+
1
8
  ### 0.3.0
2
9
 
3
10
  * Add logging
data/Gemfile.lock CHANGED
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  activesupport (3.1.12)
12
12
  multi_json (~> 1.0)
13
- builder (3.2.0)
13
+ builder (3.2.2)
14
14
  curb (0.7.18)
15
15
  feedzirra (0.1.3)
16
16
  activesupport (~> 3.1.1)
@@ -22,24 +22,25 @@ GEM
22
22
  rake (>= 0.8.7)
23
23
  rdoc (~> 3.8)
24
24
  sax-machine (~> 0.1.0)
25
- i18n (0.6.4)
26
- json (1.7.7)
25
+ i18n (0.6.5)
26
+ json (1.8.0)
27
27
  loofah (1.2.1)
28
28
  nokogiri (>= 1.4.4)
29
- mail (2.5.3)
30
- i18n (>= 0.4.0)
29
+ mail (2.5.4)
31
30
  mime-types (~> 1.16)
32
31
  treetop (~> 1.4.8)
33
- mime-types (1.22)
34
- multi_json (1.7.2)
35
- nokogiri (1.5.9)
32
+ mime-types (1.24)
33
+ mini_portile (0.5.1)
34
+ multi_json (1.7.9)
35
+ nokogiri (1.6.0)
36
+ mini_portile (~> 0.5.0)
36
37
  polyglot (0.3.3)
37
- rake (10.0.4)
38
+ rake (10.1.0)
38
39
  rdoc (3.12.2)
39
40
  json (~> 1.4)
40
41
  sax-machine (0.1.0)
41
42
  nokogiri (> 0.0.0)
42
- treetop (1.4.12)
43
+ treetop (1.4.15)
43
44
  polyglot
44
45
  polyglot (>= 0.3.1)
45
46
 
data/README.md CHANGED
@@ -45,6 +45,8 @@ pair is separated with a colon: `foo: bar`
45
45
  avoid SMTP server throttling errors (default is `10`; use `0` to disable)
46
46
  * `log_path` (optional) is the _absolute_ path to the log file (default is
47
47
  `true` which logs to standard output; use `false` to disable)
48
+ * `max_entries` (optional) is the maximum number of entries to process per feed
49
+ (default is `20`; use `0` for unlimited)
48
50
 
49
51
  ### SMTP
50
52
 
@@ -99,13 +101,12 @@ immediately. During dry run mode:
99
101
 
100
102
  * No feeds are fetched and, thus, no email is sent (existing feed entries are
101
103
  considered already seen)
102
- * `~/.feed2email/state.yml` is created containing the timestamp of when each
103
- feed was last fetched
104
+ * `~/.feed2email/history.yml` is created containing processed (seen) entries per
105
+ feed
104
106
 
105
107
  If you want to receive existing entries from a specific feed, you can manually
106
- alter the timestamp for that feed in `state.yml` to a value in the past. Next
107
- time you run feed2email, all entries published past that timestamp will be sent
108
- with email.
108
+ delete them from `history.yml`. Next time feed2email runs, they will be
109
+ processed (sent as email).
109
110
 
110
111
  You can use [cron][] to run feed2email automatically e.g. once every hour.
111
112
 
@@ -1,14 +1,9 @@
1
1
  module Feed2Email
2
2
  class Entry
3
- attr_reader :feed
4
-
5
- def self.process(data, feed)
6
- Entry.new(data, feed).process
7
- end
8
-
9
- def initialize(data, feed)
3
+ def initialize(data, feed_uri, feed_title)
10
4
  @data = data
11
- @feed = feed
5
+ @feed_uri = feed_uri
6
+ @feed_title = feed_title
12
7
  end
13
8
 
14
9
  def author
@@ -20,14 +15,7 @@ module Feed2Email
20
15
  end
21
16
 
22
17
  def process
23
- log :debug, "Processing entry #{uri} ..."
24
-
25
- if send?
26
- log :debug, 'Sending email...'
27
- to_mail.send
28
- else
29
- log :debug, 'Entry should not be sent; skipping...'
30
- end
18
+ Mail.new(self, @feed_title).send
31
19
  end
32
20
 
33
21
  def title
@@ -36,49 +24,12 @@ module Feed2Email
36
24
 
37
25
  def uri
38
26
  @uri ||= begin
39
- if @data.url[0] == '/'
40
- @feed.uri.chomp('/') + @data.url
27
+ if @data.url[0] == '/' # invalid entry URL is a path
28
+ @feed_uri[%r{https?://[^/]+}] + @data.url # prepend feed URI
41
29
  else
42
30
  @data.url
43
31
  end
44
32
  end
45
33
  end
46
-
47
- private
48
-
49
- def log(*args)
50
- Feed2Email::Logger.instance.log(*args)
51
- end
52
-
53
- def published_at
54
- @data.published
55
- end
56
-
57
- def send?
58
- if published_at
59
- log :debug, 'Entry has publication timestamp'
60
-
61
- if published_at.past? # respect entries published in the future
62
- log :debug, 'Entry published in the past'
63
-
64
- if published_at > @feed.fetch_time
65
- log :debug, 'Entry not seen before'
66
- return true
67
- else
68
- log :debug, 'Entry seen before'
69
- end
70
- else
71
- log :warn, "Entry #{uri} published in the future"
72
- end
73
- else
74
- log :warn, "Entry #{uri} does not have publication timestamp"
75
- end
76
-
77
- false
78
- end
79
-
80
- def to_mail
81
- Mail.new(self)
82
- end
83
34
  end
84
35
  end
@@ -1,20 +1,12 @@
1
1
  module Feed2Email
2
2
  class Feed
3
3
  FEEDS_FILE = File.join(CONFIG_DIR, 'feeds.yml')
4
- STATE_FILE = File.join(CONFIG_DIR, 'state.yml')
4
+ HISTORY_FILE = File.join(CONFIG_DIR, 'history.yml')
5
5
 
6
6
  def self.log(*args)
7
7
  Feed2Email::Logger.instance.log(*args)
8
8
  end
9
9
 
10
- def self.pluralize(n, singular, plural)
11
- "#{n} #{n == 1 ? singular : plural}"
12
- end
13
-
14
- def self.process(uri)
15
- Feed.new(uri).process
16
- end
17
-
18
10
  def self.process_all
19
11
  Feed2Email::Config.instance.read!
20
12
 
@@ -26,111 +18,121 @@ module Feed2Email
26
18
  exit 4
27
19
  end
28
20
 
29
- log :info, "Subscribed to #{pluralize(feed_uris.size, 'feed', 'feeds')}"
21
+ log :info, "Subscribed to #{n = feed_uris.size} feed#{n == 1 ? '' : 's'}"
30
22
 
31
- log :debug, 'Loading fetch times...'
32
- @@fetch_times = YAML.load(open(STATE_FILE)) rescue {}
23
+ log :debug, 'Loading history...'
24
+ @@history = YAML.load(open(HISTORY_FILE)) rescue {}
33
25
 
34
- feed_uris.each {|uri| Feed.process(uri) }
26
+ feed_uris.each do |uri|
27
+ log :info, "Found feed #{uri}"
28
+ Feed.new(uri).process
29
+ end
35
30
 
36
- log :debug, 'Writing fetch times...'
37
- open(STATE_FILE, 'w') {|f| f.write(@@fetch_times.to_yaml) }
31
+ log :debug, 'Writing history...'
32
+ open(HISTORY_FILE, 'w') {|f| f.write(@@history.to_yaml) }
38
33
  end
39
34
 
40
- attr_reader :uri
41
-
42
35
  def initialize(uri)
43
36
  @uri = uri
44
37
  end
45
38
 
46
- def fetch_time
47
- @@fetch_times[@uri]
48
- end
49
-
50
- def pluralize(*args)
51
- Feed2Email::Feed.pluralize(*args) # delegate
52
- end
53
-
54
39
  def process
55
- log :info, "Processing feed #{@uri} ..."
56
-
57
- if seen_before?
58
- log :debug, 'Feed seen before'
59
-
60
- if fetched?
61
- log :debug, 'Feed is fetched'
62
-
63
- if have_entries?
64
- log :info, "Processing #{pluralize(entries.size, 'entry', 'entries')}..."
40
+ if fetched?
41
+ log :debug, 'Feed is fetched'
65
42
 
66
- begin
67
- process_entries
68
- rescue => e
69
- log :error, "#{e.class}: #{e.message.strip}"
70
- end
71
- else
72
- log :warn, 'Feed does not have entries'
73
- end
43
+ if entries.any?
44
+ log :info,
45
+ "Processing #{n = entries.size} entr#{n == 1 ? 'y' : 'ies'}..."
46
+ process_entries
74
47
  else
75
- log :error, 'Feed could not be fetched'
48
+ log :warn, 'Feed does not have entries'
76
49
  end
77
50
  else
78
- log :info, 'Feed not seen before; skipping...'
51
+ log :error, 'Feed could not be fetched'
79
52
  end
80
-
81
- if e.nil? && (!seen_before? || fetched?)
82
- log :debug, 'Syncing fetch time...'
83
- sync_fetch_time
84
- end
85
- end
86
-
87
- def title
88
- data.title
89
53
  end
90
54
 
91
55
  private
92
56
 
57
+ def config
58
+ Feed2Email::Config.instance.config
59
+ end
60
+
93
61
  def data
94
62
  if @data.nil?
95
63
  log :debug, 'Fetching and parsing feed...'
96
- @data = Feedzirra::Feed.fetch_and_parse(@uri,
97
- :user_agent => "feed2email/#{VERSION}",
98
- :compress => true
99
- )
100
- @fetched_at = Time.now
64
+
65
+ begin
66
+ @data = Feedzirra::Feed.fetch_and_parse(@uri,
67
+ :user_agent => "feed2email/#{VERSION}",
68
+ :compress => true
69
+ )
70
+ rescue => e
71
+ log :error, "#{e.class}: #{e.message.strip}"
72
+ end
101
73
  end
102
74
 
103
75
  @data
104
76
  end
105
77
 
106
78
  def entries
107
- data.entries
79
+ @entries ||= data.entries[0..max_entries - 1].map {|entry_data|
80
+ Entry.new(entry_data, @uri, title)
81
+ }
108
82
  end
109
83
 
110
84
  def fetched?
111
85
  data.respond_to?(:entries)
112
86
  end
113
87
 
114
- def have_entries?
115
- entries.any?
116
- end
117
-
118
88
  def log(*args)
119
89
  Feed2Email::Feed.log(*args) # delegate
120
90
  end
121
91
 
92
+ def max_entries
93
+ (config['max_entries'] || 20).to_i
94
+ end
95
+
122
96
  def process_entries
123
- entries.each do |entry_data|
124
- Entry.process(entry_data, self)
97
+ entries.each do |entry|
98
+ log :info, "Found entry #{entry.uri}"
99
+
100
+ if seen_before?
101
+ if seen_entries.include?(entry.uri)
102
+ log :debug, 'Skipping seen entry...'
103
+ else
104
+ log :debug, 'Processing new entry...'
105
+
106
+ begin
107
+ entry.process
108
+ rescue => e
109
+ log :error, "#{e.class}: #{e.message.strip}"
110
+ end
111
+
112
+ seen_entries << entry.uri if e.nil? # record in history if no errors
113
+ e = nil
114
+ end
115
+ else
116
+ log :debug, 'Skipping new entry...'
117
+ seen_entries << entry.uri # record in history
118
+ end
125
119
  end
126
120
  end
127
121
 
128
122
  def seen_before?
129
- fetch_time.is_a?(Time)
123
+ if @seen_before.nil?
124
+ @seen_before = !@@history[@uri].nil?
125
+ end
126
+
127
+ @seen_before
128
+ end
129
+
130
+ def seen_entries
131
+ @@history[@uri] ||= []
130
132
  end
131
133
 
132
- def sync_fetch_time
133
- @@fetch_times[@uri] = @fetched_at || Time.now
134
+ def title
135
+ data.title
134
136
  end
135
137
  end
136
138
  end
@@ -24,7 +24,7 @@ module Feed2Email
24
24
  if log_path.nil? || log_path == true
25
25
  STDOUT
26
26
  else
27
- log_path
27
+ File.expand_path(log_path)
28
28
  end
29
29
  end
30
30
 
@@ -1,7 +1,8 @@
1
1
  module Feed2Email
2
2
  class Mail
3
- def initialize(entry)
3
+ def initialize(entry, feed_title)
4
4
  @entry = entry
5
+ @feed_title = feed_title
5
6
  end
6
7
 
7
8
  def send
@@ -42,7 +43,7 @@ module Feed2Email
42
43
 
43
44
  def from
44
45
  from_data = {
45
- :name => @entry.feed.title,
46
+ :name => @feed_title,
46
47
  :email => from_address,
47
48
  }
48
49
  '"%{name}" <%{email}>' % from_data
@@ -1,3 +1,3 @@
1
1
  module Feed2Email
2
- VERSION = '0.3.0'
2
+ VERSION = '0.4.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed2email
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aggelos Orfanakos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-25 00:00:00.000000000 Z
11
+ date: 2013-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: feedzirra