feed2email 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bf67c31096e64a3b8e790b968fbd7fecd5384c78
4
- data.tar.gz: 0a7567b5e3b8aa17aa9977d9a5485fa5d6fec138
3
+ metadata.gz: d2edf473e3d4740e4593c3c15cf7e2bb3c106815
4
+ data.tar.gz: 063706f3316c3f7610fa9b361adeca02dc2ecfe3
5
5
  SHA512:
6
- metadata.gz: e187b0d14d5687d410b9860b6ab5cbeed2fe5d38764d95da23f460a198f607547cc8b35e5faae8b65a74c2d0add7259ab37cfec13607ff9c3de0643d0264eda6
7
- data.tar.gz: 4d735364a8a9f84d6a66de1c3a7f622056517ec72dd04f648c2a9fad9966ea188f44dea1c5222f9b1c3ff7f64ae64f81f66970e5b4733272ec8b5e0c10f65907
6
+ metadata.gz: a2780f0f6836d9071e8021bbe206c3c369a0a516d6c38f5678448dea229d00a332e206b4d8ce734569d5b933ed518b80edcb5ea2c9136e127ba33bd1fde75b5a
7
+ data.tar.gz: bcc5b42264fccb8ff5e54479120c6c50d67f188ddd343b1d894bb7b810e923e9b3c1f6a4693875ee6cae387250d465f0965f4d2be5ed5cb4e5e1b7c6c662fe92
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ### 0.4.0
2
+
3
+ * Major rewrite to keep history of processed (seen) entries
4
+ * Handle feed fetching/parsing errors
5
+ * Limit the number of entries to process per feed
6
+ * Fix prepending of feed URI to path entry permalinks
7
+
1
8
  ### 0.3.0
2
9
 
3
10
  * Add logging
data/Gemfile.lock CHANGED
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  activesupport (3.1.12)
12
12
  multi_json (~> 1.0)
13
- builder (3.2.0)
13
+ builder (3.2.2)
14
14
  curb (0.7.18)
15
15
  feedzirra (0.1.3)
16
16
  activesupport (~> 3.1.1)
@@ -22,24 +22,25 @@ GEM
22
22
  rake (>= 0.8.7)
23
23
  rdoc (~> 3.8)
24
24
  sax-machine (~> 0.1.0)
25
- i18n (0.6.4)
26
- json (1.7.7)
25
+ i18n (0.6.5)
26
+ json (1.8.0)
27
27
  loofah (1.2.1)
28
28
  nokogiri (>= 1.4.4)
29
- mail (2.5.3)
30
- i18n (>= 0.4.0)
29
+ mail (2.5.4)
31
30
  mime-types (~> 1.16)
32
31
  treetop (~> 1.4.8)
33
- mime-types (1.22)
34
- multi_json (1.7.2)
35
- nokogiri (1.5.9)
32
+ mime-types (1.24)
33
+ mini_portile (0.5.1)
34
+ multi_json (1.7.9)
35
+ nokogiri (1.6.0)
36
+ mini_portile (~> 0.5.0)
36
37
  polyglot (0.3.3)
37
- rake (10.0.4)
38
+ rake (10.1.0)
38
39
  rdoc (3.12.2)
39
40
  json (~> 1.4)
40
41
  sax-machine (0.1.0)
41
42
  nokogiri (> 0.0.0)
42
- treetop (1.4.12)
43
+ treetop (1.4.15)
43
44
  polyglot
44
45
  polyglot (>= 0.3.1)
45
46
 
data/README.md CHANGED
@@ -45,6 +45,8 @@ pair is separated with a colon: `foo: bar`
45
45
  avoid SMTP server throttling errors (default is `10`; use `0` to disable)
46
46
  * `log_path` (optional) is the _absolute_ path to the log file (default is
47
47
  `true` which logs to standard output; use `false` to disable)
48
+ * `max_entries` (optional) is the maximum number of entries to process per feed
49
+ (default is `20`; use `0` for unlimited)
48
50
 
49
51
  ### SMTP
50
52
 
@@ -99,13 +101,12 @@ immediately. During dry run mode:
99
101
 
100
102
  * No feeds are fetched and, thus, no email is sent (existing feed entries are
101
103
  considered already seen)
102
- * `~/.feed2email/state.yml` is created containing the timestamp of when each
103
- feed was last fetched
104
+ * `~/.feed2email/history.yml` is created containing processed (seen) entries per
105
+ feed
104
106
 
105
107
  If you want to receive existing entries from a specific feed, you can manually
106
- alter the timestamp for that feed in `state.yml` to a value in the past. Next
107
- time you run feed2email, all entries published past that timestamp will be sent
108
- with email.
108
+ delete them from `history.yml`. Next time feed2email runs, they will be
109
+ processed (sent as email).
109
110
 
110
111
  You can use [cron][] to run feed2email automatically e.g. once every hour.
111
112
 
@@ -1,14 +1,9 @@
1
1
  module Feed2Email
2
2
  class Entry
3
- attr_reader :feed
4
-
5
- def self.process(data, feed)
6
- Entry.new(data, feed).process
7
- end
8
-
9
- def initialize(data, feed)
3
+ def initialize(data, feed_uri, feed_title)
10
4
  @data = data
11
- @feed = feed
5
+ @feed_uri = feed_uri
6
+ @feed_title = feed_title
12
7
  end
13
8
 
14
9
  def author
@@ -20,14 +15,7 @@ module Feed2Email
20
15
  end
21
16
 
22
17
  def process
23
- log :debug, "Processing entry #{uri} ..."
24
-
25
- if send?
26
- log :debug, 'Sending email...'
27
- to_mail.send
28
- else
29
- log :debug, 'Entry should not be sent; skipping...'
30
- end
18
+ Mail.new(self, @feed_title).send
31
19
  end
32
20
 
33
21
  def title
@@ -36,49 +24,12 @@ module Feed2Email
36
24
 
37
25
  def uri
38
26
  @uri ||= begin
39
- if @data.url[0] == '/'
40
- @feed.uri.chomp('/') + @data.url
27
+ if @data.url[0] == '/' # invalid entry URL is a path
28
+ @feed_uri[%r{https?://[^/]+}] + @data.url # prepend feed URI
41
29
  else
42
30
  @data.url
43
31
  end
44
32
  end
45
33
  end
46
-
47
- private
48
-
49
- def log(*args)
50
- Feed2Email::Logger.instance.log(*args)
51
- end
52
-
53
- def published_at
54
- @data.published
55
- end
56
-
57
- def send?
58
- if published_at
59
- log :debug, 'Entry has publication timestamp'
60
-
61
- if published_at.past? # respect entries published in the future
62
- log :debug, 'Entry published in the past'
63
-
64
- if published_at > @feed.fetch_time
65
- log :debug, 'Entry not seen before'
66
- return true
67
- else
68
- log :debug, 'Entry seen before'
69
- end
70
- else
71
- log :warn, "Entry #{uri} published in the future"
72
- end
73
- else
74
- log :warn, "Entry #{uri} does not have publication timestamp"
75
- end
76
-
77
- false
78
- end
79
-
80
- def to_mail
81
- Mail.new(self)
82
- end
83
34
  end
84
35
  end
@@ -1,20 +1,12 @@
1
1
  module Feed2Email
2
2
  class Feed
3
3
  FEEDS_FILE = File.join(CONFIG_DIR, 'feeds.yml')
4
- STATE_FILE = File.join(CONFIG_DIR, 'state.yml')
4
+ HISTORY_FILE = File.join(CONFIG_DIR, 'history.yml')
5
5
 
6
6
  def self.log(*args)
7
7
  Feed2Email::Logger.instance.log(*args)
8
8
  end
9
9
 
10
- def self.pluralize(n, singular, plural)
11
- "#{n} #{n == 1 ? singular : plural}"
12
- end
13
-
14
- def self.process(uri)
15
- Feed.new(uri).process
16
- end
17
-
18
10
  def self.process_all
19
11
  Feed2Email::Config.instance.read!
20
12
 
@@ -26,111 +18,121 @@ module Feed2Email
26
18
  exit 4
27
19
  end
28
20
 
29
- log :info, "Subscribed to #{pluralize(feed_uris.size, 'feed', 'feeds')}"
21
+ log :info, "Subscribed to #{n = feed_uris.size} feed#{n == 1 ? '' : 's'}"
30
22
 
31
- log :debug, 'Loading fetch times...'
32
- @@fetch_times = YAML.load(open(STATE_FILE)) rescue {}
23
+ log :debug, 'Loading history...'
24
+ @@history = YAML.load(open(HISTORY_FILE)) rescue {}
33
25
 
34
- feed_uris.each {|uri| Feed.process(uri) }
26
+ feed_uris.each do |uri|
27
+ log :info, "Found feed #{uri}"
28
+ Feed.new(uri).process
29
+ end
35
30
 
36
- log :debug, 'Writing fetch times...'
37
- open(STATE_FILE, 'w') {|f| f.write(@@fetch_times.to_yaml) }
31
+ log :debug, 'Writing history...'
32
+ open(HISTORY_FILE, 'w') {|f| f.write(@@history.to_yaml) }
38
33
  end
39
34
 
40
- attr_reader :uri
41
-
42
35
  def initialize(uri)
43
36
  @uri = uri
44
37
  end
45
38
 
46
- def fetch_time
47
- @@fetch_times[@uri]
48
- end
49
-
50
- def pluralize(*args)
51
- Feed2Email::Feed.pluralize(*args) # delegate
52
- end
53
-
54
39
  def process
55
- log :info, "Processing feed #{@uri} ..."
56
-
57
- if seen_before?
58
- log :debug, 'Feed seen before'
59
-
60
- if fetched?
61
- log :debug, 'Feed is fetched'
62
-
63
- if have_entries?
64
- log :info, "Processing #{pluralize(entries.size, 'entry', 'entries')}..."
40
+ if fetched?
41
+ log :debug, 'Feed is fetched'
65
42
 
66
- begin
67
- process_entries
68
- rescue => e
69
- log :error, "#{e.class}: #{e.message.strip}"
70
- end
71
- else
72
- log :warn, 'Feed does not have entries'
73
- end
43
+ if entries.any?
44
+ log :info,
45
+ "Processing #{n = entries.size} entr#{n == 1 ? 'y' : 'ies'}..."
46
+ process_entries
74
47
  else
75
- log :error, 'Feed could not be fetched'
48
+ log :warn, 'Feed does not have entries'
76
49
  end
77
50
  else
78
- log :info, 'Feed not seen before; skipping...'
51
+ log :error, 'Feed could not be fetched'
79
52
  end
80
-
81
- if e.nil? && (!seen_before? || fetched?)
82
- log :debug, 'Syncing fetch time...'
83
- sync_fetch_time
84
- end
85
- end
86
-
87
- def title
88
- data.title
89
53
  end
90
54
 
91
55
  private
92
56
 
57
+ def config
58
+ Feed2Email::Config.instance.config
59
+ end
60
+
93
61
  def data
94
62
  if @data.nil?
95
63
  log :debug, 'Fetching and parsing feed...'
96
- @data = Feedzirra::Feed.fetch_and_parse(@uri,
97
- :user_agent => "feed2email/#{VERSION}",
98
- :compress => true
99
- )
100
- @fetched_at = Time.now
64
+
65
+ begin
66
+ @data = Feedzirra::Feed.fetch_and_parse(@uri,
67
+ :user_agent => "feed2email/#{VERSION}",
68
+ :compress => true
69
+ )
70
+ rescue => e
71
+ log :error, "#{e.class}: #{e.message.strip}"
72
+ end
101
73
  end
102
74
 
103
75
  @data
104
76
  end
105
77
 
106
78
  def entries
107
- data.entries
79
+ @entries ||= data.entries[0..max_entries - 1].map {|entry_data|
80
+ Entry.new(entry_data, @uri, title)
81
+ }
108
82
  end
109
83
 
110
84
  def fetched?
111
85
  data.respond_to?(:entries)
112
86
  end
113
87
 
114
- def have_entries?
115
- entries.any?
116
- end
117
-
118
88
  def log(*args)
119
89
  Feed2Email::Feed.log(*args) # delegate
120
90
  end
121
91
 
92
+ def max_entries
93
+ (config['max_entries'] || 20).to_i
94
+ end
95
+
122
96
  def process_entries
123
- entries.each do |entry_data|
124
- Entry.process(entry_data, self)
97
+ entries.each do |entry|
98
+ log :info, "Found entry #{entry.uri}"
99
+
100
+ if seen_before?
101
+ if seen_entries.include?(entry.uri)
102
+ log :debug, 'Skipping seen entry...'
103
+ else
104
+ log :debug, 'Processing new entry...'
105
+
106
+ begin
107
+ entry.process
108
+ rescue => e
109
+ log :error, "#{e.class}: #{e.message.strip}"
110
+ end
111
+
112
+ seen_entries << entry.uri if e.nil? # record in history if no errors
113
+ e = nil
114
+ end
115
+ else
116
+ log :debug, 'Skipping new entry...'
117
+ seen_entries << entry.uri # record in history
118
+ end
125
119
  end
126
120
  end
127
121
 
128
122
  def seen_before?
129
- fetch_time.is_a?(Time)
123
+ if @seen_before.nil?
124
+ @seen_before = !@@history[@uri].nil?
125
+ end
126
+
127
+ @seen_before
128
+ end
129
+
130
+ def seen_entries
131
+ @@history[@uri] ||= []
130
132
  end
131
133
 
132
- def sync_fetch_time
133
- @@fetch_times[@uri] = @fetched_at || Time.now
134
+ def title
135
+ data.title
134
136
  end
135
137
  end
136
138
  end
@@ -24,7 +24,7 @@ module Feed2Email
24
24
  if log_path.nil? || log_path == true
25
25
  STDOUT
26
26
  else
27
- log_path
27
+ File.expand_path(log_path)
28
28
  end
29
29
  end
30
30
 
@@ -1,7 +1,8 @@
1
1
  module Feed2Email
2
2
  class Mail
3
- def initialize(entry)
3
+ def initialize(entry, feed_title)
4
4
  @entry = entry
5
+ @feed_title = feed_title
5
6
  end
6
7
 
7
8
  def send
@@ -42,7 +43,7 @@ module Feed2Email
42
43
 
43
44
  def from
44
45
  from_data = {
45
- :name => @entry.feed.title,
46
+ :name => @feed_title,
46
47
  :email => from_address,
47
48
  }
48
49
  '"%{name}" <%{email}>' % from_data
@@ -1,3 +1,3 @@
1
1
  module Feed2Email
2
- VERSION = '0.3.0'
2
+ VERSION = '0.4.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed2email
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aggelos Orfanakos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-25 00:00:00.000000000 Z
11
+ date: 2013-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: feedzirra