pluto 0.8.9 → 0.8.10
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/pluto.rb +1 -0
- data/lib/pluto/fetcher.rb +19 -3
- data/lib/pluto/refresher.rb +5 -2
- data/lib/pluto/schema.rb +2 -1
- data/lib/pluto/version.rb +1 -1
- metadata +20 -20
data/lib/pluto.rb
CHANGED
data/lib/pluto/fetcher.rb
CHANGED
@@ -98,10 +98,10 @@ class Fetcher
|
|
98
98
|
|
99
99
|
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
100
100
|
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
101
|
+
puts "no change; request returns not modified (304); skipping parsing feed"
|
101
102
|
return nil # no updates available; nothing to do
|
102
103
|
end
|
103
104
|
|
104
|
-
|
105
105
|
feed_fetched = Time.now
|
106
106
|
|
107
107
|
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
@@ -113,13 +113,13 @@ class Fetcher
|
|
113
113
|
http_etag: nil,
|
114
114
|
http_last_modified: nil,
|
115
115
|
body: nil,
|
116
|
+
md5: nil,
|
116
117
|
fetched: feed_fetched
|
117
118
|
}
|
118
119
|
feed_rec.update_attributes!( feed_attribs )
|
119
120
|
return nil # sorry; no feed for parsing available
|
120
121
|
end
|
121
|
-
|
122
|
-
|
122
|
+
|
123
123
|
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
124
124
|
|
125
125
|
feed_xml = response.body
|
@@ -137,12 +137,28 @@ class Fetcher
|
|
137
137
|
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
138
138
|
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
139
139
|
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
140
|
+
|
141
|
+
## check for md5 hash for response.body
|
142
|
+
|
143
|
+
last_feed_md5 = feed_rec.md5
|
144
|
+
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
145
|
+
|
146
|
+
if last_feed_md5 && last_feed_md5 == feed_md5
|
147
|
+
# not all servers handle conditional gets, so while not much can be
|
148
|
+
# done about the bandwidth, but if the response body is identical
|
149
|
+
# the downstream processing (parsing, caching, ...) can be avoided.
|
150
|
+
# - thanks to planet mars -fido.rb for the idea, cheers.
|
140
151
|
|
152
|
+
puts "no change; md5 digests match; skipping parsing feed"
|
153
|
+
return nil # no updates available; nothing to do
|
154
|
+
end
|
155
|
+
|
141
156
|
feed_attribs = {
|
142
157
|
http_code: response.code.to_i,
|
143
158
|
http_etag: response.header[ 'etag' ],
|
144
159
|
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
145
160
|
body: feed_xml,
|
161
|
+
md5: feed_md5,
|
146
162
|
fetched: feed_fetched
|
147
163
|
}
|
148
164
|
|
data/lib/pluto/refresher.rb
CHANGED
@@ -25,11 +25,14 @@ class Refresher
|
|
25
25
|
Action.create!( title: 'update feeds' )
|
26
26
|
|
27
27
|
feeds_fetched = Time.now
|
28
|
-
|
28
|
+
|
29
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
30
|
+
|
31
|
+
Site.order(:id).each do |site|
|
29
32
|
site.update_attributes!( fetched: feeds_fetched )
|
30
33
|
end
|
31
34
|
|
32
|
-
Feed.
|
35
|
+
Feed.order(:id).each do |feed|
|
33
36
|
update_feed_worker( feed )
|
34
37
|
end
|
35
38
|
end
|
data/lib/pluto/schema.rb
CHANGED
@@ -62,7 +62,8 @@ class CreateDb < ActiveRecord::Migration
|
|
62
62
|
## note: save last-modified header as text (not datetime) - pass through as is
|
63
63
|
t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
|
64
64
|
|
65
|
-
t.
|
65
|
+
t.string :md5 # md5 hash of body
|
66
|
+
t.text :body # last http response body (complete feed!)
|
66
67
|
|
67
68
|
t.datetime :fetched # last fetched/checked date
|
68
69
|
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-10-
|
12
|
+
date: 2013-10-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pakman
|
16
|
-
requirement: &
|
16
|
+
requirement: &74547830 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *74547830
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &74547550 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.4.1
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *74547550
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: logutils
|
38
|
-
requirement: &
|
38
|
+
requirement: &74547300 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.6'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *74547300
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: feedutils
|
49
|
-
requirement: &
|
49
|
+
requirement: &74547000 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.3.2
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *74547000
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: props
|
60
|
-
requirement: &
|
60
|
+
requirement: &74546740 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.2
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *74546740
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: textutils
|
71
|
-
requirement: &
|
71
|
+
requirement: &74546440 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 0.6.8
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *74546440
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: gli
|
82
|
-
requirement: &
|
82
|
+
requirement: &74546190 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: 2.5.6
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *74546190
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rdoc
|
93
|
-
requirement: &
|
93
|
+
requirement: &74545900 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ~>
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '3.10'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *74545900
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: hoe
|
104
|
-
requirement: &
|
104
|
+
requirement: &74545650 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ~>
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '3.3'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *74545650
|
113
113
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
114
114
|
Web Feeds)
|
115
115
|
email: feedreader@googlegroups.com
|