pluto 0.8.9 → 0.8.10

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pluto.rb CHANGED
@@ -13,6 +13,7 @@ require 'optparse'
13
13
  require 'fileutils'
14
14
  require 'logger'
15
15
  require 'date'
16
+ require 'digest/md5'
16
17
 
17
18
 
18
19
  # 3rd party ruby gems/libs
data/lib/pluto/fetcher.rb CHANGED
@@ -98,10 +98,10 @@ class Fetcher
98
98
 
99
99
  if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
100
100
  puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
101
+ puts "no change; request returns not modified (304); skipping parsing feed"
101
102
  return nil # no updates available; nothing to do
102
103
  end
103
104
 
104
-
105
105
  feed_fetched = Time.now
106
106
 
107
107
  if response.code != '200' # note Net::HTTP response.code is a string in ruby
@@ -113,13 +113,13 @@ class Fetcher
113
113
  http_etag: nil,
114
114
  http_last_modified: nil,
115
115
  body: nil,
116
+ md5: nil,
116
117
  fetched: feed_fetched
117
118
  }
118
119
  feed_rec.update_attributes!( feed_attribs )
119
120
  return nil # sorry; no feed for parsing available
120
121
  end
121
-
122
-
122
+
123
123
  puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
124
124
 
125
125
  feed_xml = response.body
@@ -137,12 +137,28 @@ class Fetcher
137
137
  # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
138
138
  feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
139
139
  logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
140
+
141
+ ## check for md5 hash for response.body
142
+
143
+ last_feed_md5 = feed_rec.md5
144
+ feed_md5 = Digest::MD5.hexdigest( feed_xml )
145
+
146
+ if last_feed_md5 && last_feed_md5 == feed_md5
147
+ # not all servers handle conditional gets, so while not much can be
148
+ # done about the bandwidth, but if the response body is identical
149
+ # the downstream processing (parsing, caching, ...) can be avoided.
150
+ # - thanks to planet mars -fido.rb for the idea, cheers.
140
151
 
152
+ puts "no change; md5 digests match; skipping parsing feed"
153
+ return nil # no updates available; nothing to do
154
+ end
155
+
141
156
  feed_attribs = {
142
157
  http_code: response.code.to_i,
143
158
  http_etag: response.header[ 'etag' ],
144
159
  http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
145
160
  body: feed_xml,
161
+ md5: feed_md5,
146
162
  fetched: feed_fetched
147
163
  }
148
164
 
@@ -25,11 +25,14 @@ class Refresher
25
25
  Action.create!( title: 'update feeds' )
26
26
 
27
27
  feeds_fetched = Time.now
28
- Site.all.each do |site|
28
+
29
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
30
+
31
+ Site.order(:id).each do |site|
29
32
  site.update_attributes!( fetched: feeds_fetched )
30
33
  end
31
34
 
32
- Feed.all.each do |feed|
35
+ Feed.order(:id).each do |feed|
33
36
  update_feed_worker( feed )
34
37
  end
35
38
  end
data/lib/pluto/schema.rb CHANGED
@@ -62,7 +62,8 @@ class CreateDb < ActiveRecord::Migration
62
62
  ## note: save last-modified header as text (not datetime) - pass through as is
63
63
  t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
64
64
 
65
- t.text :body # last http response body (complete feed!)
65
+ t.string :md5 # md5 hash of body
66
+ t.text :body # last http response body (complete feed!)
66
67
 
67
68
  t.datetime :fetched # last fetched/checked date
68
69
 
data/lib/pluto/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Pluto
3
- VERSION = '0.8.9'
3
+ VERSION = '0.8.10'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.9
4
+ version: 0.8.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-05 00:00:00.000000000 Z
12
+ date: 2013-10-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pakman
16
- requirement: &77270490 !ruby/object:Gem::Requirement
16
+ requirement: &74547830 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *77270490
24
+ version_requirements: *74547830
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fetcher
27
- requirement: &77270080 !ruby/object:Gem::Requirement
27
+ requirement: &74547550 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.4.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *77270080
35
+ version_requirements: *74547550
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: logutils
38
- requirement: &77269570 !ruby/object:Gem::Requirement
38
+ requirement: &74547300 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.6'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *77269570
46
+ version_requirements: *74547300
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: feedutils
49
- requirement: &77268950 !ruby/object:Gem::Requirement
49
+ requirement: &74547000 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.3.2
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *77268950
57
+ version_requirements: *74547000
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: props
60
- requirement: &77268430 !ruby/object:Gem::Requirement
60
+ requirement: &74546740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.2
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *77268430
68
+ version_requirements: *74546740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: textutils
71
- requirement: &77268060 !ruby/object:Gem::Requirement
71
+ requirement: &74546440 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 0.6.8
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *77268060
79
+ version_requirements: *74546440
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: gli
82
- requirement: &77267780 !ruby/object:Gem::Requirement
82
+ requirement: &74546190 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 2.5.6
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *77267780
90
+ version_requirements: *74546190
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rdoc
93
- requirement: &77267490 !ruby/object:Gem::Requirement
93
+ requirement: &74545900 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ~>
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3.10'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *77267490
101
+ version_requirements: *74545900
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: hoe
104
- requirement: &77267190 !ruby/object:Gem::Requirement
104
+ requirement: &74545650 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ~>
@@ -109,7 +109,7 @@ dependencies:
109
109
  version: '3.3'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *77267190
112
+ version_requirements: *74545650
113
113
  description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
114
114
  Web Feeds)
115
115
  email: feedreader@googlegroups.com