pluto 0.8.9 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/pluto.rb CHANGED
@@ -13,6 +13,7 @@ require 'optparse'
13
13
  require 'fileutils'
14
14
  require 'logger'
15
15
  require 'date'
16
+ require 'digest/md5'
16
17
 
17
18
 
18
19
  # 3rd party ruby gems/libs
data/lib/pluto/fetcher.rb CHANGED
@@ -98,10 +98,10 @@ class Fetcher
98
98
 
99
99
  if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
100
100
  puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
101
+ puts "no change; request returns not modified (304); skipping parsing feed"
101
102
  return nil # no updates available; nothing to do
102
103
  end
103
104
 
104
-
105
105
  feed_fetched = Time.now
106
106
 
107
107
  if response.code != '200' # note Net::HTTP response.code is a string in ruby
@@ -113,13 +113,13 @@ class Fetcher
113
113
  http_etag: nil,
114
114
  http_last_modified: nil,
115
115
  body: nil,
116
+ md5: nil,
116
117
  fetched: feed_fetched
117
118
  }
118
119
  feed_rec.update_attributes!( feed_attribs )
119
120
  return nil # sorry; no feed for parsing available
120
121
  end
121
-
122
-
122
+
123
123
  puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
124
124
 
125
125
  feed_xml = response.body
@@ -137,12 +137,28 @@ class Fetcher
137
137
  # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
138
138
  feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
139
139
  logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
140
+
141
+ ## check for md5 hash for response.body
142
+
143
+ last_feed_md5 = feed_rec.md5
144
+ feed_md5 = Digest::MD5.hexdigest( feed_xml )
145
+
146
+ if last_feed_md5 && last_feed_md5 == feed_md5
147
+ # not all servers handle conditional gets, so while not much can be
148
+ # done about the bandwidth, but if the response body is identical
149
+ # the downstream processing (parsing, caching, ...) can be avoided.
150
+ # - thanks to planet mars -fido.rb for the idea, cheers.
140
151
 
152
+ puts "no change; md5 digests match; skipping parsing feed"
153
+ return nil # no updates available; nothing to do
154
+ end
155
+
141
156
  feed_attribs = {
142
157
  http_code: response.code.to_i,
143
158
  http_etag: response.header[ 'etag' ],
144
159
  http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
145
160
  body: feed_xml,
161
+ md5: feed_md5,
146
162
  fetched: feed_fetched
147
163
  }
148
164
 
@@ -25,11 +25,14 @@ class Refresher
25
25
  Action.create!( title: 'update feeds' )
26
26
 
27
27
  feeds_fetched = Time.now
28
- Site.all.each do |site|
28
+
29
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
30
+
31
+ Site.order(:id).each do |site|
29
32
  site.update_attributes!( fetched: feeds_fetched )
30
33
  end
31
34
 
32
- Feed.all.each do |feed|
35
+ Feed.order(:id).each do |feed|
33
36
  update_feed_worker( feed )
34
37
  end
35
38
  end
data/lib/pluto/schema.rb CHANGED
@@ -62,7 +62,8 @@ class CreateDb < ActiveRecord::Migration
62
62
  ## note: save last-modified header as text (not datetime) - pass through as is
63
63
  t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
64
64
 
65
- t.text :body # last http response body (complete feed!)
65
+ t.string :md5 # md5 hash of body
66
+ t.text :body # last http response body (complete feed!)
66
67
 
67
68
  t.datetime :fetched # last fetched/checked date
68
69
 
data/lib/pluto/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Pluto
3
- VERSION = '0.8.9'
3
+ VERSION = '0.8.10'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.9
4
+ version: 0.8.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-05 00:00:00.000000000 Z
12
+ date: 2013-10-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pakman
16
- requirement: &77270490 !ruby/object:Gem::Requirement
16
+ requirement: &74547830 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *77270490
24
+ version_requirements: *74547830
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fetcher
27
- requirement: &77270080 !ruby/object:Gem::Requirement
27
+ requirement: &74547550 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.4.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *77270080
35
+ version_requirements: *74547550
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: logutils
38
- requirement: &77269570 !ruby/object:Gem::Requirement
38
+ requirement: &74547300 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.6'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *77269570
46
+ version_requirements: *74547300
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: feedutils
49
- requirement: &77268950 !ruby/object:Gem::Requirement
49
+ requirement: &74547000 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.3.2
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *77268950
57
+ version_requirements: *74547000
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: props
60
- requirement: &77268430 !ruby/object:Gem::Requirement
60
+ requirement: &74546740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.2
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *77268430
68
+ version_requirements: *74546740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: textutils
71
- requirement: &77268060 !ruby/object:Gem::Requirement
71
+ requirement: &74546440 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 0.6.8
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *77268060
79
+ version_requirements: *74546440
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: gli
82
- requirement: &77267780 !ruby/object:Gem::Requirement
82
+ requirement: &74546190 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 2.5.6
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *77267780
90
+ version_requirements: *74546190
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rdoc
93
- requirement: &77267490 !ruby/object:Gem::Requirement
93
+ requirement: &74545900 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ~>
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3.10'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *77267490
101
+ version_requirements: *74545900
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: hoe
104
- requirement: &77267190 !ruby/object:Gem::Requirement
104
+ requirement: &74545650 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ~>
@@ -109,7 +109,7 @@ dependencies:
109
109
  version: '3.3'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *77267190
112
+ version_requirements: *74545650
113
113
  description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
114
114
  Web Feeds)
115
115
  email: feedreader@googlegroups.com