pluto 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -5
- data/lib/pluto/fetcher.rb +96 -15
- data/lib/pluto/refresher.rb +34 -12
- data/lib/pluto/schema.rb +22 -2
- data/lib/pluto/subscriber.rb +5 -3
- data/lib/pluto/updater.rb +10 -0
- data/lib/pluto/version.rb +1 -1
- metadata +21 -21
data/README.md
CHANGED
@@ -76,8 +76,8 @@ COMMAND OPTIONS
|
|
76
76
|
-n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
|
77
77
|
|
78
78
|
EXAMPLE
|
79
|
-
pluto build ruby.
|
80
|
-
pluto build ruby.
|
79
|
+
pluto build ruby.ini
|
80
|
+
pluto build ruby.ini --template news
|
81
81
|
pluto b ruby
|
82
82
|
pluto b ruby -t news
|
83
83
|
pluto b # will use pluto.ini|pluto.yml|planet.ini|planet.yml if present
|
@@ -127,7 +127,7 @@ SYNOPSIS
|
|
127
127
|
pluto [global options] update FILE
|
128
128
|
|
129
129
|
EXAMPLE
|
130
|
-
pluto update ruby.
|
130
|
+
pluto update ruby.ini
|
131
131
|
pluto u ruby
|
132
132
|
~~~
|
133
133
|
|
@@ -148,8 +148,8 @@ COMMAND OPTIONS
|
|
148
148
|
-n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
|
149
149
|
|
150
150
|
EXAMPLE
|
151
|
-
pluto merge ruby.
|
152
|
-
pluto merge ruby.
|
151
|
+
pluto merge ruby.ini
|
152
|
+
pluto merge ruby.ini --template news
|
153
153
|
pluto m ruby
|
154
154
|
pluto m ruby -t news
|
155
155
|
~~~
|
data/lib/pluto/fetcher.rb
CHANGED
@@ -77,11 +77,6 @@ class Fetcher
|
|
77
77
|
feed_url = feed_rec.feed_url
|
78
78
|
feed_key = feed_rec.key
|
79
79
|
|
80
|
-
### todo/fix:
|
81
|
-
## add if available http_etag machinery for smarter updates
|
82
|
-
## and http_last_modified headers
|
83
|
-
## and brute force body_old == body_new etc.
|
84
|
-
|
85
80
|
### todo/fix: normalize/unifiy feed_url
|
86
81
|
## - same in fetcher - use shared utitlity method or similar
|
87
82
|
|
@@ -91,15 +86,6 @@ class Fetcher
|
|
91
86
|
'last-modified' => feed_rec.http_last_modified
|
92
87
|
}
|
93
88
|
|
94
|
-
### fix bug in fetcher - do NOT use request_uri use uri.to
|
95
|
-
## - add request_uri entry to (e.g. w/o host etc.)
|
96
|
-
## - remove code here once fixed in fetcher
|
97
|
-
@worker.cache[ URI.parse( feed_url ).request_uri ] = {
|
98
|
-
'etag' => feed_rec.http_etag,
|
99
|
-
'last-modified' => feed_rec.http_last_modified
|
100
|
-
}
|
101
|
-
|
102
|
-
|
103
89
|
response = @worker.get( feed_url )
|
104
90
|
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
105
91
|
|
@@ -125,6 +111,10 @@ class Fetcher
|
|
125
111
|
fetched: feed_fetched
|
126
112
|
}
|
127
113
|
feed_rec.update_attributes!( feed_attribs )
|
114
|
+
|
115
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
116
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
117
|
+
|
128
118
|
return nil # sorry; no feed for parsing available
|
129
119
|
end
|
130
120
|
|
@@ -191,7 +181,98 @@ class Fetcher
|
|
191
181
|
# or throw exception
|
192
182
|
feed = FeedUtils::Parser.parse( feed_xml )
|
193
183
|
end
|
194
|
-
|
184
|
+
|
185
|
+
|
186
|
+
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
187
|
+
site_url = site_rec.url
|
188
|
+
site_key = site_rec.key
|
189
|
+
|
190
|
+
### todo/fix: normalize/unifiy feed_url
|
191
|
+
## - same in fetcher - use shared utitlity method or similar
|
192
|
+
|
193
|
+
@worker.use_cache = true
|
194
|
+
@worker.cache[ site_url ] = {
|
195
|
+
'etag' => site_rec.http_etag,
|
196
|
+
'last-modified' => site_rec.http_last_modified
|
197
|
+
}
|
198
|
+
|
199
|
+
response = @worker.get( site_url )
|
200
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
201
|
+
|
202
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
203
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
204
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
205
|
+
return nil # no updates available; nothing to do
|
206
|
+
end
|
207
|
+
|
208
|
+
site_fetched = Time.now
|
209
|
+
|
210
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
211
|
+
|
212
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
213
|
+
|
214
|
+
site_attribs = {
|
215
|
+
http_code: response.code.to_i,
|
216
|
+
http_server: response.header[ 'server' ],
|
217
|
+
http_etag: nil,
|
218
|
+
http_last_modified: nil,
|
219
|
+
body: nil,
|
220
|
+
md5: nil,
|
221
|
+
fetched: feed_fetched
|
222
|
+
}
|
223
|
+
site_rec.update_attributes!( site_attribs )
|
224
|
+
|
225
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
226
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
227
|
+
|
228
|
+
return nil # sorry; no feed for parsing available
|
229
|
+
end
|
230
|
+
|
231
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
232
|
+
|
233
|
+
site_text = response.body
|
234
|
+
|
235
|
+
###
|
236
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
237
|
+
# will mostly be ASCII
|
238
|
+
# - try to change encoding to UTF-8 ourselves
|
239
|
+
logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
|
240
|
+
|
241
|
+
#####
|
242
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
243
|
+
|
244
|
+
## NB:
|
245
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
246
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
247
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
248
|
+
logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
|
249
|
+
|
250
|
+
site_attribs = {
|
251
|
+
http_code: response.code.to_i,
|
252
|
+
http_server: response.header[ 'server' ],
|
253
|
+
http_etag: response.header[ 'etag' ],
|
254
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
255
|
+
fetched: site_fetched
|
256
|
+
}
|
257
|
+
|
258
|
+
## if debug?
|
259
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
260
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
261
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
262
|
+
## end
|
263
|
+
|
264
|
+
site_rec.update_attributes!( site_attribs )
|
265
|
+
|
266
|
+
## logger.debug "site_text:"
|
267
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
268
|
+
|
269
|
+
|
270
|
+
puts "Before parsing site config >#{site_key}<..."
|
271
|
+
|
272
|
+
# assume ini format for now
|
273
|
+
site_config = INI.load( site_text )
|
274
|
+
end
|
275
|
+
|
195
276
|
end # class Fetcher
|
196
277
|
|
197
278
|
end # module Pluto
|
data/lib/pluto/refresher.rb
CHANGED
@@ -14,7 +14,7 @@ class Refresher
|
|
14
14
|
def debug?() @debug || false; end
|
15
15
|
|
16
16
|
|
17
|
-
def
|
17
|
+
def update_sites( opts={} ) # update all site configs
|
18
18
|
if debug?
|
19
19
|
## turn on logging for sql too
|
20
20
|
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
@@ -22,16 +22,28 @@ class Refresher
|
|
22
22
|
end
|
23
23
|
|
24
24
|
# -- log update activity
|
25
|
-
Activity.create!( text: 'update
|
26
|
-
|
27
|
-
feeds_fetched = Time.now
|
28
|
-
|
25
|
+
Activity.create!( text: 'update sites' )
|
26
|
+
|
29
27
|
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
30
|
-
|
28
|
+
|
31
29
|
Site.order(:id).each do |site|
|
32
|
-
site.
|
30
|
+
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def update_feeds( opts={} ) # update all feeds
|
36
|
+
if debug?
|
37
|
+
## turn on logging for sql too
|
38
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
39
|
+
@worker.debug = true # also pass along worker debug flag if set
|
33
40
|
end
|
34
41
|
|
42
|
+
# -- log update activity
|
43
|
+
Activity.create!( text: 'update feeds' )
|
44
|
+
|
45
|
+
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
46
|
+
|
35
47
|
Feed.order(:id).each do |feed|
|
36
48
|
update_feed_worker( feed )
|
37
49
|
end
|
@@ -48,12 +60,7 @@ class Refresher
|
|
48
60
|
# -- log update activity
|
49
61
|
Activity.create!( text: "update feeds >#{site_key}<" )
|
50
62
|
|
51
|
-
#####
|
52
|
-
# -- update fetched timestamps for all sites
|
53
|
-
feeds_fetched = Time.now
|
54
|
-
|
55
63
|
site = Site.find_by_key!( site_key )
|
56
|
-
site.update_attributes!( fetched: feeds_fetched )
|
57
64
|
|
58
65
|
site.feeds.each do |feed|
|
59
66
|
update_feed_worker( feed )
|
@@ -61,7 +68,22 @@ class Refresher
|
|
61
68
|
|
62
69
|
end # method update_feeds
|
63
70
|
|
71
|
+
|
64
72
|
private
|
73
|
+
def update_site_worker( site_rec )
|
74
|
+
site_config = @worker.site_by_rec_if_modified( site_rec )
|
75
|
+
|
76
|
+
# on error or if http-not modified etc. skip update/processing
|
77
|
+
return if site_config.nil?
|
78
|
+
|
79
|
+
subscriber = Subscriber.new
|
80
|
+
subscriber.debug = debug? ? true : false # pass along debug flag
|
81
|
+
|
82
|
+
site_key = site_rec.key
|
83
|
+
subscriber.update_subscriptions_for( site_key, site_config )
|
84
|
+
end
|
85
|
+
|
86
|
+
|
65
87
|
def update_feed_worker( feed_rec )
|
66
88
|
feed = @worker.feed_by_rec_if_modified( feed_rec )
|
67
89
|
|
data/lib/pluto/schema.rb
CHANGED
@@ -2,18 +2,38 @@
|
|
2
2
|
module Pluto
|
3
3
|
|
4
4
|
class CreateDb < ActiveRecord::Migration
|
5
|
-
|
5
|
+
|
6
6
|
def up
|
7
7
|
create_table :sites do |t|
|
8
8
|
t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
|
9
9
|
t.string :key, :null => false # e.g. ruby, js, etc.
|
10
|
-
t.datetime :fetched # last fetched/checked date -- make not null ??
|
11
10
|
|
12
11
|
############
|
13
12
|
# filters (site-wide)
|
14
13
|
t.string :includes # regex
|
15
14
|
t.string :excludes # regex
|
16
15
|
|
16
|
+
######################
|
17
|
+
# for auto-update of feed list/site config
|
18
|
+
|
19
|
+
t.string :url # source url for auto-update (optional)
|
20
|
+
|
21
|
+
## note: make sure to use same fields for update check as feed
|
22
|
+
|
23
|
+
t.datetime :fetched # last fetched/checked date -- make not null ??
|
24
|
+
t.integer :http_code # last http status code e.g. 200,404,etc.
|
25
|
+
t.string :http_etag # last http header etag
|
26
|
+
## note: save last-modified header as text (not datetime) - pass through as is
|
27
|
+
t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
|
28
|
+
t.string :http_server # last http server header if present
|
29
|
+
|
30
|
+
# note: do NOT store body content (that is, text) and md5 digest
|
31
|
+
# use git! and github! commit will be http_etag!!
|
32
|
+
|
33
|
+
t.datetime :fetched # last fetched/checked date
|
34
|
+
|
35
|
+
#############
|
36
|
+
# more fields
|
17
37
|
|
18
38
|
t.timestamps # created_at, updated_at
|
19
39
|
end
|
data/lib/pluto/subscriber.rb
CHANGED
@@ -12,16 +12,17 @@ class Subscriber
|
|
12
12
|
|
13
13
|
def update_subscriptions( config, opts={} )
|
14
14
|
# !!!! -- depreciated API - remove - do NOT use anymore
|
15
|
-
puts "warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key)"
|
15
|
+
puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
|
16
16
|
update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
|
17
17
|
end
|
18
18
|
|
19
19
|
|
20
20
|
def update_subscriptions_for( site_key, config, opts={} )
|
21
21
|
site_attribs = {
|
22
|
-
title:
|
22
|
+
title: config['title'] || config['name'], # support either title or name
|
23
|
+
url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
|
23
24
|
}
|
24
|
-
|
25
|
+
|
25
26
|
logger.debug "site_attribs: #{site_attribs.inspect}"
|
26
27
|
|
27
28
|
site_rec = Site.find_by_key( site_key )
|
@@ -49,6 +50,7 @@ class Subscriber
|
|
49
50
|
|
50
51
|
# skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
|
51
52
|
next if ['title','title2','name',
|
53
|
+
'source', 'url',
|
52
54
|
'include','includes','exclude','excludes',
|
53
55
|
'feeds',
|
54
56
|
'planet','defaults'].include?( key )
|
data/lib/pluto/updater.rb
CHANGED
@@ -4,6 +4,16 @@ class Updater
|
|
4
4
|
|
5
5
|
include LogUtils::Logging
|
6
6
|
|
7
|
+
### fix!!!!!: change config to text - yes/no - why? why not??
|
8
|
+
# or pass along struct
|
9
|
+
# - with hash and text and format(e.g. ini/yml) as fields???
|
10
|
+
#
|
11
|
+
# - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
|
12
|
+
## - pass along unparsed text!! - not hash struct
|
13
|
+
# - will get saved in db plus we need to generate md5 hash
|
14
|
+
# - add filename e.g. ruby.ini|ruby.conf|ruby.yml as opt ??
|
15
|
+
# or add config format as opt e.g. ini or yml?
|
16
|
+
|
7
17
|
def initialize( opts, config )
|
8
18
|
@opts = opts
|
9
19
|
@config = config
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-11-09 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pakman
|
16
|
-
requirement: &
|
16
|
+
requirement: &75633210 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75633210
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &75632900 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.4.1
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75632900
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: logutils
|
38
|
-
requirement: &
|
38
|
+
requirement: &75632580 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.6'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75632580
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: feedutils
|
49
|
-
requirement: &
|
49
|
+
requirement: &75632320 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.4.0
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *75632320
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: props
|
60
|
-
requirement: &
|
60
|
+
requirement: &75632050 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.3
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *75632050
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: textutils
|
71
|
-
requirement: &
|
71
|
+
requirement: &75631720 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0.7'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *75631720
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: activityutils
|
82
|
-
requirement: &
|
82
|
+
requirement: &75631420 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: 0.1.0
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *75631420
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: gli
|
93
|
-
requirement: &
|
93
|
+
requirement: &75631140 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: 2.5.6
|
99
99
|
type: :runtime
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *75631140
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: rdoc
|
104
|
-
requirement: &
|
104
|
+
requirement: &75653690 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ~>
|
@@ -109,10 +109,10 @@ dependencies:
|
|
109
109
|
version: '3.10'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *75653690
|
113
113
|
- !ruby/object:Gem::Dependency
|
114
114
|
name: hoe
|
115
|
-
requirement: &
|
115
|
+
requirement: &75653420 !ruby/object:Gem::Requirement
|
116
116
|
none: false
|
117
117
|
requirements:
|
118
118
|
- - ~>
|
@@ -120,7 +120,7 @@ dependencies:
|
|
120
120
|
version: '3.3'
|
121
121
|
type: :development
|
122
122
|
prerelease: false
|
123
|
-
version_requirements: *
|
123
|
+
version_requirements: *75653420
|
124
124
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
125
125
|
Web Feeds)
|
126
126
|
email: feedreader@googlegroups.com
|