pluto 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -76,8 +76,8 @@ COMMAND OPTIONS
76
76
  -n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
77
77
 
78
78
  EXAMPLE
79
- pluto build ruby.yml
80
- pluto build ruby.yml --template news
79
+ pluto build ruby.ini
80
+ pluto build ruby.ini --template news
81
81
  pluto b ruby
82
82
  pluto b ruby -t news
83
83
  pluto b # will use pluto.ini|pluto.yml|planet.ini|planet.yml if present
@@ -127,7 +127,7 @@ SYNOPSIS
127
127
  pluto [global options] update FILE
128
128
 
129
129
  EXAMPLE
130
- pluto update ruby.yml
130
+ pluto update ruby.ini
131
131
  pluto u ruby
132
132
  ~~~
133
133
 
@@ -148,8 +148,8 @@ COMMAND OPTIONS
148
148
  -n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
149
149
 
150
150
  EXAMPLE
151
- pluto merge ruby.yml
152
- pluto merge ruby.yml --template news
151
+ pluto merge ruby.ini
152
+ pluto merge ruby.ini --template news
153
153
  pluto m ruby
154
154
  pluto m ruby -t news
155
155
  ~~~
data/lib/pluto/fetcher.rb CHANGED
@@ -77,11 +77,6 @@ class Fetcher
77
77
  feed_url = feed_rec.feed_url
78
78
  feed_key = feed_rec.key
79
79
 
80
- ### todo/fix:
81
- ## add if available http_etag machinery for smarter updates
82
- ## and http_last_modified headers
83
- ## and brute force body_old == body_new etc.
84
-
85
80
  ### todo/fix: normalize/unifiy feed_url
86
81
  ## - same in fetcher - use shared utitlity method or similar
87
82
 
@@ -91,15 +86,6 @@ class Fetcher
91
86
  'last-modified' => feed_rec.http_last_modified
92
87
  }
93
88
 
94
- ### fix bug in fetcher - do NOT use request_uri use uri.to
95
- ## - add request_uri entry to (e.g. w/o host etc.)
96
- ## - remove code here once fixed in fetcher
97
- @worker.cache[ URI.parse( feed_url ).request_uri ] = {
98
- 'etag' => feed_rec.http_etag,
99
- 'last-modified' => feed_rec.http_last_modified
100
- }
101
-
102
-
103
89
  response = @worker.get( feed_url )
104
90
  @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
105
91
 
@@ -125,6 +111,10 @@ class Fetcher
125
111
  fetched: feed_fetched
126
112
  }
127
113
  feed_rec.update_attributes!( feed_attribs )
114
+
115
+ ## add log error activity -- in future add to error log - better - why? why not?
116
+ Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
117
+
128
118
  return nil # sorry; no feed for parsing available
129
119
  end
130
120
 
@@ -191,7 +181,98 @@ class Fetcher
191
181
  # or throw exception
192
182
  feed = FeedUtils::Parser.parse( feed_xml )
193
183
  end
194
-
184
+
185
+
186
+ def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
187
+ site_url = site_rec.url
188
+ site_key = site_rec.key
189
+
190
+ ### todo/fix: normalize/unifiy feed_url
191
+ ## - same in fetcher - use shared utitlity method or similar
192
+
193
+ @worker.use_cache = true
194
+ @worker.cache[ site_url ] = {
195
+ 'etag' => site_rec.http_etag,
196
+ 'last-modified' => site_rec.http_last_modified
197
+ }
198
+
199
+ response = @worker.get( site_url )
200
+ @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
201
+
202
+ if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
203
+ puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
204
+ puts "no change; request returns not modified (304); skipping parsing site config"
205
+ return nil # no updates available; nothing to do
206
+ end
207
+
208
+ site_fetched = Time.now
209
+
210
+ if response.code != '200' # note Net::HTTP response.code is a string in ruby
211
+
212
+ puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
213
+
214
+ site_attribs = {
215
+ http_code: response.code.to_i,
216
+ http_server: response.header[ 'server' ],
217
+ http_etag: nil,
218
+ http_last_modified: nil,
219
+ body: nil,
220
+ md5: nil,
221
+ fetched: feed_fetched
222
+ }
223
+ site_rec.update_attributes!( site_attribs )
224
+
225
+ ## add log error activity -- in future add to error log - better - why? why not?
226
+ Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
227
+
228
+ return nil # sorry; no feed for parsing available
229
+ end
230
+
231
+ puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
232
+
233
+ site_text = response.body
234
+
235
+ ###
236
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
237
+ # will mostly be ASCII
238
+ # - try to change encoding to UTF-8 ourselves
239
+ logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
240
+
241
+ #####
242
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
243
+
244
+ ## NB:
245
+ # for now "hardcoded" to utf8 - what else can we do?
246
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
247
+ site_text = site_text.force_encoding( Encoding::UTF_8 )
248
+ logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
249
+
250
+ site_attribs = {
251
+ http_code: response.code.to_i,
252
+ http_server: response.header[ 'server' ],
253
+ http_etag: response.header[ 'etag' ],
254
+ http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
255
+ fetched: site_fetched
256
+ }
257
+
258
+ ## if debug?
259
+ puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
260
+ puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
261
+ puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
262
+ ## end
263
+
264
+ site_rec.update_attributes!( site_attribs )
265
+
266
+ ## logger.debug "site_text:"
267
+ ## logger.debug site_text[ 0..300 ] # get first 300 chars
268
+
269
+
270
+ puts "Before parsing site config >#{site_key}<..."
271
+
272
+ # assume ini format for now
273
+ site_config = INI.load( site_text )
274
+ end
275
+
195
276
  end # class Fetcher
196
277
 
197
278
  end # module Pluto
@@ -14,7 +14,7 @@ class Refresher
14
14
  def debug?() @debug || false; end
15
15
 
16
16
 
17
- def update_feeds( opts={} ) # update all feeds
17
+ def update_sites( opts={} ) # update all site configs
18
18
  if debug?
19
19
  ## turn on logging for sql too
20
20
  ActiveRecord::Base.logger = Logger.new( STDOUT )
@@ -22,16 +22,28 @@ class Refresher
22
22
  end
23
23
 
24
24
  # -- log update activity
25
- Activity.create!( text: 'update feeds' )
26
-
27
- feeds_fetched = Time.now
28
-
25
+ Activity.create!( text: 'update sites' )
26
+
29
27
  #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
30
-
28
+
31
29
  Site.order(:id).each do |site|
32
- site.update_attributes!( fetched: feeds_fetched )
30
+ update_site_worker( site ) if site.url.present? # note: only update if (source) url present
31
+ end
32
+ end
33
+
34
+
35
+ def update_feeds( opts={} ) # update all feeds
36
+ if debug?
37
+ ## turn on logging for sql too
38
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
39
+ @worker.debug = true # also pass along worker debug flag if set
33
40
  end
34
41
 
42
+ # -- log update activity
43
+ Activity.create!( text: 'update feeds' )
44
+
45
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
46
+
35
47
  Feed.order(:id).each do |feed|
36
48
  update_feed_worker( feed )
37
49
  end
@@ -48,12 +60,7 @@ class Refresher
48
60
  # -- log update activity
49
61
  Activity.create!( text: "update feeds >#{site_key}<" )
50
62
 
51
- #####
52
- # -- update fetched timestamps for all sites
53
- feeds_fetched = Time.now
54
-
55
63
  site = Site.find_by_key!( site_key )
56
- site.update_attributes!( fetched: feeds_fetched )
57
64
 
58
65
  site.feeds.each do |feed|
59
66
  update_feed_worker( feed )
@@ -61,7 +68,22 @@ class Refresher
61
68
 
62
69
  end # method update_feeds
63
70
 
71
+
64
72
  private
73
+ def update_site_worker( site_rec )
74
+ site_config = @worker.site_by_rec_if_modified( site_rec )
75
+
76
+ # on error or if http-not modified etc. skip update/processing
77
+ return if site_config.nil?
78
+
79
+ subscriber = Subscriber.new
80
+ subscriber.debug = debug? ? true : false # pass along debug flag
81
+
82
+ site_key = site_rec.key
83
+ subscriber.update_subscriptions_for( site_key, site_config )
84
+ end
85
+
86
+
65
87
  def update_feed_worker( feed_rec )
66
88
  feed = @worker.feed_by_rec_if_modified( feed_rec )
67
89
 
data/lib/pluto/schema.rb CHANGED
@@ -2,18 +2,38 @@
2
2
  module Pluto
3
3
 
4
4
  class CreateDb < ActiveRecord::Migration
5
-
5
+
6
6
  def up
7
7
  create_table :sites do |t|
8
8
  t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
9
9
  t.string :key, :null => false # e.g. ruby, js, etc.
10
- t.datetime :fetched # last fetched/checked date -- make not null ??
11
10
 
12
11
  ############
13
12
  # filters (site-wide)
14
13
  t.string :includes # regex
15
14
  t.string :excludes # regex
16
15
 
16
+ ######################
17
+ # for auto-update of feed list/site config
18
+
19
+ t.string :url # source url for auto-update (optional)
20
+
21
+ ## note: make sure to use same fields for update check as feed
22
+
23
+ t.datetime :fetched # last fetched/checked date -- make not null ??
24
+ t.integer :http_code # last http status code e.g. 200,404,etc.
25
+ t.string :http_etag # last http header etag
26
+ ## note: save last-modified header as text (not datetime) - pass through as is
27
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
28
+ t.string :http_server # last http server header if present
29
+
30
+ # note: do NOT store body content (that is, text) and md5 digest
31
+ # use git! and github! commit will be http_etag!!
32
+
33
+ t.datetime :fetched # last fetched/checked date
34
+
35
+ #############
36
+ # more fields
17
37
 
18
38
  t.timestamps # created_at, updated_at
19
39
  end
@@ -12,16 +12,17 @@ class Subscriber
12
12
 
13
13
  def update_subscriptions( config, opts={} )
14
14
  # !!!! -- depreciated API - remove - do NOT use anymore
15
- puts "warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key)"
15
+ puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
16
16
  update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
17
17
  end
18
18
 
19
19
 
20
20
  def update_subscriptions_for( site_key, config, opts={} )
21
21
  site_attribs = {
22
- title: config['title'] || config['name'] # support either title or name
22
+ title: config['title'] || config['name'], # support either title or name
23
+ url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
23
24
  }
24
-
25
+
25
26
  logger.debug "site_attribs: #{site_attribs.inspect}"
26
27
 
27
28
  site_rec = Site.find_by_key( site_key )
@@ -49,6 +50,7 @@ class Subscriber
49
50
 
50
51
  # skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
51
52
  next if ['title','title2','name',
53
+ 'source', 'url',
52
54
  'include','includes','exclude','excludes',
53
55
  'feeds',
54
56
  'planet','defaults'].include?( key )
data/lib/pluto/updater.rb CHANGED
@@ -4,6 +4,16 @@ class Updater
4
4
 
5
5
  include LogUtils::Logging
6
6
 
7
+ ### fix!!!!!: change config to text - yes/no - why? why not??
8
+ # or pass along struct
9
+ # - with hash and text and format(e.g. ini/yml) as fields???
10
+ #
11
+ # - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
12
+ ## - pass along unparsed text!! - not hash struct
13
+ # - will get saved in db plus we need to generate md5 hash
14
+ # - add filename e.g. ruby.ini|ruby.conf|ruby.yml as opt ??
15
+ # or add config format as opt e.g. ini or yml?
16
+
7
17
  def initialize( opts, config )
8
18
  @opts = opts
9
19
  @config = config
data/lib/pluto/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Pluto
3
- VERSION = '0.9.1'
3
+ VERSION = '0.9.2'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pakman
16
- requirement: &81472690 !ruby/object:Gem::Requirement
16
+ requirement: &75633210 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *81472690
24
+ version_requirements: *75633210
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fetcher
27
- requirement: &81472130 !ruby/object:Gem::Requirement
27
+ requirement: &75632900 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.4.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *81472130
35
+ version_requirements: *75632900
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: logutils
38
- requirement: &81494060 !ruby/object:Gem::Requirement
38
+ requirement: &75632580 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.6'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *81494060
46
+ version_requirements: *75632580
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: feedutils
49
- requirement: &81493690 !ruby/object:Gem::Requirement
49
+ requirement: &75632320 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.4.0
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *81493690
57
+ version_requirements: *75632320
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: props
60
- requirement: &81493150 !ruby/object:Gem::Requirement
60
+ requirement: &75632050 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.3
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *81493150
68
+ version_requirements: *75632050
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: textutils
71
- requirement: &81492890 !ruby/object:Gem::Requirement
71
+ requirement: &75631720 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0.7'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *81492890
79
+ version_requirements: *75631720
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: activityutils
82
- requirement: &81492660 !ruby/object:Gem::Requirement
82
+ requirement: &75631420 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 0.1.0
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *81492660
90
+ version_requirements: *75631420
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: gli
93
- requirement: &81492380 !ruby/object:Gem::Requirement
93
+ requirement: &75631140 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: 2.5.6
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *81492380
101
+ version_requirements: *75631140
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: rdoc
104
- requirement: &81492080 !ruby/object:Gem::Requirement
104
+ requirement: &75653690 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ~>
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '3.10'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *81492080
112
+ version_requirements: *75653690
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: hoe
115
- requirement: &81491610 !ruby/object:Gem::Requirement
115
+ requirement: &75653420 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ~>
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: '3.3'
121
121
  type: :development
122
122
  prerelease: false
123
- version_requirements: *81491610
123
+ version_requirements: *75653420
124
124
  description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
125
125
  Web Feeds)
126
126
  email: feedreader@googlegroups.com