pluto 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -76,8 +76,8 @@ COMMAND OPTIONS
76
76
  -n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
77
77
 
78
78
  EXAMPLE
79
- pluto build ruby.yml
80
- pluto build ruby.yml --template news
79
+ pluto build ruby.ini
80
+ pluto build ruby.ini --template news
81
81
  pluto b ruby
82
82
  pluto b ruby -t news
83
83
  pluto b # will use pluto.ini|pluto.yml|planet.ini|planet.yml if present
@@ -127,7 +127,7 @@ SYNOPSIS
127
127
  pluto [global options] update FILE
128
128
 
129
129
  EXAMPLE
130
- pluto update ruby.yml
130
+ pluto update ruby.ini
131
131
  pluto u ruby
132
132
  ~~~
133
133
 
@@ -148,8 +148,8 @@ COMMAND OPTIONS
148
148
  -n, --dbname=NAME - Database name (default: <PLANET>.db e.g. ruby.db)
149
149
 
150
150
  EXAMPLE
151
- pluto merge ruby.yml
152
- pluto merge ruby.yml --template news
151
+ pluto merge ruby.ini
152
+ pluto merge ruby.ini --template news
153
153
  pluto m ruby
154
154
  pluto m ruby -t news
155
155
  ~~~
data/lib/pluto/fetcher.rb CHANGED
@@ -77,11 +77,6 @@ class Fetcher
77
77
  feed_url = feed_rec.feed_url
78
78
  feed_key = feed_rec.key
79
79
 
80
- ### todo/fix:
81
- ## add if available http_etag machinery for smarter updates
82
- ## and http_last_modified headers
83
- ## and brute force body_old == body_new etc.
84
-
85
80
  ### todo/fix: normalize/unifiy feed_url
86
81
  ## - same in fetcher - use shared utitlity method or similar
87
82
 
@@ -91,15 +86,6 @@ class Fetcher
91
86
  'last-modified' => feed_rec.http_last_modified
92
87
  }
93
88
 
94
- ### fix bug in fetcher - do NOT use request_uri use uri.to
95
- ## - add request_uri entry to (e.g. w/o host etc.)
96
- ## - remove code here once fixed in fetcher
97
- @worker.cache[ URI.parse( feed_url ).request_uri ] = {
98
- 'etag' => feed_rec.http_etag,
99
- 'last-modified' => feed_rec.http_last_modified
100
- }
101
-
102
-
103
89
  response = @worker.get( feed_url )
104
90
  @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
105
91
 
@@ -125,6 +111,10 @@ class Fetcher
125
111
  fetched: feed_fetched
126
112
  }
127
113
  feed_rec.update_attributes!( feed_attribs )
114
+
115
+ ## add log error activity -- in future add to error log - better - why? why not?
116
+ Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
117
+
128
118
  return nil # sorry; no feed for parsing available
129
119
  end
130
120
 
@@ -191,7 +181,98 @@ class Fetcher
191
181
  # or throw exception
192
182
  feed = FeedUtils::Parser.parse( feed_xml )
193
183
  end
194
-
184
+
185
+
186
+ def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
187
+ site_url = site_rec.url
188
+ site_key = site_rec.key
189
+
190
+ ### todo/fix: normalize/unifiy feed_url
191
+ ## - same in fetcher - use shared utitlity method or similar
192
+
193
+ @worker.use_cache = true
194
+ @worker.cache[ site_url ] = {
195
+ 'etag' => site_rec.http_etag,
196
+ 'last-modified' => site_rec.http_last_modified
197
+ }
198
+
199
+ response = @worker.get( site_url )
200
+ @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
201
+
202
+ if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
203
+ puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
204
+ puts "no change; request returns not modified (304); skipping parsing site config"
205
+ return nil # no updates available; nothing to do
206
+ end
207
+
208
+ site_fetched = Time.now
209
+
210
+ if response.code != '200' # note Net::HTTP response.code is a string in ruby
211
+
212
+ puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
213
+
214
+ site_attribs = {
215
+ http_code: response.code.to_i,
216
+ http_server: response.header[ 'server' ],
217
+ http_etag: nil,
218
+ http_last_modified: nil,
219
+ body: nil,
220
+ md5: nil,
221
+ fetched: feed_fetched
222
+ }
223
+ site_rec.update_attributes!( site_attribs )
224
+
225
+ ## add log error activity -- in future add to error log - better - why? why not?
226
+ Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
227
+
228
+ return nil # sorry; no feed for parsing available
229
+ end
230
+
231
+ puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
232
+
233
+ site_text = response.body
234
+
235
+ ###
236
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
237
+ # will mostly be ASCII
238
+ # - try to change encoding to UTF-8 ourselves
239
+ logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
240
+
241
+ #####
242
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
243
+
244
+ ## NB:
245
+ # for now "hardcoded" to utf8 - what else can we do?
246
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
247
+ site_text = site_text.force_encoding( Encoding::UTF_8 )
248
+ logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
249
+
250
+ site_attribs = {
251
+ http_code: response.code.to_i,
252
+ http_server: response.header[ 'server' ],
253
+ http_etag: response.header[ 'etag' ],
254
+ http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
255
+ fetched: site_fetched
256
+ }
257
+
258
+ ## if debug?
259
+ puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
260
+ puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
261
+ puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
262
+ ## end
263
+
264
+ site_rec.update_attributes!( site_attribs )
265
+
266
+ ## logger.debug "site_text:"
267
+ ## logger.debug site_text[ 0..300 ] # get first 300 chars
268
+
269
+
270
+ puts "Before parsing site config >#{site_key}<..."
271
+
272
+ # assume ini format for now
273
+ site_config = INI.load( site_text )
274
+ end
275
+
195
276
  end # class Fetcher
196
277
 
197
278
  end # module Pluto
@@ -14,7 +14,7 @@ class Refresher
14
14
  def debug?() @debug || false; end
15
15
 
16
16
 
17
- def update_feeds( opts={} ) # update all feeds
17
+ def update_sites( opts={} ) # update all site configs
18
18
  if debug?
19
19
  ## turn on logging for sql too
20
20
  ActiveRecord::Base.logger = Logger.new( STDOUT )
@@ -22,16 +22,28 @@ class Refresher
22
22
  end
23
23
 
24
24
  # -- log update activity
25
- Activity.create!( text: 'update feeds' )
26
-
27
- feeds_fetched = Time.now
28
-
25
+ Activity.create!( text: 'update sites' )
26
+
29
27
  #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
30
-
28
+
31
29
  Site.order(:id).each do |site|
32
- site.update_attributes!( fetched: feeds_fetched )
30
+ update_site_worker( site ) if site.url.present? # note: only update if (source) url present
31
+ end
32
+ end
33
+
34
+
35
+ def update_feeds( opts={} ) # update all feeds
36
+ if debug?
37
+ ## turn on logging for sql too
38
+ ActiveRecord::Base.logger = Logger.new( STDOUT )
39
+ @worker.debug = true # also pass along worker debug flag if set
33
40
  end
34
41
 
42
+ # -- log update activity
43
+ Activity.create!( text: 'update feeds' )
44
+
45
+ #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
46
+
35
47
  Feed.order(:id).each do |feed|
36
48
  update_feed_worker( feed )
37
49
  end
@@ -48,12 +60,7 @@ class Refresher
48
60
  # -- log update activity
49
61
  Activity.create!( text: "update feeds >#{site_key}<" )
50
62
 
51
- #####
52
- # -- update fetched timestamps for all sites
53
- feeds_fetched = Time.now
54
-
55
63
  site = Site.find_by_key!( site_key )
56
- site.update_attributes!( fetched: feeds_fetched )
57
64
 
58
65
  site.feeds.each do |feed|
59
66
  update_feed_worker( feed )
@@ -61,7 +68,22 @@ class Refresher
61
68
 
62
69
  end # method update_feeds
63
70
 
71
+
64
72
  private
73
+ def update_site_worker( site_rec )
74
+ site_config = @worker.site_by_rec_if_modified( site_rec )
75
+
76
+ # on error or if http-not modified etc. skip update/processing
77
+ return if site_config.nil?
78
+
79
+ subscriber = Subscriber.new
80
+ subscriber.debug = debug? ? true : false # pass along debug flag
81
+
82
+ site_key = site_rec.key
83
+ subscriber.update_subscriptions_for( site_key, site_config )
84
+ end
85
+
86
+
65
87
  def update_feed_worker( feed_rec )
66
88
  feed = @worker.feed_by_rec_if_modified( feed_rec )
67
89
 
data/lib/pluto/schema.rb CHANGED
@@ -2,18 +2,38 @@
2
2
  module Pluto
3
3
 
4
4
  class CreateDb < ActiveRecord::Migration
5
-
5
+
6
6
  def up
7
7
  create_table :sites do |t|
8
8
  t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
9
9
  t.string :key, :null => false # e.g. ruby, js, etc.
10
- t.datetime :fetched # last fetched/checked date -- make not null ??
11
10
 
12
11
  ############
13
12
  # filters (site-wide)
14
13
  t.string :includes # regex
15
14
  t.string :excludes # regex
16
15
 
16
+ ######################
17
+ # for auto-update of feed list/site config
18
+
19
+ t.string :url # source url for auto-update (optional)
20
+
21
+ ## note: make sure to use same fields for update check as feed
22
+
23
+ t.datetime :fetched # last fetched/checked date -- make not null ??
24
+ t.integer :http_code # last http status code e.g. 200,404,etc.
25
+ t.string :http_etag # last http header etag
26
+ ## note: save last-modified header as text (not datetime) - pass through as is
27
+ t.string :http_last_modified # last http header last-modified - note: save header as plain text!!! pass along in next request as-is
28
+ t.string :http_server # last http server header if present
29
+
30
+ # note: do NOT store body content (that is, text) and md5 digest
31
+ # use git! and github! commit will be http_etag!!
32
+
33
+ t.datetime :fetched # last fetched/checked date
34
+
35
+ #############
36
+ # more fields
17
37
 
18
38
  t.timestamps # created_at, updated_at
19
39
  end
@@ -12,16 +12,17 @@ class Subscriber
12
12
 
13
13
  def update_subscriptions( config, opts={} )
14
14
  # !!!! -- depreciated API - remove - do NOT use anymore
15
- puts "warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key)"
15
+ puts "*** warn - [Pluto::Subscriber] depreciated API -- use update_subscriptions_for( site_key )"
16
16
  update_subscriptions_for( 'planet', config, opts ) # default to planet site_key
17
17
  end
18
18
 
19
19
 
20
20
  def update_subscriptions_for( site_key, config, opts={} )
21
21
  site_attribs = {
22
- title: config['title'] || config['name'] # support either title or name
22
+ title: config['title'] || config['name'], # support either title or name
23
+ url: config['source'] || config['url'] # support source or url for source url for auto-update (optional)
23
24
  }
24
-
25
+
25
26
  logger.debug "site_attribs: #{site_attribs.inspect}"
26
27
 
27
28
  site_rec = Site.find_by_key( site_key )
@@ -49,6 +50,7 @@ class Subscriber
49
50
 
50
51
  # skip "top-level" feed keys e.g. title, etc. or planet planet sections (e.g. planet,defaults)
51
52
  next if ['title','title2','name',
53
+ 'source', 'url',
52
54
  'include','includes','exclude','excludes',
53
55
  'feeds',
54
56
  'planet','defaults'].include?( key )
data/lib/pluto/updater.rb CHANGED
@@ -4,6 +4,16 @@ class Updater
4
4
 
5
5
  include LogUtils::Logging
6
6
 
7
+ ### fix!!!!!: change config to text - yes/no - why? why not??
8
+ # or pass along struct
9
+ # - with hash and text and format(e.g. ini/yml) as fields???
10
+ #
11
+ # - why? - we need to get handle on md5 digest/hash plus on plain text, ideally to store in db
12
+ ## - pass along unparsed text!! - not hash struct
13
+ # - will get saved in db plus we need to generate md5 hash
14
+ # - add filename e.g. ruby.ini|ruby.conf|ruby.yml as opt ??
15
+ # or add config format as opt e.g. ini or yml?
16
+
7
17
  def initialize( opts, config )
8
18
  @opts = opts
9
19
  @config = config
data/lib/pluto/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Pluto
3
- VERSION = '0.9.1'
3
+ VERSION = '0.9.2'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1
4
+ version: 0.9.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pakman
16
- requirement: &81472690 !ruby/object:Gem::Requirement
16
+ requirement: &75633210 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *81472690
24
+ version_requirements: *75633210
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: fetcher
27
- requirement: &81472130 !ruby/object:Gem::Requirement
27
+ requirement: &75632900 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.4.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *81472130
35
+ version_requirements: *75632900
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: logutils
38
- requirement: &81494060 !ruby/object:Gem::Requirement
38
+ requirement: &75632580 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0.6'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *81494060
46
+ version_requirements: *75632580
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: feedutils
49
- requirement: &81493690 !ruby/object:Gem::Requirement
49
+ requirement: &75632320 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.4.0
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *81493690
57
+ version_requirements: *75632320
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: props
60
- requirement: &81493150 !ruby/object:Gem::Requirement
60
+ requirement: &75632050 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.3
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *81493150
68
+ version_requirements: *75632050
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: textutils
71
- requirement: &81492890 !ruby/object:Gem::Requirement
71
+ requirement: &75631720 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0.7'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *81492890
79
+ version_requirements: *75631720
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: activityutils
82
- requirement: &81492660 !ruby/object:Gem::Requirement
82
+ requirement: &75631420 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 0.1.0
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *81492660
90
+ version_requirements: *75631420
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: gli
93
- requirement: &81492380 !ruby/object:Gem::Requirement
93
+ requirement: &75631140 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: 2.5.6
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *81492380
101
+ version_requirements: *75631140
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: rdoc
104
- requirement: &81492080 !ruby/object:Gem::Requirement
104
+ requirement: &75653690 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ~>
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '3.10'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *81492080
112
+ version_requirements: *75653690
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: hoe
115
- requirement: &81491610 !ruby/object:Gem::Requirement
115
+ requirement: &75653420 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ~>
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: '3.3'
121
121
  type: :development
122
122
  prerelease: false
123
- version_requirements: *81491610
123
+ version_requirements: *75653420
124
124
  description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
125
125
  Web Feeds)
126
126
  email: feedreader@googlegroups.com