pluto-update 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 537a23fa41963fdb77dfea36a67d3971673b0ec4
4
- data.tar.gz: ca620b5fb7c8e25e236095576919be7b0d48155a
3
+ metadata.gz: d631c3eb45ac43efb9e2d0186270a5ff6adc0e43
4
+ data.tar.gz: bb4774dcb0d97bc05b3f19f05956f608d94bf18d
5
5
  SHA512:
6
- metadata.gz: e6d7563a0cc2e7b6a3e2081ef2b1c1c6e1e30c7ba115220efa4cbd7f7c1356b85ccb0e42e665a78d6663a92dc9d2aacc8237c456eafa449cf77a7bb93e324a26
7
- data.tar.gz: b3fba603c49a729c325f0935457739a9d73c04d36d58482921999e63888ba8cf4df4f00e54865314c56909fabedfc19affe767a645e747767c537862bf469124
6
+ metadata.gz: 858c5e290c621b417ab5773e3c307daed4a0c0174d00ef88729ac9a235deafc5e0c42027eaaf86220017415b9538be04c6661a7f989fedd5c87c5143c97e2fe9
7
+ data.tar.gz: 228ffdf0626f7455c73bc108bdc4639c5f2ff42d468238d2d6dcd308a8e0df3aead6f0f2cecf0d374a36b378b3babba0353ad10f26d5e8ca81ef4d102117302b
data/README.md CHANGED
@@ -34,12 +34,9 @@ title = Planet Ruby
34
34
  ```
35
35
 
36
36
  For more samples, see [`nytimes.ini`](https://github.com/feedreader/planets/blob/master/nytimes.ini),
37
- [`js.ini`](https://github.com/feedreader/planets/blob/master/js.ini),
38
- [`dart.ini`](https://github.com/feedreader/planets/blob/master/dart.ini),
39
- [`haskell.ini`](https://github.com/feedreader/planets/blob/master/haskell.ini),
40
- [`viennarb.ini`](https://github.com/feedreader/planets/blob/master/viennarb.ini),
41
- [`beer.ini`](https://github.com/feedreader/planets/blob/master/beer.ini),
42
- [`football.ini`](https://github.com/feedreader/planets/blob/master/football.ini).
37
+ [`js.ini`](https://github.com/feedreader/planet-web/blob/master/js.ini),
38
+ [`dart.ini`](https://github.com/feedreader/planet-web/blob/master/dart.ini),
39
+ [`haskell.ini`](https://github.com/feedreader/planets/blob/master/haskell.ini).
43
40
 
44
41
 
45
42
 
data/Rakefile CHANGED
@@ -18,8 +18,9 @@ Hoe.spec 'pluto-update' do
18
18
  self.history_file = 'HISTORY.md'
19
19
 
20
20
  self.extra_deps = [
21
- ['pluto-models', '>= 1.2.2'],
21
+ ['pluto-models', '>= 1.3.2'],
22
22
  ['fetcher', '>= 0.4.4'],
23
+ ['preproc', '>= 0.1.0'],
23
24
  ]
24
25
 
25
26
  self.licenses = ['Public Domain']
@@ -5,7 +5,8 @@ require 'pluto/models'
5
5
 
6
6
 
7
7
  # more 3rd party gems
8
- require 'fetcher'
8
+ require 'fetcher' # fetch (text) documents
9
+ require 'preproc' # include preprocessor
9
10
 
10
11
 
11
12
  # our own code
@@ -262,15 +262,30 @@ class Fetcher
262
262
  response = @worker.get( site_url )
263
263
  @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
264
264
 
265
+ site_fetched = Time.now
266
+
267
+ ###
268
+ # Note: Net::HTTP will NOT set encoding UTF-8 etc.
269
+ # will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
270
+ # thus, set/force encoding to utf-8
271
+ site_text = response.body.to_s
272
+ site_text = site_text.force_encoding( Encoding::UTF_8 )
273
+
265
274
  if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
266
- puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
267
- puts "no change; request returns not modified (304); skipping parsing site config"
268
- return nil # no updates available; nothing to do
269
- end
270
275
 
271
- site_fetched = Time.now
276
+ if site_text.index('@include')
277
+ ## note: if the site_text includes @include
278
+ ## we must revalidate complete file hierachy(tree) for now
279
+ ### continue;
280
+ ##
281
+ ## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
282
+ else
283
+ puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
284
+ puts "no change; request returns not modified (304); skipping parsing site config"
285
+ return nil # no updates available; nothing to do
286
+ end
272
287
 
273
- if response.code != '200' # note Net::HTTP response.code is a string in ruby
288
+ elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
274
289
 
275
290
  puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
276
291
 
@@ -279,9 +294,7 @@ class Fetcher
279
294
  http_server: response.header[ 'server' ],
280
295
  http_etag: nil,
281
296
  http_last_modified: nil,
282
- body: nil,
283
- md5: nil,
284
- fetched: feed_fetched
297
+ fetched: site_fetched
285
298
  }
286
299
  site_rec.update_attributes!( site_attribs )
287
300
 
@@ -289,27 +302,12 @@ class Fetcher
289
302
  Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
290
303
 
291
304
  return nil # sorry; no feed for parsing available
305
+ else
306
+ # assume 200; continue w/ processing
292
307
  end
293
308
 
294
309
  puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
295
310
 
296
- site_text = response.body
297
-
298
- ###
299
- # NB: Net::HTTP will NOT set encoding UTF-8 etc.
300
- # will mostly be ASCII
301
- # - try to change encoding to UTF-8 ourselves
302
- logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
303
-
304
- #####
305
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
306
-
307
- ## NB:
308
- # for now "hardcoded" to utf8 - what else can we do?
309
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
310
- site_text = site_text.force_encoding( Encoding::UTF_8 )
311
- logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
312
-
313
311
  site_attribs = {
314
312
  http_code: response.code.to_i,
315
313
  http_server: response.header[ 'server' ],
@@ -326,6 +324,23 @@ class Fetcher
326
324
 
327
325
  site_rec.update_attributes!( site_attribs )
328
326
 
327
+
328
+ #################
329
+ ### fix: add support for http_etag cache etc. - how??
330
+ ###
331
+ ### use from_text( text, base: base ) !!!!!!!!
332
+ ### do NOT reissue first request
333
+ ##
334
+ ## fix: use special case/method for update_with_includes!!!
335
+ ## keep it simple w/o includes (do NOT mix in one method)
336
+ ## split into two methods!!!
337
+
338
+ ## retry w/ preprocesser
339
+ ## refetch if @include found w/ all includes included
340
+ if site_text.index('@include')
341
+ site_text = InclPreproc.from_url( site_url ).read
342
+ end
343
+
329
344
  ## logger.debug "site_text:"
330
345
  ## logger.debug site_text[ 0..300 ] # get first 300 chars
331
346
 
@@ -24,14 +24,16 @@ class Refresher
24
24
  @worker.debug = true # also pass along worker debug flag if set
25
25
  end
26
26
 
27
- # -- log update activity
28
- Activity.create!( text: "update sites (#{Site.count})" )
27
+ start_time = Time.now
28
+ Activity.create!( text: "start update sites (#{Site.count})" )
29
29
 
30
30
  #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
31
-
32
31
  Site.order(:id).each do |site|
33
32
  update_site_worker( site ) if site.url.present? # note: only update if (source) url present
34
33
  end
34
+
35
+ total_secs = Time.now - start_time
36
+ Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
35
37
  end
36
38
 
37
39
 
@@ -42,14 +44,17 @@ class Refresher
42
44
  @worker.debug = true # also pass along worker debug flag if set
43
45
  end
44
46
 
45
- # -- log update activity
46
- Activity.create!( text: "update feeds (#{Feed.count})" )
47
+ start_time = Time.now
48
+ Activity.create!( text: "start update feeds (#{Feed.count})" )
47
49
 
48
50
  #### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
49
-
50
51
  Feed.order(:id).each do |feed|
51
52
  update_feed_worker( feed )
53
+ ### todo/fix: add catch exception in loop and log to activity log and continue w/ next feed
52
54
  end
55
+
56
+ total_secs = Time.now - start_time
57
+ Activity.create!( text: "done update feeds (#{Site.count}) in #{total_secs}s" )
53
58
  end
54
59
 
55
60
 
@@ -4,7 +4,7 @@
4
4
  module PlutoUpdate
5
5
 
6
6
  MAJOR = 1
7
- MINOR = 3
7
+ MINOR = 4
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluto-update
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-25 00:00:00.000000000 Z
11
+ date: 2015-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pluto-models
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.2.2
19
+ version: 1.3.2
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 1.2.2
26
+ version: 1.3.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: fetcher
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.4.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: preproc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rdoc
43
57
  requirement: !ruby/object:Gem::Requirement