pluto-update 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -6
- data/Rakefile +2 -1
- data/lib/pluto/update.rb +2 -1
- data/lib/pluto/update/fetcher.rb +41 -26
- data/lib/pluto/update/refresher.rb +11 -6
- data/lib/pluto/update/version.rb +1 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d631c3eb45ac43efb9e2d0186270a5ff6adc0e43
|
4
|
+
data.tar.gz: bb4774dcb0d97bc05b3f19f05956f608d94bf18d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 858c5e290c621b417ab5773e3c307daed4a0c0174d00ef88729ac9a235deafc5e0c42027eaaf86220017415b9538be04c6661a7f989fedd5c87c5143c97e2fe9
|
7
|
+
data.tar.gz: 228ffdf0626f7455c73bc108bdc4639c5f2ff42d468238d2d6dcd308a8e0df3aead6f0f2cecf0d374a36b378b3babba0353ad10f26d5e8ca81ef4d102117302b
|
data/README.md
CHANGED
@@ -34,12 +34,9 @@ title = Planet Ruby
|
|
34
34
|
```
|
35
35
|
|
36
36
|
For more samples, see [`nytimes.ini`](https://github.com/feedreader/planets/blob/master/nytimes.ini),
|
37
|
-
[`js.ini`](https://github.com/feedreader/
|
38
|
-
[`dart.ini`](https://github.com/feedreader/
|
39
|
-
[`haskell.ini`](https://github.com/feedreader/planets/blob/master/haskell.ini)
|
40
|
-
[`viennarb.ini`](https://github.com/feedreader/planets/blob/master/viennarb.ini),
|
41
|
-
[`beer.ini`](https://github.com/feedreader/planets/blob/master/beer.ini),
|
42
|
-
[`football.ini`](https://github.com/feedreader/planets/blob/master/football.ini).
|
37
|
+
[`js.ini`](https://github.com/feedreader/planet-web/blob/master/js.ini),
|
38
|
+
[`dart.ini`](https://github.com/feedreader/planet-web/blob/master/dart.ini),
|
39
|
+
[`haskell.ini`](https://github.com/feedreader/planets/blob/master/haskell.ini).
|
43
40
|
|
44
41
|
|
45
42
|
|
data/Rakefile
CHANGED
@@ -18,8 +18,9 @@ Hoe.spec 'pluto-update' do
|
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
|
-
['pluto-models', '>= 1.
|
21
|
+
['pluto-models', '>= 1.3.2'],
|
22
22
|
['fetcher', '>= 0.4.4'],
|
23
|
+
['preproc', '>= 0.1.0'],
|
23
24
|
]
|
24
25
|
|
25
26
|
self.licenses = ['Public Domain']
|
data/lib/pluto/update.rb
CHANGED
data/lib/pluto/update/fetcher.rb
CHANGED
@@ -262,15 +262,30 @@ class Fetcher
|
|
262
262
|
response = @worker.get( site_url )
|
263
263
|
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
264
264
|
|
265
|
+
site_fetched = Time.now
|
266
|
+
|
267
|
+
###
|
268
|
+
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
|
269
|
+
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
270
|
+
# thus, set/force encoding to utf-8
|
271
|
+
site_text = response.body.to_s
|
272
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
273
|
+
|
265
274
|
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
266
|
-
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
267
|
-
puts "no change; request returns not modified (304); skipping parsing site config"
|
268
|
-
return nil # no updates available; nothing to do
|
269
|
-
end
|
270
275
|
|
271
|
-
|
276
|
+
if site_text.index('@include')
|
277
|
+
## note: if the site_text includes @include
|
278
|
+
## we must revalidate complete file hierachy(tree) for now
|
279
|
+
### continue;
|
280
|
+
##
|
281
|
+
## fix/todo: use ahead-of-time preprocessor ?? in the future to simplify???
|
282
|
+
else
|
283
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
284
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
285
|
+
return nil # no updates available; nothing to do
|
286
|
+
end
|
272
287
|
|
273
|
-
|
288
|
+
elsif response.code != '200' # note Net::HTTP response.code is a string in ruby
|
274
289
|
|
275
290
|
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
276
291
|
|
@@ -279,9 +294,7 @@ class Fetcher
|
|
279
294
|
http_server: response.header[ 'server' ],
|
280
295
|
http_etag: nil,
|
281
296
|
http_last_modified: nil,
|
282
|
-
|
283
|
-
md5: nil,
|
284
|
-
fetched: feed_fetched
|
297
|
+
fetched: site_fetched
|
285
298
|
}
|
286
299
|
site_rec.update_attributes!( site_attribs )
|
287
300
|
|
@@ -289,27 +302,12 @@ class Fetcher
|
|
289
302
|
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
290
303
|
|
291
304
|
return nil # sorry; no feed for parsing available
|
305
|
+
else
|
306
|
+
# assume 200; continue w/ processing
|
292
307
|
end
|
293
308
|
|
294
309
|
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
295
310
|
|
296
|
-
site_text = response.body
|
297
|
-
|
298
|
-
###
|
299
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
300
|
-
# will mostly be ASCII
|
301
|
-
# - try to change encoding to UTF-8 ourselves
|
302
|
-
logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
|
303
|
-
|
304
|
-
#####
|
305
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
306
|
-
|
307
|
-
## NB:
|
308
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
309
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
310
|
-
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
311
|
-
logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
|
312
|
-
|
313
311
|
site_attribs = {
|
314
312
|
http_code: response.code.to_i,
|
315
313
|
http_server: response.header[ 'server' ],
|
@@ -326,6 +324,23 @@ class Fetcher
|
|
326
324
|
|
327
325
|
site_rec.update_attributes!( site_attribs )
|
328
326
|
|
327
|
+
|
328
|
+
#################
|
329
|
+
### fix: add support for http_etag cache etc. - how??
|
330
|
+
###
|
331
|
+
### use from_text( text, base: base ) !!!!!!!!
|
332
|
+
### do NOT reissue first request
|
333
|
+
##
|
334
|
+
## fix: use special case/method for update_with_includes!!!
|
335
|
+
## keep it simple w/o includes (do NOT mix in one method)
|
336
|
+
## split into two methods!!!
|
337
|
+
|
338
|
+
## retry w/ preprocesser
|
339
|
+
## refetch if @include found w/ all includes included
|
340
|
+
if site_text.index('@include')
|
341
|
+
site_text = InclPreproc.from_url( site_url ).read
|
342
|
+
end
|
343
|
+
|
329
344
|
## logger.debug "site_text:"
|
330
345
|
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
331
346
|
|
@@ -24,14 +24,16 @@ class Refresher
|
|
24
24
|
@worker.debug = true # also pass along worker debug flag if set
|
25
25
|
end
|
26
26
|
|
27
|
-
|
28
|
-
Activity.create!( text: "update sites (#{Site.count})" )
|
27
|
+
start_time = Time.now
|
28
|
+
Activity.create!( text: "start update sites (#{Site.count})" )
|
29
29
|
|
30
30
|
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
31
|
-
|
32
31
|
Site.order(:id).each do |site|
|
33
32
|
update_site_worker( site ) if site.url.present? # note: only update if (source) url present
|
34
33
|
end
|
34
|
+
|
35
|
+
total_secs = Time.now - start_time
|
36
|
+
Activity.create!( text: "done update sites (#{Site.count}) in #{total_secs}s" )
|
35
37
|
end
|
36
38
|
|
37
39
|
|
@@ -42,14 +44,17 @@ class Refresher
|
|
42
44
|
@worker.debug = true # also pass along worker debug flag if set
|
43
45
|
end
|
44
46
|
|
45
|
-
|
46
|
-
Activity.create!( text: "update feeds (#{Feed.count})" )
|
47
|
+
start_time = Time.now
|
48
|
+
Activity.create!( text: "start update feeds (#{Feed.count})" )
|
47
49
|
|
48
50
|
#### - hack - use order(:id) instead of .all - avoids rails/activerecord 4 warnings
|
49
|
-
|
50
51
|
Feed.order(:id).each do |feed|
|
51
52
|
update_feed_worker( feed )
|
53
|
+
### todo/fix: add catch exception in loop and log to activity log and continue w/ next feed
|
52
54
|
end
|
55
|
+
|
56
|
+
total_secs = Time.now - start_time
|
57
|
+
Activity.create!( text: "done update feeds (#{Site.count}) in #{total_secs}s" )
|
53
58
|
end
|
54
59
|
|
55
60
|
|
data/lib/pluto/update/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto-update
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pluto-models
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 1.3.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.3.2
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: fetcher
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 0.4.4
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: preproc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.1.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.1.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rdoc
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|