pluto-models 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/HISTORY.md +4 -0
- data/Manifest.txt +30 -0
- data/README.md +82 -0
- data/Rakefile +41 -0
- data/config/pluto.index.yml +23 -0
- data/lib/pluto/activerecord.rb +18 -0
- data/lib/pluto/connecter.rb +96 -0
- data/lib/pluto/fetcher.rb +292 -0
- data/lib/pluto/formatter.rb +81 -0
- data/lib/pluto/installer.rb +58 -0
- data/lib/pluto/lister.rb +42 -0
- data/lib/pluto/manifest_helpers.rb +51 -0
- data/lib/pluto/models.rb +130 -0
- data/lib/pluto/models/activity.rb +8 -0
- data/lib/pluto/models/feed.rb +127 -0
- data/lib/pluto/models/item.rb +73 -0
- data/lib/pluto/models/site.rb +19 -0
- data/lib/pluto/models/subscription.rb +14 -0
- data/lib/pluto/models/utils.rb +47 -0
- data/lib/pluto/refresher.rb +130 -0
- data/lib/pluto/schema.rb +139 -0
- data/lib/pluto/subscriber.rb +102 -0
- data/lib/pluto/tasks/env.rake +25 -0
- data/lib/pluto/tasks/setup.rake +40 -0
- data/lib/pluto/tasks/stats.rake +10 -0
- data/lib/pluto/tasks/update.rake +24 -0
- data/lib/pluto/updater.rb +50 -0
- data/lib/pluto/version.rb +28 -0
- data/test/helper.rb +13 -0
- data/test/test_helpers.rb +22 -0
- metadata +262 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7d114af161602287c52ed90e78691eb9e4b04b74
|
4
|
+
data.tar.gz: 009c9f806a64391c97cee02f2d0e3f5e8a48b094
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d07c395a1a7e88beab19c3f6b5b058fd99ae4c0a48ded6021b2265186ba3560b92ec514e056ed8c9ff32c0ce0f1ca0bcb2808c8967e5e8e98fa86dceaddf236e
|
7
|
+
data.tar.gz: 401d00c0da7059d0eefd9adec5f13a18e74446796e5cf92364c61c2547c886312c26b8b65eaa5510d46d423cdc27556c43778e090843f806cd7dd1a9384b4b95
|
data/.gemtest
ADDED
File without changes
|
data/HISTORY.md
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
HISTORY.md
|
2
|
+
Manifest.txt
|
3
|
+
README.md
|
4
|
+
Rakefile
|
5
|
+
config/pluto.index.yml
|
6
|
+
lib/pluto/activerecord.rb
|
7
|
+
lib/pluto/connecter.rb
|
8
|
+
lib/pluto/fetcher.rb
|
9
|
+
lib/pluto/formatter.rb
|
10
|
+
lib/pluto/installer.rb
|
11
|
+
lib/pluto/lister.rb
|
12
|
+
lib/pluto/manifest_helpers.rb
|
13
|
+
lib/pluto/models.rb
|
14
|
+
lib/pluto/models/activity.rb
|
15
|
+
lib/pluto/models/feed.rb
|
16
|
+
lib/pluto/models/item.rb
|
17
|
+
lib/pluto/models/site.rb
|
18
|
+
lib/pluto/models/subscription.rb
|
19
|
+
lib/pluto/models/utils.rb
|
20
|
+
lib/pluto/refresher.rb
|
21
|
+
lib/pluto/schema.rb
|
22
|
+
lib/pluto/subscriber.rb
|
23
|
+
lib/pluto/tasks/env.rake
|
24
|
+
lib/pluto/tasks/setup.rake
|
25
|
+
lib/pluto/tasks/stats.rake
|
26
|
+
lib/pluto/tasks/update.rake
|
27
|
+
lib/pluto/updater.rb
|
28
|
+
lib/pluto/version.rb
|
29
|
+
test/helper.rb
|
30
|
+
test/test_helpers.rb
|
data/README.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
# pluto-models gem - planet models and generator machinery for easy (re)use
|
2
|
+
|
3
|
+
* home :: [github.com/feedreader/pluto-models](https://github.com/feedreader/pluto-models)
|
4
|
+
* bugs :: [github.com/feedreader/pluto-models/issues](https://github.com/feedreader/pluto-models/issues)
|
5
|
+
* gem :: [rubygems.org/gems/pluto-models](https://rubygems.org/gems/pluto-models)
|
6
|
+
* rdoc :: [rubydoc.info/gems/pluto-models](http://rubydoc.info/gems/pluto-models)
|
7
|
+
* forum :: [groups.google.com/group/feedreader](http://groups.google.com/group/feedreader)
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
### Planet Configuration Sample
|
14
|
+
|
15
|
+
`ruby.ini`:
|
16
|
+
|
17
|
+
```
|
18
|
+
title = Planet Ruby
|
19
|
+
|
20
|
+
[rubyflow]
|
21
|
+
title = Ruby Flow
|
22
|
+
link = http://rubyflow.com
|
23
|
+
feed = http://feeds.feedburner.com/Rubyflow?format=xml
|
24
|
+
|
25
|
+
[rubyonrails]
|
26
|
+
title = Ruby on Rails Blog
|
27
|
+
link = http://weblog.rubyonrails.org
|
28
|
+
feed = http://weblog.rubyonrails.org/feed/atom.xml
|
29
|
+
|
30
|
+
[viennarb]
|
31
|
+
title = vienna.rb Blog
|
32
|
+
link = http://vienna-rb.at
|
33
|
+
feed = http://vienna-rb.at/atom.xml
|
34
|
+
```
|
35
|
+
|
36
|
+
or `ruby.yml`:
|
37
|
+
|
38
|
+
```
|
39
|
+
title: Planet Ruby
|
40
|
+
|
41
|
+
|
42
|
+
rubyflow:
|
43
|
+
title: Ruby Flow
|
44
|
+
link: http://rubyflow.com
|
45
|
+
feed: http://feeds.feedburner.com/Rubyflow?format=xml
|
46
|
+
|
47
|
+
rubyonrails:
|
48
|
+
title: Ruby on Rails Blog
|
49
|
+
link: http://weblog.rubyonrails.org
|
50
|
+
feed: http://weblog.rubyonrails.org/feed/atom.xml
|
51
|
+
|
52
|
+
viennarb:
|
53
|
+
title: vienna.rb Blog
|
54
|
+
link: http://vienna-rb.at
|
55
|
+
feed: http://vienna-rb.at/atom.xml
|
56
|
+
```
|
57
|
+
|
58
|
+
For more samples, see [`nytimes.ini`](https://github.com/feedreader/pluto.samples/blob/master/nytimes.ini),
|
59
|
+
[`js.ini`](https://github.com/feedreader/pluto.samples/blob/master/js.ini),
|
60
|
+
[`dart.ini`](https://github.com/feedreader/pluto.samples/blob/master/dart.ini),
|
61
|
+
[`haskell.ini`](https://github.com/feedreader/pluto.samples/blob/master/haskell.ini),
|
62
|
+
[`viennarb.ini`](https://github.com/feedreader/pluto.samples/blob/master/viennarb.ini),
|
63
|
+
[`beer.ini`](https://github.com/feedreader/pluto.samples/blob/master/beer.ini),
|
64
|
+
[`football.ini`](https://github.com/feedreader/pluto.samples/blob/master/football.ini).
|
65
|
+
|
66
|
+
|
67
|
+
## Real World Usage
|
68
|
+
|
69
|
+
[`pluto`](https://github.com/feedreader/pluto) - planet generator command line tool using the pluto-models gem
|
70
|
+
[`pluto.live`](https://github.com/feedreader/pluto.live) - sample planet site; sinatra web app/starter template in ruby using the pluto-models gem
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
## License
|
75
|
+
|
76
|
+
The `pluto-models` scripts are dedicated to the public domain.
|
77
|
+
Use it as you please with no restrictions whatsoever.
|
78
|
+
|
79
|
+
## Questions? Comments?
|
80
|
+
|
81
|
+
Send them along to the [Planet Pluto and Friends Forum/Mailing List](http://groups.google.com/group/feedreader).
|
82
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/pluto/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'pluto-models' do
|
5
|
+
|
6
|
+
self.version = Pluto::VERSION
|
7
|
+
|
8
|
+
self.summary = 'pluto-models - planet models and generator machinery for easy (re)use'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/feedreader/pluto-models']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'feedreader@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'HISTORY.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
['pakman', '>= 0.5.0'],
|
22
|
+
['fetcher', '>= 0.4.4'],
|
23
|
+
['logutils', '>= 0.6.1'],
|
24
|
+
['feedutils', '>= 0.4.0'],
|
25
|
+
['props', '>= 1.1.2'],
|
26
|
+
['textutils', '>= 0.10.0'],
|
27
|
+
['gli', '>= 2.12.2'],
|
28
|
+
['activerecord'],
|
29
|
+
['logutils-activerecord', '>= 0.2.0'],
|
30
|
+
['props-activerecord', '0.1.0'],
|
31
|
+
['activityutils', '>= 0.1.0' ],
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
self.licenses = ['Public Domain']
|
36
|
+
|
37
|
+
self.spec_extras = {
|
38
|
+
required_ruby_version: '>= 1.9.2'
|
39
|
+
}
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#########################################
|
2
|
+
# Pluto Update Index for Template Packs
|
3
|
+
|
4
|
+
|
5
|
+
#############
|
6
|
+
# shortcuts for fetching template packs
|
7
|
+
# maps shortcut to URI
|
8
|
+
|
9
|
+
blank: https://raw.github.com/planet-templates/planet-blank/master/blank.txt
|
10
|
+
|
11
|
+
top: https://raw.github.com/planet-templates/planet-top/master/top.txt
|
12
|
+
|
13
|
+
news: https://raw.github.com/planet-templates/planet-news/master/news.txt
|
14
|
+
|
15
|
+
feeds: https://raw.github.com/planet-templates/planet-feeds/master/feeds.txt
|
16
|
+
|
17
|
+
classic: https://raw.github.com/planet-templates/planet-classic/master/classic.txt
|
18
|
+
|
19
|
+
|
20
|
+
####
|
21
|
+
# all:
|
22
|
+
# - add why? why not??
|
23
|
+
#
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
module ActiveRecordMethods
|
4
|
+
|
5
|
+
def read_attribute_w_fallbacks( *keys )
|
6
|
+
### todo: use a different name e.g.:
|
7
|
+
## read_attribute_cascade ?? - does anything like this exists already?
|
8
|
+
## why? why not?
|
9
|
+
keys.each do |key|
|
10
|
+
value = read_attribute( key )
|
11
|
+
return value unless value.nil?
|
12
|
+
end
|
13
|
+
value # fallthrough? return latest value (will be nil)
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
end # module ActiveRecordMethods
|
18
|
+
end # module Pluto
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
|
4
|
+
# DB Connecter / Connection Manager
|
5
|
+
# lets you establish connection
|
6
|
+
|
7
|
+
class Connecter
|
8
|
+
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
# do nothing for now
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def debug=(value)
|
17
|
+
@debug = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug?
|
21
|
+
@debug || false
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def connect!( config = nil )
|
26
|
+
|
27
|
+
if config.nil? # use/try DATABASE_URL from environment
|
28
|
+
|
29
|
+
logger.debug "ENV['DATBASE_URL'] - >#{ENV['DATABASE_URL']}<"
|
30
|
+
|
31
|
+
db = URI.parse( ENV['DATABASE_URL'] || 'sqlite3:///pluto.db' )
|
32
|
+
|
33
|
+
if db.scheme == 'postgres'
|
34
|
+
config = {
|
35
|
+
adapter: 'postgresql',
|
36
|
+
host: db.host,
|
37
|
+
port: db.port,
|
38
|
+
username: db.user,
|
39
|
+
password: db.password,
|
40
|
+
database: db.path[1..-1],
|
41
|
+
encoding: 'utf8'
|
42
|
+
}
|
43
|
+
else # assume sqlite3
|
44
|
+
config = {
|
45
|
+
adapter: db.scheme, # sqlite3
|
46
|
+
database: db.path[1..-1] # pluto.db (NB: cut off leading /, thus 1..-1)
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end # if config.nil?
|
50
|
+
|
51
|
+
puts 'db settings:'
|
52
|
+
pp config
|
53
|
+
|
54
|
+
### for dbbrowser and other tools add to ActiveRecord
|
55
|
+
|
56
|
+
if ActiveRecord::Base.configurations.nil? # todo/check: can this ever happen? remove?
|
57
|
+
puts "ActiveRecord configurations nil - set to empty hash"
|
58
|
+
ActiveRecord::Base.configurations = {} # make it an empty hash
|
59
|
+
end
|
60
|
+
|
61
|
+
if debug?
|
62
|
+
puts 'ar configurations (before):'
|
63
|
+
pp ActiveRecord::Base.configurations
|
64
|
+
end
|
65
|
+
|
66
|
+
# note: for now always use pluto key for config storage
|
67
|
+
ActiveRecord::Base.configurations['pluto'] = config
|
68
|
+
|
69
|
+
if debug?
|
70
|
+
puts 'ar configurations (after):'
|
71
|
+
pp ActiveRecord::Base.configurations
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# for debugging - disable for production use
|
76
|
+
if debug?
|
77
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
ActiveRecord::Base.establish_connection( config )
|
82
|
+
|
83
|
+
# first time? - auto-run db migratation, that is, create db tables
|
84
|
+
unless ActivityDb::Models::Activity.table_exists?
|
85
|
+
ActivityDb::CreateDb.new.up
|
86
|
+
end
|
87
|
+
|
88
|
+
unless Models::Feed.table_exists?
|
89
|
+
CreateDb.new.up
|
90
|
+
end
|
91
|
+
end # method connect!
|
92
|
+
|
93
|
+
|
94
|
+
end # class Connecter
|
95
|
+
|
96
|
+
end # module Pluto
|
@@ -0,0 +1,292 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
|
4
|
+
class Fetcher
|
5
|
+
|
6
|
+
include LogUtils::Logging
|
7
|
+
|
8
|
+
include Models # for easy convenience access for Activity etc.
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@worker = ::Fetcher::Worker.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def debug=(value) @debug = value; end
|
15
|
+
def debug?() @debug || false; end
|
16
|
+
|
17
|
+
|
18
|
+
def fetch_feed( url )
|
19
|
+
response = @worker.get( url )
|
20
|
+
|
21
|
+
## if debug?
|
22
|
+
puts "http status #{response.code} #{response.message}"
|
23
|
+
|
24
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
25
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
26
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
27
|
+
## end
|
28
|
+
|
29
|
+
xml = response.body
|
30
|
+
|
31
|
+
###
|
32
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
33
|
+
# will mostly be ASCII
|
34
|
+
# - try to change encoding to UTF-8 ourselves
|
35
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
36
|
+
|
37
|
+
#####
|
38
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
39
|
+
|
40
|
+
## NB:
|
41
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
42
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
43
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
44
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
45
|
+
|
46
|
+
xml
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
def feed_by_rec( feed_rec )
|
51
|
+
# simple feed fetcher; use for debugging (only/mostly)
|
52
|
+
# -- will NOT change db records in any way
|
53
|
+
|
54
|
+
feed_url = feed_rec.feed_url
|
55
|
+
feed_key = feed_rec.key
|
56
|
+
|
57
|
+
feed_xml = fetch_feed( feed_url )
|
58
|
+
|
59
|
+
logger.debug "feed_xml:"
|
60
|
+
logger.debug feed_xml[ 0..500 ] # get first 500 chars
|
61
|
+
|
62
|
+
# if opts.verbose? # also write a copy to disk
|
63
|
+
if debug?
|
64
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
65
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
66
|
+
f.write( feed_xml )
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
puts "Before parsing feed >#{feed_key}<..."
|
71
|
+
|
72
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
73
|
+
# or throw exception
|
74
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
def feed_by_rec_if_modified( feed_rec ) # try smart http update; will update db records
|
79
|
+
feed_url = feed_rec.feed_url
|
80
|
+
feed_key = feed_rec.key
|
81
|
+
|
82
|
+
### todo/fix: normalize/unifiy feed_url
|
83
|
+
## - same in fetcher - use shared utitlity method or similar
|
84
|
+
|
85
|
+
@worker.use_cache = true
|
86
|
+
@worker.cache[ feed_url ] = {
|
87
|
+
'etag' => feed_rec.http_etag,
|
88
|
+
'last-modified' => feed_rec.http_last_modified
|
89
|
+
}
|
90
|
+
|
91
|
+
begin
|
92
|
+
response = @worker.get( feed_url )
|
93
|
+
rescue SocketError => e
|
94
|
+
## catch socket error for unknown domain names (e.g. pragdave.blogs.pragprog.com)
|
95
|
+
### will result in SocketError -- getaddrinfo: Name or service not known
|
96
|
+
puts "*** error: fetching feed '#{feed_key}' - #{e.to_s}"
|
97
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - #{e.to_s}" )
|
98
|
+
|
99
|
+
### todo/fix: update feed rec in db
|
100
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
101
|
+
return nil
|
102
|
+
end
|
103
|
+
|
104
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
105
|
+
|
106
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
107
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
108
|
+
puts "no change; request returns not modified (304); skipping parsing feed"
|
109
|
+
return nil # no updates available; nothing to do
|
110
|
+
end
|
111
|
+
|
112
|
+
feed_fetched = Time.now
|
113
|
+
|
114
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
115
|
+
|
116
|
+
puts "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
117
|
+
|
118
|
+
feed_attribs = {
|
119
|
+
http_code: response.code.to_i,
|
120
|
+
http_server: response.header[ 'server' ],
|
121
|
+
http_etag: nil,
|
122
|
+
http_last_modified: nil,
|
123
|
+
body: nil,
|
124
|
+
md5: nil,
|
125
|
+
fetched: feed_fetched
|
126
|
+
}
|
127
|
+
feed_rec.update_attributes!( feed_attribs )
|
128
|
+
|
129
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
130
|
+
Activity.create!( text: "*** error: fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}" )
|
131
|
+
|
132
|
+
return nil # sorry; no feed for parsing available
|
133
|
+
end
|
134
|
+
|
135
|
+
puts "OK - fetching feed '#{feed_key}' - HTTP status #{response.code} #{response.message}"
|
136
|
+
|
137
|
+
feed_xml = response.body
|
138
|
+
###
|
139
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
140
|
+
# will mostly be ASCII
|
141
|
+
# - try to change encoding to UTF-8 ourselves
|
142
|
+
logger.debug "feed_xml.encoding.name (before): #{feed_xml.encoding.name}"
|
143
|
+
|
144
|
+
#####
|
145
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
146
|
+
|
147
|
+
## NB:
|
148
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
149
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
150
|
+
feed_xml = feed_xml.force_encoding( Encoding::UTF_8 )
|
151
|
+
logger.debug "feed_xml.encoding.name (after): #{feed_xml.encoding.name}"
|
152
|
+
|
153
|
+
## check for md5 hash for response.body
|
154
|
+
|
155
|
+
last_feed_md5 = feed_rec.md5
|
156
|
+
feed_md5 = Digest::MD5.hexdigest( feed_xml )
|
157
|
+
|
158
|
+
if last_feed_md5 && last_feed_md5 == feed_md5
|
159
|
+
# not all servers handle conditional gets, so while not much can be
|
160
|
+
# done about the bandwidth, but if the response body is identical
|
161
|
+
# the downstream processing (parsing, caching, ...) can be avoided.
|
162
|
+
# - thanks to planet mars -fido.rb for the idea, cheers.
|
163
|
+
|
164
|
+
puts "no change; md5 digests match; skipping parsing feed"
|
165
|
+
return nil # no updates available; nothing to do
|
166
|
+
end
|
167
|
+
|
168
|
+
feed_attribs = {
|
169
|
+
http_code: response.code.to_i,
|
170
|
+
http_server: response.header[ 'server' ],
|
171
|
+
http_etag: response.header[ 'etag' ],
|
172
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
173
|
+
body: feed_xml,
|
174
|
+
md5: feed_md5,
|
175
|
+
fetched: feed_fetched
|
176
|
+
}
|
177
|
+
|
178
|
+
## if debug?
|
179
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
180
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
181
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
182
|
+
## end
|
183
|
+
|
184
|
+
feed_rec.update_attributes!( feed_attribs )
|
185
|
+
|
186
|
+
logger.debug "feed_xml:"
|
187
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
188
|
+
|
189
|
+
puts "Before parsing feed >#{feed_key}<..."
|
190
|
+
|
191
|
+
### move to feedutils
|
192
|
+
### logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
193
|
+
|
194
|
+
## fix/todo: check for feed.nil? -> error parsing!!!
|
195
|
+
# or throw exception
|
196
|
+
feed = FeedUtils::Parser.parse( feed_xml )
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
def site_by_rec_if_modified( site_rec ) # try smart http update; will update db records
|
201
|
+
site_url = site_rec.url
|
202
|
+
site_key = site_rec.key
|
203
|
+
|
204
|
+
### todo/fix: normalize/unifiy feed_url
|
205
|
+
## - same in fetcher - use shared utitlity method or similar
|
206
|
+
|
207
|
+
@worker.use_cache = true
|
208
|
+
@worker.cache[ site_url ] = {
|
209
|
+
'etag' => site_rec.http_etag,
|
210
|
+
'last-modified' => site_rec.http_last_modified
|
211
|
+
}
|
212
|
+
|
213
|
+
response = @worker.get( site_url )
|
214
|
+
@worker.use_cache = false # fix/todo: restore old use_cache setting instead of false
|
215
|
+
|
216
|
+
if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified)
|
217
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
218
|
+
puts "no change; request returns not modified (304); skipping parsing site config"
|
219
|
+
return nil # no updates available; nothing to do
|
220
|
+
end
|
221
|
+
|
222
|
+
site_fetched = Time.now
|
223
|
+
|
224
|
+
if response.code != '200' # note Net::HTTP response.code is a string in ruby
|
225
|
+
|
226
|
+
puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
227
|
+
|
228
|
+
site_attribs = {
|
229
|
+
http_code: response.code.to_i,
|
230
|
+
http_server: response.header[ 'server' ],
|
231
|
+
http_etag: nil,
|
232
|
+
http_last_modified: nil,
|
233
|
+
body: nil,
|
234
|
+
md5: nil,
|
235
|
+
fetched: feed_fetched
|
236
|
+
}
|
237
|
+
site_rec.update_attributes!( site_attribs )
|
238
|
+
|
239
|
+
## add log error activity -- in future add to error log - better - why? why not?
|
240
|
+
Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" )
|
241
|
+
|
242
|
+
return nil # sorry; no feed for parsing available
|
243
|
+
end
|
244
|
+
|
245
|
+
puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}"
|
246
|
+
|
247
|
+
site_text = response.body
|
248
|
+
|
249
|
+
###
|
250
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
251
|
+
# will mostly be ASCII
|
252
|
+
# - try to change encoding to UTF-8 ourselves
|
253
|
+
logger.debug "site_text.encoding.name (before): #{site_text.encoding.name}"
|
254
|
+
|
255
|
+
#####
|
256
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
257
|
+
|
258
|
+
## NB:
|
259
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
260
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
261
|
+
site_text = site_text.force_encoding( Encoding::UTF_8 )
|
262
|
+
logger.debug "site_text.encoding.name (after): #{site_text.encoding.name}"
|
263
|
+
|
264
|
+
site_attribs = {
|
265
|
+
http_code: response.code.to_i,
|
266
|
+
http_server: response.header[ 'server' ],
|
267
|
+
http_etag: response.header[ 'etag' ],
|
268
|
+
http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime)
|
269
|
+
fetched: site_fetched
|
270
|
+
}
|
271
|
+
|
272
|
+
## if debug?
|
273
|
+
puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
|
274
|
+
puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
|
275
|
+
puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
|
276
|
+
## end
|
277
|
+
|
278
|
+
site_rec.update_attributes!( site_attribs )
|
279
|
+
|
280
|
+
## logger.debug "site_text:"
|
281
|
+
## logger.debug site_text[ 0..300 ] # get first 300 chars
|
282
|
+
|
283
|
+
|
284
|
+
puts "Before parsing site config >#{site_key}<..."
|
285
|
+
|
286
|
+
# assume ini format for now
|
287
|
+
site_config = INI.load( site_text )
|
288
|
+
end
|
289
|
+
|
290
|
+
end # class Fetcher
|
291
|
+
|
292
|
+
end # module Pluto
|