synchroniser 0.4.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/synchroniser.rb +5 -10
- data/lib/synchroniser/config.rb +6 -16
- data/lib/synchroniser/ingestor/uri.rb +22 -0
- data/spec/synchroniser_spec.rb +12 -3
- data/synchroniser.gemspec +3 -2
- data/test/docs/ingestors/rss.yml +0 -1
- data/test/docs/ingestors/rss_with_group.yml +9 -0
- metadata +6 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/synchroniser.rb
CHANGED
@@ -7,6 +7,10 @@ class Synchroniser
|
|
7
7
|
def initialize(container, ingestors)
|
8
8
|
@container = container
|
9
9
|
@ingestors = ingestors
|
10
|
+
|
11
|
+
@stats = [:ingestable, :ingested, :seed].inject({}) { |v, var|
|
12
|
+
v.merge({ var => 0 })
|
13
|
+
}
|
10
14
|
end
|
11
15
|
|
12
16
|
def ingest
|
@@ -22,8 +26,6 @@ class Synchroniser
|
|
22
26
|
end
|
23
27
|
|
24
28
|
def sync
|
25
|
-
initialise_stats
|
26
|
-
|
27
29
|
verify_container
|
28
30
|
|
29
31
|
# reset flags and store the number of flags reset
|
@@ -43,18 +45,11 @@ class Synchroniser
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def display_stats
|
46
|
-
@stats[:failed] = @stats[
|
48
|
+
@stats[:failed] = @stats[:ingestable] - @stats[:ingested]
|
47
49
|
puts "Initial seeding of #{@stats[:seed]}"
|
48
50
|
puts "Ingestor found #{@stats[:ingestable]} items, #{@stats[:ingested]} ingested, #{@stats[:failed]} failed."
|
49
51
|
end
|
50
52
|
|
51
|
-
def initialise_stats
|
52
|
-
@stats = {}
|
53
|
-
[:ingestable, :ingested, :seed].map() { |var|
|
54
|
-
@stats[var] = 0
|
55
|
-
}
|
56
|
-
end
|
57
|
-
|
58
53
|
end
|
59
54
|
|
60
55
|
require File.join(File.dirname(__FILE__), 'synchroniser', 'config.rb')
|
data/lib/synchroniser/config.rb
CHANGED
@@ -19,31 +19,21 @@ module Synchroniser::Config
|
|
19
19
|
raise ArgumentError, "Unable to find the stanza '#{ingestor_name}' in the configuration file supplied"
|
20
20
|
end
|
21
21
|
|
22
|
+
defaults['group'] ||= @config["params"]["container"]["defaults"]['group']
|
23
|
+
defaults['group'] ||= ingestor_name
|
24
|
+
|
22
25
|
@config["params"]["container"]["defaults"].merge!(defaults) unless defaults.empty?
|
23
26
|
end
|
24
27
|
|
25
28
|
def get_sync
|
26
29
|
container = build_container(@config['params']['container'])
|
27
|
-
ingestors = build_ingestors(
|
30
|
+
ingestors = build_ingestors()
|
28
31
|
Synchroniser.new(container, ingestors)
|
29
32
|
end
|
30
33
|
|
31
|
-
def build_ingestors(
|
34
|
+
def build_ingestors()
|
32
35
|
require @config['ingestor'] + "_ingestor.rb"
|
33
|
-
|
34
|
-
|
35
|
-
strategies = []
|
36
|
-
@config['params']['url'].each { |url|
|
37
|
-
@ingestor.load(url)
|
38
|
-
strategies << @ingestor.clone
|
39
|
-
|
40
|
-
# not sure if this will actually work
|
41
|
-
@ingestor.pagination.each do |url|
|
42
|
-
@ingestor.load(url)
|
43
|
-
strategies << @ingestor.clone
|
44
|
-
end
|
45
|
-
}
|
46
|
-
strategies
|
36
|
+
eval("#{@config['ingestor'].camelize}Ingestor.new").strategise(@config['params']['url'])
|
47
37
|
end
|
48
38
|
|
49
39
|
def build_container(container)
|
@@ -3,6 +3,12 @@ module Synchroniser::Ingestor::Uri
|
|
3
3
|
|
4
4
|
require 'open-uri'
|
5
5
|
|
6
|
+
def populate(url)
|
7
|
+
ingestor = self.clone
|
8
|
+
ingestor.load(url)
|
9
|
+
ingestor
|
10
|
+
end
|
11
|
+
|
6
12
|
def load(uri)
|
7
13
|
@data = open(uri).read
|
8
14
|
end
|
@@ -11,4 +17,20 @@ module Synchroniser::Ingestor::Uri
|
|
11
17
|
# pagination is not enabled by default
|
12
18
|
[]
|
13
19
|
end
|
20
|
+
|
21
|
+
def strategise(urls)
|
22
|
+
strategies = urls.inject([]) do |strategies, url|
|
23
|
+
strategies << populate(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
# not sure if this will actually work
|
27
|
+
# for every ingestor
|
28
|
+
strategies.inject([]) do |strategies, ingestor|
|
29
|
+
# grab the pagination urls and populate strategy with a new ingestor
|
30
|
+
ingestor.pagination.inject([]) do |strategies, url|
|
31
|
+
strategies << ingestor.populate(url)
|
32
|
+
#lastly add in the original ingestor
|
33
|
+
end << ingestor
|
34
|
+
end
|
35
|
+
end
|
14
36
|
end
|
data/spec/synchroniser_spec.rb
CHANGED
@@ -9,7 +9,8 @@ describe Synchroniser do
|
|
9
9
|
@group = "def456"
|
10
10
|
|
11
11
|
# clear out the current synchronisation
|
12
|
-
SyncItem.
|
12
|
+
SyncItem.destroy_all
|
13
|
+
Post.destroy_all
|
13
14
|
end
|
14
15
|
|
15
16
|
it "should always create a new post with new data" do
|
@@ -35,13 +36,13 @@ describe Synchroniser do
|
|
35
36
|
end
|
36
37
|
|
37
38
|
it "should set the correct container group from a config" do
|
38
|
-
config = Synchroniser::Config::Params.new("test/docs/ingestors/
|
39
|
+
config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
|
39
40
|
synchroniser = config.get_sync
|
40
41
|
synchroniser.container.group.should == "rss_test_feed"
|
41
42
|
end
|
42
43
|
|
43
44
|
it "should set the correct model group from a config" do
|
44
|
-
config = Synchroniser::Config::Params.new("test/docs/ingestors/
|
45
|
+
config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
|
45
46
|
synchroniser = config.get_sync
|
46
47
|
synchroniser.ingest[0].group.should == "rss_test_feed"
|
47
48
|
end
|
@@ -59,12 +60,20 @@ describe Synchroniser do
|
|
59
60
|
it "should set container defaults corrently" do
|
60
61
|
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
|
61
62
|
synchroniser.container.test_default.should == "random_default_name"
|
63
|
+
synchroniser.container.group.should == "rss"
|
62
64
|
end
|
63
65
|
|
64
66
|
it "should sync without error" do
|
65
67
|
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
|
66
68
|
synchroniser.sync
|
67
69
|
SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
|
70
|
+
SyncItem.find(:all)[0].group.should == "rss"
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should sync without error with defined group" do
|
74
|
+
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss").get_sync
|
75
|
+
synchroniser.sync
|
76
|
+
SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
|
68
77
|
SyncItem.find(:all)[0].group.should == "rss_test_feed"
|
69
78
|
end
|
70
79
|
|
data/synchroniser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{synchroniser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Rob Sharp"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-09-07}
|
13
13
|
s.description = %q{A wee synchroniser for a data source and a data sink.}
|
14
14
|
s.email = %q{rob@sharp.id.au}
|
15
15
|
s.files = [
|
@@ -49,6 +49,7 @@ Gem::Specification.new do |s|
|
|
49
49
|
"synchroniser.gemspec",
|
50
50
|
"tasks/synchroniser_tasks.rake",
|
51
51
|
"test/docs/ingestors/rss.yml",
|
52
|
+
"test/docs/ingestors/rss_with_group.yml",
|
52
53
|
"test/docs/rss.xml",
|
53
54
|
"uninstall.rb"
|
54
55
|
]
|
data/test/docs/ingestors/rss.yml
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: synchroniser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 6
|
9
|
+
- 0
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Rob Sharp
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-09-07 00:00:00 +10:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- synchroniser.gemspec
|
94
94
|
- tasks/synchroniser_tasks.rake
|
95
95
|
- test/docs/ingestors/rss.yml
|
96
|
+
- test/docs/ingestors/rss_with_group.yml
|
96
97
|
- test/docs/rss.xml
|
97
98
|
- uninstall.rb
|
98
99
|
has_rdoc: true
|