synchroniser 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/synchroniser.rb +5 -10
- data/lib/synchroniser/config.rb +6 -16
- data/lib/synchroniser/ingestor/uri.rb +22 -0
- data/spec/synchroniser_spec.rb +12 -3
- data/synchroniser.gemspec +3 -2
- data/test/docs/ingestors/rss.yml +0 -1
- data/test/docs/ingestors/rss_with_group.yml +9 -0
- metadata +6 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/synchroniser.rb
CHANGED
@@ -7,6 +7,10 @@ class Synchroniser
|
|
7
7
|
def initialize(container, ingestors)
|
8
8
|
@container = container
|
9
9
|
@ingestors = ingestors
|
10
|
+
|
11
|
+
@stats = [:ingestable, :ingested, :seed].inject({}) { |v, var|
|
12
|
+
v.merge({ var => 0 })
|
13
|
+
}
|
10
14
|
end
|
11
15
|
|
12
16
|
def ingest
|
@@ -22,8 +26,6 @@ class Synchroniser
|
|
22
26
|
end
|
23
27
|
|
24
28
|
def sync
|
25
|
-
initialise_stats
|
26
|
-
|
27
29
|
verify_container
|
28
30
|
|
29
31
|
# reset flags and store the number of flags reset
|
@@ -43,18 +45,11 @@ class Synchroniser
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def display_stats
|
46
|
-
@stats[:failed] = @stats[
|
48
|
+
@stats[:failed] = @stats[:ingestable] - @stats[:ingested]
|
47
49
|
puts "Initial seeding of #{@stats[:seed]}"
|
48
50
|
puts "Ingestor found #{@stats[:ingestable]} items, #{@stats[:ingested]} ingested, #{@stats[:failed]} failed."
|
49
51
|
end
|
50
52
|
|
51
|
-
def initialise_stats
|
52
|
-
@stats = {}
|
53
|
-
[:ingestable, :ingested, :seed].map() { |var|
|
54
|
-
@stats[var] = 0
|
55
|
-
}
|
56
|
-
end
|
57
|
-
|
58
53
|
end
|
59
54
|
|
60
55
|
require File.join(File.dirname(__FILE__), 'synchroniser', 'config.rb')
|
data/lib/synchroniser/config.rb
CHANGED
@@ -19,31 +19,21 @@ module Synchroniser::Config
|
|
19
19
|
raise ArgumentError, "Unable to find the stanza '#{ingestor_name}' in the configuration file supplied"
|
20
20
|
end
|
21
21
|
|
22
|
+
defaults['group'] ||= @config["params"]["container"]["defaults"]['group']
|
23
|
+
defaults['group'] ||= ingestor_name
|
24
|
+
|
22
25
|
@config["params"]["container"]["defaults"].merge!(defaults) unless defaults.empty?
|
23
26
|
end
|
24
27
|
|
25
28
|
def get_sync
|
26
29
|
container = build_container(@config['params']['container'])
|
27
|
-
ingestors = build_ingestors(
|
30
|
+
ingestors = build_ingestors()
|
28
31
|
Synchroniser.new(container, ingestors)
|
29
32
|
end
|
30
33
|
|
31
|
-
def build_ingestors(
|
34
|
+
def build_ingestors()
|
32
35
|
require @config['ingestor'] + "_ingestor.rb"
|
33
|
-
|
34
|
-
|
35
|
-
strategies = []
|
36
|
-
@config['params']['url'].each { |url|
|
37
|
-
@ingestor.load(url)
|
38
|
-
strategies << @ingestor.clone
|
39
|
-
|
40
|
-
# not sure if this will actually work
|
41
|
-
@ingestor.pagination.each do |url|
|
42
|
-
@ingestor.load(url)
|
43
|
-
strategies << @ingestor.clone
|
44
|
-
end
|
45
|
-
}
|
46
|
-
strategies
|
36
|
+
eval("#{@config['ingestor'].camelize}Ingestor.new").strategise(@config['params']['url'])
|
47
37
|
end
|
48
38
|
|
49
39
|
def build_container(container)
|
@@ -3,6 +3,12 @@ module Synchroniser::Ingestor::Uri
|
|
3
3
|
|
4
4
|
require 'open-uri'
|
5
5
|
|
6
|
+
def populate(url)
|
7
|
+
ingestor = self.clone
|
8
|
+
ingestor.load(url)
|
9
|
+
ingestor
|
10
|
+
end
|
11
|
+
|
6
12
|
def load(uri)
|
7
13
|
@data = open(uri).read
|
8
14
|
end
|
@@ -11,4 +17,20 @@ module Synchroniser::Ingestor::Uri
|
|
11
17
|
# pagination is not enabled by default
|
12
18
|
[]
|
13
19
|
end
|
20
|
+
|
21
|
+
def strategise(urls)
|
22
|
+
strategies = urls.inject([]) do |strategies, url|
|
23
|
+
strategies << populate(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
# not sure if this will actually work
|
27
|
+
# for every ingestor
|
28
|
+
strategies.inject([]) do |strategies, ingestor|
|
29
|
+
# grab the pagination urls and populate strategy with a new ingestor
|
30
|
+
ingestor.pagination.inject([]) do |strategies, url|
|
31
|
+
strategies << ingestor.populate(url)
|
32
|
+
#lastly add in the original ingestor
|
33
|
+
end << ingestor
|
34
|
+
end
|
35
|
+
end
|
14
36
|
end
|
data/spec/synchroniser_spec.rb
CHANGED
@@ -9,7 +9,8 @@ describe Synchroniser do
|
|
9
9
|
@group = "def456"
|
10
10
|
|
11
11
|
# clear out the current synchronisation
|
12
|
-
SyncItem.
|
12
|
+
SyncItem.destroy_all
|
13
|
+
Post.destroy_all
|
13
14
|
end
|
14
15
|
|
15
16
|
it "should always create a new post with new data" do
|
@@ -35,13 +36,13 @@ describe Synchroniser do
|
|
35
36
|
end
|
36
37
|
|
37
38
|
it "should set the correct container group from a config" do
|
38
|
-
config = Synchroniser::Config::Params.new("test/docs/ingestors/
|
39
|
+
config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
|
39
40
|
synchroniser = config.get_sync
|
40
41
|
synchroniser.container.group.should == "rss_test_feed"
|
41
42
|
end
|
42
43
|
|
43
44
|
it "should set the correct model group from a config" do
|
44
|
-
config = Synchroniser::Config::Params.new("test/docs/ingestors/
|
45
|
+
config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
|
45
46
|
synchroniser = config.get_sync
|
46
47
|
synchroniser.ingest[0].group.should == "rss_test_feed"
|
47
48
|
end
|
@@ -59,12 +60,20 @@ describe Synchroniser do
|
|
59
60
|
it "should set container defaults corrently" do
|
60
61
|
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
|
61
62
|
synchroniser.container.test_default.should == "random_default_name"
|
63
|
+
synchroniser.container.group.should == "rss"
|
62
64
|
end
|
63
65
|
|
64
66
|
it "should sync without error" do
|
65
67
|
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
|
66
68
|
synchroniser.sync
|
67
69
|
SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
|
70
|
+
SyncItem.find(:all)[0].group.should == "rss"
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should sync without error with defined group" do
|
74
|
+
synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss").get_sync
|
75
|
+
synchroniser.sync
|
76
|
+
SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
|
68
77
|
SyncItem.find(:all)[0].group.should == "rss_test_feed"
|
69
78
|
end
|
70
79
|
|
data/synchroniser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{synchroniser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Rob Sharp"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-09-07}
|
13
13
|
s.description = %q{A wee synchroniser for a data source and a data sink.}
|
14
14
|
s.email = %q{rob@sharp.id.au}
|
15
15
|
s.files = [
|
@@ -49,6 +49,7 @@ Gem::Specification.new do |s|
|
|
49
49
|
"synchroniser.gemspec",
|
50
50
|
"tasks/synchroniser_tasks.rake",
|
51
51
|
"test/docs/ingestors/rss.yml",
|
52
|
+
"test/docs/ingestors/rss_with_group.yml",
|
52
53
|
"test/docs/rss.xml",
|
53
54
|
"uninstall.rb"
|
54
55
|
]
|
data/test/docs/ingestors/rss.yml
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: synchroniser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 6
|
9
|
+
- 0
|
10
|
+
version: 0.6.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Rob Sharp
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-09-07 00:00:00 +10:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- synchroniser.gemspec
|
94
94
|
- tasks/synchroniser_tasks.rake
|
95
95
|
- test/docs/ingestors/rss.yml
|
96
|
+
- test/docs/ingestors/rss_with_group.yml
|
96
97
|
- test/docs/rss.xml
|
97
98
|
- uninstall.rb
|
98
99
|
has_rdoc: true
|