synchroniser 0.4.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.6.0
@@ -7,6 +7,10 @@ class Synchroniser
7
7
  def initialize(container, ingestors)
8
8
  @container = container
9
9
  @ingestors = ingestors
10
+
11
+ @stats = [:ingestable, :ingested, :seed].inject({}) { |v, var|
12
+ v.merge({ var => 0 })
13
+ }
10
14
  end
11
15
 
12
16
  def ingest
@@ -22,8 +26,6 @@ class Synchroniser
22
26
  end
23
27
 
24
28
  def sync
25
- initialise_stats
26
-
27
29
  verify_container
28
30
 
29
31
  # reset flags and store the number of flags reset
@@ -43,18 +45,11 @@ class Synchroniser
43
45
  end
44
46
 
45
47
  def display_stats
46
- @stats[:failed] = @stats['ingestable'] - @stats['ingested']
48
+ @stats[:failed] = @stats[:ingestable] - @stats[:ingested]
47
49
  puts "Initial seeding of #{@stats[:seed]}"
48
50
  puts "Ingestor found #{@stats[:ingestable]} items, #{@stats[:ingested]} ingested, #{@stats[:failed]} failed."
49
51
  end
50
52
 
51
- def initialise_stats
52
- @stats = {}
53
- [:ingestable, :ingested, :seed].map() { |var|
54
- @stats[var] = 0
55
- }
56
- end
57
-
58
53
  end
59
54
 
60
55
  require File.join(File.dirname(__FILE__), 'synchroniser', 'config.rb')
@@ -19,31 +19,21 @@ module Synchroniser::Config
19
19
  raise ArgumentError, "Unable to find the stanza '#{ingestor_name}' in the configuration file supplied"
20
20
  end
21
21
 
22
+ defaults['group'] ||= @config["params"]["container"]["defaults"]['group']
23
+ defaults['group'] ||= ingestor_name
24
+
22
25
  @config["params"]["container"]["defaults"].merge!(defaults) unless defaults.empty?
23
26
  end
24
27
 
25
28
  def get_sync
26
29
  container = build_container(@config['params']['container'])
27
- ingestors = build_ingestors(container)
30
+ ingestors = build_ingestors()
28
31
  Synchroniser.new(container, ingestors)
29
32
  end
30
33
 
31
- def build_ingestors(container)
34
+ def build_ingestors()
32
35
  require @config['ingestor'] + "_ingestor.rb"
33
- @ingestor = eval("#{@config['ingestor'].camelize}Ingestor.new")
34
-
35
- strategies = []
36
- @config['params']['url'].each { |url|
37
- @ingestor.load(url)
38
- strategies << @ingestor.clone
39
-
40
- # not sure if this will actually work
41
- @ingestor.pagination.each do |url|
42
- @ingestor.load(url)
43
- strategies << @ingestor.clone
44
- end
45
- }
46
- strategies
36
+ eval("#{@config['ingestor'].camelize}Ingestor.new").strategise(@config['params']['url'])
47
37
  end
48
38
 
49
39
  def build_container(container)
@@ -3,6 +3,12 @@ module Synchroniser::Ingestor::Uri
3
3
 
4
4
  require 'open-uri'
5
5
 
6
+ def populate(url)
7
+ ingestor = self.clone
8
+ ingestor.load(url)
9
+ ingestor
10
+ end
11
+
6
12
  def load(uri)
7
13
  @data = open(uri).read
8
14
  end
@@ -11,4 +17,20 @@ module Synchroniser::Ingestor::Uri
11
17
  # pagination is not enabled by default
12
18
  []
13
19
  end
20
+
21
+ def strategise(urls)
22
+ strategies = urls.inject([]) do |strategies, url|
23
+ strategies << populate(url)
24
+ end
25
+
26
+ # not sure if this will actually work
27
+ # for every ingestor
28
+ strategies.inject([]) do |strategies, ingestor|
29
+ # grab the pagination urls and populate strategy with a new ingestor
30
+ ingestor.pagination.inject([]) do |strategies, url|
31
+ strategies << ingestor.populate(url)
32
+ #lastly add in the original ingestor
33
+ end << ingestor
34
+ end
35
+ end
14
36
  end
@@ -9,7 +9,8 @@ describe Synchroniser do
9
9
  @group = "def456"
10
10
 
11
11
  # clear out the current synchronisation
12
- SyncItem.find(:all).map { |item| item.destroy }
12
+ SyncItem.destroy_all
13
+ Post.destroy_all
13
14
  end
14
15
 
15
16
  it "should always create a new post with new data" do
@@ -35,13 +36,13 @@ describe Synchroniser do
35
36
  end
36
37
 
37
38
  it "should set the correct container group from a config" do
38
- config = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss")
39
+ config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
39
40
  synchroniser = config.get_sync
40
41
  synchroniser.container.group.should == "rss_test_feed"
41
42
  end
42
43
 
43
44
  it "should set the correct model group from a config" do
44
- config = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss")
45
+ config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
45
46
  synchroniser = config.get_sync
46
47
  synchroniser.ingest[0].group.should == "rss_test_feed"
47
48
  end
@@ -59,12 +60,20 @@ describe Synchroniser do
59
60
  it "should set container defaults corrently" do
60
61
  synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
61
62
  synchroniser.container.test_default.should == "random_default_name"
63
+ synchroniser.container.group.should == "rss"
62
64
  end
63
65
 
64
66
  it "should sync without error" do
65
67
  synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
66
68
  synchroniser.sync
67
69
  SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
70
+ SyncItem.find(:all)[0].group.should == "rss"
71
+ end
72
+
73
+ it "should sync without error with defined group" do
74
+ synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss").get_sync
75
+ synchroniser.sync
76
+ SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
68
77
  SyncItem.find(:all)[0].group.should == "rss_test_feed"
69
78
  end
70
79
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{synchroniser}
8
- s.version = "0.4.2"
8
+ s.version = "0.6.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Rob Sharp"]
12
- s.date = %q{2010-08-30}
12
+ s.date = %q{2010-09-07}
13
13
  s.description = %q{A wee synchroniser for a data source and a data sink.}
14
14
  s.email = %q{rob@sharp.id.au}
15
15
  s.files = [
@@ -49,6 +49,7 @@ Gem::Specification.new do |s|
49
49
  "synchroniser.gemspec",
50
50
  "tasks/synchroniser_tasks.rake",
51
51
  "test/docs/ingestors/rss.yml",
52
+ "test/docs/ingestors/rss_with_group.yml",
52
53
  "test/docs/rss.xml",
53
54
  "uninstall.rb"
54
55
  ]
@@ -5,5 +5,4 @@ test:
5
5
  container:
6
6
  object: Post
7
7
  defaults:
8
- group: rss_test_feed
9
8
  test_default: random_default_name
@@ -0,0 +1,9 @@
1
+ test:
2
+ ingestor: rss
3
+ params:
4
+ url: test/docs/rss.xml
5
+ container:
6
+ object: Post
7
+ defaults:
8
+ test_default: random_default_name
9
+ group: rss_test_feed
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: synchroniser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 7
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 4
9
- - 2
10
- version: 0.4.2
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Rob Sharp
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-30 00:00:00 +10:00
18
+ date: 2010-09-07 00:00:00 +10:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ files:
93
93
  - synchroniser.gemspec
94
94
  - tasks/synchroniser_tasks.rake
95
95
  - test/docs/ingestors/rss.yml
96
+ - test/docs/ingestors/rss_with_group.yml
96
97
  - test/docs/rss.xml
97
98
  - uninstall.rb
98
99
  has_rdoc: true