synchroniser 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.6.0
@@ -7,6 +7,10 @@ class Synchroniser
7
7
  def initialize(container, ingestors)
8
8
  @container = container
9
9
  @ingestors = ingestors
10
+
11
+ @stats = [:ingestable, :ingested, :seed].inject({}) { |v, var|
12
+ v.merge({ var => 0 })
13
+ }
10
14
  end
11
15
 
12
16
  def ingest
@@ -22,8 +26,6 @@ class Synchroniser
22
26
  end
23
27
 
24
28
  def sync
25
- initialise_stats
26
-
27
29
  verify_container
28
30
 
29
31
  # reset flags and store the number of flags reset
@@ -43,18 +45,11 @@ class Synchroniser
43
45
  end
44
46
 
45
47
  def display_stats
46
- @stats[:failed] = @stats['ingestable'] - @stats['ingested']
48
+ @stats[:failed] = @stats[:ingestable] - @stats[:ingested]
47
49
  puts "Initial seeding of #{@stats[:seed]}"
48
50
  puts "Ingestor found #{@stats[:ingestable]} items, #{@stats[:ingested]} ingested, #{@stats[:failed]} failed."
49
51
  end
50
52
 
51
- def initialise_stats
52
- @stats = {}
53
- [:ingestable, :ingested, :seed].map() { |var|
54
- @stats[var] = 0
55
- }
56
- end
57
-
58
53
  end
59
54
 
60
55
  require File.join(File.dirname(__FILE__), 'synchroniser', 'config.rb')
@@ -19,31 +19,21 @@ module Synchroniser::Config
19
19
  raise ArgumentError, "Unable to find the stanza '#{ingestor_name}' in the configuration file supplied"
20
20
  end
21
21
 
22
+ defaults['group'] ||= @config["params"]["container"]["defaults"]['group']
23
+ defaults['group'] ||= ingestor_name
24
+
22
25
  @config["params"]["container"]["defaults"].merge!(defaults) unless defaults.empty?
23
26
  end
24
27
 
25
28
  def get_sync
26
29
  container = build_container(@config['params']['container'])
27
- ingestors = build_ingestors(container)
30
+ ingestors = build_ingestors()
28
31
  Synchroniser.new(container, ingestors)
29
32
  end
30
33
 
31
- def build_ingestors(container)
34
+ def build_ingestors()
32
35
  require @config['ingestor'] + "_ingestor.rb"
33
- @ingestor = eval("#{@config['ingestor'].camelize}Ingestor.new")
34
-
35
- strategies = []
36
- @config['params']['url'].each { |url|
37
- @ingestor.load(url)
38
- strategies << @ingestor.clone
39
-
40
- # not sure if this will actually work
41
- @ingestor.pagination.each do |url|
42
- @ingestor.load(url)
43
- strategies << @ingestor.clone
44
- end
45
- }
46
- strategies
36
+ eval("#{@config['ingestor'].camelize}Ingestor.new").strategise(@config['params']['url'])
47
37
  end
48
38
 
49
39
  def build_container(container)
@@ -3,6 +3,12 @@ module Synchroniser::Ingestor::Uri
3
3
 
4
4
  require 'open-uri'
5
5
 
6
+ def populate(url)
7
+ ingestor = self.clone
8
+ ingestor.load(url)
9
+ ingestor
10
+ end
11
+
6
12
  def load(uri)
7
13
  @data = open(uri).read
8
14
  end
@@ -11,4 +17,20 @@ module Synchroniser::Ingestor::Uri
11
17
  # pagination is not enabled by default
12
18
  []
13
19
  end
20
+
21
+ def strategise(urls)
22
+ strategies = urls.inject([]) do |strategies, url|
23
+ strategies << populate(url)
24
+ end
25
+
26
+ # not sure if this will actually work
27
+ # for every ingestor
28
+ strategies.inject([]) do |strategies, ingestor|
29
+ # grab the pagination urls and populate strategy with a new ingestor
30
+ ingestor.pagination.inject([]) do |strategies, url|
31
+ strategies << ingestor.populate(url)
32
+ #lastly add in the original ingestor
33
+ end << ingestor
34
+ end
35
+ end
14
36
  end
@@ -9,7 +9,8 @@ describe Synchroniser do
9
9
  @group = "def456"
10
10
 
11
11
  # clear out the current synchronisation
12
- SyncItem.find(:all).map { |item| item.destroy }
12
+ SyncItem.destroy_all
13
+ Post.destroy_all
13
14
  end
14
15
 
15
16
  it "should always create a new post with new data" do
@@ -35,13 +36,13 @@ describe Synchroniser do
35
36
  end
36
37
 
37
38
  it "should set the correct container group from a config" do
38
- config = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss")
39
+ config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
39
40
  synchroniser = config.get_sync
40
41
  synchroniser.container.group.should == "rss_test_feed"
41
42
  end
42
43
 
43
44
  it "should set the correct model group from a config" do
44
- config = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss")
45
+ config = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss")
45
46
  synchroniser = config.get_sync
46
47
  synchroniser.ingest[0].group.should == "rss_test_feed"
47
48
  end
@@ -59,12 +60,20 @@ describe Synchroniser do
59
60
  it "should set container defaults corrently" do
60
61
  synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
61
62
  synchroniser.container.test_default.should == "random_default_name"
63
+ synchroniser.container.group.should == "rss"
62
64
  end
63
65
 
64
66
  it "should sync without error" do
65
67
  synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss.yml", "rss").get_sync
66
68
  synchroniser.sync
67
69
  SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
70
+ SyncItem.find(:all)[0].group.should == "rss"
71
+ end
72
+
73
+ it "should sync without error with defined group" do
74
+ synchroniser = Synchroniser::Config::Params.new("test/docs/ingestors/rss_with_group.yml", "rss").get_sync
75
+ synchroniser.sync
76
+ SyncItem.find(:all)[0].item.should == "http://news.bbc.co.uk/sport1/hi/football/teams/n/newcastle_united/8329055.stm"
68
77
  SyncItem.find(:all)[0].group.should == "rss_test_feed"
69
78
  end
70
79
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{synchroniser}
8
- s.version = "0.4.2"
8
+ s.version = "0.6.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Rob Sharp"]
12
- s.date = %q{2010-08-30}
12
+ s.date = %q{2010-09-07}
13
13
  s.description = %q{A wee synchroniser for a data source and a data sink.}
14
14
  s.email = %q{rob@sharp.id.au}
15
15
  s.files = [
@@ -49,6 +49,7 @@ Gem::Specification.new do |s|
49
49
  "synchroniser.gemspec",
50
50
  "tasks/synchroniser_tasks.rake",
51
51
  "test/docs/ingestors/rss.yml",
52
+ "test/docs/ingestors/rss_with_group.yml",
52
53
  "test/docs/rss.xml",
53
54
  "uninstall.rb"
54
55
  ]
@@ -5,5 +5,4 @@ test:
5
5
  container:
6
6
  object: Post
7
7
  defaults:
8
- group: rss_test_feed
9
8
  test_default: random_default_name
@@ -0,0 +1,9 @@
1
+ test:
2
+ ingestor: rss
3
+ params:
4
+ url: test/docs/rss.xml
5
+ container:
6
+ object: Post
7
+ defaults:
8
+ test_default: random_default_name
9
+ group: rss_test_feed
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: synchroniser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 7
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 4
9
- - 2
10
- version: 0.4.2
8
+ - 6
9
+ - 0
10
+ version: 0.6.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Rob Sharp
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-30 00:00:00 +10:00
18
+ date: 2010-09-07 00:00:00 +10:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ files:
93
93
  - synchroniser.gemspec
94
94
  - tasks/synchroniser_tasks.rake
95
95
  - test/docs/ingestors/rss.yml
96
+ - test/docs/ingestors/rss_with_group.yml
96
97
  - test/docs/rss.xml
97
98
  - uninstall.rb
98
99
  has_rdoc: true