assimilate 0.2.3 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -5,4 +5,4 @@ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- # task :default => :spec
8
+ task :default => :spec
data/assimilate.gemspec CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |gem|
20
20
  gem.add_dependency "bson_ext", "~> 1.6.0"
21
21
  gem.add_dependency 'activesupport', "~> 3.2.0"
22
22
 
23
+ gem.add_development_dependency "rake", "~> 0.9.2"
23
24
  gem.add_development_dependency "rspec", "~> 2.9.0"
24
25
  gem.add_development_dependency "guard-rspec", "~> 0.7.0"
25
26
  gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
@@ -43,11 +43,14 @@ class Assimilate::Extender
43
43
 
44
44
  # if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
45
45
  # otherwise (no timestamp) compare the hashes and apply if there are any differences.
46
- def apply_this_update(current_record, new_data)
47
- if @comparison_field && current_record[@keyfield]
48
- is_newer(current_record[@keyfield], new_data)
46
+ def apply_this_update?(current_record, new_data)
47
+ if @comparison_field && current_record[keyfield]
48
+ is_newer(current_record[keyfield], new_data)
49
+ elsif keyfield
50
+ current_record[keyfield] != new_data
49
51
  else
50
- current_record[@keyfield] != new_data
52
+ # top-level extension - compare all the attributes to be added
53
+ new_data.select {|k,v| current_record[k] != v}.any?
51
54
  end
52
55
  end
53
56
 
@@ -60,7 +63,7 @@ class Assimilate::Extender
60
63
  # @seen[key] = data
61
64
  current_record = @baseline[key]
62
65
  if current_record
63
- if apply_this_update(current_record, data)
66
+ if apply_this_update?(current_record, data)
64
67
  @changes << key
65
68
  @seen[key] = data
66
69
  else
@@ -98,11 +101,21 @@ class Assimilate::Extender
98
101
  def apply_inserts
99
102
  @adds.each do |key|
100
103
  data = @seen[key]
101
- @catalog.catalog.insert(
102
- @domainkey => domain,
103
- idfield => key,
104
- keyfield => data
105
- )
104
+ if keyfield
105
+ @catalog.catalog.insert(
106
+ @domainkey => domain,
107
+ idfield => key,
108
+ keyfield => data
109
+ )
110
+ else
111
+ # top-level extension
112
+ @catalog.catalog.insert(
113
+ data.merge(
114
+ @domainkey => domain,
115
+ idfield => key
116
+ )
117
+ )
118
+ end
106
119
  end
107
120
  end
108
121
 
@@ -110,16 +123,27 @@ class Assimilate::Extender
110
123
  def apply_updates
111
124
  @changes.each do |key|
112
125
  data = @seen[key]
113
- @catalog.catalog.update(
114
- {
115
- @domainkey => domain,
116
- idfield => key
117
- },
118
- {"$set" => {
119
- keyfield => data
126
+ if keyfield
127
+ @catalog.catalog.update(
128
+ {
129
+ @domainkey => domain,
130
+ idfield => key
131
+ },
132
+ {"$set" => {
133
+ keyfield => data
134
+ }
120
135
  }
121
- }
122
- )
136
+ )
137
+ else
138
+ # top-level extension
139
+ @catalog.catalog.update(
140
+ {
141
+ @domainkey => domain,
142
+ idfield => key
143
+ },
144
+ {"$set" => data }
145
+ )
146
+ end
123
147
  end
124
148
  end
125
149
 
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3"
3
3
  end
@@ -0,0 +1,5 @@
1
+ ID,birthdate
2
+ 1,1732/02/22
3
+ 2,1735/10/30
4
+ 6,1751/03/16
5
+ 999,1802/03/04
@@ -30,6 +30,14 @@ describe "loading extended data" do
30
30
  @extender.commit
31
31
  end
32
32
 
33
+ def import_toplevel_extended_data(datestamp, filename, opts = {})
34
+ @extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID'))
35
+ Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
36
+ @extender << rec
37
+ end
38
+ @extender.commit
39
+ end
40
+
33
41
  describe "into matching catalog entries" do
34
42
  before(:all) do
35
43
  reset_catalog
@@ -70,6 +78,46 @@ describe "loading extended data" do
70
78
  end
71
79
  end
72
80
 
81
+ describe "at top level of catalog entries" do
82
+ before(:all) do
83
+ reset_catalog
84
+ import_base_data("123")
85
+ end
86
+
87
+ before(:each) do
88
+ import_toplevel_extended_data("991", "birthdates.csv")
89
+ end
90
+
91
+ it "should capture changes" do
92
+ @extender.stats.should == {
93
+ :baseline_count => 6,
94
+ :final_count => 7,
95
+ :distinct_ids => 4,
96
+ :adds_count => 1,
97
+ :new_ids => ['999'],
98
+ :updates_count => 3,
99
+ :updated_fields => {'birthdate' => 4},
100
+ :unchanged_count => 0
101
+ }
102
+ end
103
+
104
+ it "should do no-ops on duplicate load" do
105
+ # import_extended_data("1002", "dates")
106
+ lambda {import_toplevel_extended_data("992", "birthdates.csv")}.should_not raise_error
107
+
108
+ @extender.stats.should == {
109
+ :baseline_count => 7,
110
+ :final_count => 7,
111
+ :distinct_ids => 4,
112
+ :adds_count => 0,
113
+ :new_ids => [],
114
+ :updates_count => 0,
115
+ :updated_fields => {},
116
+ :unchanged_count => 4
117
+ }
118
+ end
119
+ end
120
+
73
121
  describe "updating log entries" do
74
122
  before(:all) do
75
123
  reset_catalog
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: '0.3'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-31 00:00:00.000000000 Z
12
+ date: 2012-06-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2154717840 !ruby/object:Gem::Requirement
16
+ requirement: &2161063380 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2154717840
24
+ version_requirements: *2161063380
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2154717340 !ruby/object:Gem::Requirement
27
+ requirement: &2161062840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2154717340
35
+ version_requirements: *2161062840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2154716880 !ruby/object:Gem::Requirement
38
+ requirement: &2161062360 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2154716880
46
+ version_requirements: *2161062360
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: &2161061860 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2161061860
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: rspec
49
- requirement: &2154716420 !ruby/object:Gem::Requirement
60
+ requirement: &2161061380 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ~>
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: 2.9.0
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *2154716420
68
+ version_requirements: *2161061380
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: guard-rspec
60
- requirement: &2152593120 !ruby/object:Gem::Requirement
71
+ requirement: &2161060880 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 0.7.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *2152593120
79
+ version_requirements: *2161060880
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: ruby_gntp
71
- requirement: &2152587980 !ruby/object:Gem::Requirement
82
+ requirement: &2161060380 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,7 +87,7 @@ dependencies:
76
87
  version: 0.3.4
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *2152587980
90
+ version_requirements: *2161060380
80
91
  description: Ingest updates from CSV and apply to set of hashes
81
92
  email:
82
93
  - jmay@pobox.com
@@ -100,6 +111,7 @@ files:
100
111
  - lib/assimilate/extender.rb
101
112
  - lib/assimilate/version.rb
102
113
  - spec/data/batch_input.csv
114
+ - spec/data/birthdates.csv
103
115
  - spec/data/dates.csv
104
116
  - spec/data/duplicate_input.csv
105
117
  - spec/data/logs1.csv
@@ -135,6 +147,7 @@ specification_version: 3
135
147
  summary: Review & incorporate changes to a repository of persistent hashes in mongodb.
136
148
  test_files:
137
149
  - spec/data/batch_input.csv
150
+ - spec/data/birthdates.csv
138
151
  - spec/data/dates.csv
139
152
  - spec/data/duplicate_input.csv
140
153
  - spec/data/logs1.csv