assimilate 0.2.3 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/assimilate.gemspec +1 -0
- data/lib/assimilate/extender.rb +43 -19
- data/lib/assimilate/version.rb +1 -1
- data/spec/data/birthdates.csv +5 -0
- data/spec/lib/extend_spec.rb +48 -0
- metadata +27 -14
data/Rakefile
CHANGED
data/assimilate.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |gem|
|
|
20
20
|
gem.add_dependency "bson_ext", "~> 1.6.0"
|
21
21
|
gem.add_dependency 'activesupport', "~> 3.2.0"
|
22
22
|
|
23
|
+
gem.add_development_dependency "rake", "~> 0.9.2"
|
23
24
|
gem.add_development_dependency "rspec", "~> 2.9.0"
|
24
25
|
gem.add_development_dependency "guard-rspec", "~> 0.7.0"
|
25
26
|
gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
|
data/lib/assimilate/extender.rb
CHANGED
@@ -43,11 +43,14 @@ class Assimilate::Extender
|
|
43
43
|
|
44
44
|
# if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
|
45
45
|
# otherwise (no timestamp) compare the hashes and apply if there are any differences.
|
46
|
-
def apply_this_update(current_record, new_data)
|
47
|
-
if @comparison_field && current_record[
|
48
|
-
is_newer(current_record[
|
46
|
+
def apply_this_update?(current_record, new_data)
|
47
|
+
if @comparison_field && current_record[keyfield]
|
48
|
+
is_newer(current_record[keyfield], new_data)
|
49
|
+
elsif keyfield
|
50
|
+
current_record[keyfield] != new_data
|
49
51
|
else
|
50
|
-
|
52
|
+
# top-level extension - compare all the attributes to be added
|
53
|
+
new_data.select {|k,v| current_record[k] != v}.any?
|
51
54
|
end
|
52
55
|
end
|
53
56
|
|
@@ -60,7 +63,7 @@ class Assimilate::Extender
|
|
60
63
|
# @seen[key] = data
|
61
64
|
current_record = @baseline[key]
|
62
65
|
if current_record
|
63
|
-
if apply_this_update(current_record, data)
|
66
|
+
if apply_this_update?(current_record, data)
|
64
67
|
@changes << key
|
65
68
|
@seen[key] = data
|
66
69
|
else
|
@@ -98,11 +101,21 @@ class Assimilate::Extender
|
|
98
101
|
def apply_inserts
|
99
102
|
@adds.each do |key|
|
100
103
|
data = @seen[key]
|
101
|
-
|
102
|
-
@
|
103
|
-
|
104
|
-
|
105
|
-
|
104
|
+
if keyfield
|
105
|
+
@catalog.catalog.insert(
|
106
|
+
@domainkey => domain,
|
107
|
+
idfield => key,
|
108
|
+
keyfield => data
|
109
|
+
)
|
110
|
+
else
|
111
|
+
# top-level extension
|
112
|
+
@catalog.catalog.insert(
|
113
|
+
data.merge(
|
114
|
+
@domainkey => domain,
|
115
|
+
idfield => key
|
116
|
+
)
|
117
|
+
)
|
118
|
+
end
|
106
119
|
end
|
107
120
|
end
|
108
121
|
|
@@ -110,16 +123,27 @@ class Assimilate::Extender
|
|
110
123
|
def apply_updates
|
111
124
|
@changes.each do |key|
|
112
125
|
data = @seen[key]
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
126
|
+
if keyfield
|
127
|
+
@catalog.catalog.update(
|
128
|
+
{
|
129
|
+
@domainkey => domain,
|
130
|
+
idfield => key
|
131
|
+
},
|
132
|
+
{"$set" => {
|
133
|
+
keyfield => data
|
134
|
+
}
|
120
135
|
}
|
121
|
-
|
122
|
-
|
136
|
+
)
|
137
|
+
else
|
138
|
+
# top-level extension
|
139
|
+
@catalog.catalog.update(
|
140
|
+
{
|
141
|
+
@domainkey => domain,
|
142
|
+
idfield => key
|
143
|
+
},
|
144
|
+
{"$set" => data }
|
145
|
+
)
|
146
|
+
end
|
123
147
|
end
|
124
148
|
end
|
125
149
|
|
data/lib/assimilate/version.rb
CHANGED
data/spec/lib/extend_spec.rb
CHANGED
@@ -30,6 +30,14 @@ describe "loading extended data" do
|
|
30
30
|
@extender.commit
|
31
31
|
end
|
32
32
|
|
33
|
+
def import_toplevel_extended_data(datestamp, filename, opts = {})
|
34
|
+
@extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID'))
|
35
|
+
Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
|
36
|
+
@extender << rec
|
37
|
+
end
|
38
|
+
@extender.commit
|
39
|
+
end
|
40
|
+
|
33
41
|
describe "into matching catalog entries" do
|
34
42
|
before(:all) do
|
35
43
|
reset_catalog
|
@@ -70,6 +78,46 @@ describe "loading extended data" do
|
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
81
|
+
describe "at top level of catalog entries" do
|
82
|
+
before(:all) do
|
83
|
+
reset_catalog
|
84
|
+
import_base_data("123")
|
85
|
+
end
|
86
|
+
|
87
|
+
before(:each) do
|
88
|
+
import_toplevel_extended_data("991", "birthdates.csv")
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should capture changes" do
|
92
|
+
@extender.stats.should == {
|
93
|
+
:baseline_count => 6,
|
94
|
+
:final_count => 7,
|
95
|
+
:distinct_ids => 4,
|
96
|
+
:adds_count => 1,
|
97
|
+
:new_ids => ['999'],
|
98
|
+
:updates_count => 3,
|
99
|
+
:updated_fields => {'birthdate' => 4},
|
100
|
+
:unchanged_count => 0
|
101
|
+
}
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should do no-ops on duplicate load" do
|
105
|
+
# import_extended_data("1002", "dates")
|
106
|
+
lambda {import_toplevel_extended_data("992", "birthdates.csv")}.should_not raise_error
|
107
|
+
|
108
|
+
@extender.stats.should == {
|
109
|
+
:baseline_count => 7,
|
110
|
+
:final_count => 7,
|
111
|
+
:distinct_ids => 4,
|
112
|
+
:adds_count => 0,
|
113
|
+
:new_ids => [],
|
114
|
+
:updates_count => 0,
|
115
|
+
:updated_fields => {},
|
116
|
+
:unchanged_count => 4
|
117
|
+
}
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
73
121
|
describe "updating log entries" do
|
74
122
|
before(:all) do
|
75
123
|
reset_catalog
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assimilate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.3'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-06-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mongo
|
16
|
-
requirement: &
|
16
|
+
requirement: &2161063380 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.6.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2161063380
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bson_ext
|
27
|
-
requirement: &
|
27
|
+
requirement: &2161062840 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.6.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2161062840
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: activesupport
|
38
|
-
requirement: &
|
38
|
+
requirement: &2161062360 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,21 @@ dependencies:
|
|
43
43
|
version: 3.2.0
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2161062360
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rake
|
49
|
+
requirement: &2161061860 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.9.2
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2161061860
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: rspec
|
49
|
-
requirement: &
|
60
|
+
requirement: &2161061380 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ~>
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: 2.9.0
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *2161061380
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: guard-rspec
|
60
|
-
requirement: &
|
71
|
+
requirement: &2161060880 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ~>
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 0.7.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *2161060880
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: ruby_gntp
|
71
|
-
requirement: &
|
82
|
+
requirement: &2161060380 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ~>
|
@@ -76,7 +87,7 @@ dependencies:
|
|
76
87
|
version: 0.3.4
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *2161060380
|
80
91
|
description: Ingest updates from CSV and apply to set of hashes
|
81
92
|
email:
|
82
93
|
- jmay@pobox.com
|
@@ -100,6 +111,7 @@ files:
|
|
100
111
|
- lib/assimilate/extender.rb
|
101
112
|
- lib/assimilate/version.rb
|
102
113
|
- spec/data/batch_input.csv
|
114
|
+
- spec/data/birthdates.csv
|
103
115
|
- spec/data/dates.csv
|
104
116
|
- spec/data/duplicate_input.csv
|
105
117
|
- spec/data/logs1.csv
|
@@ -135,6 +147,7 @@ specification_version: 3
|
|
135
147
|
summary: Review & incorporate changes to a repository of persistent hashes in mongodb.
|
136
148
|
test_files:
|
137
149
|
- spec/data/batch_input.csv
|
150
|
+
- spec/data/birthdates.csv
|
138
151
|
- spec/data/dates.csv
|
139
152
|
- spec/data/duplicate_input.csv
|
140
153
|
- spec/data/logs1.csv
|