assimilate 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/assimilate/batch.rb +14 -10
- data/lib/assimilate/catalog.rb +2 -1
- data/lib/assimilate/version.rb +1 -1
- data/lib/assimilate.rb +1 -1
- data/spec/data/batch_input.csv +7 -7
- data/spec/data/duplicate_input.csv +7 -7
- data/spec/data/updates.csv +6 -6
- data/spec/lib/batch_spec.rb +10 -6
- metadata +14 -14
data/lib/assimilate/batch.rb
CHANGED
@@ -13,7 +13,7 @@ class Assimilate::Batch
|
|
13
13
|
load_baseline
|
14
14
|
|
15
15
|
@noops = []
|
16
|
-
@changes =
|
16
|
+
@changes = {}
|
17
17
|
@adds = []
|
18
18
|
@deletes = []
|
19
19
|
@resolved = false
|
@@ -45,22 +45,26 @@ class Assimilate::Batch
|
|
45
45
|
if current_record == hash
|
46
46
|
@noops << hash
|
47
47
|
else
|
48
|
-
@changes
|
48
|
+
@changes[key] = deltas(current_record, hash)
|
49
49
|
end
|
50
50
|
else
|
51
51
|
@adds << hash
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
55
|
+
def deltas(h1,h2)
|
56
|
+
(h1.keys | h2.keys).each_with_object({}) {|k,h| h[k] = h2[k] if h1[k] != h2[k]}
|
57
|
+
end
|
58
|
+
|
55
59
|
# compute anything needed before we can write updates to permanent store
|
56
60
|
# * find records that have been deleted
|
57
61
|
def resolve
|
58
62
|
if !@resolved
|
59
63
|
@deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
|
60
64
|
|
61
|
-
@updated_field_counts = @changes.each_with_object(Hash.new(0)) do |
|
62
|
-
key = rec[idfield]
|
63
|
-
diffs =
|
65
|
+
@updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
|
66
|
+
# key = rec[idfield]
|
67
|
+
# diffs = deltas(stripped_record_for(key), rec)
|
64
68
|
diffs.keys.each do |f|
|
65
69
|
h[f] += 1
|
66
70
|
end
|
@@ -79,8 +83,8 @@ class Assimilate::Batch
|
|
79
83
|
:new_ids => @adds.map {|rec| rec[idfield]},
|
80
84
|
:deletes_count => @deleted_keys.count,
|
81
85
|
:deleted_ids => @deleted_keys,
|
82
|
-
:updates_count => @changes.
|
83
|
-
:updated_ids => @changes.
|
86
|
+
:updates_count => @changes.size,
|
87
|
+
:updated_ids => @changes.keys,
|
84
88
|
:unchanged_count => @noops.count,
|
85
89
|
:updated_fields => @updated_field_counts
|
86
90
|
}
|
@@ -128,13 +132,13 @@ class Assimilate::Batch
|
|
128
132
|
end
|
129
133
|
|
130
134
|
def apply_updates
|
131
|
-
@changes.each do |
|
135
|
+
@changes.each do |key, diffs|
|
132
136
|
@catalog.catalog.update(
|
133
137
|
{
|
134
138
|
@domainkey => domain,
|
135
|
-
idfield =>
|
139
|
+
idfield => key
|
136
140
|
},
|
137
|
-
{"$set" =>
|
141
|
+
{"$set" => diffs}
|
138
142
|
)
|
139
143
|
end
|
140
144
|
end
|
data/lib/assimilate/catalog.rb
CHANGED
data/lib/assimilate/version.rb
CHANGED
data/lib/assimilate.rb
CHANGED
data/spec/data/batch_input.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
ID,name,title
|
2
|
-
1,George Washington,President
|
3
|
-
2,John Adams,Vice President
|
4
|
-
3,Benjamin Franklin,Sage
|
5
|
-
4,Aaron Burr,Duelist
|
6
|
-
5,Alexander Hamilton,Financier
|
7
|
-
6,James Madison,Theorist
|
1
|
+
ID,name,title,spouse
|
2
|
+
1,George Washington,President,Martha
|
3
|
+
2,John Adams,Vice President,Abigail
|
4
|
+
3,Benjamin Franklin,Sage,Deborah
|
5
|
+
4,Aaron Burr,Duelist,Theodosia
|
6
|
+
5,Alexander Hamilton,Financier,Elizabeth
|
7
|
+
6,James Madison,Theorist,Dolly
|
@@ -1,7 +1,7 @@
|
|
1
|
-
ID,name,title
|
2
|
-
1,George Washington,President
|
3
|
-
2,John Adams,Vice President
|
4
|
-
3,Benjamin Franklin,Sage
|
5
|
-
4,Aaron Burr,Duelist
|
6
|
-
5,Alexander Hamilton,Financier
|
7
|
-
6,James Madison,Theorist
|
1
|
+
ID,name,title,spouse
|
2
|
+
1,George Washington,President,Martha
|
3
|
+
2,John Adams,Vice President,Abigail
|
4
|
+
3,Benjamin Franklin,Sage,Deborah
|
5
|
+
4,Aaron Burr,Duelist,Theodosia
|
6
|
+
5,Alexander Hamilton,Financier,Elizabeth
|
7
|
+
6,James Madison,Theorist,Dolly
|
data/spec/data/updates.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
ID,name,title
|
2
|
-
1,George Washington,President
|
3
|
-
2,John Adams,Vice President
|
4
|
-
3,Benjamin Franklin,Ambassador
|
5
|
-
5,Alexander Hamilton,Financier
|
6
|
-
7,Thomas Jefferson,Anti-Federalist
|
1
|
+
ID,name,title,spouse
|
2
|
+
1,George Washington,President,Martha
|
3
|
+
2,John Adams,Vice President,Abigail
|
4
|
+
3,Benjamin Franklin,Ambassador,
|
5
|
+
5,Alexander Hamilton,Financier,Elizabeth
|
6
|
+
7,Thomas Jefferson,Anti-Federalist,
|
data/spec/lib/batch_spec.rb
CHANGED
@@ -45,15 +45,16 @@ describe "importing file" do
|
|
45
45
|
|
46
46
|
it "should load the records verbatim" do
|
47
47
|
@catalog.catalog.count.should == 6
|
48
|
-
@catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
|
48
|
+
@catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
|
49
|
+
{'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage', 'spouse' => 'Deborah'}
|
49
50
|
end
|
50
51
|
|
51
52
|
it "should refuse to do a duplicate import" do
|
52
|
-
lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError)
|
53
|
+
lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for datestamp 123")
|
53
54
|
end
|
54
55
|
|
55
56
|
it "should refuse to re-import same file" do
|
56
|
-
lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError)
|
57
|
+
lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for file batch_input.csv")
|
57
58
|
end
|
58
59
|
|
59
60
|
it "should do all no-ops when importing identical data" do
|
@@ -78,9 +79,7 @@ describe "importing file" do
|
|
78
79
|
before(:all) do
|
79
80
|
reset_catalog
|
80
81
|
import_data("123")
|
81
|
-
end
|
82
82
|
|
83
|
-
before(:each) do
|
84
83
|
import_data("345", "updates.csv")
|
85
84
|
end
|
86
85
|
|
@@ -95,9 +94,14 @@ describe "importing file" do
|
|
95
94
|
:updates_count => 1,
|
96
95
|
:updated_ids => ['3'],
|
97
96
|
:unchanged_count => 3,
|
98
|
-
:updated_fields => {'title' => 1}
|
97
|
+
:updated_fields => {'title' => 1, 'spouse' => 1}
|
99
98
|
}
|
100
99
|
@catalog.active_count.should == 5
|
101
100
|
end
|
101
|
+
|
102
|
+
it "should handle deleted attributes" do
|
103
|
+
franklin = @catalog.where('ID' => '3')
|
104
|
+
franklin['spouse'].should be_nil
|
105
|
+
end
|
102
106
|
end
|
103
107
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assimilate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mongo
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157608660 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.6.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157608660
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bson_ext
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157607760 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.6.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157607760
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: activesupport
|
38
|
-
requirement: &
|
38
|
+
requirement: &2157607080 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 3.2.0
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157607080
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rspec
|
49
|
-
requirement: &
|
49
|
+
requirement: &2157606260 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 2.9.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2157606260
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: guard-rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &2157605600 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 0.7.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2157605600
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: ruby_gntp
|
71
|
-
requirement: &
|
71
|
+
requirement: &2157603980 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.3.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2157603980
|
80
80
|
description: Ingest updates from CSV and apply to set of hashes
|
81
81
|
email:
|
82
82
|
- jmay@pobox.com
|