assimilate 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@ class Assimilate::Batch
13
13
  load_baseline
14
14
 
15
15
  @noops = []
16
- @changes = []
16
+ @changes = {}
17
17
  @adds = []
18
18
  @deletes = []
19
19
  @resolved = false
@@ -45,22 +45,26 @@ class Assimilate::Batch
45
45
  if current_record == hash
46
46
  @noops << hash
47
47
  else
48
- @changes << hash
48
+ @changes[key] = deltas(current_record, hash)
49
49
  end
50
50
  else
51
51
  @adds << hash
52
52
  end
53
53
  end
54
54
 
55
+ def deltas(h1,h2)
56
+ (h1.keys | h2.keys).each_with_object({}) {|k,h| h[k] = h2[k] if h1[k] != h2[k]}
57
+ end
58
+
55
59
  # compute anything needed before we can write updates to permanent store
56
60
  # * find records that have been deleted
57
61
  def resolve
58
62
  if !@resolved
59
63
  @deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
60
64
 
61
- @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |rec,h|
62
- key = rec[idfield]
63
- diffs = rec.diff(stripped_record_for(key))
65
+ @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
66
+ # key = rec[idfield]
67
+ # diffs = deltas(stripped_record_for(key), rec)
64
68
  diffs.keys.each do |f|
65
69
  h[f] += 1
66
70
  end
@@ -79,8 +83,8 @@ class Assimilate::Batch
79
83
  :new_ids => @adds.map {|rec| rec[idfield]},
80
84
  :deletes_count => @deleted_keys.count,
81
85
  :deleted_ids => @deleted_keys,
82
- :updates_count => @changes.count,
83
- :updated_ids => @changes.map {|rec| rec[idfield]},
86
+ :updates_count => @changes.size,
87
+ :updated_ids => @changes.keys,
84
88
  :unchanged_count => @noops.count,
85
89
  :updated_fields => @updated_field_counts
86
90
  }
@@ -128,13 +132,13 @@ class Assimilate::Batch
128
132
  end
129
133
 
130
134
  def apply_updates
131
- @changes.each do |rec|
135
+ @changes.each do |key, diffs|
132
136
  @catalog.catalog.update(
133
137
  {
134
138
  @domainkey => domain,
135
- idfield => rec[idfield]
139
+ idfield => key
136
140
  },
137
- {"$set" => rec}
141
+ {"$set" => diffs}
138
142
  )
139
143
  end
140
144
  end
@@ -51,7 +51,8 @@ class Assimilate::Catalog
51
51
  end
52
52
 
53
53
  def where(params)
54
- @catalog.find(params).first.select {|k,v| k !~ /^_/}
54
+ record = @catalog.find(params).first
55
+ record && record.select {|k,v| k !~ /^_/}
55
56
  end
56
57
 
57
58
  def active_count
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/assimilate.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require "mongo"
2
- require "active_support/core_ext" # needed for Hash#diff
2
+ require "active_support/core_ext" # needed for Hash#symbolize_keys!
3
3
  require "csv"
4
4
 
5
5
  require_relative "assimilate/version"
@@ -1,7 +1,7 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Sage
5
- 4,Aaron Burr,Duelist
6
- 5,Alexander Hamilton,Financier
7
- 6,James Madison,Theorist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Sage,Deborah
5
+ 4,Aaron Burr,Duelist,Theodosia
6
+ 5,Alexander Hamilton,Financier,Elizabeth
7
+ 6,James Madison,Theorist,Dolly
@@ -1,7 +1,7 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Sage
5
- 4,Aaron Burr,Duelist
6
- 5,Alexander Hamilton,Financier
7
- 6,James Madison,Theorist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Sage,Deborah
5
+ 4,Aaron Burr,Duelist,Theodosia
6
+ 5,Alexander Hamilton,Financier,Elizabeth
7
+ 6,James Madison,Theorist,Dolly
@@ -1,6 +1,6 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Ambassador
5
- 5,Alexander Hamilton,Financier
6
- 7,Thomas Jefferson,Anti-Federalist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Ambassador,
5
+ 5,Alexander Hamilton,Financier,Elizabeth
6
+ 7,Thomas Jefferson,Anti-Federalist,
@@ -45,15 +45,16 @@ describe "importing file" do
45
45
 
46
46
  it "should load the records verbatim" do
47
47
  @catalog.catalog.count.should == 6
48
- @catalog.where('_resource' => 'testdata', 'ID' => '3').should == {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage'}
48
+ @catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
49
+ {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage', 'spouse' => 'Deborah'}
49
50
  end
50
51
 
51
52
  it "should refuse to do a duplicate import" do
52
- lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError)
53
+ lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for datestamp 123")
53
54
  end
54
55
 
55
56
  it "should refuse to re-import same file" do
56
- lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError)
57
+ lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for file batch_input.csv")
57
58
  end
58
59
 
59
60
  it "should do all no-ops when importing identical data" do
@@ -78,9 +79,7 @@ describe "importing file" do
78
79
  before(:all) do
79
80
  reset_catalog
80
81
  import_data("123")
81
- end
82
82
 
83
- before(:each) do
84
83
  import_data("345", "updates.csv")
85
84
  end
86
85
 
@@ -95,9 +94,14 @@ describe "importing file" do
95
94
  :updates_count => 1,
96
95
  :updated_ids => ['3'],
97
96
  :unchanged_count => 3,
98
- :updated_fields => {'title' => 1}
97
+ :updated_fields => {'title' => 1, 'spouse' => 1}
99
98
  }
100
99
  @catalog.active_count.should == 5
101
100
  end
101
+
102
+ it "should handle deleted attributes" do
103
+ franklin = @catalog.where('ID' => '3')
104
+ franklin['spouse'].should be_nil
105
+ end
102
106
  end
103
107
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-02 00:00:00.000000000 Z
12
+ date: 2012-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2152558540 !ruby/object:Gem::Requirement
16
+ requirement: &2157608660 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2152558540
24
+ version_requirements: *2157608660
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2152557720 !ruby/object:Gem::Requirement
27
+ requirement: &2157607760 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2152557720
35
+ version_requirements: *2157607760
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2152556760 !ruby/object:Gem::Requirement
38
+ requirement: &2157607080 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2152556760
46
+ version_requirements: *2157607080
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &2152555800 !ruby/object:Gem::Requirement
49
+ requirement: &2157606260 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.9.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2152555800
57
+ version_requirements: *2157606260
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
- requirement: &2152554060 !ruby/object:Gem::Requirement
60
+ requirement: &2157605600 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2152554060
68
+ version_requirements: *2157605600
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby_gntp
71
- requirement: &2152550400 !ruby/object:Gem::Requirement
71
+ requirement: &2157603980 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.3.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2152550400
79
+ version_requirements: *2157603980
80
80
  description: Ingest updates from CSV and apply to set of hashes
81
81
  email:
82
82
  - jmay@pobox.com