assimilate 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ class Assimilate::Batch
13
13
  load_baseline
14
14
 
15
15
  @noops = []
16
- @changes = []
16
+ @changes = {}
17
17
  @adds = []
18
18
  @deletes = []
19
19
  @resolved = false
@@ -45,22 +45,26 @@ class Assimilate::Batch
45
45
  if current_record == hash
46
46
  @noops << hash
47
47
  else
48
- @changes << hash
48
+ @changes[key] = deltas(current_record, hash)
49
49
  end
50
50
  else
51
51
  @adds << hash
52
52
  end
53
53
  end
54
54
 
55
+ def deltas(h1,h2)
56
+ (h1.keys | h2.keys).each_with_object({}) {|k,h| h[k] = h2[k] if h1[k] != h2[k]}
57
+ end
58
+
55
59
  # compute anything needed before we can write updates to permanent store
56
60
  # * find records that have been deleted
57
61
  def resolve
58
62
  if !@resolved
59
63
  @deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
60
64
 
61
- @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |rec,h|
62
- key = rec[idfield]
63
- diffs = rec.diff(stripped_record_for(key))
65
+ @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
66
+ # key = rec[idfield]
67
+ # diffs = deltas(stripped_record_for(key), rec)
64
68
  diffs.keys.each do |f|
65
69
  h[f] += 1
66
70
  end
@@ -79,8 +83,8 @@ class Assimilate::Batch
79
83
  :new_ids => @adds.map {|rec| rec[idfield]},
80
84
  :deletes_count => @deleted_keys.count,
81
85
  :deleted_ids => @deleted_keys,
82
- :updates_count => @changes.count,
83
- :updated_ids => @changes.map {|rec| rec[idfield]},
86
+ :updates_count => @changes.size,
87
+ :updated_ids => @changes.keys,
84
88
  :unchanged_count => @noops.count,
85
89
  :updated_fields => @updated_field_counts
86
90
  }
@@ -128,13 +132,13 @@ class Assimilate::Batch
128
132
  end
129
133
 
130
134
  def apply_updates
131
- @changes.each do |rec|
135
+ @changes.each do |key, diffs|
132
136
  @catalog.catalog.update(
133
137
  {
134
138
  @domainkey => domain,
135
- idfield => rec[idfield]
139
+ idfield => key
136
140
  },
137
- {"$set" => rec}
141
+ {"$set" => diffs}
138
142
  )
139
143
  end
140
144
  end
@@ -51,7 +51,8 @@ class Assimilate::Catalog
51
51
  end
52
52
 
53
53
  def where(params)
54
- @catalog.find(params).first.select {|k,v| k !~ /^_/}
54
+ record = @catalog.find(params).first
55
+ record && record.select {|k,v| k !~ /^_/}
55
56
  end
56
57
 
57
58
  def active_count
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/assimilate.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require "mongo"
2
- require "active_support/core_ext" # needed for Hash#diff
2
+ require "active_support/core_ext" # needed for Hash#symbolize_keys!
3
3
  require "csv"
4
4
 
5
5
  require_relative "assimilate/version"
@@ -1,7 +1,7 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Sage
5
- 4,Aaron Burr,Duelist
6
- 5,Alexander Hamilton,Financier
7
- 6,James Madison,Theorist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Sage,Deborah
5
+ 4,Aaron Burr,Duelist,Theodosia
6
+ 5,Alexander Hamilton,Financier,Elizabeth
7
+ 6,James Madison,Theorist,Dolly
@@ -1,7 +1,7 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Sage
5
- 4,Aaron Burr,Duelist
6
- 5,Alexander Hamilton,Financier
7
- 6,James Madison,Theorist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Sage,Deborah
5
+ 4,Aaron Burr,Duelist,Theodosia
6
+ 5,Alexander Hamilton,Financier,Elizabeth
7
+ 6,James Madison,Theorist,Dolly
@@ -1,6 +1,6 @@
1
- ID,name,title
2
- 1,George Washington,President
3
- 2,John Adams,Vice President
4
- 3,Benjamin Franklin,Ambassador
5
- 5,Alexander Hamilton,Financier
6
- 7,Thomas Jefferson,Anti-Federalist
1
+ ID,name,title,spouse
2
+ 1,George Washington,President,Martha
3
+ 2,John Adams,Vice President,Abigail
4
+ 3,Benjamin Franklin,Ambassador,
5
+ 5,Alexander Hamilton,Financier,Elizabeth
6
+ 7,Thomas Jefferson,Anti-Federalist,
@@ -45,15 +45,16 @@ describe "importing file" do
45
45
 
46
46
  it "should load the records verbatim" do
47
47
  @catalog.catalog.count.should == 6
48
- @catalog.where('_resource' => 'testdata', 'ID' => '3').should == {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage'}
48
+ @catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
49
+ {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage', 'spouse' => 'Deborah'}
49
50
  end
50
51
 
51
52
  it "should refuse to do a duplicate import" do
52
- lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError)
53
+ lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for datestamp 123")
53
54
  end
54
55
 
55
56
  it "should refuse to re-import same file" do
56
- lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError)
57
+ lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for file batch_input.csv")
57
58
  end
58
59
 
59
60
  it "should do all no-ops when importing identical data" do
@@ -78,9 +79,7 @@ describe "importing file" do
78
79
  before(:all) do
79
80
  reset_catalog
80
81
  import_data("123")
81
- end
82
82
 
83
- before(:each) do
84
83
  import_data("345", "updates.csv")
85
84
  end
86
85
 
@@ -95,9 +94,14 @@ describe "importing file" do
95
94
  :updates_count => 1,
96
95
  :updated_ids => ['3'],
97
96
  :unchanged_count => 3,
98
- :updated_fields => {'title' => 1}
97
+ :updated_fields => {'title' => 1, 'spouse' => 1}
99
98
  }
100
99
  @catalog.active_count.should == 5
101
100
  end
101
+
102
+ it "should handle deleted attributes" do
103
+ franklin = @catalog.where('ID' => '3')
104
+ franklin['spouse'].should be_nil
105
+ end
102
106
  end
103
107
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-02 00:00:00.000000000 Z
12
+ date: 2012-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2152558540 !ruby/object:Gem::Requirement
16
+ requirement: &2157608660 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2152558540
24
+ version_requirements: *2157608660
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2152557720 !ruby/object:Gem::Requirement
27
+ requirement: &2157607760 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2152557720
35
+ version_requirements: *2157607760
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2152556760 !ruby/object:Gem::Requirement
38
+ requirement: &2157607080 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2152556760
46
+ version_requirements: *2157607080
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &2152555800 !ruby/object:Gem::Requirement
49
+ requirement: &2157606260 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.9.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2152555800
57
+ version_requirements: *2157606260
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
- requirement: &2152554060 !ruby/object:Gem::Requirement
60
+ requirement: &2157605600 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2152554060
68
+ version_requirements: *2157605600
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby_gntp
71
- requirement: &2152550400 !ruby/object:Gem::Requirement
71
+ requirement: &2157603980 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.3.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2152550400
79
+ version_requirements: *2157603980
80
80
  description: Ingest updates from CSV and apply to set of hashes
81
81
  email:
82
82
  - jmay@pobox.com