assimilate 0.2.3 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -5,4 +5,4 @@ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- # task :default => :spec
8
+ task :default => :spec
data/assimilate.gemspec CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |gem|
20
20
  gem.add_dependency "bson_ext", "~> 1.6.0"
21
21
  gem.add_dependency 'activesupport', "~> 3.2.0"
22
22
 
23
+ gem.add_development_dependency "rake", "~> 0.9.2"
23
24
  gem.add_development_dependency "rspec", "~> 2.9.0"
24
25
  gem.add_development_dependency "guard-rspec", "~> 0.7.0"
25
26
  gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
@@ -43,11 +43,14 @@ class Assimilate::Extender
43
43
 
44
44
  # if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
45
45
  # otherwise (no timestamp) compare the hashes and apply if there are any differences.
46
- def apply_this_update(current_record, new_data)
47
- if @comparison_field && current_record[@keyfield]
48
- is_newer(current_record[@keyfield], new_data)
46
+ def apply_this_update?(current_record, new_data)
47
+ if @comparison_field && current_record[keyfield]
48
+ is_newer(current_record[keyfield], new_data)
49
+ elsif keyfield
50
+ current_record[keyfield] != new_data
49
51
  else
50
- current_record[@keyfield] != new_data
52
+ # top-level extension - compare all the attributes to be added
53
+ new_data.select {|k,v| current_record[k] != v}.any?
51
54
  end
52
55
  end
53
56
 
@@ -60,7 +63,7 @@ class Assimilate::Extender
60
63
  # @seen[key] = data
61
64
  current_record = @baseline[key]
62
65
  if current_record
63
- if apply_this_update(current_record, data)
66
+ if apply_this_update?(current_record, data)
64
67
  @changes << key
65
68
  @seen[key] = data
66
69
  else
@@ -98,11 +101,21 @@ class Assimilate::Extender
98
101
  def apply_inserts
99
102
  @adds.each do |key|
100
103
  data = @seen[key]
101
- @catalog.catalog.insert(
102
- @domainkey => domain,
103
- idfield => key,
104
- keyfield => data
105
- )
104
+ if keyfield
105
+ @catalog.catalog.insert(
106
+ @domainkey => domain,
107
+ idfield => key,
108
+ keyfield => data
109
+ )
110
+ else
111
+ # top-level extension
112
+ @catalog.catalog.insert(
113
+ data.merge(
114
+ @domainkey => domain,
115
+ idfield => key
116
+ )
117
+ )
118
+ end
106
119
  end
107
120
  end
108
121
 
@@ -110,16 +123,27 @@ class Assimilate::Extender
110
123
  def apply_updates
111
124
  @changes.each do |key|
112
125
  data = @seen[key]
113
- @catalog.catalog.update(
114
- {
115
- @domainkey => domain,
116
- idfield => key
117
- },
118
- {"$set" => {
119
- keyfield => data
126
+ if keyfield
127
+ @catalog.catalog.update(
128
+ {
129
+ @domainkey => domain,
130
+ idfield => key
131
+ },
132
+ {"$set" => {
133
+ keyfield => data
134
+ }
120
135
  }
121
- }
122
- )
136
+ )
137
+ else
138
+ # top-level extension
139
+ @catalog.catalog.update(
140
+ {
141
+ @domainkey => domain,
142
+ idfield => key
143
+ },
144
+ {"$set" => data }
145
+ )
146
+ end
123
147
  end
124
148
  end
125
149
 
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3"
3
3
  end
@@ -0,0 +1,5 @@
1
+ ID,birthdate
2
+ 1,1732/02/22
3
+ 2,1735/10/30
4
+ 6,1751/03/16
5
+ 999,1802/03/04
@@ -30,6 +30,14 @@ describe "loading extended data" do
30
30
  @extender.commit
31
31
  end
32
32
 
33
+ def import_toplevel_extended_data(datestamp, filename, opts = {})
34
+ @extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID'))
35
+ Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
36
+ @extender << rec
37
+ end
38
+ @extender.commit
39
+ end
40
+
33
41
  describe "into matching catalog entries" do
34
42
  before(:all) do
35
43
  reset_catalog
@@ -70,6 +78,46 @@ describe "loading extended data" do
70
78
  end
71
79
  end
72
80
 
81
+ describe "at top level of catalog entries" do
82
+ before(:all) do
83
+ reset_catalog
84
+ import_base_data("123")
85
+ end
86
+
87
+ before(:each) do
88
+ import_toplevel_extended_data("991", "birthdates.csv")
89
+ end
90
+
91
+ it "should capture changes" do
92
+ @extender.stats.should == {
93
+ :baseline_count => 6,
94
+ :final_count => 7,
95
+ :distinct_ids => 4,
96
+ :adds_count => 1,
97
+ :new_ids => ['999'],
98
+ :updates_count => 3,
99
+ :updated_fields => {'birthdate' => 4},
100
+ :unchanged_count => 0
101
+ }
102
+ end
103
+
104
+ it "should do no-ops on duplicate load" do
105
+ # import_extended_data("1002", "dates")
106
+ lambda {import_toplevel_extended_data("992", "birthdates.csv")}.should_not raise_error
107
+
108
+ @extender.stats.should == {
109
+ :baseline_count => 7,
110
+ :final_count => 7,
111
+ :distinct_ids => 4,
112
+ :adds_count => 0,
113
+ :new_ids => [],
114
+ :updates_count => 0,
115
+ :updated_fields => {},
116
+ :unchanged_count => 4
117
+ }
118
+ end
119
+ end
120
+
73
121
  describe "updating log entries" do
74
122
  before(:all) do
75
123
  reset_catalog
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: '0.3'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-31 00:00:00.000000000 Z
12
+ date: 2012-06-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2154717840 !ruby/object:Gem::Requirement
16
+ requirement: &2161063380 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2154717840
24
+ version_requirements: *2161063380
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2154717340 !ruby/object:Gem::Requirement
27
+ requirement: &2161062840 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2154717340
35
+ version_requirements: *2161062840
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2154716880 !ruby/object:Gem::Requirement
38
+ requirement: &2161062360 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,21 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2154716880
46
+ version_requirements: *2161062360
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: &2161061860 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2161061860
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: rspec
49
- requirement: &2154716420 !ruby/object:Gem::Requirement
60
+ requirement: &2161061380 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ~>
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: 2.9.0
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *2154716420
68
+ version_requirements: *2161061380
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: guard-rspec
60
- requirement: &2152593120 !ruby/object:Gem::Requirement
71
+ requirement: &2161060880 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 0.7.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *2152593120
79
+ version_requirements: *2161060880
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: ruby_gntp
71
- requirement: &2152587980 !ruby/object:Gem::Requirement
82
+ requirement: &2161060380 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,7 +87,7 @@ dependencies:
76
87
  version: 0.3.4
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *2152587980
90
+ version_requirements: *2161060380
80
91
  description: Ingest updates from CSV and apply to set of hashes
81
92
  email:
82
93
  - jmay@pobox.com
@@ -100,6 +111,7 @@ files:
100
111
  - lib/assimilate/extender.rb
101
112
  - lib/assimilate/version.rb
102
113
  - spec/data/batch_input.csv
114
+ - spec/data/birthdates.csv
103
115
  - spec/data/dates.csv
104
116
  - spec/data/duplicate_input.csv
105
117
  - spec/data/logs1.csv
@@ -135,6 +147,7 @@ specification_version: 3
135
147
  summary: Review & incorporate changes to a repository of persistent hashes in mongodb.
136
148
  test_files:
137
149
  - spec/data/batch_input.csv
150
+ - spec/data/birthdates.csv
138
151
  - spec/data/dates.csv
139
152
  - spec/data/duplicate_input.csv
140
153
  - spec/data/logs1.csv