assimilate 0.1.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/assimilate/extender.rb +19 -4
- data/lib/assimilate/version.rb +1 -1
- data/spec/data/logs1.csv +5 -0
- data/spec/data/logs2.csv +5 -0
- data/spec/lib/extend_spec.rb +89 -8
- metadata +18 -14
data/lib/assimilate/extender.rb
CHANGED
@@ -9,6 +9,7 @@ class Assimilate::Extender
|
|
9
9
|
@idfield = args[:idfield]
|
10
10
|
@filename = args[:filename]
|
11
11
|
@keyfield = args[:key]
|
12
|
+
@comparison_field = args[:compare]
|
12
13
|
|
13
14
|
load_baseline
|
14
15
|
|
@@ -29,6 +30,20 @@ class Assimilate::Extender
|
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
33
|
+
def is_newer(current_data, new_data)
|
34
|
+
new_data[@comparison_field].to_i > current_data[@comparison_field].to_i
|
35
|
+
end
|
36
|
+
|
37
|
+
# if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
|
38
|
+
# otherwise (no timestamp) compare the hashes and apply if there are any differences.
|
39
|
+
def apply_this_update(current_record, new_data)
|
40
|
+
if @comparison_field
|
41
|
+
is_newer(current_record[@keyfield], new_data)
|
42
|
+
else
|
43
|
+
current_record[@keyfield] != new_data
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
32
47
|
def <<(record)
|
33
48
|
@seen ||= Hash.new(0)
|
34
49
|
|
@@ -38,12 +53,12 @@ class Assimilate::Extender
|
|
38
53
|
# @seen[key] = data
|
39
54
|
current_record = @baseline[key]
|
40
55
|
if current_record
|
41
|
-
if current_record
|
42
|
-
@noops << key
|
43
|
-
@seen[key] = {}
|
44
|
-
else
|
56
|
+
if apply_this_update(current_record, data)
|
45
57
|
@changes << key
|
46
58
|
@seen[key] = data
|
59
|
+
else
|
60
|
+
@noops << key
|
61
|
+
@seen[key] = {}
|
47
62
|
end
|
48
63
|
else
|
49
64
|
@adds << key
|
data/lib/assimilate/version.rb
CHANGED
data/spec/data/logs1.csv
ADDED
data/spec/data/logs2.csv
ADDED
data/spec/lib/extend_spec.rb
CHANGED
@@ -22,20 +22,20 @@ describe "loading extended data" do
|
|
22
22
|
@batcher.commit
|
23
23
|
end
|
24
24
|
|
25
|
+
def import_extended_data(datestamp, filename, opts = {})
|
26
|
+
@extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID', key: 'inauguration'))
|
27
|
+
Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
|
28
|
+
@extender << rec
|
29
|
+
end
|
30
|
+
@extender.commit
|
31
|
+
end
|
32
|
+
|
25
33
|
describe "into matching catalog entries" do
|
26
34
|
before(:all) do
|
27
35
|
reset_catalog
|
28
36
|
import_base_data("123")
|
29
37
|
end
|
30
38
|
|
31
|
-
def import_extended_data(datestamp, filename)
|
32
|
-
@extender = @catalog.extend_data(domain: 'testdata', datastamp: datestamp, idfield: 'ID', key: 'inauguration')
|
33
|
-
Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
|
34
|
-
@extender << rec
|
35
|
-
end
|
36
|
-
@extender.commit
|
37
|
-
end
|
38
|
-
|
39
39
|
before(:each) do
|
40
40
|
import_extended_data("1001", "dates.csv")
|
41
41
|
end
|
@@ -70,6 +70,87 @@ describe "loading extended data" do
|
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
73
|
+
describe "updating log entries" do
|
74
|
+
before(:all) do
|
75
|
+
reset_catalog
|
76
|
+
import_base_data("20120501")
|
77
|
+
import_extended_data("20120505", "logs1.csv")
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
before(:each) do
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should capture changes" do
|
85
|
+
@extender.stats.should == {
|
86
|
+
:baseline_count => 6,
|
87
|
+
:final_count => 6,
|
88
|
+
:distinct_ids => 4,
|
89
|
+
:adds_count => 0,
|
90
|
+
:new_ids => [],
|
91
|
+
:updates_count => 4,
|
92
|
+
:updated_fields => {'timestamp' => 4, 'event' => 4},
|
93
|
+
:unchanged_count => 0
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should load the new events" do
|
98
|
+
lambda {import_extended_data("20120506", "logs2.csv", :compare => 'timestamp')}.should_not raise_error
|
99
|
+
|
100
|
+
@extender.stats.should == {
|
101
|
+
:baseline_count => 6,
|
102
|
+
:final_count => 6,
|
103
|
+
:distinct_ids => 4,
|
104
|
+
:adds_count => 0,
|
105
|
+
:new_ids => [],
|
106
|
+
:updates_count => 4,
|
107
|
+
:updated_fields => {'timestamp' => 4, 'event' => 4},
|
108
|
+
:unchanged_count => 0
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe "updating log entries in reverse order" do
|
114
|
+
before(:all) do
|
115
|
+
reset_catalog
|
116
|
+
import_base_data("20120501")
|
117
|
+
import_extended_data("20120505", "logs2.csv")
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
before(:each) do
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should capture changes" do
|
125
|
+
@extender.stats.should == {
|
126
|
+
:baseline_count => 6,
|
127
|
+
:final_count => 6,
|
128
|
+
:distinct_ids => 4,
|
129
|
+
:adds_count => 0,
|
130
|
+
:new_ids => [],
|
131
|
+
:updates_count => 4,
|
132
|
+
:updated_fields => {'timestamp' => 4, 'event' => 4},
|
133
|
+
:unchanged_count => 0
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
it "should load the new events" do
|
138
|
+
lambda {import_extended_data("20120506", "logs1.csv", :compare => 'timestamp')}.should_not raise_error
|
139
|
+
|
140
|
+
@extender.stats.should == {
|
141
|
+
:baseline_count => 6,
|
142
|
+
:final_count => 6,
|
143
|
+
:distinct_ids => 4,
|
144
|
+
:adds_count => 0,
|
145
|
+
:new_ids => [],
|
146
|
+
:updates_count => 0,
|
147
|
+
:updated_fields => {},
|
148
|
+
:unchanged_count => 4
|
149
|
+
}
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
|
73
154
|
# test handling of multiple records for same ID in the extended-data file
|
74
155
|
# test importing data at top level (no keyfield for sub-attributes)
|
75
156
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assimilate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mongo
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157154580 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.6.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157154580
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bson_ext
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157154080 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.6.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157154080
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: activesupport
|
38
|
-
requirement: &
|
38
|
+
requirement: &2156541740 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 3.2.0
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2156541740
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rspec
|
49
|
-
requirement: &
|
49
|
+
requirement: &2156539660 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 2.9.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2156539660
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: guard-rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &2156536900 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 0.7.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2156536900
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: ruby_gntp
|
71
|
-
requirement: &
|
71
|
+
requirement: &2156535700 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: 0.3.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2156535700
|
80
80
|
description: Ingest updates from CSV and apply to set of hashes
|
81
81
|
email:
|
82
82
|
- jmay@pobox.com
|
@@ -102,6 +102,8 @@ files:
|
|
102
102
|
- spec/data/batch_input.csv
|
103
103
|
- spec/data/dates.csv
|
104
104
|
- spec/data/duplicate_input.csv
|
105
|
+
- spec/data/logs1.csv
|
106
|
+
- spec/data/logs2.csv
|
105
107
|
- spec/data/test.yml
|
106
108
|
- spec/data/updates.csv
|
107
109
|
- spec/lib/batch_spec.rb
|
@@ -135,6 +137,8 @@ test_files:
|
|
135
137
|
- spec/data/batch_input.csv
|
136
138
|
- spec/data/dates.csv
|
137
139
|
- spec/data/duplicate_input.csv
|
140
|
+
- spec/data/logs1.csv
|
141
|
+
- spec/data/logs2.csv
|
138
142
|
- spec/data/test.yml
|
139
143
|
- spec/data/updates.csv
|
140
144
|
- spec/lib/batch_spec.rb
|