assimilate 0.1.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,6 +9,7 @@ class Assimilate::Extender
9
9
  @idfield = args[:idfield]
10
10
  @filename = args[:filename]
11
11
  @keyfield = args[:key]
12
+ @comparison_field = args[:compare]
12
13
 
13
14
  load_baseline
14
15
 
@@ -29,6 +30,20 @@ class Assimilate::Extender
29
30
  end
30
31
  end
31
32
 
33
+ def is_newer(current_data, new_data)
34
+ new_data[@comparison_field].to_i > current_data[@comparison_field].to_i
35
+ end
36
+
37
+ # if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
38
+ # otherwise (no timestamp) compare the hashes and apply if there are any differences.
39
+ def apply_this_update(current_record, new_data)
40
+ if @comparison_field
41
+ is_newer(current_record[@keyfield], new_data)
42
+ else
43
+ current_record[@keyfield] != new_data
44
+ end
45
+ end
46
+
32
47
  def <<(record)
33
48
  @seen ||= Hash.new(0)
34
49
 
@@ -38,12 +53,12 @@ class Assimilate::Extender
38
53
  # @seen[key] = data
39
54
  current_record = @baseline[key]
40
55
  if current_record
41
- if current_record[@keyfield] == data
42
- @noops << key
43
- @seen[key] = {}
44
- else
56
+ if apply_this_update(current_record, data)
45
57
  @changes << key
46
58
  @seen[key] = data
59
+ else
60
+ @noops << key
61
+ @seen[key] = {}
47
62
  end
48
63
  else
49
64
  @adds << key
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2"
3
3
  end
@@ -0,0 +1,5 @@
1
+ timestamp,ID,event
2
+ 1201010101,1,something happened
3
+ 1201010304,2,important event
4
+ 1201010205,6,no comment
5
+ 1201000123,5,no comment
@@ -0,0 +1,5 @@
1
+ timestamp,ID,event
2
+ 1201020101,1,something else happened
3
+ 1201020304,2,another important event
4
+ 1201020205,6,a comment
5
+ 1201020123,5,no comment
@@ -22,20 +22,20 @@ describe "loading extended data" do
22
22
  @batcher.commit
23
23
  end
24
24
 
25
+ def import_extended_data(datestamp, filename, opts = {})
26
+ @extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID', key: 'inauguration'))
27
+ Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
28
+ @extender << rec
29
+ end
30
+ @extender.commit
31
+ end
32
+
25
33
  describe "into matching catalog entries" do
26
34
  before(:all) do
27
35
  reset_catalog
28
36
  import_base_data("123")
29
37
  end
30
38
 
31
- def import_extended_data(datestamp, filename)
32
- @extender = @catalog.extend_data(domain: 'testdata', datastamp: datestamp, idfield: 'ID', key: 'inauguration')
33
- Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
34
- @extender << rec
35
- end
36
- @extender.commit
37
- end
38
-
39
39
  before(:each) do
40
40
  import_extended_data("1001", "dates.csv")
41
41
  end
@@ -70,6 +70,87 @@ describe "loading extended data" do
70
70
  end
71
71
  end
72
72
 
73
+ describe "updating log entries" do
74
+ before(:all) do
75
+ reset_catalog
76
+ import_base_data("20120501")
77
+ import_extended_data("20120505", "logs1.csv")
78
+ end
79
+
80
+
81
+ before(:each) do
82
+ end
83
+
84
+ it "should capture changes" do
85
+ @extender.stats.should == {
86
+ :baseline_count => 6,
87
+ :final_count => 6,
88
+ :distinct_ids => 4,
89
+ :adds_count => 0,
90
+ :new_ids => [],
91
+ :updates_count => 4,
92
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
93
+ :unchanged_count => 0
94
+ }
95
+ end
96
+
97
+ it "should load the new events" do
98
+ lambda {import_extended_data("20120506", "logs2.csv", :compare => 'timestamp')}.should_not raise_error
99
+
100
+ @extender.stats.should == {
101
+ :baseline_count => 6,
102
+ :final_count => 6,
103
+ :distinct_ids => 4,
104
+ :adds_count => 0,
105
+ :new_ids => [],
106
+ :updates_count => 4,
107
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
108
+ :unchanged_count => 0
109
+ }
110
+ end
111
+ end
112
+
113
+ describe "updating log entries in reverse order" do
114
+ before(:all) do
115
+ reset_catalog
116
+ import_base_data("20120501")
117
+ import_extended_data("20120505", "logs2.csv")
118
+ end
119
+
120
+
121
+ before(:each) do
122
+ end
123
+
124
+ it "should capture changes" do
125
+ @extender.stats.should == {
126
+ :baseline_count => 6,
127
+ :final_count => 6,
128
+ :distinct_ids => 4,
129
+ :adds_count => 0,
130
+ :new_ids => [],
131
+ :updates_count => 4,
132
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
133
+ :unchanged_count => 0
134
+ }
135
+ end
136
+
137
+ it "should load the new events" do
138
+ lambda {import_extended_data("20120506", "logs1.csv", :compare => 'timestamp')}.should_not raise_error
139
+
140
+ @extender.stats.should == {
141
+ :baseline_count => 6,
142
+ :final_count => 6,
143
+ :distinct_ids => 4,
144
+ :adds_count => 0,
145
+ :new_ids => [],
146
+ :updates_count => 0,
147
+ :updated_fields => {},
148
+ :unchanged_count => 4
149
+ }
150
+ end
151
+ end
152
+
153
+
73
154
  # test handling of multiple records for same ID in the extended-data file
74
155
  # test importing data at top level (no keyfield for sub-attributes)
75
156
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: '0.2'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-04 00:00:00.000000000 Z
12
+ date: 2012-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2155898600 !ruby/object:Gem::Requirement
16
+ requirement: &2157154580 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2155898600
24
+ version_requirements: *2157154580
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2155897820 !ruby/object:Gem::Requirement
27
+ requirement: &2157154080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2155897820
35
+ version_requirements: *2157154080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2155897040 !ruby/object:Gem::Requirement
38
+ requirement: &2156541740 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2155897040
46
+ version_requirements: *2156541740
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &2155896200 !ruby/object:Gem::Requirement
49
+ requirement: &2156539660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.9.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155896200
57
+ version_requirements: *2156539660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
- requirement: &2155895320 !ruby/object:Gem::Requirement
60
+ requirement: &2156536900 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155895320
68
+ version_requirements: *2156536900
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby_gntp
71
- requirement: &2155894100 !ruby/object:Gem::Requirement
71
+ requirement: &2156535700 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.3.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2155894100
79
+ version_requirements: *2156535700
80
80
  description: Ingest updates from CSV and apply to set of hashes
81
81
  email:
82
82
  - jmay@pobox.com
@@ -102,6 +102,8 @@ files:
102
102
  - spec/data/batch_input.csv
103
103
  - spec/data/dates.csv
104
104
  - spec/data/duplicate_input.csv
105
+ - spec/data/logs1.csv
106
+ - spec/data/logs2.csv
105
107
  - spec/data/test.yml
106
108
  - spec/data/updates.csv
107
109
  - spec/lib/batch_spec.rb
@@ -135,6 +137,8 @@ test_files:
135
137
  - spec/data/batch_input.csv
136
138
  - spec/data/dates.csv
137
139
  - spec/data/duplicate_input.csv
140
+ - spec/data/logs1.csv
141
+ - spec/data/logs2.csv
138
142
  - spec/data/test.yml
139
143
  - spec/data/updates.csv
140
144
  - spec/lib/batch_spec.rb