assimilate 0.1.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ class Assimilate::Extender
9
9
  @idfield = args[:idfield]
10
10
  @filename = args[:filename]
11
11
  @keyfield = args[:key]
12
+ @comparison_field = args[:compare]
12
13
 
13
14
  load_baseline
14
15
 
@@ -29,6 +30,20 @@ class Assimilate::Extender
29
30
  end
30
31
  end
31
32
 
33
+ def is_newer(current_data, new_data)
34
+ new_data[@comparison_field].to_i > current_data[@comparison_field].to_i
35
+ end
36
+
37
+ # if there is a field to compare on (i.e. a timestamp), then apply the update if the timestamp is newer;
38
+ # otherwise (no timestamp) compare the hashes and apply if there are any differences.
39
+ def apply_this_update(current_record, new_data)
40
+ if @comparison_field
41
+ is_newer(current_record[@keyfield], new_data)
42
+ else
43
+ current_record[@keyfield] != new_data
44
+ end
45
+ end
46
+
32
47
  def <<(record)
33
48
  @seen ||= Hash.new(0)
34
49
 
@@ -38,12 +53,12 @@ class Assimilate::Extender
38
53
  # @seen[key] = data
39
54
  current_record = @baseline[key]
40
55
  if current_record
41
- if current_record[@keyfield] == data
42
- @noops << key
43
- @seen[key] = {}
44
- else
56
+ if apply_this_update(current_record, data)
45
57
  @changes << key
46
58
  @seen[key] = data
59
+ else
60
+ @noops << key
61
+ @seen[key] = {}
47
62
  end
48
63
  else
49
64
  @adds << key
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2"
3
3
  end
@@ -0,0 +1,5 @@
1
+ timestamp,ID,event
2
+ 1201010101,1,something happened
3
+ 1201010304,2,important event
4
+ 1201010205,6,no comment
5
+ 1201000123,5,no comment
@@ -0,0 +1,5 @@
1
+ timestamp,ID,event
2
+ 1201020101,1,something else happened
3
+ 1201020304,2,another important event
4
+ 1201020205,6,a comment
5
+ 1201020123,5,no comment
@@ -22,20 +22,20 @@ describe "loading extended data" do
22
22
  @batcher.commit
23
23
  end
24
24
 
25
+ def import_extended_data(datestamp, filename, opts = {})
26
+ @extender = @catalog.extend_data(opts.merge(domain: 'testdata', datestamp: datestamp, idfield: 'ID', key: 'inauguration'))
27
+ Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
28
+ @extender << rec
29
+ end
30
+ @extender.commit
31
+ end
32
+
25
33
  describe "into matching catalog entries" do
26
34
  before(:all) do
27
35
  reset_catalog
28
36
  import_base_data("123")
29
37
  end
30
38
 
31
- def import_extended_data(datestamp, filename)
32
- @extender = @catalog.extend_data(domain: 'testdata', datastamp: datestamp, idfield: 'ID', key: 'inauguration')
33
- Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
34
- @extender << rec
35
- end
36
- @extender.commit
37
- end
38
-
39
39
  before(:each) do
40
40
  import_extended_data("1001", "dates.csv")
41
41
  end
@@ -70,6 +70,87 @@ describe "loading extended data" do
70
70
  end
71
71
  end
72
72
 
73
+ describe "updating log entries" do
74
+ before(:all) do
75
+ reset_catalog
76
+ import_base_data("20120501")
77
+ import_extended_data("20120505", "logs1.csv")
78
+ end
79
+
80
+
81
+ before(:each) do
82
+ end
83
+
84
+ it "should capture changes" do
85
+ @extender.stats.should == {
86
+ :baseline_count => 6,
87
+ :final_count => 6,
88
+ :distinct_ids => 4,
89
+ :adds_count => 0,
90
+ :new_ids => [],
91
+ :updates_count => 4,
92
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
93
+ :unchanged_count => 0
94
+ }
95
+ end
96
+
97
+ it "should load the new events" do
98
+ lambda {import_extended_data("20120506", "logs2.csv", :compare => 'timestamp')}.should_not raise_error
99
+
100
+ @extender.stats.should == {
101
+ :baseline_count => 6,
102
+ :final_count => 6,
103
+ :distinct_ids => 4,
104
+ :adds_count => 0,
105
+ :new_ids => [],
106
+ :updates_count => 4,
107
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
108
+ :unchanged_count => 0
109
+ }
110
+ end
111
+ end
112
+
113
+ describe "updating log entries in reverse order" do
114
+ before(:all) do
115
+ reset_catalog
116
+ import_base_data("20120501")
117
+ import_extended_data("20120505", "logs2.csv")
118
+ end
119
+
120
+
121
+ before(:each) do
122
+ end
123
+
124
+ it "should capture changes" do
125
+ @extender.stats.should == {
126
+ :baseline_count => 6,
127
+ :final_count => 6,
128
+ :distinct_ids => 4,
129
+ :adds_count => 0,
130
+ :new_ids => [],
131
+ :updates_count => 4,
132
+ :updated_fields => {'timestamp' => 4, 'event' => 4},
133
+ :unchanged_count => 0
134
+ }
135
+ end
136
+
137
+ it "should load the new events" do
138
+ lambda {import_extended_data("20120506", "logs1.csv", :compare => 'timestamp')}.should_not raise_error
139
+
140
+ @extender.stats.should == {
141
+ :baseline_count => 6,
142
+ :final_count => 6,
143
+ :distinct_ids => 4,
144
+ :adds_count => 0,
145
+ :new_ids => [],
146
+ :updates_count => 0,
147
+ :updated_fields => {},
148
+ :unchanged_count => 4
149
+ }
150
+ end
151
+ end
152
+
153
+
73
154
  # test handling of multiple records for same ID in the extended-data file
74
155
  # test importing data at top level (no keyfield for sub-attributes)
75
156
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: '0.2'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-04 00:00:00.000000000 Z
12
+ date: 2012-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2155898600 !ruby/object:Gem::Requirement
16
+ requirement: &2157154580 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2155898600
24
+ version_requirements: *2157154580
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2155897820 !ruby/object:Gem::Requirement
27
+ requirement: &2157154080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2155897820
35
+ version_requirements: *2157154080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2155897040 !ruby/object:Gem::Requirement
38
+ requirement: &2156541740 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2155897040
46
+ version_requirements: *2156541740
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &2155896200 !ruby/object:Gem::Requirement
49
+ requirement: &2156539660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.9.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155896200
57
+ version_requirements: *2156539660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
- requirement: &2155895320 !ruby/object:Gem::Requirement
60
+ requirement: &2156536900 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155895320
68
+ version_requirements: *2156536900
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby_gntp
71
- requirement: &2155894100 !ruby/object:Gem::Requirement
71
+ requirement: &2156535700 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.3.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2155894100
79
+ version_requirements: *2156535700
80
80
  description: Ingest updates from CSV and apply to set of hashes
81
81
  email:
82
82
  - jmay@pobox.com
@@ -102,6 +102,8 @@ files:
102
102
  - spec/data/batch_input.csv
103
103
  - spec/data/dates.csv
104
104
  - spec/data/duplicate_input.csv
105
+ - spec/data/logs1.csv
106
+ - spec/data/logs2.csv
105
107
  - spec/data/test.yml
106
108
  - spec/data/updates.csv
107
109
  - spec/lib/batch_spec.rb
@@ -135,6 +137,8 @@ test_files:
135
137
  - spec/data/batch_input.csv
136
138
  - spec/data/dates.csv
137
139
  - spec/data/duplicate_input.csv
140
+ - spec/data/logs1.csv
141
+ - spec/data/logs2.csv
138
142
  - spec/data/test.yml
139
143
  - spec/data/updates.csv
140
144
  - spec/lib/batch_spec.rb