assimilate 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,6 +80,7 @@ class Assimilate::Batch
80
80
  :deletes_count => @deleted_keys.count,
81
81
  :deleted_ids => @deleted_keys,
82
82
  :updates_count => @changes.count,
83
+ :updated_ids => @changes.map {|rec| rec[idfield]},
83
84
  :unchanged_count => @noops.count,
84
85
  :updated_fields => @updated_field_counts
85
86
  }
@@ -82,7 +82,7 @@ EOT
82
82
  Unchanged records: #{results[:unchanged_count]}
83
83
  New records: #{results[:adds_count]} (#{results[:new_ids].take(10).join(',')})
84
84
  Deletes: #{results[:deletes_count]} (#{results[:deleted_ids].take(10).join(',')})
85
- Updates: #{results[:updates_count]}
85
+ Updates: #{results[:updates_count]} (#{results[:updated_ids].take(10).join(',')})
86
86
  EOT
87
87
  if results[:updated_fields].any?
88
88
  $stderr.puts <<-EOT
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/assimilate.rb CHANGED
@@ -16,8 +16,7 @@ module Assimilate
16
16
  catalog = Catalog.new(:config => opts[:config])
17
17
  batcher = catalog.start_batch(opts.merge(:filename => filename))
18
18
 
19
- records = CSV.read(filename, :headers => true)
20
- records.each do |rec|
19
+ slurp(filename) do |rec|
21
20
  batcher << rec
22
21
  end
23
22
  if opts[:commit]
@@ -37,8 +36,7 @@ module Assimilate
37
36
  begin
38
37
  catalog = Catalog.new(:config => opts[:config])
39
38
  extender = catalog.extend_data(opts)
40
- records = CSV.read(filename, :headers => true)
41
- records.each do |rec|
39
+ slurp(filename) do |rec|
42
40
  extender << rec
43
41
  end
44
42
  if opts[:commit]
@@ -49,4 +47,20 @@ module Assimilate
49
47
  extender.stats
50
48
  end
51
49
  end
50
+
51
+ def self.slurp(filename)
52
+ headers = nil
53
+ CSV.read(filename).each do |row|
54
+ if !headers
55
+ headers = row.to_a
56
+ else
57
+ raise "Row count mismatch: #{row} vs #{headers}" if row.count > headers.count
58
+ hash = {}
59
+ row.zip(headers) do |v,k|
60
+ hash[k] = v.strip unless v.blank?
61
+ end
62
+ yield hash
63
+ end
64
+ end
65
+ end
52
66
  end
@@ -16,8 +16,7 @@ describe "importing file" do
16
16
  def import_data(datestamp, filename = "batch_input.csv")
17
17
  @batcher = @catalog.start_batch(domain: 'testdata', datestamp: datestamp, filename: filename, idfield: 'ID')
18
18
 
19
- @records = CSV.read(File.dirname(__FILE__) + "/../data/#{filename}", :headers => true)
20
- @records.each do |rec|
19
+ Assimilate.slurp(File.dirname(__FILE__) + "/../data/#{filename}") do |rec|
21
20
  @batcher << rec
22
21
  end
23
22
  @batcher.commit
@@ -38,15 +37,15 @@ describe "importing file" do
38
37
  :deletes_count => 0,
39
38
  :deleted_ids => [],
40
39
  :updates_count => 0,
40
+ :updated_ids => [],
41
41
  :unchanged_count => 0,
42
42
  :updated_fields => {}
43
43
  }
44
44
  end
45
45
 
46
46
  it "should load the records verbatim" do
47
- @catalog.catalog.count.should == @records.count
48
- example = @records[rand(@records.count)]
49
- @catalog.where('_resource' => 'testdata', 'ID' => example['ID']).should == example.to_hash
47
+ @catalog.catalog.count.should == 6
48
+ @catalog.where('_resource' => 'testdata', 'ID' => '3').should == {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage'}
50
49
  end
51
50
 
52
51
  it "should refuse to do a duplicate import" do
@@ -67,10 +66,11 @@ describe "importing file" do
67
66
  :deletes_count => 0,
68
67
  :deleted_ids => [],
69
68
  :updates_count => 0,
69
+ :updated_ids => [],
70
70
  :unchanged_count => 6,
71
71
  :updated_fields => {}
72
72
  }
73
- @catalog.catalog.count.should == @records.count
73
+ @catalog.catalog.count.should == 6
74
74
  end
75
75
  end
76
76
 
@@ -93,10 +93,11 @@ describe "importing file" do
93
93
  :deletes_count => 2,
94
94
  :deleted_ids => ['4', '6'],
95
95
  :updates_count => 1,
96
+ :updated_ids => ['3'],
96
97
  :unchanged_count => 3,
97
98
  :updated_fields => {'title' => 1}
98
99
  }
99
- @catalog.active_count.should == @records.count
100
+ @catalog.active_count.should == 5
100
101
  end
101
102
  end
102
103
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-05-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
16
- requirement: &2152913700 !ruby/object:Gem::Requirement
16
+ requirement: &2152558540 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.6.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2152913700
24
+ version_requirements: *2152558540
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bson_ext
27
- requirement: &2152912480 !ruby/object:Gem::Requirement
27
+ requirement: &2152557720 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.6.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2152912480
35
+ version_requirements: *2152557720
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activesupport
38
- requirement: &2152911920 !ruby/object:Gem::Requirement
38
+ requirement: &2152556760 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 3.2.0
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2152911920
46
+ version_requirements: *2152556760
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &2152911460 !ruby/object:Gem::Requirement
49
+ requirement: &2152555800 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.9.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2152911460
57
+ version_requirements: *2152555800
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
- requirement: &2152910880 !ruby/object:Gem::Requirement
60
+ requirement: &2152554060 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 0.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2152910880
68
+ version_requirements: *2152554060
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby_gntp
71
- requirement: &2152910140 !ruby/object:Gem::Requirement
71
+ requirement: &2152550400 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: 0.3.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2152910140
79
+ version_requirements: *2152550400
80
80
  description: Ingest updates from CSV and apply to set of hashes
81
81
  email:
82
82
  - jmay@pobox.com