assimilate 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/assimilate/batch.rb +14 -10
 - data/lib/assimilate/catalog.rb +2 -1
 - data/lib/assimilate/version.rb +1 -1
 - data/lib/assimilate.rb +1 -1
 - data/spec/data/batch_input.csv +7 -7
 - data/spec/data/duplicate_input.csv +7 -7
 - data/spec/data/updates.csv +6 -6
 - data/spec/lib/batch_spec.rb +10 -6
 - metadata +14 -14
 
    
        data/lib/assimilate/batch.rb
    CHANGED
    
    | 
         @@ -13,7 +13,7 @@ class Assimilate::Batch 
     | 
|
| 
       13 
13 
     | 
    
         
             
                load_baseline
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
                @noops = []
         
     | 
| 
       16 
     | 
    
         
            -
                @changes =  
     | 
| 
      
 16 
     | 
    
         
            +
                @changes = {}
         
     | 
| 
       17 
17 
     | 
    
         
             
                @adds = []
         
     | 
| 
       18 
18 
     | 
    
         
             
                @deletes = []
         
     | 
| 
       19 
19 
     | 
    
         
             
                @resolved = false
         
     | 
| 
         @@ -45,22 +45,26 @@ class Assimilate::Batch 
     | 
|
| 
       45 
45 
     | 
    
         
             
                  if current_record == hash
         
     | 
| 
       46 
46 
     | 
    
         
             
                    @noops << hash
         
     | 
| 
       47 
47 
     | 
    
         
             
                  else
         
     | 
| 
       48 
     | 
    
         
            -
                    @changes  
     | 
| 
      
 48 
     | 
    
         
            +
                    @changes[key] = deltas(current_record, hash)
         
     | 
| 
       49 
49 
     | 
    
         
             
                  end
         
     | 
| 
       50 
50 
     | 
    
         
             
                else
         
     | 
| 
       51 
51 
     | 
    
         
             
                  @adds << hash
         
     | 
| 
       52 
52 
     | 
    
         
             
                end
         
     | 
| 
       53 
53 
     | 
    
         
             
              end
         
     | 
| 
       54 
54 
     | 
    
         | 
| 
      
 55 
     | 
    
         
            +
              def deltas(h1,h2)
         
     | 
| 
      
 56 
     | 
    
         
            +
                (h1.keys | h2.keys).each_with_object({}) {|k,h| h[k] = h2[k] if h1[k] != h2[k]}
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
       55 
59 
     | 
    
         
             
              # compute anything needed before we can write updates to permanent store
         
     | 
| 
       56 
60 
     | 
    
         
             
              # * find records that have been deleted
         
     | 
| 
       57 
61 
     | 
    
         
             
              def resolve
         
     | 
| 
       58 
62 
     | 
    
         
             
                if !@resolved
         
     | 
| 
       59 
63 
     | 
    
         
             
                  @deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
         
     | 
| 
       60 
64 
     | 
    
         | 
| 
       61 
     | 
    
         
            -
                  @updated_field_counts = @changes.each_with_object(Hash.new(0)) do | 
     | 
| 
       62 
     | 
    
         
            -
                    key = rec[idfield]
         
     | 
| 
       63 
     | 
    
         
            -
                    diffs =  
     | 
| 
      
 65 
     | 
    
         
            +
                  @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
         
     | 
| 
      
 66 
     | 
    
         
            +
                    # key = rec[idfield]
         
     | 
| 
      
 67 
     | 
    
         
            +
                    # diffs = deltas(stripped_record_for(key), rec)
         
     | 
| 
       64 
68 
     | 
    
         
             
                    diffs.keys.each do |f|
         
     | 
| 
       65 
69 
     | 
    
         
             
                      h[f] += 1
         
     | 
| 
       66 
70 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -79,8 +83,8 @@ class Assimilate::Batch 
     | 
|
| 
       79 
83 
     | 
    
         
             
                  :new_ids => @adds.map {|rec| rec[idfield]},
         
     | 
| 
       80 
84 
     | 
    
         
             
                  :deletes_count => @deleted_keys.count,
         
     | 
| 
       81 
85 
     | 
    
         
             
                  :deleted_ids => @deleted_keys,
         
     | 
| 
       82 
     | 
    
         
            -
                  :updates_count => @changes. 
     | 
| 
       83 
     | 
    
         
            -
                  :updated_ids => @changes. 
     | 
| 
      
 86 
     | 
    
         
            +
                  :updates_count => @changes.size,
         
     | 
| 
      
 87 
     | 
    
         
            +
                  :updated_ids => @changes.keys,
         
     | 
| 
       84 
88 
     | 
    
         
             
                  :unchanged_count => @noops.count,
         
     | 
| 
       85 
89 
     | 
    
         
             
                  :updated_fields => @updated_field_counts
         
     | 
| 
       86 
90 
     | 
    
         
             
                }
         
     | 
| 
         @@ -128,13 +132,13 @@ class Assimilate::Batch 
     | 
|
| 
       128 
132 
     | 
    
         
             
              end
         
     | 
| 
       129 
133 
     | 
    
         | 
| 
       130 
134 
     | 
    
         
             
              def apply_updates
         
     | 
| 
       131 
     | 
    
         
            -
                @changes.each do | 
     | 
| 
      
 135 
     | 
    
         
            +
                @changes.each do |key, diffs|
         
     | 
| 
       132 
136 
     | 
    
         
             
                  @catalog.catalog.update(
         
     | 
| 
       133 
137 
     | 
    
         
             
                    {
         
     | 
| 
       134 
138 
     | 
    
         
             
                      @domainkey => domain,
         
     | 
| 
       135 
     | 
    
         
            -
                      idfield =>  
     | 
| 
      
 139 
     | 
    
         
            +
                      idfield => key
         
     | 
| 
       136 
140 
     | 
    
         
             
                    },
         
     | 
| 
       137 
     | 
    
         
            -
                    {"$set" =>  
     | 
| 
      
 141 
     | 
    
         
            +
                    {"$set" => diffs}
         
     | 
| 
       138 
142 
     | 
    
         
             
                  )
         
     | 
| 
       139 
143 
     | 
    
         
             
                end
         
     | 
| 
       140 
144 
     | 
    
         
             
              end
         
     | 
    
        data/lib/assimilate/catalog.rb
    CHANGED
    
    
    
        data/lib/assimilate/version.rb
    CHANGED
    
    
    
        data/lib/assimilate.rb
    CHANGED
    
    
    
        data/spec/data/batch_input.csv
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            ID,name,title
         
     | 
| 
       2 
     | 
    
         
            -
            1,George Washington,President
         
     | 
| 
       3 
     | 
    
         
            -
            2,John Adams,Vice President
         
     | 
| 
       4 
     | 
    
         
            -
            3,Benjamin Franklin,Sage
         
     | 
| 
       5 
     | 
    
         
            -
            4,Aaron Burr,Duelist
         
     | 
| 
       6 
     | 
    
         
            -
            5,Alexander Hamilton,Financier
         
     | 
| 
       7 
     | 
    
         
            -
            6,James Madison,Theorist
         
     | 
| 
      
 1 
     | 
    
         
            +
            ID,name,title,spouse
         
     | 
| 
      
 2 
     | 
    
         
            +
            1,George Washington,President,Martha
         
     | 
| 
      
 3 
     | 
    
         
            +
            2,John Adams,Vice President,Abigail
         
     | 
| 
      
 4 
     | 
    
         
            +
            3,Benjamin Franklin,Sage,Deborah
         
     | 
| 
      
 5 
     | 
    
         
            +
            4,Aaron Burr,Duelist,Theodosia
         
     | 
| 
      
 6 
     | 
    
         
            +
            5,Alexander Hamilton,Financier,Elizabeth
         
     | 
| 
      
 7 
     | 
    
         
            +
            6,James Madison,Theorist,Dolly
         
     | 
| 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            ID,name,title
         
     | 
| 
       2 
     | 
    
         
            -
            1,George Washington,President
         
     | 
| 
       3 
     | 
    
         
            -
            2,John Adams,Vice President
         
     | 
| 
       4 
     | 
    
         
            -
            3,Benjamin Franklin,Sage
         
     | 
| 
       5 
     | 
    
         
            -
            4,Aaron Burr,Duelist
         
     | 
| 
       6 
     | 
    
         
            -
            5,Alexander Hamilton,Financier
         
     | 
| 
       7 
     | 
    
         
            -
            6,James Madison,Theorist
         
     | 
| 
      
 1 
     | 
    
         
            +
            ID,name,title,spouse
         
     | 
| 
      
 2 
     | 
    
         
            +
            1,George Washington,President,Martha
         
     | 
| 
      
 3 
     | 
    
         
            +
            2,John Adams,Vice President,Abigail
         
     | 
| 
      
 4 
     | 
    
         
            +
            3,Benjamin Franklin,Sage,Deborah
         
     | 
| 
      
 5 
     | 
    
         
            +
            4,Aaron Burr,Duelist,Theodosia
         
     | 
| 
      
 6 
     | 
    
         
            +
            5,Alexander Hamilton,Financier,Elizabeth
         
     | 
| 
      
 7 
     | 
    
         
            +
            6,James Madison,Theorist,Dolly
         
     | 
    
        data/spec/data/updates.csv
    CHANGED
    
    | 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            ID,name,title
         
     | 
| 
       2 
     | 
    
         
            -
            1,George Washington,President
         
     | 
| 
       3 
     | 
    
         
            -
            2,John Adams,Vice President
         
     | 
| 
       4 
     | 
    
         
            -
            3,Benjamin Franklin,Ambassador
         
     | 
| 
       5 
     | 
    
         
            -
            5,Alexander Hamilton,Financier
         
     | 
| 
       6 
     | 
    
         
            -
            7,Thomas Jefferson,Anti-Federalist
         
     | 
| 
      
 1 
     | 
    
         
            +
            ID,name,title,spouse
         
     | 
| 
      
 2 
     | 
    
         
            +
            1,George Washington,President,Martha
         
     | 
| 
      
 3 
     | 
    
         
            +
            2,John Adams,Vice President,Abigail
         
     | 
| 
      
 4 
     | 
    
         
            +
            3,Benjamin Franklin,Ambassador,
         
     | 
| 
      
 5 
     | 
    
         
            +
            5,Alexander Hamilton,Financier,Elizabeth
         
     | 
| 
      
 6 
     | 
    
         
            +
            7,Thomas Jefferson,Anti-Federalist,
         
     | 
    
        data/spec/lib/batch_spec.rb
    CHANGED
    
    | 
         @@ -45,15 +45,16 @@ describe "importing file" do 
     | 
|
| 
       45 
45 
     | 
    
         | 
| 
       46 
46 
     | 
    
         
             
                it "should load the records verbatim" do
         
     | 
| 
       47 
47 
     | 
    
         
             
                  @catalog.catalog.count.should == 6
         
     | 
| 
       48 
     | 
    
         
            -
                  @catalog.where('_resource' => 'testdata', 'ID' => '3').should == 
     | 
| 
      
 48 
     | 
    
         
            +
                  @catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
         
     | 
| 
      
 49 
     | 
    
         
            +
                    {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage', 'spouse' => 'Deborah'}
         
     | 
| 
       49 
50 
     | 
    
         
             
                end
         
     | 
| 
       50 
51 
     | 
    
         | 
| 
       51 
52 
     | 
    
         
             
                it "should refuse to do a duplicate import" do
         
     | 
| 
       52 
     | 
    
         
            -
                  lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for datestamp 123")
         
     | 
| 
       53 
54 
     | 
    
         
             
                end
         
     | 
| 
       54 
55 
     | 
    
         | 
| 
       55 
56 
     | 
    
         
             
                it "should refuse to re-import same file" do
         
     | 
| 
       56 
     | 
    
         
            -
                  lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError)
         
     | 
| 
      
 57 
     | 
    
         
            +
                  lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for file batch_input.csv")
         
     | 
| 
       57 
58 
     | 
    
         
             
                end
         
     | 
| 
       58 
59 
     | 
    
         | 
| 
       59 
60 
     | 
    
         
             
                it "should do all no-ops when importing identical data" do
         
     | 
| 
         @@ -78,9 +79,7 @@ describe "importing file" do 
     | 
|
| 
       78 
79 
     | 
    
         
             
                before(:all) do
         
     | 
| 
       79 
80 
     | 
    
         
             
                  reset_catalog
         
     | 
| 
       80 
81 
     | 
    
         
             
                  import_data("123")
         
     | 
| 
       81 
     | 
    
         
            -
                end
         
     | 
| 
       82 
82 
     | 
    
         | 
| 
       83 
     | 
    
         
            -
                before(:each) do
         
     | 
| 
       84 
83 
     | 
    
         
             
                  import_data("345", "updates.csv")
         
     | 
| 
       85 
84 
     | 
    
         
             
                end
         
     | 
| 
       86 
85 
     | 
    
         | 
| 
         @@ -95,9 +94,14 @@ describe "importing file" do 
     | 
|
| 
       95 
94 
     | 
    
         
             
                    :updates_count => 1,
         
     | 
| 
       96 
95 
     | 
    
         
             
                    :updated_ids => ['3'],
         
     | 
| 
       97 
96 
     | 
    
         
             
                    :unchanged_count => 3,
         
     | 
| 
       98 
     | 
    
         
            -
                    :updated_fields => {'title' => 1}
         
     | 
| 
      
 97 
     | 
    
         
            +
                    :updated_fields => {'title' => 1, 'spouse' => 1}
         
     | 
| 
       99 
98 
     | 
    
         
             
                  }
         
     | 
| 
       100 
99 
     | 
    
         
             
                  @catalog.active_count.should == 5
         
     | 
| 
       101 
100 
     | 
    
         
             
                end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
                it "should handle deleted attributes" do
         
     | 
| 
      
 103 
     | 
    
         
            +
                  franklin = @catalog.where('ID' => '3')
         
     | 
| 
      
 104 
     | 
    
         
            +
                  franklin['spouse'].should be_nil
         
     | 
| 
      
 105 
     | 
    
         
            +
                end
         
     | 
| 
       102 
106 
     | 
    
         
             
              end
         
     | 
| 
       103 
107 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: assimilate
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.5
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,11 +9,11 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-05- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-05-03 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: mongo
         
     | 
| 
       16 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &2157608660 !ruby/object:Gem::Requirement
         
     | 
| 
       17 
17 
     | 
    
         
             
                none: false
         
     | 
| 
       18 
18 
     | 
    
         
             
                requirements:
         
     | 
| 
       19 
19 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -21,10 +21,10 @@ dependencies: 
     | 
|
| 
       21 
21 
     | 
    
         
             
                    version: 1.6.0
         
     | 
| 
       22 
22 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       23 
23 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       24 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *2157608660
         
     | 
| 
       25 
25 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       26 
26 
     | 
    
         
             
              name: bson_ext
         
     | 
| 
       27 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &2157607760 !ruby/object:Gem::Requirement
         
     | 
| 
       28 
28 
     | 
    
         
             
                none: false
         
     | 
| 
       29 
29 
     | 
    
         
             
                requirements:
         
     | 
| 
       30 
30 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -32,10 +32,10 @@ dependencies: 
     | 
|
| 
       32 
32 
     | 
    
         
             
                    version: 1.6.0
         
     | 
| 
       33 
33 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       34 
34 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       35 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *2157607760
         
     | 
| 
       36 
36 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       37 
37 
     | 
    
         
             
              name: activesupport
         
     | 
| 
       38 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &2157607080 !ruby/object:Gem::Requirement
         
     | 
| 
       39 
39 
     | 
    
         
             
                none: false
         
     | 
| 
       40 
40 
     | 
    
         
             
                requirements:
         
     | 
| 
       41 
41 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -43,10 +43,10 @@ dependencies: 
     | 
|
| 
       43 
43 
     | 
    
         
             
                    version: 3.2.0
         
     | 
| 
       44 
44 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       45 
45 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       46 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *2157607080
         
     | 
| 
       47 
47 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       48 
48 
     | 
    
         
             
              name: rspec
         
     | 
| 
       49 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 49 
     | 
    
         
            +
              requirement: &2157606260 !ruby/object:Gem::Requirement
         
     | 
| 
       50 
50 
     | 
    
         
             
                none: false
         
     | 
| 
       51 
51 
     | 
    
         
             
                requirements:
         
     | 
| 
       52 
52 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -54,10 +54,10 @@ dependencies: 
     | 
|
| 
       54 
54 
     | 
    
         
             
                    version: 2.9.0
         
     | 
| 
       55 
55 
     | 
    
         
             
              type: :development
         
     | 
| 
       56 
56 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       57 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 57 
     | 
    
         
            +
              version_requirements: *2157606260
         
     | 
| 
       58 
58 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       59 
59 
     | 
    
         
             
              name: guard-rspec
         
     | 
| 
       60 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 60 
     | 
    
         
            +
              requirement: &2157605600 !ruby/object:Gem::Requirement
         
     | 
| 
       61 
61 
     | 
    
         
             
                none: false
         
     | 
| 
       62 
62 
     | 
    
         
             
                requirements:
         
     | 
| 
       63 
63 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -65,10 +65,10 @@ dependencies: 
     | 
|
| 
       65 
65 
     | 
    
         
             
                    version: 0.7.0
         
     | 
| 
       66 
66 
     | 
    
         
             
              type: :development
         
     | 
| 
       67 
67 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       68 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 68 
     | 
    
         
            +
              version_requirements: *2157605600
         
     | 
| 
       69 
69 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       70 
70 
     | 
    
         
             
              name: ruby_gntp
         
     | 
| 
       71 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: &2157603980 !ruby/object:Gem::Requirement
         
     | 
| 
       72 
72 
     | 
    
         
             
                none: false
         
     | 
| 
       73 
73 
     | 
    
         
             
                requirements:
         
     | 
| 
       74 
74 
     | 
    
         
             
                - - ~>
         
     | 
| 
         @@ -76,7 +76,7 @@ dependencies: 
     | 
|
| 
       76 
76 
     | 
    
         
             
                    version: 0.3.4
         
     | 
| 
       77 
77 
     | 
    
         
             
              type: :development
         
     | 
| 
       78 
78 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       79 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 79 
     | 
    
         
            +
              version_requirements: *2157603980
         
     | 
| 
       80 
80 
     | 
    
         
             
            description: Ingest updates from CSV and apply to set of hashes
         
     | 
| 
       81 
81 
     | 
    
         
             
            email:
         
     | 
| 
       82 
82 
     | 
    
         
             
            - jmay@pobox.com
         
     |