ingestion_engine 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bb45d11b839f5c50bf93f06d0cdb531896170f85
4
+ data.tar.gz: 739c9202ed5b8016ae2795545c2c84ac3f53a747
5
+ SHA512:
6
+ metadata.gz: c521895788ea57ed62c767f231775a8119682c18ffdb9254df809b1d7c6478082754798886b4d9eef001080b5cdccfc3283265918b7b960352d29cf28a125657
7
+ data.tar.gz: b06c97b99a0452335dd69e424dc37a2b96777a1775193ab9996affcd1ef0e2a63be761799d51a1aaa484cb0ed6138e8aa12c70c691563c7e50829ebd88430952
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ .bundle
2
+ vendor/ruby
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rspec'
4
+ gem 'sqlite3'
5
+ gem 'activerecord', require: 'active_record'
6
+ gem 'pry-byebug'
7
+ gem 'awesome_print'
8
+ gem 'database_cleaner'
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activemodel (4.2.1)
5
+ activesupport (= 4.2.1)
6
+ builder (~> 3.1)
7
+ activerecord (4.2.1)
8
+ activemodel (= 4.2.1)
9
+ activesupport (= 4.2.1)
10
+ arel (~> 6.0)
11
+ activesupport (4.2.1)
12
+ i18n (~> 0.7)
13
+ json (~> 1.7, >= 1.7.7)
14
+ minitest (~> 5.1)
15
+ thread_safe (~> 0.3, >= 0.3.4)
16
+ tzinfo (~> 1.1)
17
+ arel (6.0.0)
18
+ awesome_print (1.6.1)
19
+ builder (3.2.2)
20
+ byebug (4.0.4)
21
+ columnize (= 0.9.0)
22
+ coderay (1.1.0)
23
+ columnize (0.9.0)
24
+ database_cleaner (1.4.1)
25
+ diff-lcs (1.2.5)
26
+ i18n (0.7.0)
27
+ json (1.8.2)
28
+ method_source (0.8.2)
29
+ minitest (5.5.1)
30
+ pry (0.10.1)
31
+ coderay (~> 1.1.0)
32
+ method_source (~> 0.8.1)
33
+ slop (~> 3.4)
34
+ pry-byebug (3.1.0)
35
+ byebug (~> 4.0)
36
+ pry (~> 0.10)
37
+ rspec (3.2.0)
38
+ rspec-core (~> 3.2.0)
39
+ rspec-expectations (~> 3.2.0)
40
+ rspec-mocks (~> 3.2.0)
41
+ rspec-core (3.2.2)
42
+ rspec-support (~> 3.2.0)
43
+ rspec-expectations (3.2.0)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.2.0)
46
+ rspec-mocks (3.2.1)
47
+ diff-lcs (>= 1.2.0, < 2.0)
48
+ rspec-support (~> 3.2.0)
49
+ rspec-support (3.2.2)
50
+ slop (3.6.0)
51
+ sqlite3 (1.3.10)
52
+ thread_safe (0.3.5)
53
+ tzinfo (1.2.2)
54
+ thread_safe (~> 0.1)
55
+
56
+ PLATFORMS
57
+ ruby
58
+
59
+ DEPENDENCIES
60
+ activerecord
61
+ awesome_print
62
+ database_cleaner
63
+ pry-byebug
64
+ rspec
65
+ sqlite3
data/bin/byebug ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'byebug' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('byebug', 'byebug')
data/bin/coderay ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'coderay' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('coderay', 'coderay')
data/bin/htmldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'htmldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'htmldiff')
data/bin/ldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'ldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'ldiff')
data/bin/pry ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'pry' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('pry', 'pry')
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
@@ -0,0 +1,9 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "ingestion_engine"
3
+ spec.version = '0.0.1'
4
+ spec.authors = ['Brandon Mathis']
5
+ spec.email = ['BeMathis@gmail.com']
6
+ spec.summary = 'Ingest and massage csv data'
7
+ spec.license = 'MIT'
8
+ spec.files = `git ls-files`.split("\n")
9
+ end
data/invalid.csv ADDED
@@ -0,0 +1,3 @@
1
+ username,email,first_name,last_name,errors
2
+ BeMathis, bemathis<at>gmail.com, brandon, mathis, Email is invalid
3
+ Carrion, carrion@gmail, carrion, fost, Email is invalid
@@ -0,0 +1,43 @@
1
+ module IngestionEngine
2
+ class Base
3
+ attr_reader :rows, :klass, :headers
4
+
5
+ def initialize(klass, file)
6
+ @klass = klass
7
+ @rows = CSV.read(file)
8
+ @headers = @rows.shift.map!(&:strip)
9
+ end
10
+
11
+ def ingest
12
+ entities = []
13
+ init entities
14
+ dump_invalid entities
15
+ save entities
16
+ end
17
+
18
+ private
19
+
20
+ def init(entities)
21
+ rows.each do |row|
22
+ obj = klass.new
23
+ headers.each_with_index do |header, index|
24
+ obj.send("#{header}=", row[index].strip)
25
+ end
26
+ entities << obj
27
+ end
28
+ end
29
+
30
+ def dump_invalid(entities)
31
+ invalids = entities.select(&:invalid?).each do |invalid_entity|
32
+ entities.delete(invalid_entity)
33
+ end
34
+ IngestionEngine::Reporter.new(invalids, headers).log do |invalid|
35
+ invalid.errors.full_messages.to_sentence
36
+ end
37
+ end
38
+
39
+ def save(entities)
40
+ entities.each { |entity| entity.save }
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,21 @@
1
+ module IngestionEngine
2
+ class Reporter
3
+ attr_reader :entities, :headers
4
+
5
+ def initialize(entities, headers)
6
+ @entities = entities
7
+ @headers = headers
8
+ end
9
+
10
+ def log(&block)
11
+ File.open('invalid.csv', 'w') do |f|
12
+ f.puts (headers + ['errors']).join(',')
13
+ entities.each do |entity|
14
+ msg = yield(entity)
15
+ values = headers.map{ |header| entity.send(header) }
16
+ f.puts (values + [msg.gsub(/,/, '')]).join(', ')
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,6 @@
1
+ require_relative 'ingestion_engine/base'
2
+ require_relative 'ingestion_engine/reporter'
3
+ require 'csv'
4
+
5
+ module IngestionEngine
6
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ class User < ActiveRecord::Base
4
+ validates_presence_of :first_name
5
+ validates :email, format: { with: /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i, on: :create }
6
+ end
7
+
8
+ describe IngestionEngine::Base do
9
+ describe '#ingest' do
10
+ let(:csv) { File.open('spec/sample_csvs/users.csv') }
11
+ let(:emails) { User.all.map(&:email) }
12
+ let(:usernames) { User.all.map(&:username) }
13
+
14
+ it 'saves the given items' do
15
+ IngestionEngine::Base.new(User, csv).ingest
16
+ expect(User.count).to eq 3
17
+ expect(usernames).to include 'BeMathis'
18
+ expect(usernames).to include 'Carrion'
19
+ expect(usernames).to include 'durrhurrdurr'
20
+ expect(emails).to include 'bemathis@gmail.com'
21
+ expect(emails).to include 'carrion@gmail.com'
22
+ expect(emails).to include 'durrhurrdurr@gmail.com'
23
+ end
24
+
25
+ context 'with missing attr that is required' do
26
+ let(:csv) { File.open('spec/sample_csvs/users_with_missing_first_name.csv') }
27
+
28
+ it 'does not save invalid items' do
29
+ IngestionEngine::Base.new(User, csv).ingest
30
+ expect(User.count).to eq 2
31
+ expect(usernames).to include 'durrhurrdurr'
32
+ expect(usernames).to include 'Carrion'
33
+ expect(usernames).to_not include 'BeMathis'
34
+ end
35
+
36
+ it 'dumps the bad entry into invalid records csv' do
37
+ IngestionEngine::Base.new(User, csv).ingest
38
+ CSV.foreach('invalid.csv', {headers: true}) do |row|
39
+ expect(row['username']).to eq 'BeMathis'
40
+ end
41
+ end
42
+ end
43
+
44
+ context 'with bad formatted email' do
45
+ let(:csv) { File.open('spec/sample_csvs/users_with_bad_email.csv') }
46
+
47
+ it 'dumps the bad entry into invalid records csv' do
48
+ IngestionEngine::Base.new(User, csv).ingest
49
+ invalids = CSV.parse(File.open('invalid.csv'), headers: true, header_converters: :symbol)
50
+ expect(invalids[0][:username]).to eq 'BeMathis'
51
+ expect(invalids[1][:username]).to eq 'Carrion'
52
+ expect(invalids[0][:errors].strip).to eq 'Email is invalid'
53
+ expect(invalids[1][:errors].strip).to eq 'Email is invalid'
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis@gmail.com, brandon, mathis
3
+ Carrion, carrion@gmail.com, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis<at>gmail.com, brandon, mathis
3
+ Carrion, carrion@gmail, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis@gmail.com, , mathis
3
+ Carrion, carrion@gmail.com, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,39 @@
1
+ Bundler.require
2
+ require 'ingestion_engine'
3
+ require 'rspec/core'
4
+
5
+
6
+ ActiveRecord::Base.establish_connection(
7
+ :adapter => "sqlite3",
8
+ :database => "/tmp/ingestion_engine.sqlite"
9
+ )
10
+
11
+ connection = ActiveRecord::Base.connection
12
+
13
+ RSpec.configure do |config|
14
+ config.mock_with :rspec
15
+
16
+ config.before(:suite) do
17
+ DatabaseCleaner.strategy = :transaction
18
+ DatabaseCleaner.clean_with(:truncation)
19
+ end
20
+
21
+ config.around(:each) do |example|
22
+ DatabaseCleaner.cleaning do
23
+ example.run
24
+ end
25
+ end
26
+
27
+ config.before(:all) do
28
+ connection.create_table :users do |t|
29
+ t.string :first_name
30
+ t.string :last_name
31
+ t.string :email
32
+ t.string :username
33
+ end
34
+ end
35
+
36
+ config.after(:all) do
37
+ connection.drop_table :users
38
+ end
39
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ingestion_engine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brandon Mathis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - BeMathis@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - bin/byebug
24
+ - bin/coderay
25
+ - bin/htmldiff
26
+ - bin/ldiff
27
+ - bin/pry
28
+ - bin/rspec
29
+ - ingestion_engine.gemspec
30
+ - invalid.csv
31
+ - lib/ingestion_engine.rb
32
+ - lib/ingestion_engine/base.rb
33
+ - lib/ingestion_engine/reporter.rb
34
+ - spec/lib/ingestion_engine/base_spec.rb
35
+ - spec/sample_csvs/users.csv
36
+ - spec/sample_csvs/users_with_bad_email.csv
37
+ - spec/sample_csvs/users_with_missing_first_name.csv
38
+ - spec/spec_helper.rb
39
+ homepage:
40
+ licenses:
41
+ - MIT
42
+ metadata: {}
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 2.4.6
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Ingest and massage csv data
63
+ test_files: []