ingestion_engine 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bb45d11b839f5c50bf93f06d0cdb531896170f85
4
+ data.tar.gz: 739c9202ed5b8016ae2795545c2c84ac3f53a747
5
+ SHA512:
6
+ metadata.gz: c521895788ea57ed62c767f231775a8119682c18ffdb9254df809b1d7c6478082754798886b4d9eef001080b5cdccfc3283265918b7b960352d29cf28a125657
7
+ data.tar.gz: b06c97b99a0452335dd69e424dc37a2b96777a1775193ab9996affcd1ef0e2a63be761799d51a1aaa484cb0ed6138e8aa12c70c691563c7e50829ebd88430952
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ .bundle
2
+ vendor/ruby
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rspec'
4
+ gem 'sqlite3'
5
+ gem 'activerecord', require: 'active_record'
6
+ gem 'pry-byebug'
7
+ gem 'awesome_print'
8
+ gem 'database_cleaner'
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activemodel (4.2.1)
5
+ activesupport (= 4.2.1)
6
+ builder (~> 3.1)
7
+ activerecord (4.2.1)
8
+ activemodel (= 4.2.1)
9
+ activesupport (= 4.2.1)
10
+ arel (~> 6.0)
11
+ activesupport (4.2.1)
12
+ i18n (~> 0.7)
13
+ json (~> 1.7, >= 1.7.7)
14
+ minitest (~> 5.1)
15
+ thread_safe (~> 0.3, >= 0.3.4)
16
+ tzinfo (~> 1.1)
17
+ arel (6.0.0)
18
+ awesome_print (1.6.1)
19
+ builder (3.2.2)
20
+ byebug (4.0.4)
21
+ columnize (= 0.9.0)
22
+ coderay (1.1.0)
23
+ columnize (0.9.0)
24
+ database_cleaner (1.4.1)
25
+ diff-lcs (1.2.5)
26
+ i18n (0.7.0)
27
+ json (1.8.2)
28
+ method_source (0.8.2)
29
+ minitest (5.5.1)
30
+ pry (0.10.1)
31
+ coderay (~> 1.1.0)
32
+ method_source (~> 0.8.1)
33
+ slop (~> 3.4)
34
+ pry-byebug (3.1.0)
35
+ byebug (~> 4.0)
36
+ pry (~> 0.10)
37
+ rspec (3.2.0)
38
+ rspec-core (~> 3.2.0)
39
+ rspec-expectations (~> 3.2.0)
40
+ rspec-mocks (~> 3.2.0)
41
+ rspec-core (3.2.2)
42
+ rspec-support (~> 3.2.0)
43
+ rspec-expectations (3.2.0)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.2.0)
46
+ rspec-mocks (3.2.1)
47
+ diff-lcs (>= 1.2.0, < 2.0)
48
+ rspec-support (~> 3.2.0)
49
+ rspec-support (3.2.2)
50
+ slop (3.6.0)
51
+ sqlite3 (1.3.10)
52
+ thread_safe (0.3.5)
53
+ tzinfo (1.2.2)
54
+ thread_safe (~> 0.1)
55
+
56
+ PLATFORMS
57
+ ruby
58
+
59
+ DEPENDENCIES
60
+ activerecord
61
+ awesome_print
62
+ database_cleaner
63
+ pry-byebug
64
+ rspec
65
+ sqlite3
data/bin/byebug ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'byebug' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('byebug', 'byebug')
data/bin/coderay ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'coderay' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('coderay', 'coderay')
data/bin/htmldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'htmldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'htmldiff')
data/bin/ldiff ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'ldiff' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('diff-lcs', 'ldiff')
data/bin/pry ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'pry' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('pry', 'pry')
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
@@ -0,0 +1,9 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "ingestion_engine"
3
+ spec.version = '0.0.1'
4
+ spec.authors = ['Brandon Mathis']
5
+ spec.email = ['BeMathis@gmail.com']
6
+ spec.summary = 'Ingest and massage csv data'
7
+ spec.license = 'MIT'
8
+ spec.files = `git ls-files`.split("\n")
9
+ end
data/invalid.csv ADDED
@@ -0,0 +1,3 @@
1
+ username,email,first_name,last_name,errors
2
+ BeMathis, bemathis<at>gmail.com, brandon, mathis, Email is invalid
3
+ Carrion, carrion@gmail, carrion, fost, Email is invalid
@@ -0,0 +1,43 @@
1
+ module IngestionEngine
2
+ class Base
3
+ attr_reader :rows, :klass, :headers
4
+
5
+ def initialize(klass, file)
6
+ @klass = klass
7
+ @rows = CSV.read(file)
8
+ @headers = @rows.shift.map!(&:strip)
9
+ end
10
+
11
+ def ingest
12
+ entities = []
13
+ init entities
14
+ dump_invalid entities
15
+ save entities
16
+ end
17
+
18
+ private
19
+
20
+ def init(entities)
21
+ rows.each do |row|
22
+ obj = klass.new
23
+ headers.each_with_index do |header, index|
24
+ obj.send("#{header}=", row[index].strip)
25
+ end
26
+ entities << obj
27
+ end
28
+ end
29
+
30
+ def dump_invalid(entities)
31
+ invalids = entities.select(&:invalid?).each do |invalid_entity|
32
+ entities.delete(invalid_entity)
33
+ end
34
+ IngestionEngine::Reporter.new(invalids, headers).log do |invalid|
35
+ invalid.errors.full_messages.to_sentence
36
+ end
37
+ end
38
+
39
+ def save(entities)
40
+ entities.each { |entity| entity.save }
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,21 @@
1
+ module IngestionEngine
2
+ class Reporter
3
+ attr_reader :entities, :headers
4
+
5
+ def initialize(entities, headers)
6
+ @entities = entities
7
+ @headers = headers
8
+ end
9
+
10
+ def log(&block)
11
+ File.open('invalid.csv', 'w') do |f|
12
+ f.puts (headers + ['errors']).join(',')
13
+ entities.each do |entity|
14
+ msg = yield(entity)
15
+ values = headers.map{ |header| entity.send(header) }
16
+ f.puts (values + [msg.gsub(/,/, '')]).join(', ')
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,6 @@
1
+ require_relative 'ingestion_engine/base'
2
+ require_relative 'ingestion_engine/reporter'
3
+ require 'csv'
4
+
5
+ module IngestionEngine
6
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ class User < ActiveRecord::Base
4
+ validates_presence_of :first_name
5
+ validates :email, format: { with: /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i, on: :create }
6
+ end
7
+
8
+ describe IngestionEngine::Base do
9
+ describe '#ingest' do
10
+ let(:csv) { File.open('spec/sample_csvs/users.csv') }
11
+ let(:emails) { User.all.map(&:email) }
12
+ let(:usernames) { User.all.map(&:username) }
13
+
14
+ it 'saves the given items' do
15
+ IngestionEngine::Base.new(User, csv).ingest
16
+ expect(User.count).to eq 3
17
+ expect(usernames).to include 'BeMathis'
18
+ expect(usernames).to include 'Carrion'
19
+ expect(usernames).to include 'durrhurrdurr'
20
+ expect(emails).to include 'bemathis@gmail.com'
21
+ expect(emails).to include 'carrion@gmail.com'
22
+ expect(emails).to include 'durrhurrdurr@gmail.com'
23
+ end
24
+
25
+ context 'with missing attr that is required' do
26
+ let(:csv) { File.open('spec/sample_csvs/users_with_missing_first_name.csv') }
27
+
28
+ it 'does not save invalid items' do
29
+ IngestionEngine::Base.new(User, csv).ingest
30
+ expect(User.count).to eq 2
31
+ expect(usernames).to include 'durrhurrdurr'
32
+ expect(usernames).to include 'Carrion'
33
+ expect(usernames).to_not include 'BeMathis'
34
+ end
35
+
36
+ it 'dumps the bad entry into invalid records csv' do
37
+ IngestionEngine::Base.new(User, csv).ingest
38
+ CSV.foreach('invalid.csv', {headers: true}) do |row|
39
+ expect(row['username']).to eq 'BeMathis'
40
+ end
41
+ end
42
+ end
43
+
44
+ context 'with bad formatted email' do
45
+ let(:csv) { File.open('spec/sample_csvs/users_with_bad_email.csv') }
46
+
47
+ it 'dumps the bad entry into invalid records csv' do
48
+ IngestionEngine::Base.new(User, csv).ingest
49
+ invalids = CSV.parse(File.open('invalid.csv'), headers: true, header_converters: :symbol)
50
+ expect(invalids[0][:username]).to eq 'BeMathis'
51
+ expect(invalids[1][:username]).to eq 'Carrion'
52
+ expect(invalids[0][:errors].strip).to eq 'Email is invalid'
53
+ expect(invalids[1][:errors].strip).to eq 'Email is invalid'
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis@gmail.com, brandon, mathis
3
+ Carrion, carrion@gmail.com, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis<at>gmail.com, brandon, mathis
3
+ Carrion, carrion@gmail, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,4 @@
1
+ username, email, first_name, last_name
2
+ BeMathis, bemathis@gmail.com, , mathis
3
+ Carrion, carrion@gmail.com, carrion, fost
4
+ durrhurrdurr, durrhurrdurr@gmail.com, durr, hurrdurr
@@ -0,0 +1,39 @@
1
+ Bundler.require
2
+ require 'ingestion_engine'
3
+ require 'rspec/core'
4
+
5
+
6
+ ActiveRecord::Base.establish_connection(
7
+ :adapter => "sqlite3",
8
+ :database => "/tmp/ingestion_engine.sqlite"
9
+ )
10
+
11
+ connection = ActiveRecord::Base.connection
12
+
13
+ RSpec.configure do |config|
14
+ config.mock_with :rspec
15
+
16
+ config.before(:suite) do
17
+ DatabaseCleaner.strategy = :transaction
18
+ DatabaseCleaner.clean_with(:truncation)
19
+ end
20
+
21
+ config.around(:each) do |example|
22
+ DatabaseCleaner.cleaning do
23
+ example.run
24
+ end
25
+ end
26
+
27
+ config.before(:all) do
28
+ connection.create_table :users do |t|
29
+ t.string :first_name
30
+ t.string :last_name
31
+ t.string :email
32
+ t.string :username
33
+ end
34
+ end
35
+
36
+ config.after(:all) do
37
+ connection.drop_table :users
38
+ end
39
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ingestion_engine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brandon Mathis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - BeMathis@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - bin/byebug
24
+ - bin/coderay
25
+ - bin/htmldiff
26
+ - bin/ldiff
27
+ - bin/pry
28
+ - bin/rspec
29
+ - ingestion_engine.gemspec
30
+ - invalid.csv
31
+ - lib/ingestion_engine.rb
32
+ - lib/ingestion_engine/base.rb
33
+ - lib/ingestion_engine/reporter.rb
34
+ - spec/lib/ingestion_engine/base_spec.rb
35
+ - spec/sample_csvs/users.csv
36
+ - spec/sample_csvs/users_with_bad_email.csv
37
+ - spec/sample_csvs/users_with_missing_first_name.csv
38
+ - spec/spec_helper.rb
39
+ homepage:
40
+ licenses:
41
+ - MIT
42
+ metadata: {}
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 2.4.6
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Ingest and massage csv data
63
+ test_files: []