ingestion_engine 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +65 -0
- data/bin/byebug +16 -0
- data/bin/coderay +16 -0
- data/bin/htmldiff +16 -0
- data/bin/ldiff +16 -0
- data/bin/pry +16 -0
- data/bin/rspec +16 -0
- data/ingestion_engine.gemspec +9 -0
- data/invalid.csv +3 -0
- data/lib/ingestion_engine/base.rb +43 -0
- data/lib/ingestion_engine/reporter.rb +21 -0
- data/lib/ingestion_engine.rb +6 -0
- data/spec/lib/ingestion_engine/base_spec.rb +57 -0
- data/spec/sample_csvs/users.csv +4 -0
- data/spec/sample_csvs/users_with_bad_email.csv +4 -0
- data/spec/sample_csvs/users_with_missing_first_name.csv +4 -0
- data/spec/spec_helper.rb +39 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bb45d11b839f5c50bf93f06d0cdb531896170f85
|
4
|
+
data.tar.gz: 739c9202ed5b8016ae2795545c2c84ac3f53a747
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c521895788ea57ed62c767f231775a8119682c18ffdb9254df809b1d7c6478082754798886b4d9eef001080b5cdccfc3283265918b7b960352d29cf28a125657
|
7
|
+
data.tar.gz: b06c97b99a0452335dd69e424dc37a2b96777a1775193ab9996affcd1ef0e2a63be761799d51a1aaa484cb0ed6138e8aa12c70c691563c7e50829ebd88430952
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activemodel (4.2.1)
|
5
|
+
activesupport (= 4.2.1)
|
6
|
+
builder (~> 3.1)
|
7
|
+
activerecord (4.2.1)
|
8
|
+
activemodel (= 4.2.1)
|
9
|
+
activesupport (= 4.2.1)
|
10
|
+
arel (~> 6.0)
|
11
|
+
activesupport (4.2.1)
|
12
|
+
i18n (~> 0.7)
|
13
|
+
json (~> 1.7, >= 1.7.7)
|
14
|
+
minitest (~> 5.1)
|
15
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
16
|
+
tzinfo (~> 1.1)
|
17
|
+
arel (6.0.0)
|
18
|
+
awesome_print (1.6.1)
|
19
|
+
builder (3.2.2)
|
20
|
+
byebug (4.0.4)
|
21
|
+
columnize (= 0.9.0)
|
22
|
+
coderay (1.1.0)
|
23
|
+
columnize (0.9.0)
|
24
|
+
database_cleaner (1.4.1)
|
25
|
+
diff-lcs (1.2.5)
|
26
|
+
i18n (0.7.0)
|
27
|
+
json (1.8.2)
|
28
|
+
method_source (0.8.2)
|
29
|
+
minitest (5.5.1)
|
30
|
+
pry (0.10.1)
|
31
|
+
coderay (~> 1.1.0)
|
32
|
+
method_source (~> 0.8.1)
|
33
|
+
slop (~> 3.4)
|
34
|
+
pry-byebug (3.1.0)
|
35
|
+
byebug (~> 4.0)
|
36
|
+
pry (~> 0.10)
|
37
|
+
rspec (3.2.0)
|
38
|
+
rspec-core (~> 3.2.0)
|
39
|
+
rspec-expectations (~> 3.2.0)
|
40
|
+
rspec-mocks (~> 3.2.0)
|
41
|
+
rspec-core (3.2.2)
|
42
|
+
rspec-support (~> 3.2.0)
|
43
|
+
rspec-expectations (3.2.0)
|
44
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
45
|
+
rspec-support (~> 3.2.0)
|
46
|
+
rspec-mocks (3.2.1)
|
47
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
48
|
+
rspec-support (~> 3.2.0)
|
49
|
+
rspec-support (3.2.2)
|
50
|
+
slop (3.6.0)
|
51
|
+
sqlite3 (1.3.10)
|
52
|
+
thread_safe (0.3.5)
|
53
|
+
tzinfo (1.2.2)
|
54
|
+
thread_safe (~> 0.1)
|
55
|
+
|
56
|
+
PLATFORMS
|
57
|
+
ruby
|
58
|
+
|
59
|
+
DEPENDENCIES
|
60
|
+
activerecord
|
61
|
+
awesome_print
|
62
|
+
database_cleaner
|
63
|
+
pry-byebug
|
64
|
+
rspec
|
65
|
+
sqlite3
|
data/bin/byebug
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'byebug' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('byebug', 'byebug')
|
data/bin/coderay
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'coderay' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('coderay', 'coderay')
|
data/bin/htmldiff
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'htmldiff' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('diff-lcs', 'htmldiff')
|
data/bin/ldiff
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'ldiff' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('diff-lcs', 'ldiff')
|
data/bin/pry
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'pry' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('pry', 'pry')
|
data/bin/rspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'rspec' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('rspec-core', 'rspec')
|
@@ -0,0 +1,9 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "ingestion_engine"
|
3
|
+
spec.version = '0.0.1'
|
4
|
+
spec.authors = ['Brandon Mathis']
|
5
|
+
spec.email = ['BeMathis@gmail.com']
|
6
|
+
spec.summary = 'Ingest and massage csv data'
|
7
|
+
spec.license = 'MIT'
|
8
|
+
spec.files = `git ls-files`.split("\n")
|
9
|
+
end
|
data/invalid.csv
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module IngestionEngine
|
2
|
+
class Base
|
3
|
+
attr_reader :rows, :klass, :headers
|
4
|
+
|
5
|
+
def initialize(klass, file)
|
6
|
+
@klass = klass
|
7
|
+
@rows = CSV.read(file)
|
8
|
+
@headers = @rows.shift.map!(&:strip)
|
9
|
+
end
|
10
|
+
|
11
|
+
def ingest
|
12
|
+
entities = []
|
13
|
+
init entities
|
14
|
+
dump_invalid entities
|
15
|
+
save entities
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def init(entities)
|
21
|
+
rows.each do |row|
|
22
|
+
obj = klass.new
|
23
|
+
headers.each_with_index do |header, index|
|
24
|
+
obj.send("#{header}=", row[index].strip)
|
25
|
+
end
|
26
|
+
entities << obj
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def dump_invalid(entities)
|
31
|
+
invalids = entities.select(&:invalid?).each do |invalid_entity|
|
32
|
+
entities.delete(invalid_entity)
|
33
|
+
end
|
34
|
+
IngestionEngine::Reporter.new(invalids, headers).log do |invalid|
|
35
|
+
invalid.errors.full_messages.to_sentence
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def save(entities)
|
40
|
+
entities.each { |entity| entity.save }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module IngestionEngine
|
2
|
+
class Reporter
|
3
|
+
attr_reader :entities, :headers
|
4
|
+
|
5
|
+
def initialize(entities, headers)
|
6
|
+
@entities = entities
|
7
|
+
@headers = headers
|
8
|
+
end
|
9
|
+
|
10
|
+
def log(&block)
|
11
|
+
File.open('invalid.csv', 'w') do |f|
|
12
|
+
f.puts (headers + ['errors']).join(',')
|
13
|
+
entities.each do |entity|
|
14
|
+
msg = yield(entity)
|
15
|
+
values = headers.map{ |header| entity.send(header) }
|
16
|
+
f.puts (values + [msg.gsub(/,/, '')]).join(', ')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
class User < ActiveRecord::Base
|
4
|
+
validates_presence_of :first_name
|
5
|
+
validates :email, format: { with: /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\z/i, on: :create }
|
6
|
+
end
|
7
|
+
|
8
|
+
describe IngestionEngine::Base do
|
9
|
+
describe '#ingest' do
|
10
|
+
let(:csv) { File.open('spec/sample_csvs/users.csv') }
|
11
|
+
let(:emails) { User.all.map(&:email) }
|
12
|
+
let(:usernames) { User.all.map(&:username) }
|
13
|
+
|
14
|
+
it 'saves the given items' do
|
15
|
+
IngestionEngine::Base.new(User, csv).ingest
|
16
|
+
expect(User.count).to eq 3
|
17
|
+
expect(usernames).to include 'BeMathis'
|
18
|
+
expect(usernames).to include 'Carrion'
|
19
|
+
expect(usernames).to include 'durrhurrdurr'
|
20
|
+
expect(emails).to include 'bemathis@gmail.com'
|
21
|
+
expect(emails).to include 'carrion@gmail.com'
|
22
|
+
expect(emails).to include 'durrhurrdurr@gmail.com'
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'with missing attr that is required' do
|
26
|
+
let(:csv) { File.open('spec/sample_csvs/users_with_missing_first_name.csv') }
|
27
|
+
|
28
|
+
it 'does not save invalid items' do
|
29
|
+
IngestionEngine::Base.new(User, csv).ingest
|
30
|
+
expect(User.count).to eq 2
|
31
|
+
expect(usernames).to include 'durrhurrdurr'
|
32
|
+
expect(usernames).to include 'Carrion'
|
33
|
+
expect(usernames).to_not include 'BeMathis'
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'dumps the bad entry into invalid records csv' do
|
37
|
+
IngestionEngine::Base.new(User, csv).ingest
|
38
|
+
CSV.foreach('invalid.csv', {headers: true}) do |row|
|
39
|
+
expect(row['username']).to eq 'BeMathis'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'with bad formatted email' do
|
45
|
+
let(:csv) { File.open('spec/sample_csvs/users_with_bad_email.csv') }
|
46
|
+
|
47
|
+
it 'dumps the bad entry into invalid records csv' do
|
48
|
+
IngestionEngine::Base.new(User, csv).ingest
|
49
|
+
invalids = CSV.parse(File.open('invalid.csv'), headers: true, header_converters: :symbol)
|
50
|
+
expect(invalids[0][:username]).to eq 'BeMathis'
|
51
|
+
expect(invalids[1][:username]).to eq 'Carrion'
|
52
|
+
expect(invalids[0][:errors].strip).to eq 'Email is invalid'
|
53
|
+
expect(invalids[1][:errors].strip).to eq 'Email is invalid'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
Bundler.require
|
2
|
+
require 'ingestion_engine'
|
3
|
+
require 'rspec/core'
|
4
|
+
|
5
|
+
|
6
|
+
ActiveRecord::Base.establish_connection(
|
7
|
+
:adapter => "sqlite3",
|
8
|
+
:database => "/tmp/ingestion_engine.sqlite"
|
9
|
+
)
|
10
|
+
|
11
|
+
connection = ActiveRecord::Base.connection
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
config.mock_with :rspec
|
15
|
+
|
16
|
+
config.before(:suite) do
|
17
|
+
DatabaseCleaner.strategy = :transaction
|
18
|
+
DatabaseCleaner.clean_with(:truncation)
|
19
|
+
end
|
20
|
+
|
21
|
+
config.around(:each) do |example|
|
22
|
+
DatabaseCleaner.cleaning do
|
23
|
+
example.run
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
config.before(:all) do
|
28
|
+
connection.create_table :users do |t|
|
29
|
+
t.string :first_name
|
30
|
+
t.string :last_name
|
31
|
+
t.string :email
|
32
|
+
t.string :username
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
config.after(:all) do
|
37
|
+
connection.drop_table :users
|
38
|
+
end
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ingestion_engine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brandon Mathis
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- BeMathis@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- Gemfile
|
22
|
+
- Gemfile.lock
|
23
|
+
- bin/byebug
|
24
|
+
- bin/coderay
|
25
|
+
- bin/htmldiff
|
26
|
+
- bin/ldiff
|
27
|
+
- bin/pry
|
28
|
+
- bin/rspec
|
29
|
+
- ingestion_engine.gemspec
|
30
|
+
- invalid.csv
|
31
|
+
- lib/ingestion_engine.rb
|
32
|
+
- lib/ingestion_engine/base.rb
|
33
|
+
- lib/ingestion_engine/reporter.rb
|
34
|
+
- spec/lib/ingestion_engine/base_spec.rb
|
35
|
+
- spec/sample_csvs/users.csv
|
36
|
+
- spec/sample_csvs/users_with_bad_email.csv
|
37
|
+
- spec/sample_csvs/users_with_missing_first_name.csv
|
38
|
+
- spec/spec_helper.rb
|
39
|
+
homepage:
|
40
|
+
licenses:
|
41
|
+
- MIT
|
42
|
+
metadata: {}
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 2.4.6
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Ingest and massage csv data
|
63
|
+
test_files: []
|