itiel 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.autotest +17 -0
- data/.gitignore +13 -0
- data/.gitlab-ci.yml +36 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/Gemfile.rails.4.0 +7 -0
- data/Gemfile.rails.4.1 +7 -0
- data/Gemfile.rails.4.2 +7 -0
- data/README.markdown +106 -0
- data/Rakefile +13 -0
- data/build.sh +10 -0
- data/features/extract/database_table.feature +16 -0
- data/features/extract/sql_script.feature +17 -0
- data/features/load/database_table_loader.feature +21 -0
- data/features/lookup/csv_file.feature +41 -0
- data/features/lookup/database_table.feature +43 -0
- data/features/script/ruby_script.feature +19 -0
- data/features/step_definitions/csv_steps.rb +15 -0
- data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
- data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
- data/features/step_definitions/extractor/database_steps.rb +27 -0
- data/features/step_definitions/extractor/database_table_steps.rb +8 -0
- data/features/step_definitions/extractor/extraction_steps.rb +3 -0
- data/features/step_definitions/flow_steps.rb +9 -0
- data/features/step_definitions/loader/csv_file_steps.rb +4 -0
- data/features/step_definitions/loader/database_table_steps.rb +14 -0
- data/features/step_definitions/lookup/lookup_steps.rb +35 -0
- data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
- data/features/step_definitions/stream_steps.rb +8 -0
- data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
- data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
- data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
- data/features/step_definitions/transformation/map_values_step.rb +4 -0
- data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
- data/features/step_definitions/transformation/select_column_steps.rb +3 -0
- data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
- data/features/support/database.yml +1 -0
- data/features/support/env.rb +13 -0
- data/features/transform/transformations.feature +123 -0
- data/itiel.gemspec +34 -0
- data/lib/itiel.rb +45 -0
- data/lib/itiel/db/connection.rb +24 -0
- data/lib/itiel/db/sql_connectable.rb +33 -0
- data/lib/itiel/db/truncator.rb +30 -0
- data/lib/itiel/extract/chained_step.rb +22 -0
- data/lib/itiel/extract/csv_file.rb +31 -0
- data/lib/itiel/extract/custom_sql.rb +38 -0
- data/lib/itiel/extract/database_table.rb +23 -0
- data/lib/itiel/job.rb +116 -0
- data/lib/itiel/load/chained_step.rb +37 -0
- data/lib/itiel/load/csv_file.rb +45 -0
- data/lib/itiel/load/database_table.rb +34 -0
- data/lib/itiel/load/input_output_behavior.rb +36 -0
- data/lib/itiel/logger.rb +47 -0
- data/lib/itiel/lookup/chained_step.rb +35 -0
- data/lib/itiel/lookup/csv_file.rb +16 -0
- data/lib/itiel/lookup/database_table.rb +36 -0
- data/lib/itiel/lookup/hash_lookup.rb +35 -0
- data/lib/itiel/nameable.rb +6 -0
- data/lib/itiel/script/chained_step.rb +18 -0
- data/lib/itiel/script/ruby_script.rb +31 -0
- data/lib/itiel/script/sql_script.rb +29 -0
- data/lib/itiel/transform/calculated_columns.rb +47 -0
- data/lib/itiel/transform/chained_step.rb +27 -0
- data/lib/itiel/transform/constant_column.rb +35 -0
- data/lib/itiel/transform/input_output_behavior.rb +44 -0
- data/lib/itiel/transform/map_values.rb +43 -0
- data/lib/itiel/transform/remove_column.rb +33 -0
- data/lib/itiel/transform/rename_column.rb +43 -0
- data/lib/itiel/transform/select_column.rb +37 -0
- data/lib/itiel/version.rb +3 -0
- data/spec/db/sql_connectable_spec.rb +20 -0
- data/spec/extract/chained_step_spec.rb +31 -0
- data/spec/extract/csv_file_spec.rb +22 -0
- data/spec/extract/custom_sql_spec.rb +19 -0
- data/spec/extract/database_table_spec.rb +22 -0
- data/spec/job_spec.rb +80 -0
- data/spec/loader/chained_step_spec.rb +39 -0
- data/spec/loader/csv_file_spec.rb +69 -0
- data/spec/loader/database_table_spec.rb +29 -0
- data/spec/lookup/hash_lookup_spec.rb +108 -0
- data/spec/nameable_spec.rb +17 -0
- data/spec/script/chained_step_spec.rb +24 -0
- data/spec/script/ruby_script_spec.rb +18 -0
- data/spec/script/sql_script_spec.rb +41 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/config/database.yml +1 -0
- data/spec/support/config/sources.yml +9 -0
- data/spec/transform/calculated_columns_spec.rb +36 -0
- data/spec/transform/chained_step_spec.rb +36 -0
- data/spec/transform/constant_column_spec.rb +22 -0
- data/spec/transform/map_values_spec.rb +26 -0
- data/spec/transform/rename_column_spec.rb +25 -0
- data/spec/transform/select_column_spec.rb +21 -0
- metadata +344 -0
@@ -0,0 +1,6 @@
|
|
1
|
+
When /^I create a Itiel::Extract::CustomSQL object with the query:$/ do |string|
|
2
|
+
@custom_sql = Itiel::Extract::CustomSQL.new string
|
3
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
4
|
+
@custom_sql.connection = :test
|
5
|
+
@extraction = @custom_sql.extract
|
6
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
Given /^an? (.+) database table with rows:$/ do |table_name, table|
|
2
|
+
step "a #{table_name} database table"
|
3
|
+
|
4
|
+
rows = Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym]
|
5
|
+
table.hashes.each do |row|
|
6
|
+
rows.insert row
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
Given /^a blogposts database table$/ do
|
11
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
12
|
+
|
13
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :blogposts do
|
14
|
+
Integer :id
|
15
|
+
String :title
|
16
|
+
String :author
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
Given /^an? authors database table$/ do
|
21
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
22
|
+
|
23
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :authors do
|
24
|
+
Integer :id
|
25
|
+
String :name
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
When /^I create a Itiel::Extract::DatabaseTable object for the '(.*?)' table$/ do |table_name|
|
2
|
+
@database_table = Itiel::Extract::DatabaseTable.new
|
3
|
+
@database_table.connection = :test
|
4
|
+
@database_table.table_name = table_name
|
5
|
+
@source ||= @database_table
|
6
|
+
|
7
|
+
@extraction = @database_table.extract
|
8
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Given /^I create a Loader::DatabaseTable object for the "([^"]*)" table$/ do |arg1|
|
2
|
+
@destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I load the source into the destination$/ do
|
6
|
+
@source >> @destination
|
7
|
+
@source.start
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^the "([^"]*)" table should have (\d+) records$/ do |table_name, record_count|
|
11
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
12
|
+
|
13
|
+
expect(Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym].count).to eq record_count.to_i
|
14
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
Given(/^I create a Itiel::Lookup::DatabaseTable object for the "(.*?)"$/) do |table_name|
|
2
|
+
@lookup = Itiel::Lookup::DatabaseTable.new
|
3
|
+
@lookup.connection = :test
|
4
|
+
@lookup.table_name = table_name
|
5
|
+
end
|
6
|
+
|
7
|
+
Given(/^I create a Itiel::Lookup::CSVFile object with the "(.*?)" file$/) do |file_name|
|
8
|
+
@lookup = Itiel::Lookup::CSVFile.new(File.join('tmp', file_name))
|
9
|
+
end
|
10
|
+
|
11
|
+
Given(/^the lookup joins the "(.*?)" column in the source with the "(.*?)" column in the lookup stream$/) do |source_column, lookup_column|
|
12
|
+
@lookup.lookup_columns = { source_column => lookup_column }
|
13
|
+
end
|
14
|
+
|
15
|
+
Given(/^the lookup appends the "(.*?)" column on the lookup stream to the source as "(.*?)"$/) do |original_name, new_name|
|
16
|
+
@lookup.joined_columns = { original_name => new_name }
|
17
|
+
end
|
18
|
+
|
19
|
+
Then(/^the output for the lookup should be:$/) do |table|
|
20
|
+
result = @lookup.output
|
21
|
+
table.hashes.each_with_index do |row, index|
|
22
|
+
table.headers.each do |column|
|
23
|
+
expect(result[index][column.to_sym].to_s).to eq row[column]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Given /^an? orders database table$/ do
|
29
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
30
|
+
|
31
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :orders do
|
32
|
+
Integer :id
|
33
|
+
String :email
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Given /^I create a Transformation::CalculatedColumns object$/ do
|
2
|
+
@calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
|
3
|
+
name_state = "#{row['name']}-#{row['state']}"
|
4
|
+
id_name = "#{row['id']}-#{row['name']}"
|
5
|
+
{ 'name_state' => name_state, 'id_name' => id_name }
|
6
|
+
end
|
7
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
test: 'sqlite://test.db'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
SimpleCov.start
|
4
|
+
|
5
|
+
require 'itiel'
|
6
|
+
require 'csv'
|
7
|
+
|
8
|
+
Itiel::Logger.logger = Logger.new File.join('tmp', 'itiel.log')
|
9
|
+
|
10
|
+
Before '@database' do
|
11
|
+
File.unlink 'test.db' if File.exists?('test.db')
|
12
|
+
@database_config_path = File.join('features', 'support', 'database.yml')
|
13
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
Feature: Transformations
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given a "source.csv" file with the following rows:
|
5
|
+
| id | name | state |
|
6
|
+
| 1 | john | active |
|
7
|
+
| 2 | ruby | active |
|
8
|
+
| 3 | rails | inactive |
|
9
|
+
| 4 | pete | active |
|
10
|
+
|
11
|
+
# @source = Itiel::Extracts::CSVFile.new('source.csv')
|
12
|
+
And I create a Extractor::CSVFile object with "source.csv"
|
13
|
+
# @destination = Itiel::Loads::CSVFile.new('destination.csv')
|
14
|
+
And I create a Loader::CSVFile object with "destination.csv"
|
15
|
+
|
16
|
+
|
17
|
+
Scenario: Append a constant value column to the original stream
|
18
|
+
# @constant_column = Itiel::Transform::ConstantColumn.new("constant" => "value")
|
19
|
+
Given I create a Transformation::ConstantColumn object with "constant" => "value"
|
20
|
+
|
21
|
+
# @source >> @constant_column >> @destination
|
22
|
+
And the data flows in the following direction:
|
23
|
+
| @source |
|
24
|
+
| @constant_column |
|
25
|
+
| @destination |
|
26
|
+
|
27
|
+
# @source.start
|
28
|
+
When I start the source
|
29
|
+
|
30
|
+
Then the "destination.csv" file should exist with the following content:
|
31
|
+
| id | name | state | constant |
|
32
|
+
| 1 | john | active | value |
|
33
|
+
| 2 | ruby | active | value |
|
34
|
+
| 3 | rails | inactive | value |
|
35
|
+
| 4 | pete | active | value |
|
36
|
+
|
37
|
+
Scenario: Map Values in a column
|
38
|
+
# @map_values = Itiel::Transform::MapValues.new("state" => { "active" => 1, "inactive" => 0 })
|
39
|
+
Given I create a Transformation::MapValues object for the state column to integer
|
40
|
+
|
41
|
+
# @source.next_step = @map_values
|
42
|
+
# @map_values.next_step = @destination
|
43
|
+
And the data flows in the following direction:
|
44
|
+
| @source |
|
45
|
+
| @map_values |
|
46
|
+
| @destination |
|
47
|
+
|
48
|
+
# @source.start
|
49
|
+
When I start the source
|
50
|
+
|
51
|
+
Then the "destination.csv" file should exist with the following content:
|
52
|
+
| id | name | state |
|
53
|
+
| 1 | john | 1 |
|
54
|
+
| 2 | ruby | 1 |
|
55
|
+
| 3 | rails | 0 |
|
56
|
+
| 4 | pete | 1 |
|
57
|
+
|
58
|
+
|
59
|
+
Scenario: Rename an existing column
|
60
|
+
# @rename_column = Itiel::Transform::RenameColumn.new("state" => "status")
|
61
|
+
Given I create a Transformation::RenameColumn object from "state" to "status"
|
62
|
+
|
63
|
+
# @source.next_step = @rename_column
|
64
|
+
# @rename_column.next_step = @destination
|
65
|
+
And the data flows in the following direction:
|
66
|
+
| @source |
|
67
|
+
| @rename_column |
|
68
|
+
| @destination |
|
69
|
+
|
70
|
+
# @source.start
|
71
|
+
When I start the source
|
72
|
+
|
73
|
+
Then the "destination.csv" file should exist with the following content:
|
74
|
+
| id | name | status |
|
75
|
+
| 1 | john | active |
|
76
|
+
| 2 | ruby | active |
|
77
|
+
| 3 | rails | inactive |
|
78
|
+
| 4 | pete | active |
|
79
|
+
|
80
|
+
Scenario: Select specific columns from the stream
|
81
|
+
# @select_column = Itiel::Transform::SelectColumn.new("id", "state")
|
82
|
+
Given I create a Transformation::SelectColumn object with "id" and "state"
|
83
|
+
|
84
|
+
# @source.next_step = @select_column
|
85
|
+
# @select_column.next_step = @destination
|
86
|
+
And the data flows in the following direction:
|
87
|
+
| @source |
|
88
|
+
| @select_column |
|
89
|
+
| @destination |
|
90
|
+
|
91
|
+
# @source.start
|
92
|
+
When I start the source
|
93
|
+
|
94
|
+
Then the "destination.csv" file should exist with the following content:
|
95
|
+
| id | state |
|
96
|
+
| 1 | active |
|
97
|
+
| 2 | active |
|
98
|
+
| 3 | inactive |
|
99
|
+
| 4 | active |
|
100
|
+
|
101
|
+
Scenario: Create multiple columns with the result of a calculation with other columns
|
102
|
+
|
103
|
+
# @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
|
104
|
+
# name_state = "#{row['name']}-#{row['state']}"
|
105
|
+
# id_name = "#{row['id']}-#{row['name']}"
|
106
|
+
# { 'name_state' => name_state, 'id_name' => id_name }
|
107
|
+
# end
|
108
|
+
Given I create a Transformation::CalculatedColumns object
|
109
|
+
|
110
|
+
And the data flows in the following direction:
|
111
|
+
| @source |
|
112
|
+
| @calculated_columns |
|
113
|
+
| @destination |
|
114
|
+
|
115
|
+
# @source.start
|
116
|
+
When I start the source
|
117
|
+
|
118
|
+
Then the "destination.csv" file should exist with the following content:
|
119
|
+
| id | name | state | name_state | id_name |
|
120
|
+
| 1 | john | active | john-active | 1-john |
|
121
|
+
| 2 | ruby | active | ruby-active | 2-ruby |
|
122
|
+
| 3 | rails | inactive | rails-inactive | 3-rails |
|
123
|
+
| 4 | pete | active | pete-active | 4-pete |
|
data/itiel.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "itiel/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "itiel"
|
7
|
+
s.version = Itiel::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["David Padilla"]
|
10
|
+
s.email = ["david@padilla.cc"]
|
11
|
+
s.homepage = "https://gitlab.com/dabit/itiel"
|
12
|
+
s.summary = %q{DSL for ETL in Ruby}
|
13
|
+
s.description = %q{DSL for ETL in Ruby}
|
14
|
+
|
15
|
+
s.rubyforge_project = "itiel"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency 'activesupport', '~> 4.0'
|
23
|
+
s.add_dependency 'activerecord' , '~> 4.0'
|
24
|
+
s.add_dependency 'sequel'
|
25
|
+
|
26
|
+
s.add_development_dependency 'cucumber'
|
27
|
+
s.add_development_dependency 'rake'
|
28
|
+
s.add_development_dependency 'rspec', '~> 3.5.0'
|
29
|
+
s.add_development_dependency 'rr'
|
30
|
+
s.add_development_dependency 'minitest'
|
31
|
+
s.add_development_dependency 'sqlite3'
|
32
|
+
s.add_development_dependency 'simplecov'
|
33
|
+
s.add_development_dependency 'turn'
|
34
|
+
end
|
data/lib/itiel.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'active_support'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
module Itiel
|
6
|
+
class MethodNotImplementedException < Exception; end
|
7
|
+
class MissingConnection < Exception ; end
|
8
|
+
class UndefinedNextStepException < Exception ; end
|
9
|
+
class SQLSentenceNotProvided < Exception ; end
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'itiel/job'
|
13
|
+
require 'itiel/logger'
|
14
|
+
require 'itiel/nameable'
|
15
|
+
|
16
|
+
require 'itiel/db/connection'
|
17
|
+
require 'itiel/db/sql_connectable.rb'
|
18
|
+
require 'itiel/db/truncator.rb'
|
19
|
+
|
20
|
+
require 'itiel/extract/chained_step'
|
21
|
+
require 'itiel/extract/csv_file'
|
22
|
+
require 'itiel/extract/custom_sql'
|
23
|
+
require 'itiel/extract/database_table'
|
24
|
+
|
25
|
+
require 'itiel/load/chained_step'
|
26
|
+
require 'itiel/load/csv_file'
|
27
|
+
require 'itiel/load/database_table'
|
28
|
+
|
29
|
+
require 'itiel/lookup/chained_step'
|
30
|
+
require 'itiel/lookup/hash_lookup'
|
31
|
+
require 'itiel/lookup/database_table'
|
32
|
+
require 'itiel/lookup/csv_file'
|
33
|
+
|
34
|
+
require 'itiel/script/chained_step'
|
35
|
+
require 'itiel/script/sql_script'
|
36
|
+
require 'itiel/script/ruby_script'
|
37
|
+
|
38
|
+
require 'itiel/transform/chained_step'
|
39
|
+
require 'itiel/transform/calculated_columns'
|
40
|
+
require 'itiel/transform/constant_column'
|
41
|
+
require 'itiel/transform/rename_column'
|
42
|
+
require 'itiel/transform/remove_column'
|
43
|
+
require 'itiel/transform/select_column'
|
44
|
+
require 'itiel/transform/input_output_behavior'
|
45
|
+
require 'itiel/transform/map_values'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Itiel
|
2
|
+
module DB
|
3
|
+
class Connection
|
4
|
+
include Itiel::Nameable
|
5
|
+
|
6
|
+
attr_accessor :sources_file
|
7
|
+
attr_accessor :connection_name
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
self.sources_file = 'config/sources.yml'
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection_string(refresh = false)
|
14
|
+
@connection = ( refresh ? load_from_file : @connection ||= load_from_file)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
def load_from_file
|
19
|
+
yaml = YAML.load_file(self.sources_file)
|
20
|
+
yaml[self.connection_name]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Itiel
|
4
|
+
module DB
|
5
|
+
module SQLConnectable
|
6
|
+
attr_accessor :connection, :connection_file_path
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def connection_file_path
|
10
|
+
@@connection_file_path ||= 'config/database.yml'
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection_file_path=(value)
|
14
|
+
@@connection_file_path = value
|
15
|
+
end
|
16
|
+
|
17
|
+
def sequel_connection(connection_name)
|
18
|
+
Sequel.connect read_connection_string(connection_name)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
def read_connection_string(connection_name)
|
23
|
+
@connections ||= YAML.load_file(connection_file_path)
|
24
|
+
@connections[connection_name.to_s]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.included(receiver)
|
29
|
+
receiver.extend ClassMethods
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|