itiel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.autotest +17 -0
- data/.gitignore +13 -0
- data/.gitlab-ci.yml +36 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/Gemfile.rails.4.0 +7 -0
- data/Gemfile.rails.4.1 +7 -0
- data/Gemfile.rails.4.2 +7 -0
- data/README.markdown +106 -0
- data/Rakefile +13 -0
- data/build.sh +10 -0
- data/features/extract/database_table.feature +16 -0
- data/features/extract/sql_script.feature +17 -0
- data/features/load/database_table_loader.feature +21 -0
- data/features/lookup/csv_file.feature +41 -0
- data/features/lookup/database_table.feature +43 -0
- data/features/script/ruby_script.feature +19 -0
- data/features/step_definitions/csv_steps.rb +15 -0
- data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
- data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
- data/features/step_definitions/extractor/database_steps.rb +27 -0
- data/features/step_definitions/extractor/database_table_steps.rb +8 -0
- data/features/step_definitions/extractor/extraction_steps.rb +3 -0
- data/features/step_definitions/flow_steps.rb +9 -0
- data/features/step_definitions/loader/csv_file_steps.rb +4 -0
- data/features/step_definitions/loader/database_table_steps.rb +14 -0
- data/features/step_definitions/lookup/lookup_steps.rb +35 -0
- data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
- data/features/step_definitions/stream_steps.rb +8 -0
- data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
- data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
- data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
- data/features/step_definitions/transformation/map_values_step.rb +4 -0
- data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
- data/features/step_definitions/transformation/select_column_steps.rb +3 -0
- data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
- data/features/support/database.yml +1 -0
- data/features/support/env.rb +13 -0
- data/features/transform/transformations.feature +123 -0
- data/itiel.gemspec +34 -0
- data/lib/itiel.rb +45 -0
- data/lib/itiel/db/connection.rb +24 -0
- data/lib/itiel/db/sql_connectable.rb +33 -0
- data/lib/itiel/db/truncator.rb +30 -0
- data/lib/itiel/extract/chained_step.rb +22 -0
- data/lib/itiel/extract/csv_file.rb +31 -0
- data/lib/itiel/extract/custom_sql.rb +38 -0
- data/lib/itiel/extract/database_table.rb +23 -0
- data/lib/itiel/job.rb +116 -0
- data/lib/itiel/load/chained_step.rb +37 -0
- data/lib/itiel/load/csv_file.rb +45 -0
- data/lib/itiel/load/database_table.rb +34 -0
- data/lib/itiel/load/input_output_behavior.rb +36 -0
- data/lib/itiel/logger.rb +47 -0
- data/lib/itiel/lookup/chained_step.rb +35 -0
- data/lib/itiel/lookup/csv_file.rb +16 -0
- data/lib/itiel/lookup/database_table.rb +36 -0
- data/lib/itiel/lookup/hash_lookup.rb +35 -0
- data/lib/itiel/nameable.rb +6 -0
- data/lib/itiel/script/chained_step.rb +18 -0
- data/lib/itiel/script/ruby_script.rb +31 -0
- data/lib/itiel/script/sql_script.rb +29 -0
- data/lib/itiel/transform/calculated_columns.rb +47 -0
- data/lib/itiel/transform/chained_step.rb +27 -0
- data/lib/itiel/transform/constant_column.rb +35 -0
- data/lib/itiel/transform/input_output_behavior.rb +44 -0
- data/lib/itiel/transform/map_values.rb +43 -0
- data/lib/itiel/transform/remove_column.rb +33 -0
- data/lib/itiel/transform/rename_column.rb +43 -0
- data/lib/itiel/transform/select_column.rb +37 -0
- data/lib/itiel/version.rb +3 -0
- data/spec/db/sql_connectable_spec.rb +20 -0
- data/spec/extract/chained_step_spec.rb +31 -0
- data/spec/extract/csv_file_spec.rb +22 -0
- data/spec/extract/custom_sql_spec.rb +19 -0
- data/spec/extract/database_table_spec.rb +22 -0
- data/spec/job_spec.rb +80 -0
- data/spec/loader/chained_step_spec.rb +39 -0
- data/spec/loader/csv_file_spec.rb +69 -0
- data/spec/loader/database_table_spec.rb +29 -0
- data/spec/lookup/hash_lookup_spec.rb +108 -0
- data/spec/nameable_spec.rb +17 -0
- data/spec/script/chained_step_spec.rb +24 -0
- data/spec/script/ruby_script_spec.rb +18 -0
- data/spec/script/sql_script_spec.rb +41 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/config/database.yml +1 -0
- data/spec/support/config/sources.yml +9 -0
- data/spec/transform/calculated_columns_spec.rb +36 -0
- data/spec/transform/chained_step_spec.rb +36 -0
- data/spec/transform/constant_column_spec.rb +22 -0
- data/spec/transform/map_values_spec.rb +26 -0
- data/spec/transform/rename_column_spec.rb +25 -0
- data/spec/transform/select_column_spec.rb +21 -0
- metadata +344 -0
@@ -0,0 +1,6 @@
|
|
1
|
+
When /^I create a Itiel::Extract::CustomSQL object with the query:$/ do |string|
|
2
|
+
@custom_sql = Itiel::Extract::CustomSQL.new string
|
3
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
4
|
+
@custom_sql.connection = :test
|
5
|
+
@extraction = @custom_sql.extract
|
6
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
Given /^an? (.+) database table with rows:$/ do |table_name, table|
|
2
|
+
step "a #{table_name} database table"
|
3
|
+
|
4
|
+
rows = Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym]
|
5
|
+
table.hashes.each do |row|
|
6
|
+
rows.insert row
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
Given /^a blogposts database table$/ do
|
11
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
12
|
+
|
13
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :blogposts do
|
14
|
+
Integer :id
|
15
|
+
String :title
|
16
|
+
String :author
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
Given /^an? authors database table$/ do
|
21
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
22
|
+
|
23
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :authors do
|
24
|
+
Integer :id
|
25
|
+
String :name
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
When /^I create a Itiel::Extract::DatabaseTable object for the '(.*?)' table$/ do |table_name|
|
2
|
+
@database_table = Itiel::Extract::DatabaseTable.new
|
3
|
+
@database_table.connection = :test
|
4
|
+
@database_table.table_name = table_name
|
5
|
+
@source ||= @database_table
|
6
|
+
|
7
|
+
@extraction = @database_table.extract
|
8
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Given /^I create a Loader::DatabaseTable object for the "([^"]*)" table$/ do |arg1|
|
2
|
+
@destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I load the source into the destination$/ do
|
6
|
+
@source >> @destination
|
7
|
+
@source.start
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^the "([^"]*)" table should have (\d+) records$/ do |table_name, record_count|
|
11
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
12
|
+
|
13
|
+
expect(Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym].count).to eq record_count.to_i
|
14
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
Given(/^I create a Itiel::Lookup::DatabaseTable object for the "(.*?)"$/) do |table_name|
|
2
|
+
@lookup = Itiel::Lookup::DatabaseTable.new
|
3
|
+
@lookup.connection = :test
|
4
|
+
@lookup.table_name = table_name
|
5
|
+
end
|
6
|
+
|
7
|
+
Given(/^I create a Itiel::Lookup::CSVFile object with the "(.*?)" file$/) do |file_name|
|
8
|
+
@lookup = Itiel::Lookup::CSVFile.new(File.join('tmp', file_name))
|
9
|
+
end
|
10
|
+
|
11
|
+
Given(/^the lookup joins the "(.*?)" column in the source with the "(.*?)" column in the lookup stream$/) do |source_column, lookup_column|
|
12
|
+
@lookup.lookup_columns = { source_column => lookup_column }
|
13
|
+
end
|
14
|
+
|
15
|
+
Given(/^the lookup appends the "(.*?)" column on the lookup stream to the source as "(.*?)"$/) do |original_name, new_name|
|
16
|
+
@lookup.joined_columns = { original_name => new_name }
|
17
|
+
end
|
18
|
+
|
19
|
+
Then(/^the output for the lookup should be:$/) do |table|
|
20
|
+
result = @lookup.output
|
21
|
+
table.hashes.each_with_index do |row, index|
|
22
|
+
table.headers.each do |column|
|
23
|
+
expect(result[index][column.to_sym].to_s).to eq row[column]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
Given /^an? orders database table$/ do
|
29
|
+
Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
|
30
|
+
|
31
|
+
Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :orders do
|
32
|
+
Integer :id
|
33
|
+
String :email
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Given /^I create a Transformation::CalculatedColumns object$/ do
|
2
|
+
@calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
|
3
|
+
name_state = "#{row['name']}-#{row['state']}"
|
4
|
+
id_name = "#{row['id']}-#{row['name']}"
|
5
|
+
{ 'name_state' => name_state, 'id_name' => id_name }
|
6
|
+
end
|
7
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
test: 'sqlite://test.db'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
|
3
|
+
SimpleCov.start
|
4
|
+
|
5
|
+
require 'itiel'
|
6
|
+
require 'csv'
|
7
|
+
|
8
|
+
Itiel::Logger.logger = Logger.new File.join('tmp', 'itiel.log')
|
9
|
+
|
10
|
+
Before '@database' do
|
11
|
+
File.unlink 'test.db' if File.exists?('test.db')
|
12
|
+
@database_config_path = File.join('features', 'support', 'database.yml')
|
13
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
Feature: Transformations
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given a "source.csv" file with the following rows:
|
5
|
+
| id | name | state |
|
6
|
+
| 1 | john | active |
|
7
|
+
| 2 | ruby | active |
|
8
|
+
| 3 | rails | inactive |
|
9
|
+
| 4 | pete | active |
|
10
|
+
|
11
|
+
# @source = Itiel::Extracts::CSVFile.new('source.csv')
|
12
|
+
And I create a Extractor::CSVFile object with "source.csv"
|
13
|
+
# @destination = Itiel::Loads::CSVFile.new('destination.csv')
|
14
|
+
And I create a Loader::CSVFile object with "destination.csv"
|
15
|
+
|
16
|
+
|
17
|
+
Scenario: Append a constant value column to the original stream
|
18
|
+
# @constant_column = Itiel::Transform::ConstantColumn.new("constant" => "value")
|
19
|
+
Given I create a Transformation::ConstantColumn object with "constant" => "value"
|
20
|
+
|
21
|
+
# @source >> @constant_column >> @destination
|
22
|
+
And the data flows in the following direction:
|
23
|
+
| @source |
|
24
|
+
| @constant_column |
|
25
|
+
| @destination |
|
26
|
+
|
27
|
+
# @source.start
|
28
|
+
When I start the source
|
29
|
+
|
30
|
+
Then the "destination.csv" file should exist with the following content:
|
31
|
+
| id | name | state | constant |
|
32
|
+
| 1 | john | active | value |
|
33
|
+
| 2 | ruby | active | value |
|
34
|
+
| 3 | rails | inactive | value |
|
35
|
+
| 4 | pete | active | value |
|
36
|
+
|
37
|
+
Scenario: Map Values in a column
|
38
|
+
# @map_values = Itiel::Transform::MapValues.new("state" => { "active" => 1, "inactive" => 0 })
|
39
|
+
Given I create a Transformation::MapValues object for the state column to integer
|
40
|
+
|
41
|
+
# @source.next_step = @map_values
|
42
|
+
# @map_values.next_step = @destination
|
43
|
+
And the data flows in the following direction:
|
44
|
+
| @source |
|
45
|
+
| @map_values |
|
46
|
+
| @destination |
|
47
|
+
|
48
|
+
# @source.start
|
49
|
+
When I start the source
|
50
|
+
|
51
|
+
Then the "destination.csv" file should exist with the following content:
|
52
|
+
| id | name | state |
|
53
|
+
| 1 | john | 1 |
|
54
|
+
| 2 | ruby | 1 |
|
55
|
+
| 3 | rails | 0 |
|
56
|
+
| 4 | pete | 1 |
|
57
|
+
|
58
|
+
|
59
|
+
Scenario: Rename an existing column
|
60
|
+
# @rename_column = Itiel::Transform::RenameColumn.new("state" => "status")
|
61
|
+
Given I create a Transformation::RenameColumn object from "state" to "status"
|
62
|
+
|
63
|
+
# @source.next_step = @rename_column
|
64
|
+
# @rename_column.next_step = @destination
|
65
|
+
And the data flows in the following direction:
|
66
|
+
| @source |
|
67
|
+
| @rename_column |
|
68
|
+
| @destination |
|
69
|
+
|
70
|
+
# @source.start
|
71
|
+
When I start the source
|
72
|
+
|
73
|
+
Then the "destination.csv" file should exist with the following content:
|
74
|
+
| id | name | status |
|
75
|
+
| 1 | john | active |
|
76
|
+
| 2 | ruby | active |
|
77
|
+
| 3 | rails | inactive |
|
78
|
+
| 4 | pete | active |
|
79
|
+
|
80
|
+
Scenario: Select specific columns from the stream
|
81
|
+
# @select_column = Itiel::Transform::SelectColumn.new("id", "state")
|
82
|
+
Given I create a Transformation::SelectColumn object with "id" and "state"
|
83
|
+
|
84
|
+
# @source.next_step = @select_column
|
85
|
+
# @select_column.next_step = @destination
|
86
|
+
And the data flows in the following direction:
|
87
|
+
| @source |
|
88
|
+
| @select_column |
|
89
|
+
| @destination |
|
90
|
+
|
91
|
+
# @source.start
|
92
|
+
When I start the source
|
93
|
+
|
94
|
+
Then the "destination.csv" file should exist with the following content:
|
95
|
+
| id | state |
|
96
|
+
| 1 | active |
|
97
|
+
| 2 | active |
|
98
|
+
| 3 | inactive |
|
99
|
+
| 4 | active |
|
100
|
+
|
101
|
+
Scenario: Create multiple columns with the result of a calculation with other columns
|
102
|
+
|
103
|
+
# @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
|
104
|
+
# name_state = "#{row['name']}-#{row['state']}"
|
105
|
+
# id_name = "#{row['id']}-#{row['name']}"
|
106
|
+
# { 'name_state' => name_state, 'id_name' => id_name }
|
107
|
+
# end
|
108
|
+
Given I create a Transformation::CalculatedColumns object
|
109
|
+
|
110
|
+
And the data flows in the following direction:
|
111
|
+
| @source |
|
112
|
+
| @calculated_columns |
|
113
|
+
| @destination |
|
114
|
+
|
115
|
+
# @source.start
|
116
|
+
When I start the source
|
117
|
+
|
118
|
+
Then the "destination.csv" file should exist with the following content:
|
119
|
+
| id | name | state | name_state | id_name |
|
120
|
+
| 1 | john | active | john-active | 1-john |
|
121
|
+
| 2 | ruby | active | ruby-active | 2-ruby |
|
122
|
+
| 3 | rails | inactive | rails-inactive | 3-rails |
|
123
|
+
| 4 | pete | active | pete-active | 4-pete |
|
data/itiel.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "itiel/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "itiel"
|
7
|
+
s.version = Itiel::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["David Padilla"]
|
10
|
+
s.email = ["david@padilla.cc"]
|
11
|
+
s.homepage = "https://gitlab.com/dabit/itiel"
|
12
|
+
s.summary = %q{DSL for ETL in Ruby}
|
13
|
+
s.description = %q{DSL for ETL in Ruby}
|
14
|
+
|
15
|
+
s.rubyforge_project = "itiel"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency 'activesupport', '~> 4.0'
|
23
|
+
s.add_dependency 'activerecord' , '~> 4.0'
|
24
|
+
s.add_dependency 'sequel'
|
25
|
+
|
26
|
+
s.add_development_dependency 'cucumber'
|
27
|
+
s.add_development_dependency 'rake'
|
28
|
+
s.add_development_dependency 'rspec', '~> 3.5.0'
|
29
|
+
s.add_development_dependency 'rr'
|
30
|
+
s.add_development_dependency 'minitest'
|
31
|
+
s.add_development_dependency 'sqlite3'
|
32
|
+
s.add_development_dependency 'simplecov'
|
33
|
+
s.add_development_dependency 'turn'
|
34
|
+
end
|
data/lib/itiel.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'active_support'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
module Itiel
|
6
|
+
class MethodNotImplementedException < Exception; end
|
7
|
+
class MissingConnection < Exception ; end
|
8
|
+
class UndefinedNextStepException < Exception ; end
|
9
|
+
class SQLSentenceNotProvided < Exception ; end
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'itiel/job'
|
13
|
+
require 'itiel/logger'
|
14
|
+
require 'itiel/nameable'
|
15
|
+
|
16
|
+
require 'itiel/db/connection'
|
17
|
+
require 'itiel/db/sql_connectable.rb'
|
18
|
+
require 'itiel/db/truncator.rb'
|
19
|
+
|
20
|
+
require 'itiel/extract/chained_step'
|
21
|
+
require 'itiel/extract/csv_file'
|
22
|
+
require 'itiel/extract/custom_sql'
|
23
|
+
require 'itiel/extract/database_table'
|
24
|
+
|
25
|
+
require 'itiel/load/chained_step'
|
26
|
+
require 'itiel/load/csv_file'
|
27
|
+
require 'itiel/load/database_table'
|
28
|
+
|
29
|
+
require 'itiel/lookup/chained_step'
|
30
|
+
require 'itiel/lookup/hash_lookup'
|
31
|
+
require 'itiel/lookup/database_table'
|
32
|
+
require 'itiel/lookup/csv_file'
|
33
|
+
|
34
|
+
require 'itiel/script/chained_step'
|
35
|
+
require 'itiel/script/sql_script'
|
36
|
+
require 'itiel/script/ruby_script'
|
37
|
+
|
38
|
+
require 'itiel/transform/chained_step'
|
39
|
+
require 'itiel/transform/calculated_columns'
|
40
|
+
require 'itiel/transform/constant_column'
|
41
|
+
require 'itiel/transform/rename_column'
|
42
|
+
require 'itiel/transform/remove_column'
|
43
|
+
require 'itiel/transform/select_column'
|
44
|
+
require 'itiel/transform/input_output_behavior'
|
45
|
+
require 'itiel/transform/map_values'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Itiel
|
2
|
+
module DB
|
3
|
+
class Connection
|
4
|
+
include Itiel::Nameable
|
5
|
+
|
6
|
+
attr_accessor :sources_file
|
7
|
+
attr_accessor :connection_name
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
self.sources_file = 'config/sources.yml'
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection_string(refresh = false)
|
14
|
+
@connection = ( refresh ? load_from_file : @connection ||= load_from_file)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
def load_from_file
|
19
|
+
yaml = YAML.load_file(self.sources_file)
|
20
|
+
yaml[self.connection_name]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Itiel
|
4
|
+
module DB
|
5
|
+
module SQLConnectable
|
6
|
+
attr_accessor :connection, :connection_file_path
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def connection_file_path
|
10
|
+
@@connection_file_path ||= 'config/database.yml'
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection_file_path=(value)
|
14
|
+
@@connection_file_path = value
|
15
|
+
end
|
16
|
+
|
17
|
+
def sequel_connection(connection_name)
|
18
|
+
Sequel.connect read_connection_string(connection_name)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
def read_connection_string(connection_name)
|
23
|
+
@connections ||= YAML.load_file(connection_file_path)
|
24
|
+
@connections[connection_name.to_s]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.included(receiver)
|
29
|
+
receiver.extend ClassMethods
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|