itiel 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,3 @@
1
+ When /^I create a Extractor::CSVFile object with "([^"]*)"$/ do |filename|
2
+ @source = Itiel::Extract::CSVFile.new File.join("tmp", filename)
3
+ end
@@ -0,0 +1,6 @@
1
+ When /^I create a Itiel::Extract::CustomSQL object with the query:$/ do |string|
2
+ @custom_sql = Itiel::Extract::CustomSQL.new string
3
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
4
+ @custom_sql.connection = :test
5
+ @extraction = @custom_sql.extract
6
+ end
@@ -0,0 +1,27 @@
1
+ Given /^an? (.+) database table with rows:$/ do |table_name, table|
2
+ step "a #{table_name} database table"
3
+
4
+ rows = Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym]
5
+ table.hashes.each do |row|
6
+ rows.insert row
7
+ end
8
+ end
9
+
10
+ Given /^a blogposts database table$/ do
11
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
12
+
13
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :blogposts do
14
+ Integer :id
15
+ String :title
16
+ String :author
17
+ end
18
+ end
19
+
20
+ Given /^an? authors database table$/ do
21
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
22
+
23
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :authors do
24
+ Integer :id
25
+ String :name
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ When /^I create a Itiel::Extract::DatabaseTable object for the '(.*?)' table$/ do |table_name|
2
+ @database_table = Itiel::Extract::DatabaseTable.new
3
+ @database_table.connection = :test
4
+ @database_table.table_name = table_name
5
+ @source ||= @database_table
6
+
7
+ @extraction = @database_table.extract
8
+ end
@@ -0,0 +1,3 @@
1
+ Then /^the extraction for that object should have (\d+) rows?$/ do |count|
2
+ expect(@extraction.count).to eq count.to_i
3
+ end
@@ -0,0 +1,9 @@
1
+ When /^the data flows in the following direction:$/ do |table|
2
+ last_step = nil
3
+
4
+ instance_eval table.raw.flatten.join(" >> ")
5
+ end
6
+
7
+ When /^I start the source$/ do
8
+ @source.start
9
+ end
@@ -0,0 +1,4 @@
1
+ When /^I create a Loader::CSVFile object with "([^"]*)"$/ do |filename|
2
+ @destination = Itiel::Load::CSVFile.new(File.join('tmp', filename), false)
3
+ end
4
+
@@ -0,0 +1,14 @@
1
+ Given /^I create a Loader::DatabaseTable object for the "([^"]*)" table$/ do |arg1|
2
+ @destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
3
+ end
4
+
5
+ When /^I load the source into the destination$/ do
6
+ @source >> @destination
7
+ @source.start
8
+ end
9
+
10
+ Then /^the "([^"]*)" table should have (\d+) records$/ do |table_name, record_count|
11
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
12
+
13
+ expect(Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym].count).to eq record_count.to_i
14
+ end
@@ -0,0 +1,35 @@
1
+ Given(/^I create a Itiel::Lookup::DatabaseTable object for the "(.*?)"$/) do |table_name|
2
+ @lookup = Itiel::Lookup::DatabaseTable.new
3
+ @lookup.connection = :test
4
+ @lookup.table_name = table_name
5
+ end
6
+
7
+ Given(/^I create a Itiel::Lookup::CSVFile object with the "(.*?)" file$/) do |file_name|
8
+ @lookup = Itiel::Lookup::CSVFile.new(File.join('tmp', file_name))
9
+ end
10
+
11
+ Given(/^the lookup joins the "(.*?)" column in the source with the "(.*?)" column in the lookup stream$/) do |source_column, lookup_column|
12
+ @lookup.lookup_columns = { source_column => lookup_column }
13
+ end
14
+
15
+ Given(/^the lookup appends the "(.*?)" column on the lookup stream to the source as "(.*?)"$/) do |original_name, new_name|
16
+ @lookup.joined_columns = { original_name => new_name }
17
+ end
18
+
19
+ Then(/^the output for the lookup should be:$/) do |table|
20
+ result = @lookup.output
21
+ table.hashes.each_with_index do |row, index|
22
+ table.headers.each do |column|
23
+ expect(result[index][column.to_sym].to_s).to eq row[column]
24
+ end
25
+ end
26
+ end
27
+
28
+ Given /^an? orders database table$/ do
29
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
30
+
31
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :orders do
32
+ Integer :id
33
+ String :email
34
+ end
35
+ end
@@ -0,0 +1,5 @@
1
+ Given /^I create a Itiel::Script::RubyScript object with the following block of code:$/ do |script|
2
+ @ruby_script = Itiel::Script::RubyScript.new do |row|
3
+ instance_eval script
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ Given /^the following data is in the stream:$/ do |table|
2
+ @stream = table.hashes
3
+ end
4
+
5
+ Then /^the resulting output stream should be:$/ do |table|
6
+ output = @ruby_script.execute(@stream)
7
+ expect(output).to eq table.hashes
8
+ end
@@ -0,0 +1,5 @@
1
+ Given /^I create a Transformation::CalculatedColumn object that multiplies "([^"]*)" times (\d+)$/ do |column_name, times|
2
+ @calculated_column = Itiel::Transform::CalculatedColumn.new 'new_id' do |columns|
3
+ columns['id'].to_i * 3
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ Given /^I create a Transformation::CalculatedColumns object$/ do
2
+ @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
3
+ name_state = "#{row['name']}-#{row['state']}"
4
+ id_name = "#{row['id']}-#{row['name']}"
5
+ { 'name_state' => name_state, 'id_name' => id_name }
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ When /^I create a Transformation::ConstantColumn object with "([^"]*)" => "([^"]*)"$/ do |column, value|
2
+ @constant_column = Itiel::Transform::ConstantColumn.new({ column => value })
3
+ end
@@ -0,0 +1,4 @@
1
+ Given /^I create a Transformation::MapValues object for the state column to integer$/ do
2
+ @map_values = Itiel::Transform::MapValues.new(
3
+ { "state" => { "active" => 1, "inactive" => 0 }})
4
+ end
@@ -0,0 +1,3 @@
1
+ Given /^I create a Transformation::RenameColumn object from "([^"]*)" to "([^"]*)"$/ do |original_name, new_name|
2
+ @rename_column = Itiel::Transform::RenameColumn.new({ original_name => new_name })
3
+ end
@@ -0,0 +1,3 @@
1
+ Given /^I create a Transformation::SelectColumn object with "([^"]*)" and "([^"]*)"$/ do |column1, column2|
2
+ @select_column = Itiel::Transform::SelectColumn.new column1, column2
3
+ end
@@ -0,0 +1,3 @@
1
+ When /^I create a Transformation::SingleColumnSort object by "([^"]*)"$/ do |column_name|
2
+ @single_column_sort = Itiel::Transform::SingleColumnSort.new column_name
3
+ end
@@ -0,0 +1 @@
1
+ test: 'sqlite://test.db'
@@ -0,0 +1,13 @@
1
+ require 'simplecov'
2
+
3
+ SimpleCov.start
4
+
5
+ require 'itiel'
6
+ require 'csv'
7
+
8
+ Itiel::Logger.logger = Logger.new File.join('tmp', 'itiel.log')
9
+
10
+ Before '@database' do
11
+ File.unlink 'test.db' if File.exists?('test.db')
12
+ @database_config_path = File.join('features', 'support', 'database.yml')
13
+ end
@@ -0,0 +1,123 @@
1
+ Feature: Transformations
2
+
3
+ Background:
4
+ Given a "source.csv" file with the following rows:
5
+ | id | name | state |
6
+ | 1 | john | active |
7
+ | 2 | ruby | active |
8
+ | 3 | rails | inactive |
9
+ | 4 | pete | active |
10
+
11
+ # @source = Itiel::Extracts::CSVFile.new('source.csv')
12
+ And I create a Extractor::CSVFile object with "source.csv"
13
+ # @destination = Itiel::Loads::CSVFile.new('destination.csv')
14
+ And I create a Loader::CSVFile object with "destination.csv"
15
+
16
+
17
+ Scenario: Append a constant value column to the original stream
18
+ # @constant_column = Itiel::Transform::ConstantColumn.new("constant" => "value")
19
+ Given I create a Transformation::ConstantColumn object with "constant" => "value"
20
+
21
+ # @source >> @constant_column >> @destination
22
+ And the data flows in the following direction:
23
+ | @source |
24
+ | @constant_column |
25
+ | @destination |
26
+
27
+ # @source.start
28
+ When I start the source
29
+
30
+ Then the "destination.csv" file should exist with the following content:
31
+ | id | name | state | constant |
32
+ | 1 | john | active | value |
33
+ | 2 | ruby | active | value |
34
+ | 3 | rails | inactive | value |
35
+ | 4 | pete | active | value |
36
+
37
+ Scenario: Map Values in a column
38
+ # @map_values = Itiel::Transform::MapValues.new("state" => { "active" => 1, "inactive" => 0 })
39
+ Given I create a Transformation::MapValues object for the state column to integer
40
+
41
+ # @source.next_step = @map_values
42
+ # @map_values.next_step = @destination
43
+ And the data flows in the following direction:
44
+ | @source |
45
+ | @map_values |
46
+ | @destination |
47
+
48
+ # @source.start
49
+ When I start the source
50
+
51
+ Then the "destination.csv" file should exist with the following content:
52
+ | id | name | state |
53
+ | 1 | john | 1 |
54
+ | 2 | ruby | 1 |
55
+ | 3 | rails | 0 |
56
+ | 4 | pete | 1 |
57
+
58
+
59
+ Scenario: Rename an existing column
60
+ # @rename_column = Itiel::Transform::RenameColumn.new("state" => "status")
61
+ Given I create a Transformation::RenameColumn object from "state" to "status"
62
+
63
+ # @source.next_step = @rename_column
64
+ # @rename_column.next_step = @destination
65
+ And the data flows in the following direction:
66
+ | @source |
67
+ | @rename_column |
68
+ | @destination |
69
+
70
+ # @source.start
71
+ When I start the source
72
+
73
+ Then the "destination.csv" file should exist with the following content:
74
+ | id | name | status |
75
+ | 1 | john | active |
76
+ | 2 | ruby | active |
77
+ | 3 | rails | inactive |
78
+ | 4 | pete | active |
79
+
80
+ Scenario: Select specific columns from the stream
81
+ # @select_column = Itiel::Transform::SelectColumn.new("id", "state")
82
+ Given I create a Transformation::SelectColumn object with "id" and "state"
83
+
84
+ # @source.next_step = @select_column
85
+ # @select_column.next_step = @destination
86
+ And the data flows in the following direction:
87
+ | @source |
88
+ | @select_column |
89
+ | @destination |
90
+
91
+ # @source.start
92
+ When I start the source
93
+
94
+ Then the "destination.csv" file should exist with the following content:
95
+ | id | state |
96
+ | 1 | active |
97
+ | 2 | active |
98
+ | 3 | inactive |
99
+ | 4 | active |
100
+
101
+ Scenario: Create multiple columns with the result of a calculation with other columns
102
+
103
+ # @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
104
+ # name_state = "#{row['name']}-#{row['state']}"
105
+ # id_name = "#{row['id']}-#{row['name']}"
106
+ # { 'name_state' => name_state, 'id_name' => id_name }
107
+ # end
108
+ Given I create a Transformation::CalculatedColumns object
109
+
110
+ And the data flows in the following direction:
111
+ | @source |
112
+ | @calculated_columns |
113
+ | @destination |
114
+
115
+ # @source.start
116
+ When I start the source
117
+
118
+ Then the "destination.csv" file should exist with the following content:
119
+ | id | name | state | name_state | id_name |
120
+ | 1 | john | active | john-active | 1-john |
121
+ | 2 | ruby | active | ruby-active | 2-ruby |
122
+ | 3 | rails | inactive | rails-inactive | 3-rails |
123
+ | 4 | pete | active | pete-active | 4-pete |
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "itiel/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "itiel"
7
+ s.version = Itiel::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["David Padilla"]
10
+ s.email = ["david@padilla.cc"]
11
+ s.homepage = "https://gitlab.com/dabit/itiel"
12
+ s.summary = %q{DSL for ETL in Ruby}
13
+ s.description = %q{DSL for ETL in Ruby}
14
+
15
+ s.rubyforge_project = "itiel"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'activesupport', '~> 4.0'
23
+ s.add_dependency 'activerecord' , '~> 4.0'
24
+ s.add_dependency 'sequel'
25
+
26
+ s.add_development_dependency 'cucumber'
27
+ s.add_development_dependency 'rake'
28
+ s.add_development_dependency 'rspec', '~> 3.5.0'
29
+ s.add_development_dependency 'rr'
30
+ s.add_development_dependency 'minitest'
31
+ s.add_development_dependency 'sqlite3'
32
+ s.add_development_dependency 'simplecov'
33
+ s.add_development_dependency 'turn'
34
+ end
@@ -0,0 +1,45 @@
1
+ require 'yaml'
2
+ require 'active_support'
3
+ require 'active_support/core_ext'
4
+
5
+ module Itiel
6
+ class MethodNotImplementedException < Exception; end
7
+ class MissingConnection < Exception ; end
8
+ class UndefinedNextStepException < Exception ; end
9
+ class SQLSentenceNotProvided < Exception ; end
10
+ end
11
+
12
+ require 'itiel/job'
13
+ require 'itiel/logger'
14
+ require 'itiel/nameable'
15
+
16
+ require 'itiel/db/connection'
17
+ require 'itiel/db/sql_connectable.rb'
18
+ require 'itiel/db/truncator.rb'
19
+
20
+ require 'itiel/extract/chained_step'
21
+ require 'itiel/extract/csv_file'
22
+ require 'itiel/extract/custom_sql'
23
+ require 'itiel/extract/database_table'
24
+
25
+ require 'itiel/load/chained_step'
26
+ require 'itiel/load/csv_file'
27
+ require 'itiel/load/database_table'
28
+
29
+ require 'itiel/lookup/chained_step'
30
+ require 'itiel/lookup/hash_lookup'
31
+ require 'itiel/lookup/database_table'
32
+ require 'itiel/lookup/csv_file'
33
+
34
+ require 'itiel/script/chained_step'
35
+ require 'itiel/script/sql_script'
36
+ require 'itiel/script/ruby_script'
37
+
38
+ require 'itiel/transform/chained_step'
39
+ require 'itiel/transform/calculated_columns'
40
+ require 'itiel/transform/constant_column'
41
+ require 'itiel/transform/rename_column'
42
+ require 'itiel/transform/remove_column'
43
+ require 'itiel/transform/select_column'
44
+ require 'itiel/transform/input_output_behavior'
45
+ require 'itiel/transform/map_values'
@@ -0,0 +1,24 @@
1
+ module Itiel
2
+ module DB
3
+ class Connection
4
+ include Itiel::Nameable
5
+
6
+ attr_accessor :sources_file
7
+ attr_accessor :connection_name
8
+
9
+ def initialize
10
+ self.sources_file = 'config/sources.yml'
11
+ end
12
+
13
+ def connection_string(refresh = false)
14
+ @connection = ( refresh ? load_from_file : @connection ||= load_from_file)
15
+ end
16
+
17
+ private
18
+ def load_from_file
19
+ yaml = YAML.load_file(self.sources_file)
20
+ yaml[self.connection_name]
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ require 'sequel'
2
+
3
+ module Itiel
4
+ module DB
5
+ module SQLConnectable
6
+ attr_accessor :connection, :connection_file_path
7
+
8
+ module ClassMethods
9
+ def connection_file_path
10
+ @@connection_file_path ||= 'config/database.yml'
11
+ end
12
+
13
+ def connection_file_path=(value)
14
+ @@connection_file_path = value
15
+ end
16
+
17
+ def sequel_connection(connection_name)
18
+ Sequel.connect read_connection_string(connection_name)
19
+ end
20
+
21
+ protected
22
+ def read_connection_string(connection_name)
23
+ @connections ||= YAML.load_file(connection_file_path)
24
+ @connections[connection_name.to_s]
25
+ end
26
+ end
27
+
28
+ def self.included(receiver)
29
+ receiver.extend ClassMethods
30
+ end
31
+ end
32
+ end
33
+ end