itiel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,3 @@
1
+ When /^I create a Extractor::CSVFile object with "([^"]*)"$/ do |filename|
2
+ @source = Itiel::Extract::CSVFile.new File.join("tmp", filename)
3
+ end
@@ -0,0 +1,6 @@
1
+ When /^I create a Itiel::Extract::CustomSQL object with the query:$/ do |string|
2
+ @custom_sql = Itiel::Extract::CustomSQL.new string
3
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
4
+ @custom_sql.connection = :test
5
+ @extraction = @custom_sql.extract
6
+ end
@@ -0,0 +1,27 @@
1
+ Given /^an? (.+) database table with rows:$/ do |table_name, table|
2
+ step "a #{table_name} database table"
3
+
4
+ rows = Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym]
5
+ table.hashes.each do |row|
6
+ rows.insert row
7
+ end
8
+ end
9
+
10
+ Given /^a blogposts database table$/ do
11
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
12
+
13
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :blogposts do
14
+ Integer :id
15
+ String :title
16
+ String :author
17
+ end
18
+ end
19
+
20
+ Given /^an? authors database table$/ do
21
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
22
+
23
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :authors do
24
+ Integer :id
25
+ String :name
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ When /^I create a Itiel::Extract::DatabaseTable object for the '(.*?)' table$/ do |table_name|
2
+ @database_table = Itiel::Extract::DatabaseTable.new
3
+ @database_table.connection = :test
4
+ @database_table.table_name = table_name
5
+ @source ||= @database_table
6
+
7
+ @extraction = @database_table.extract
8
+ end
@@ -0,0 +1,3 @@
1
+ Then /^the extraction for that object should have (\d+) rows?$/ do |count|
2
+ expect(@extraction.count).to eq count.to_i
3
+ end
@@ -0,0 +1,9 @@
1
+ When /^the data flows in the following direction:$/ do |table|
2
+ last_step = nil
3
+
4
+ instance_eval table.raw.flatten.join(" >> ")
5
+ end
6
+
7
+ When /^I start the source$/ do
8
+ @source.start
9
+ end
@@ -0,0 +1,4 @@
1
+ When /^I create a Loader::CSVFile object with "([^"]*)"$/ do |filename|
2
+ @destination = Itiel::Load::CSVFile.new(File.join('tmp', filename), false)
3
+ end
4
+
@@ -0,0 +1,14 @@
1
+ Given /^I create a Loader::DatabaseTable object for the "([^"]*)" table$/ do |arg1|
2
+ @destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
3
+ end
4
+
5
+ When /^I load the source into the destination$/ do
6
+ @source >> @destination
7
+ @source.start
8
+ end
9
+
10
+ Then /^the "([^"]*)" table should have (\d+) records$/ do |table_name, record_count|
11
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
12
+
13
+ expect(Itiel::Extract::CustomSQL.sequel_connection(:test)[table_name.to_sym].count).to eq record_count.to_i
14
+ end
@@ -0,0 +1,35 @@
1
+ Given(/^I create a Itiel::Lookup::DatabaseTable object for the "(.*?)"$/) do |table_name|
2
+ @lookup = Itiel::Lookup::DatabaseTable.new
3
+ @lookup.connection = :test
4
+ @lookup.table_name = table_name
5
+ end
6
+
7
+ Given(/^I create a Itiel::Lookup::CSVFile object with the "(.*?)" file$/) do |file_name|
8
+ @lookup = Itiel::Lookup::CSVFile.new(File.join('tmp', file_name))
9
+ end
10
+
11
+ Given(/^the lookup joins the "(.*?)" column in the source with the "(.*?)" column in the lookup stream$/) do |source_column, lookup_column|
12
+ @lookup.lookup_columns = { source_column => lookup_column }
13
+ end
14
+
15
+ Given(/^the lookup appends the "(.*?)" column on the lookup stream to the source as "(.*?)"$/) do |original_name, new_name|
16
+ @lookup.joined_columns = { original_name => new_name }
17
+ end
18
+
19
+ Then(/^the output for the lookup should be:$/) do |table|
20
+ result = @lookup.output
21
+ table.hashes.each_with_index do |row, index|
22
+ table.headers.each do |column|
23
+ expect(result[index][column.to_sym].to_s).to eq row[column]
24
+ end
25
+ end
26
+ end
27
+
28
+ Given /^an? orders database table$/ do
29
+ Itiel::Extract::CustomSQL.connection_file_path = @database_config_path
30
+
31
+ Itiel::Extract::CustomSQL.sequel_connection(:test).create_table :orders do
32
+ Integer :id
33
+ String :email
34
+ end
35
+ end
@@ -0,0 +1,5 @@
1
+ Given /^I create a Itiel::Script::RubyScript object with the following block of code:$/ do |script|
2
+ @ruby_script = Itiel::Script::RubyScript.new do |row|
3
+ instance_eval script
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ Given /^the following data is in the stream:$/ do |table|
2
+ @stream = table.hashes
3
+ end
4
+
5
+ Then /^the resulting output stream should be:$/ do |table|
6
+ output = @ruby_script.execute(@stream)
7
+ expect(output).to eq table.hashes
8
+ end
@@ -0,0 +1,5 @@
1
+ Given /^I create a Transformation::CalculatedColumn object that multiplies "([^"]*)" times (\d+)$/ do |column_name, times|
2
+ @calculated_column = Itiel::Transform::CalculatedColumn.new 'new_id' do |columns|
3
+ columns['id'].to_i * 3
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ Given /^I create a Transformation::CalculatedColumns object$/ do
2
+ @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
3
+ name_state = "#{row['name']}-#{row['state']}"
4
+ id_name = "#{row['id']}-#{row['name']}"
5
+ { 'name_state' => name_state, 'id_name' => id_name }
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ When /^I create a Transformation::ConstantColumn object with "([^"]*)" => "([^"]*)"$/ do |column, value|
2
+ @constant_column = Itiel::Transform::ConstantColumn.new({ column => value })
3
+ end
@@ -0,0 +1,4 @@
1
+ Given /^I create a Transformation::MapValues object for the state column to integer$/ do
2
+ @map_values = Itiel::Transform::MapValues.new(
3
+ { "state" => { "active" => 1, "inactive" => 0 }})
4
+ end
@@ -0,0 +1,3 @@
1
+ Given /^I create a Transformation::RenameColumn object from "([^"]*)" to "([^"]*)"$/ do |original_name, new_name|
2
+ @rename_column = Itiel::Transform::RenameColumn.new({ original_name => new_name })
3
+ end
@@ -0,0 +1,3 @@
1
+ Given /^I create a Transformation::SelectColumn object with "([^"]*)" and "([^"]*)"$/ do |column1, column2|
2
+ @select_column = Itiel::Transform::SelectColumn.new column1, column2
3
+ end
@@ -0,0 +1,3 @@
1
+ When /^I create a Transformation::SingleColumnSort object by "([^"]*)"$/ do |column_name|
2
+ @single_column_sort = Itiel::Transform::SingleColumnSort.new column_name
3
+ end
@@ -0,0 +1 @@
1
+ test: 'sqlite://test.db'
@@ -0,0 +1,13 @@
1
+ require 'simplecov'
2
+
3
+ SimpleCov.start
4
+
5
+ require 'itiel'
6
+ require 'csv'
7
+
8
+ Itiel::Logger.logger = Logger.new File.join('tmp', 'itiel.log')
9
+
10
+ Before '@database' do
11
+ File.unlink 'test.db' if File.exists?('test.db')
12
+ @database_config_path = File.join('features', 'support', 'database.yml')
13
+ end
@@ -0,0 +1,123 @@
1
+ Feature: Transformations
2
+
3
+ Background:
4
+ Given a "source.csv" file with the following rows:
5
+ | id | name | state |
6
+ | 1 | john | active |
7
+ | 2 | ruby | active |
8
+ | 3 | rails | inactive |
9
+ | 4 | pete | active |
10
+
11
+ # @source = Itiel::Extracts::CSVFile.new('source.csv')
12
+ And I create a Extractor::CSVFile object with "source.csv"
13
+ # @destination = Itiel::Loads::CSVFile.new('destination.csv')
14
+ And I create a Loader::CSVFile object with "destination.csv"
15
+
16
+
17
+ Scenario: Append a constant value column to the original stream
18
+ # @constant_column = Itiel::Transform::ConstantColumn.new("constant" => "value")
19
+ Given I create a Transformation::ConstantColumn object with "constant" => "value"
20
+
21
+ # @source >> @constant_column >> @destination
22
+ And the data flows in the following direction:
23
+ | @source |
24
+ | @constant_column |
25
+ | @destination |
26
+
27
+ # @source.start
28
+ When I start the source
29
+
30
+ Then the "destination.csv" file should exist with the following content:
31
+ | id | name | state | constant |
32
+ | 1 | john | active | value |
33
+ | 2 | ruby | active | value |
34
+ | 3 | rails | inactive | value |
35
+ | 4 | pete | active | value |
36
+
37
+ Scenario: Map Values in a column
38
+ # @map_values = Itiel::Transform::MapValues.new("state" => { "active" => 1, "inactive" => 0 })
39
+ Given I create a Transformation::MapValues object for the state column to integer
40
+
41
+ # @source.next_step = @map_values
42
+ # @map_values.next_step = @destination
43
+ And the data flows in the following direction:
44
+ | @source |
45
+ | @map_values |
46
+ | @destination |
47
+
48
+ # @source.start
49
+ When I start the source
50
+
51
+ Then the "destination.csv" file should exist with the following content:
52
+ | id | name | state |
53
+ | 1 | john | 1 |
54
+ | 2 | ruby | 1 |
55
+ | 3 | rails | 0 |
56
+ | 4 | pete | 1 |
57
+
58
+
59
+ Scenario: Rename an existing column
60
+ # @rename_column = Itiel::Transform::RenameColumn.new("state" => "status")
61
+ Given I create a Transformation::RenameColumn object from "state" to "status"
62
+
63
+ # @source.next_step = @rename_column
64
+ # @rename_column.next_step = @destination
65
+ And the data flows in the following direction:
66
+ | @source |
67
+ | @rename_column |
68
+ | @destination |
69
+
70
+ # @source.start
71
+ When I start the source
72
+
73
+ Then the "destination.csv" file should exist with the following content:
74
+ | id | name | status |
75
+ | 1 | john | active |
76
+ | 2 | ruby | active |
77
+ | 3 | rails | inactive |
78
+ | 4 | pete | active |
79
+
80
+ Scenario: Select specific columns from the stream
81
+ # @select_column = Itiel::Transform::SelectColumn.new("id", "state")
82
+ Given I create a Transformation::SelectColumn object with "id" and "state"
83
+
84
+ # @source.next_step = @select_column
85
+ # @select_column.next_step = @destination
86
+ And the data flows in the following direction:
87
+ | @source |
88
+ | @select_column |
89
+ | @destination |
90
+
91
+ # @source.start
92
+ When I start the source
93
+
94
+ Then the "destination.csv" file should exist with the following content:
95
+ | id | state |
96
+ | 1 | active |
97
+ | 2 | active |
98
+ | 3 | inactive |
99
+ | 4 | active |
100
+
101
+ Scenario: Create multiple columns with the result of a calculation with other columns
102
+
103
+ # @calculated_columns = Itiel::Transform::CalculatedColumns.new do |row|
104
+ # name_state = "#{row['name']}-#{row['state']}"
105
+ # id_name = "#{row['id']}-#{row['name']}"
106
+ # { 'name_state' => name_state, 'id_name' => id_name }
107
+ # end
108
+ Given I create a Transformation::CalculatedColumns object
109
+
110
+ And the data flows in the following direction:
111
+ | @source |
112
+ | @calculated_columns |
113
+ | @destination |
114
+
115
+ # @source.start
116
+ When I start the source
117
+
118
+ Then the "destination.csv" file should exist with the following content:
119
+ | id | name | state | name_state | id_name |
120
+ | 1 | john | active | john-active | 1-john |
121
+ | 2 | ruby | active | ruby-active | 2-ruby |
122
+ | 3 | rails | inactive | rails-inactive | 3-rails |
123
+ | 4 | pete | active | pete-active | 4-pete |
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "itiel/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "itiel"
7
+ s.version = Itiel::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["David Padilla"]
10
+ s.email = ["david@padilla.cc"]
11
+ s.homepage = "https://gitlab.com/dabit/itiel"
12
+ s.summary = %q{DSL for ETL in Ruby}
13
+ s.description = %q{DSL for ETL in Ruby}
14
+
15
+ s.rubyforge_project = "itiel"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'activesupport', '~> 4.0'
23
+ s.add_dependency 'activerecord' , '~> 4.0'
24
+ s.add_dependency 'sequel'
25
+
26
+ s.add_development_dependency 'cucumber'
27
+ s.add_development_dependency 'rake'
28
+ s.add_development_dependency 'rspec', '~> 3.5.0'
29
+ s.add_development_dependency 'rr'
30
+ s.add_development_dependency 'minitest'
31
+ s.add_development_dependency 'sqlite3'
32
+ s.add_development_dependency 'simplecov'
33
+ s.add_development_dependency 'turn'
34
+ end
@@ -0,0 +1,45 @@
1
+ require 'yaml'
2
+ require 'active_support'
3
+ require 'active_support/core_ext'
4
+
5
+ module Itiel
6
+ class MethodNotImplementedException < Exception; end
7
+ class MissingConnection < Exception ; end
8
+ class UndefinedNextStepException < Exception ; end
9
+ class SQLSentenceNotProvided < Exception ; end
10
+ end
11
+
12
+ require 'itiel/job'
13
+ require 'itiel/logger'
14
+ require 'itiel/nameable'
15
+
16
+ require 'itiel/db/connection'
17
+ require 'itiel/db/sql_connectable.rb'
18
+ require 'itiel/db/truncator.rb'
19
+
20
+ require 'itiel/extract/chained_step'
21
+ require 'itiel/extract/csv_file'
22
+ require 'itiel/extract/custom_sql'
23
+ require 'itiel/extract/database_table'
24
+
25
+ require 'itiel/load/chained_step'
26
+ require 'itiel/load/csv_file'
27
+ require 'itiel/load/database_table'
28
+
29
+ require 'itiel/lookup/chained_step'
30
+ require 'itiel/lookup/hash_lookup'
31
+ require 'itiel/lookup/database_table'
32
+ require 'itiel/lookup/csv_file'
33
+
34
+ require 'itiel/script/chained_step'
35
+ require 'itiel/script/sql_script'
36
+ require 'itiel/script/ruby_script'
37
+
38
+ require 'itiel/transform/chained_step'
39
+ require 'itiel/transform/calculated_columns'
40
+ require 'itiel/transform/constant_column'
41
+ require 'itiel/transform/rename_column'
42
+ require 'itiel/transform/remove_column'
43
+ require 'itiel/transform/select_column'
44
+ require 'itiel/transform/input_output_behavior'
45
+ require 'itiel/transform/map_values'
@@ -0,0 +1,24 @@
1
+ module Itiel
2
+ module DB
3
+ class Connection
4
+ include Itiel::Nameable
5
+
6
+ attr_accessor :sources_file
7
+ attr_accessor :connection_name
8
+
9
+ def initialize
10
+ self.sources_file = 'config/sources.yml'
11
+ end
12
+
13
+ def connection_string(refresh = false)
14
+ @connection = ( refresh ? load_from_file : @connection ||= load_from_file)
15
+ end
16
+
17
+ private
18
+ def load_from_file
19
+ yaml = YAML.load_file(self.sources_file)
20
+ yaml[self.connection_name]
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ require 'sequel'
2
+
3
+ module Itiel
4
+ module DB
5
+ module SQLConnectable
6
+ attr_accessor :connection, :connection_file_path
7
+
8
+ module ClassMethods
9
+ def connection_file_path
10
+ @@connection_file_path ||= 'config/database.yml'
11
+ end
12
+
13
+ def connection_file_path=(value)
14
+ @@connection_file_path = value
15
+ end
16
+
17
+ def sequel_connection(connection_name)
18
+ Sequel.connect read_connection_string(connection_name)
19
+ end
20
+
21
+ protected
22
+ def read_connection_string(connection_name)
23
+ @connections ||= YAML.load_file(connection_file_path)
24
+ @connections[connection_name.to_s]
25
+ end
26
+ end
27
+
28
+ def self.included(receiver)
29
+ receiver.extend ClassMethods
30
+ end
31
+ end
32
+ end
33
+ end