itiel 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.autotest +17 -0
- data/.gitignore +13 -0
- data/.gitlab-ci.yml +36 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/Gemfile.rails.4.0 +7 -0
- data/Gemfile.rails.4.1 +7 -0
- data/Gemfile.rails.4.2 +7 -0
- data/README.markdown +106 -0
- data/Rakefile +13 -0
- data/build.sh +10 -0
- data/features/extract/database_table.feature +16 -0
- data/features/extract/sql_script.feature +17 -0
- data/features/load/database_table_loader.feature +21 -0
- data/features/lookup/csv_file.feature +41 -0
- data/features/lookup/database_table.feature +43 -0
- data/features/script/ruby_script.feature +19 -0
- data/features/step_definitions/csv_steps.rb +15 -0
- data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
- data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
- data/features/step_definitions/extractor/database_steps.rb +27 -0
- data/features/step_definitions/extractor/database_table_steps.rb +8 -0
- data/features/step_definitions/extractor/extraction_steps.rb +3 -0
- data/features/step_definitions/flow_steps.rb +9 -0
- data/features/step_definitions/loader/csv_file_steps.rb +4 -0
- data/features/step_definitions/loader/database_table_steps.rb +14 -0
- data/features/step_definitions/lookup/lookup_steps.rb +35 -0
- data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
- data/features/step_definitions/stream_steps.rb +8 -0
- data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
- data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
- data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
- data/features/step_definitions/transformation/map_values_step.rb +4 -0
- data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
- data/features/step_definitions/transformation/select_column_steps.rb +3 -0
- data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
- data/features/support/database.yml +1 -0
- data/features/support/env.rb +13 -0
- data/features/transform/transformations.feature +123 -0
- data/itiel.gemspec +34 -0
- data/lib/itiel.rb +45 -0
- data/lib/itiel/db/connection.rb +24 -0
- data/lib/itiel/db/sql_connectable.rb +33 -0
- data/lib/itiel/db/truncator.rb +30 -0
- data/lib/itiel/extract/chained_step.rb +22 -0
- data/lib/itiel/extract/csv_file.rb +31 -0
- data/lib/itiel/extract/custom_sql.rb +38 -0
- data/lib/itiel/extract/database_table.rb +23 -0
- data/lib/itiel/job.rb +116 -0
- data/lib/itiel/load/chained_step.rb +37 -0
- data/lib/itiel/load/csv_file.rb +45 -0
- data/lib/itiel/load/database_table.rb +34 -0
- data/lib/itiel/load/input_output_behavior.rb +36 -0
- data/lib/itiel/logger.rb +47 -0
- data/lib/itiel/lookup/chained_step.rb +35 -0
- data/lib/itiel/lookup/csv_file.rb +16 -0
- data/lib/itiel/lookup/database_table.rb +36 -0
- data/lib/itiel/lookup/hash_lookup.rb +35 -0
- data/lib/itiel/nameable.rb +6 -0
- data/lib/itiel/script/chained_step.rb +18 -0
- data/lib/itiel/script/ruby_script.rb +31 -0
- data/lib/itiel/script/sql_script.rb +29 -0
- data/lib/itiel/transform/calculated_columns.rb +47 -0
- data/lib/itiel/transform/chained_step.rb +27 -0
- data/lib/itiel/transform/constant_column.rb +35 -0
- data/lib/itiel/transform/input_output_behavior.rb +44 -0
- data/lib/itiel/transform/map_values.rb +43 -0
- data/lib/itiel/transform/remove_column.rb +33 -0
- data/lib/itiel/transform/rename_column.rb +43 -0
- data/lib/itiel/transform/select_column.rb +37 -0
- data/lib/itiel/version.rb +3 -0
- data/spec/db/sql_connectable_spec.rb +20 -0
- data/spec/extract/chained_step_spec.rb +31 -0
- data/spec/extract/csv_file_spec.rb +22 -0
- data/spec/extract/custom_sql_spec.rb +19 -0
- data/spec/extract/database_table_spec.rb +22 -0
- data/spec/job_spec.rb +80 -0
- data/spec/loader/chained_step_spec.rb +39 -0
- data/spec/loader/csv_file_spec.rb +69 -0
- data/spec/loader/database_table_spec.rb +29 -0
- data/spec/lookup/hash_lookup_spec.rb +108 -0
- data/spec/nameable_spec.rb +17 -0
- data/spec/script/chained_step_spec.rb +24 -0
- data/spec/script/ruby_script_spec.rb +18 -0
- data/spec/script/sql_script_spec.rb +41 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/config/database.yml +1 -0
- data/spec/support/config/sources.yml +9 -0
- data/spec/transform/calculated_columns_spec.rb +36 -0
- data/spec/transform/chained_step_spec.rb +36 -0
- data/spec/transform/constant_column_spec.rb +22 -0
- data/spec/transform/map_values_spec.rb +26 -0
- data/spec/transform/rename_column_spec.rb +25 -0
- data/spec/transform/select_column_spec.rb +21 -0
- metadata +344 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
#
|
4
|
+
# This module defines the input and output behavior of Lookup Steps.
|
5
|
+
#
|
6
|
+
# Whenever the instance receives input, it calls lookup! and set its return value
|
7
|
+
# as the input of its next_step if defined.
|
8
|
+
#
|
9
|
+
# All the classes that include this moudle must implement lookup!
|
10
|
+
#
|
11
|
+
module ChainedStep
|
12
|
+
module InstanceMethods
|
13
|
+
attr_accessor :next_step, :output
|
14
|
+
alias :>> :next_step=
|
15
|
+
|
16
|
+
def input=(input_stream)
|
17
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
18
|
+
self.output = lookup!(input_stream)
|
19
|
+
#puts output if table_name == "profiles"
|
20
|
+
#puts input_stream if table_name == "profiles"
|
21
|
+
self.next_step.input = output if next_step
|
22
|
+
Itiel::Logger.log_processed(self, input_stream.size)
|
23
|
+
end
|
24
|
+
|
25
|
+
def lookup!(input_stream)
|
26
|
+
raise "lookup is not implemented"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.included(receiver)
|
31
|
+
receiver.send :include, InstanceMethods
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
#
|
4
|
+
# Joins the main data stream with another lookup stream and appends the specified
|
5
|
+
# columns from the lookup stream to the input stream.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# Set up a lookup to the *authors* table:
|
10
|
+
#
|
11
|
+
# @database_lookup = Itiel::Lookup::DatabaseTable.new
|
12
|
+
# @database_lookup.connection = :test
|
13
|
+
# @database_lookup.table_name = "authors"
|
14
|
+
#
|
15
|
+
# Join the "author_name" column on the input stream with the "name" column in the lookup stream.
|
16
|
+
#
|
17
|
+
# @database_lookup.lookup_columns = { "author_name" => "name" }
|
18
|
+
#
|
19
|
+
# Join the "id" column in the lookup stream as "author_id"
|
20
|
+
#
|
21
|
+
# @database_lookup.joined_columns = { "id" => "author_id" }
|
22
|
+
#
|
23
|
+
class DatabaseTable
|
24
|
+
include ChainedStep
|
25
|
+
include HashLookup
|
26
|
+
include Itiel::DB::SQLConnectable
|
27
|
+
|
28
|
+
attr_accessor :table_name
|
29
|
+
|
30
|
+
def lookup_source
|
31
|
+
db = self.class.sequel_connection(connection)
|
32
|
+
db[table_name.to_sym.to_sym].all
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
module HashLookup
|
4
|
+
attr_accessor :lookup_columns, :joined_columns
|
5
|
+
|
6
|
+
def lookup!(input_stream)
|
7
|
+
input_stream.collect do |row|
|
8
|
+
origin_column = lookup_columns.first[0].to_sym
|
9
|
+
merge_data = lookup_stream[row[origin_column]] || empty_joined_columns #{ origin_column => nil }
|
10
|
+
row.merge!(merge_data)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def lookup_stream
|
15
|
+
@lookup ||= lookup_source.inject({}) do |memory, row|
|
16
|
+
clean_row = {}
|
17
|
+
joined_columns.each do |original, target|
|
18
|
+
clean_row[target.to_sym] = row[original.to_sym]
|
19
|
+
end
|
20
|
+
|
21
|
+
memory[row[lookup_columns.first[1].to_sym]] = clean_row
|
22
|
+
memory
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def empty_joined_columns
|
27
|
+
joined_columns.inject({}) {|m,h| m[h[1].to_sym] = nil; m }
|
28
|
+
end
|
29
|
+
|
30
|
+
def lookup_source
|
31
|
+
raise 'lookup_source is not implemented'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
module ChainedStep
|
4
|
+
attr_accessor :next_step
|
5
|
+
|
6
|
+
alias :>> :next_step=
|
7
|
+
|
8
|
+
def input=(input_stream)
|
9
|
+
self.execute(input_stream)
|
10
|
+
next_step.input = input_stream if next_step
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute(*)
|
14
|
+
raise "execute is not implemented"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
#
|
4
|
+
# Process the stream with ruby
|
5
|
+
#
|
6
|
+
# Initialize it with a block that will yield each row of the stream.
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# Itiel::Script::RubyScript.new do |row|
|
11
|
+
# row["slug"] = row["title"]
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
class RubyScript
|
15
|
+
include ChainedStep
|
16
|
+
|
17
|
+
attr_accessor :block
|
18
|
+
|
19
|
+
def initialize(&block)
|
20
|
+
self.block = block
|
21
|
+
end
|
22
|
+
|
23
|
+
def execute(input_stream)
|
24
|
+
input_stream.each do |row|
|
25
|
+
block.call(row)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
#
|
4
|
+
# Executes a SQL script or command on the specified
|
5
|
+
# connection
|
6
|
+
#
|
7
|
+
class SQLScript
|
8
|
+
include ChainedStep
|
9
|
+
include Itiel::DB::SQLConnectable
|
10
|
+
|
11
|
+
attr_accessor :connection
|
12
|
+
attr_accessor :sql
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
self.sql = args[0]
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute(*)
|
19
|
+
db = self.class.sequel_connection(connection)
|
20
|
+
db << sql
|
21
|
+
end
|
22
|
+
|
23
|
+
def sanity_check
|
24
|
+
raise Itiel::MissingConnection unless self.connection
|
25
|
+
raise Itiel::SQLSentenceNotProvided.new unless self.sql
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This class allow us to create multiple columns or replace the current value of a column
|
5
|
+
# by the result of the execution of a block, you must return a hash with the columns you
|
6
|
+
# wish to add as the hash keys.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# calculated = CalculatedColumns.new do |row|
|
11
|
+
# total = row['price'] * row['quantity']
|
12
|
+
# tax = total * 0.2
|
13
|
+
# { :total => total, :tax => tax }
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# calculated.input = [{:price => 12.50, :quantity => 3},{:price => 4.95, :quantity => 5}]
|
17
|
+
# calculated.output
|
18
|
+
# => {:price => 12.50, :quantity => 3, :total => 37.5, :tax => 7.5},{:price => 4.95, :quantity => 5, :total => 24.75, :tax => 4.95}
|
19
|
+
#
|
20
|
+
# It is important to note that you have to use casting on numeric fields
|
21
|
+
# just to make sure that it is from the right type. Stored column types
|
22
|
+
# may vary depending on the source.
|
23
|
+
#
|
24
|
+
class CalculatedColumns
|
25
|
+
include ChainedStep
|
26
|
+
include Itiel::Nameable
|
27
|
+
|
28
|
+
attr_accessor :arguments
|
29
|
+
|
30
|
+
def initialize(&block)
|
31
|
+
raise "Missing block" unless block_given?
|
32
|
+
@block = block
|
33
|
+
end
|
34
|
+
|
35
|
+
def transform!(input_stream)
|
36
|
+
sanity_check
|
37
|
+
input_stream.each do |object|
|
38
|
+
object.merge! @block.call(object)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sanity_check
|
43
|
+
raise Itiel::UndefinedNextStepException.new "Undefined next_step" unless self.next_step
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# Defined the behavior of the Transformation
|
5
|
+
# and how they handle the data stream.
|
6
|
+
#
|
7
|
+
# All classes including this module should implement a
|
8
|
+
# transform! method that does the real transformation.
|
9
|
+
#
|
10
|
+
# Than method will be called as soon as the transformation
|
11
|
+
# receives input.
|
12
|
+
#
|
13
|
+
module ChainedStep
|
14
|
+
attr_accessor :next_step
|
15
|
+
|
16
|
+
alias :>> :next_step=
|
17
|
+
|
18
|
+
def input=(stream)
|
19
|
+
next_step.input = transform!(stream)
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!(stream)
|
23
|
+
raise Itiel::MethodNotImplementedException.new "You need to define the transform! method in the implementing class"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This transformation appends a new Column
|
5
|
+
# with a constant value to the data stream
|
6
|
+
#
|
7
|
+
# Itiel::Transform::ConstantColumn.new( "column" => "Constant Value")
|
8
|
+
#
|
9
|
+
# Or, specify the column and the value later:
|
10
|
+
#
|
11
|
+
# transformation = Itiel::Transform::ConstantColumn.new
|
12
|
+
#
|
13
|
+
# transformation.append = { "column" => "Constant Value" }
|
14
|
+
#
|
15
|
+
# The resulting stream will have a column named "column" with the value
|
16
|
+
# "Constant Value" for all rows.
|
17
|
+
#
|
18
|
+
class ConstantColumn
|
19
|
+
include ChainedStep
|
20
|
+
include Itiel::Nameable
|
21
|
+
|
22
|
+
attr_accessor :append
|
23
|
+
|
24
|
+
def initialize(*args)
|
25
|
+
self.append = args.first
|
26
|
+
end
|
27
|
+
|
28
|
+
def transform!(input_stream)
|
29
|
+
input_stream.collect do |row|
|
30
|
+
row.merge(self.append)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This module defines the behavior for input and output
|
5
|
+
# on all our transformations.
|
6
|
+
#
|
7
|
+
# Whenever a transformation receives input, it stores the result of
|
8
|
+
# transform! on the output
|
9
|
+
#
|
10
|
+
# All the classes in tiel::Transformation should implement ransform!
|
11
|
+
#
|
12
|
+
module InputOutputBehavior
|
13
|
+
module InstanceMethods
|
14
|
+
def input=(input_stream)
|
15
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
16
|
+
@input = input_stream
|
17
|
+
end
|
18
|
+
|
19
|
+
def input
|
20
|
+
@input
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Returns cached output by default, call it with true to run the
|
25
|
+
# transformation before returning the output
|
26
|
+
#
|
27
|
+
def output(retransform = false)
|
28
|
+
@output = (retransform ? transform!(self.input) : @output ||= transform!(self.input))
|
29
|
+
Itiel::Logger.log_processed(self, @output.size)
|
30
|
+
@output
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# This method has to be implemented in the class
|
35
|
+
#
|
36
|
+
def transform!(input_stream) ; end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.included(receiver)
|
40
|
+
receiver.send :include, InstanceMethods
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# Maps a field value to different values
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @transformation = Itiel::Transform::MapValues.new(
|
9
|
+
# {
|
10
|
+
# "active" => { true => "yes", false => "no" }
|
11
|
+
# }
|
12
|
+
# )
|
13
|
+
#
|
14
|
+
# This would map all the values on the active column, true to yes and false to no
|
15
|
+
#
|
16
|
+
class MapValues
|
17
|
+
include ChainedStep
|
18
|
+
include Nameable
|
19
|
+
|
20
|
+
attr_accessor :mapping
|
21
|
+
|
22
|
+
def initialize(mapping)
|
23
|
+
self.mapping = mapping
|
24
|
+
end
|
25
|
+
|
26
|
+
def transform!(input_stream)
|
27
|
+
output = []
|
28
|
+
input_stream.each do |stream_row|
|
29
|
+
new_row = {}
|
30
|
+
stream_row.each do |column, value|
|
31
|
+
if self.mapping.keys.include?(column)
|
32
|
+
new_row[column] = self.mapping[column][value] || value
|
33
|
+
else
|
34
|
+
new_row[column] = value
|
35
|
+
end
|
36
|
+
end
|
37
|
+
output << new_row
|
38
|
+
end
|
39
|
+
output
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This transformation only selects specific columns on the data stream
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @transformer = Itiel::Transform::RemoveColumn.new("order_id")
|
9
|
+
#
|
10
|
+
# In the example, the output stream would not have the order_id column
|
11
|
+
#
|
12
|
+
class RemoveColumn
|
13
|
+
include ChainedStep
|
14
|
+
include Itiel::Nameable
|
15
|
+
|
16
|
+
attr_accessor :mappings
|
17
|
+
|
18
|
+
def initialize(*args)
|
19
|
+
self.mappings = args
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!(input_stream)
|
23
|
+
selected_output = []
|
24
|
+
input_stream.each do |object|
|
25
|
+
selected_output << object.select {|key, value| !self.mappings.include? key }
|
26
|
+
end
|
27
|
+
|
28
|
+
selected_output
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|