itiel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.autotest +17 -0
- data/.gitignore +13 -0
- data/.gitlab-ci.yml +36 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/Gemfile.rails.4.0 +7 -0
- data/Gemfile.rails.4.1 +7 -0
- data/Gemfile.rails.4.2 +7 -0
- data/README.markdown +106 -0
- data/Rakefile +13 -0
- data/build.sh +10 -0
- data/features/extract/database_table.feature +16 -0
- data/features/extract/sql_script.feature +17 -0
- data/features/load/database_table_loader.feature +21 -0
- data/features/lookup/csv_file.feature +41 -0
- data/features/lookup/database_table.feature +43 -0
- data/features/script/ruby_script.feature +19 -0
- data/features/step_definitions/csv_steps.rb +15 -0
- data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
- data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
- data/features/step_definitions/extractor/database_steps.rb +27 -0
- data/features/step_definitions/extractor/database_table_steps.rb +8 -0
- data/features/step_definitions/extractor/extraction_steps.rb +3 -0
- data/features/step_definitions/flow_steps.rb +9 -0
- data/features/step_definitions/loader/csv_file_steps.rb +4 -0
- data/features/step_definitions/loader/database_table_steps.rb +14 -0
- data/features/step_definitions/lookup/lookup_steps.rb +35 -0
- data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
- data/features/step_definitions/stream_steps.rb +8 -0
- data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
- data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
- data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
- data/features/step_definitions/transformation/map_values_step.rb +4 -0
- data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
- data/features/step_definitions/transformation/select_column_steps.rb +3 -0
- data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
- data/features/support/database.yml +1 -0
- data/features/support/env.rb +13 -0
- data/features/transform/transformations.feature +123 -0
- data/itiel.gemspec +34 -0
- data/lib/itiel.rb +45 -0
- data/lib/itiel/db/connection.rb +24 -0
- data/lib/itiel/db/sql_connectable.rb +33 -0
- data/lib/itiel/db/truncator.rb +30 -0
- data/lib/itiel/extract/chained_step.rb +22 -0
- data/lib/itiel/extract/csv_file.rb +31 -0
- data/lib/itiel/extract/custom_sql.rb +38 -0
- data/lib/itiel/extract/database_table.rb +23 -0
- data/lib/itiel/job.rb +116 -0
- data/lib/itiel/load/chained_step.rb +37 -0
- data/lib/itiel/load/csv_file.rb +45 -0
- data/lib/itiel/load/database_table.rb +34 -0
- data/lib/itiel/load/input_output_behavior.rb +36 -0
- data/lib/itiel/logger.rb +47 -0
- data/lib/itiel/lookup/chained_step.rb +35 -0
- data/lib/itiel/lookup/csv_file.rb +16 -0
- data/lib/itiel/lookup/database_table.rb +36 -0
- data/lib/itiel/lookup/hash_lookup.rb +35 -0
- data/lib/itiel/nameable.rb +6 -0
- data/lib/itiel/script/chained_step.rb +18 -0
- data/lib/itiel/script/ruby_script.rb +31 -0
- data/lib/itiel/script/sql_script.rb +29 -0
- data/lib/itiel/transform/calculated_columns.rb +47 -0
- data/lib/itiel/transform/chained_step.rb +27 -0
- data/lib/itiel/transform/constant_column.rb +35 -0
- data/lib/itiel/transform/input_output_behavior.rb +44 -0
- data/lib/itiel/transform/map_values.rb +43 -0
- data/lib/itiel/transform/remove_column.rb +33 -0
- data/lib/itiel/transform/rename_column.rb +43 -0
- data/lib/itiel/transform/select_column.rb +37 -0
- data/lib/itiel/version.rb +3 -0
- data/spec/db/sql_connectable_spec.rb +20 -0
- data/spec/extract/chained_step_spec.rb +31 -0
- data/spec/extract/csv_file_spec.rb +22 -0
- data/spec/extract/custom_sql_spec.rb +19 -0
- data/spec/extract/database_table_spec.rb +22 -0
- data/spec/job_spec.rb +80 -0
- data/spec/loader/chained_step_spec.rb +39 -0
- data/spec/loader/csv_file_spec.rb +69 -0
- data/spec/loader/database_table_spec.rb +29 -0
- data/spec/lookup/hash_lookup_spec.rb +108 -0
- data/spec/nameable_spec.rb +17 -0
- data/spec/script/chained_step_spec.rb +24 -0
- data/spec/script/ruby_script_spec.rb +18 -0
- data/spec/script/sql_script_spec.rb +41 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/config/database.yml +1 -0
- data/spec/support/config/sources.yml +9 -0
- data/spec/transform/calculated_columns_spec.rb +36 -0
- data/spec/transform/chained_step_spec.rb +36 -0
- data/spec/transform/constant_column_spec.rb +22 -0
- data/spec/transform/map_values_spec.rb +26 -0
- data/spec/transform/rename_column_spec.rb +25 -0
- data/spec/transform/select_column_spec.rb +21 -0
- metadata +344 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
#
|
4
|
+
# This module defines the input and output behavior of Lookup Steps.
|
5
|
+
#
|
6
|
+
# Whenever the instance receives input, it calls lookup! and set its return value
|
7
|
+
# as the input of its next_step if defined.
|
8
|
+
#
|
9
|
+
# All the classes that include this moudle must implement lookup!
|
10
|
+
#
|
11
|
+
module ChainedStep
|
12
|
+
module InstanceMethods
|
13
|
+
attr_accessor :next_step, :output
|
14
|
+
alias :>> :next_step=
|
15
|
+
|
16
|
+
def input=(input_stream)
|
17
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
18
|
+
self.output = lookup!(input_stream)
|
19
|
+
#puts output if table_name == "profiles"
|
20
|
+
#puts input_stream if table_name == "profiles"
|
21
|
+
self.next_step.input = output if next_step
|
22
|
+
Itiel::Logger.log_processed(self, input_stream.size)
|
23
|
+
end
|
24
|
+
|
25
|
+
def lookup!(input_stream)
|
26
|
+
raise "lookup is not implemented"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.included(receiver)
|
31
|
+
receiver.send :include, InstanceMethods
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
#
|
4
|
+
# Joins the main data stream with another lookup stream and appends the specified
|
5
|
+
# columns from the lookup stream to the input stream.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# Set up a lookup to the *authors* table:
|
10
|
+
#
|
11
|
+
# @database_lookup = Itiel::Lookup::DatabaseTable.new
|
12
|
+
# @database_lookup.connection = :test
|
13
|
+
# @database_lookup.table_name = "authors"
|
14
|
+
#
|
15
|
+
# Join the "author_name" column on the input stream with the "name" column in the lookup stream.
|
16
|
+
#
|
17
|
+
# @database_lookup.lookup_columns = { "author_name" => "name" }
|
18
|
+
#
|
19
|
+
# Join the "id" column in the lookup stream as "author_id"
|
20
|
+
#
|
21
|
+
# @database_lookup.joined_columns = { "id" => "author_id" }
|
22
|
+
#
|
23
|
+
class DatabaseTable
|
24
|
+
include ChainedStep
|
25
|
+
include HashLookup
|
26
|
+
include Itiel::DB::SQLConnectable
|
27
|
+
|
28
|
+
attr_accessor :table_name
|
29
|
+
|
30
|
+
def lookup_source
|
31
|
+
db = self.class.sequel_connection(connection)
|
32
|
+
db[table_name.to_sym.to_sym].all
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Lookup
|
3
|
+
module HashLookup
|
4
|
+
attr_accessor :lookup_columns, :joined_columns
|
5
|
+
|
6
|
+
def lookup!(input_stream)
|
7
|
+
input_stream.collect do |row|
|
8
|
+
origin_column = lookup_columns.first[0].to_sym
|
9
|
+
merge_data = lookup_stream[row[origin_column]] || empty_joined_columns #{ origin_column => nil }
|
10
|
+
row.merge!(merge_data)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def lookup_stream
|
15
|
+
@lookup ||= lookup_source.inject({}) do |memory, row|
|
16
|
+
clean_row = {}
|
17
|
+
joined_columns.each do |original, target|
|
18
|
+
clean_row[target.to_sym] = row[original.to_sym]
|
19
|
+
end
|
20
|
+
|
21
|
+
memory[row[lookup_columns.first[1].to_sym]] = clean_row
|
22
|
+
memory
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def empty_joined_columns
|
27
|
+
joined_columns.inject({}) {|m,h| m[h[1].to_sym] = nil; m }
|
28
|
+
end
|
29
|
+
|
30
|
+
def lookup_source
|
31
|
+
raise 'lookup_source is not implemented'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
module ChainedStep
|
4
|
+
attr_accessor :next_step
|
5
|
+
|
6
|
+
alias :>> :next_step=
|
7
|
+
|
8
|
+
def input=(input_stream)
|
9
|
+
self.execute(input_stream)
|
10
|
+
next_step.input = input_stream if next_step
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute(*)
|
14
|
+
raise "execute is not implemented"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
#
|
4
|
+
# Process the stream with ruby
|
5
|
+
#
|
6
|
+
# Initialize it with a block that will yield each row of the stream.
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# Itiel::Script::RubyScript.new do |row|
|
11
|
+
# row["slug"] = row["title"]
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
class RubyScript
|
15
|
+
include ChainedStep
|
16
|
+
|
17
|
+
attr_accessor :block
|
18
|
+
|
19
|
+
def initialize(&block)
|
20
|
+
self.block = block
|
21
|
+
end
|
22
|
+
|
23
|
+
def execute(input_stream)
|
24
|
+
input_stream.each do |row|
|
25
|
+
block.call(row)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Script
|
3
|
+
#
|
4
|
+
# Executes a SQL script or command on the specified
|
5
|
+
# connection
|
6
|
+
#
|
7
|
+
class SQLScript
|
8
|
+
include ChainedStep
|
9
|
+
include Itiel::DB::SQLConnectable
|
10
|
+
|
11
|
+
attr_accessor :connection
|
12
|
+
attr_accessor :sql
|
13
|
+
|
14
|
+
def initialize(*args)
|
15
|
+
self.sql = args[0]
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute(*)
|
19
|
+
db = self.class.sequel_connection(connection)
|
20
|
+
db << sql
|
21
|
+
end
|
22
|
+
|
23
|
+
def sanity_check
|
24
|
+
raise Itiel::MissingConnection unless self.connection
|
25
|
+
raise Itiel::SQLSentenceNotProvided.new unless self.sql
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This class allow us to create multiple columns or replace the current value of a column
|
5
|
+
# by the result of the execution of a block, you must return a hash with the columns you
|
6
|
+
# wish to add as the hash keys.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# calculated = CalculatedColumns.new do |row|
|
11
|
+
# total = row['price'] * row['quantity']
|
12
|
+
# tax = total * 0.2
|
13
|
+
# { :total => total, :tax => tax }
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# calculated.input = [{:price => 12.50, :quantity => 3},{:price => 4.95, :quantity => 5}]
|
17
|
+
# calculated.output
|
18
|
+
# => {:price => 12.50, :quantity => 3, :total => 37.5, :tax => 7.5},{:price => 4.95, :quantity => 5, :total => 24.75, :tax => 4.95}
|
19
|
+
#
|
20
|
+
# It is important to note that you have to use casting on numeric fields
|
21
|
+
# just to make sure that it is from the right type. Stored column types
|
22
|
+
# may vary depending on the source.
|
23
|
+
#
|
24
|
+
class CalculatedColumns
|
25
|
+
include ChainedStep
|
26
|
+
include Itiel::Nameable
|
27
|
+
|
28
|
+
attr_accessor :arguments
|
29
|
+
|
30
|
+
def initialize(&block)
|
31
|
+
raise "Missing block" unless block_given?
|
32
|
+
@block = block
|
33
|
+
end
|
34
|
+
|
35
|
+
def transform!(input_stream)
|
36
|
+
sanity_check
|
37
|
+
input_stream.each do |object|
|
38
|
+
object.merge! @block.call(object)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sanity_check
|
43
|
+
raise Itiel::UndefinedNextStepException.new "Undefined next_step" unless self.next_step
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# Defined the behavior of the Transformation
|
5
|
+
# and how they handle the data stream.
|
6
|
+
#
|
7
|
+
# All classes including this module should implement a
|
8
|
+
# transform! method that does the real transformation.
|
9
|
+
#
|
10
|
+
# Than method will be called as soon as the transformation
|
11
|
+
# receives input.
|
12
|
+
#
|
13
|
+
module ChainedStep
|
14
|
+
attr_accessor :next_step
|
15
|
+
|
16
|
+
alias :>> :next_step=
|
17
|
+
|
18
|
+
def input=(stream)
|
19
|
+
next_step.input = transform!(stream)
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!(stream)
|
23
|
+
raise Itiel::MethodNotImplementedException.new "You need to define the transform! method in the implementing class"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This transformation appends a new Column
|
5
|
+
# with a constant value to the data stream
|
6
|
+
#
|
7
|
+
# Itiel::Transform::ConstantColumn.new( "column" => "Constant Value")
|
8
|
+
#
|
9
|
+
# Or, specify the column and the value later:
|
10
|
+
#
|
11
|
+
# transformation = Itiel::Transform::ConstantColumn.new
|
12
|
+
#
|
13
|
+
# transformation.append = { "column" => "Constant Value" }
|
14
|
+
#
|
15
|
+
# The resulting stream will have a column named "column" with the value
|
16
|
+
# "Constant Value" for all rows.
|
17
|
+
#
|
18
|
+
class ConstantColumn
|
19
|
+
include ChainedStep
|
20
|
+
include Itiel::Nameable
|
21
|
+
|
22
|
+
attr_accessor :append
|
23
|
+
|
24
|
+
def initialize(*args)
|
25
|
+
self.append = args.first
|
26
|
+
end
|
27
|
+
|
28
|
+
def transform!(input_stream)
|
29
|
+
input_stream.collect do |row|
|
30
|
+
row.merge(self.append)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This module defines the behavior for input and output
|
5
|
+
# on all our transformations.
|
6
|
+
#
|
7
|
+
# Whenever a transformation receives input, it stores the result of
|
8
|
+
# transform! on the output
|
9
|
+
#
|
10
|
+
# All the classes in tiel::Transformation should implement ransform!
|
11
|
+
#
|
12
|
+
module InputOutputBehavior
|
13
|
+
module InstanceMethods
|
14
|
+
def input=(input_stream)
|
15
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
16
|
+
@input = input_stream
|
17
|
+
end
|
18
|
+
|
19
|
+
def input
|
20
|
+
@input
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Returns cached output by default, call it with true to run the
|
25
|
+
# transformation before returning the output
|
26
|
+
#
|
27
|
+
def output(retransform = false)
|
28
|
+
@output = (retransform ? transform!(self.input) : @output ||= transform!(self.input))
|
29
|
+
Itiel::Logger.log_processed(self, @output.size)
|
30
|
+
@output
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# This method has to be implemented in the class
|
35
|
+
#
|
36
|
+
def transform!(input_stream) ; end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.included(receiver)
|
40
|
+
receiver.send :include, InstanceMethods
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# Maps a field value to different values
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @transformation = Itiel::Transform::MapValues.new(
|
9
|
+
# {
|
10
|
+
# "active" => { true => "yes", false => "no" }
|
11
|
+
# }
|
12
|
+
# )
|
13
|
+
#
|
14
|
+
# This would map all the values on the active column, true to yes and false to no
|
15
|
+
#
|
16
|
+
class MapValues
|
17
|
+
include ChainedStep
|
18
|
+
include Nameable
|
19
|
+
|
20
|
+
attr_accessor :mapping
|
21
|
+
|
22
|
+
def initialize(mapping)
|
23
|
+
self.mapping = mapping
|
24
|
+
end
|
25
|
+
|
26
|
+
def transform!(input_stream)
|
27
|
+
output = []
|
28
|
+
input_stream.each do |stream_row|
|
29
|
+
new_row = {}
|
30
|
+
stream_row.each do |column, value|
|
31
|
+
if self.mapping.keys.include?(column)
|
32
|
+
new_row[column] = self.mapping[column][value] || value
|
33
|
+
else
|
34
|
+
new_row[column] = value
|
35
|
+
end
|
36
|
+
end
|
37
|
+
output << new_row
|
38
|
+
end
|
39
|
+
output
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Transform
|
3
|
+
#
|
4
|
+
# This transformation only selects specific columns on the data stream
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @transformer = Itiel::Transform::RemoveColumn.new("order_id")
|
9
|
+
#
|
10
|
+
# In the example, the output stream would not have the order_id column
|
11
|
+
#
|
12
|
+
class RemoveColumn
|
13
|
+
include ChainedStep
|
14
|
+
include Itiel::Nameable
|
15
|
+
|
16
|
+
attr_accessor :mappings
|
17
|
+
|
18
|
+
def initialize(*args)
|
19
|
+
self.mappings = args
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!(input_stream)
|
23
|
+
selected_output = []
|
24
|
+
input_stream.each do |object|
|
25
|
+
selected_output << object.select {|key, value| !self.mappings.include? key }
|
26
|
+
end
|
27
|
+
|
28
|
+
selected_output
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|