itiel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.autotest +17 -0
- data/.gitignore +13 -0
- data/.gitlab-ci.yml +36 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/Gemfile.rails.4.0 +7 -0
- data/Gemfile.rails.4.1 +7 -0
- data/Gemfile.rails.4.2 +7 -0
- data/README.markdown +106 -0
- data/Rakefile +13 -0
- data/build.sh +10 -0
- data/features/extract/database_table.feature +16 -0
- data/features/extract/sql_script.feature +17 -0
- data/features/load/database_table_loader.feature +21 -0
- data/features/lookup/csv_file.feature +41 -0
- data/features/lookup/database_table.feature +43 -0
- data/features/script/ruby_script.feature +19 -0
- data/features/step_definitions/csv_steps.rb +15 -0
- data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
- data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
- data/features/step_definitions/extractor/database_steps.rb +27 -0
- data/features/step_definitions/extractor/database_table_steps.rb +8 -0
- data/features/step_definitions/extractor/extraction_steps.rb +3 -0
- data/features/step_definitions/flow_steps.rb +9 -0
- data/features/step_definitions/loader/csv_file_steps.rb +4 -0
- data/features/step_definitions/loader/database_table_steps.rb +14 -0
- data/features/step_definitions/lookup/lookup_steps.rb +35 -0
- data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
- data/features/step_definitions/stream_steps.rb +8 -0
- data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
- data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
- data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
- data/features/step_definitions/transformation/map_values_step.rb +4 -0
- data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
- data/features/step_definitions/transformation/select_column_steps.rb +3 -0
- data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
- data/features/support/database.yml +1 -0
- data/features/support/env.rb +13 -0
- data/features/transform/transformations.feature +123 -0
- data/itiel.gemspec +34 -0
- data/lib/itiel.rb +45 -0
- data/lib/itiel/db/connection.rb +24 -0
- data/lib/itiel/db/sql_connectable.rb +33 -0
- data/lib/itiel/db/truncator.rb +30 -0
- data/lib/itiel/extract/chained_step.rb +22 -0
- data/lib/itiel/extract/csv_file.rb +31 -0
- data/lib/itiel/extract/custom_sql.rb +38 -0
- data/lib/itiel/extract/database_table.rb +23 -0
- data/lib/itiel/job.rb +116 -0
- data/lib/itiel/load/chained_step.rb +37 -0
- data/lib/itiel/load/csv_file.rb +45 -0
- data/lib/itiel/load/database_table.rb +34 -0
- data/lib/itiel/load/input_output_behavior.rb +36 -0
- data/lib/itiel/logger.rb +47 -0
- data/lib/itiel/lookup/chained_step.rb +35 -0
- data/lib/itiel/lookup/csv_file.rb +16 -0
- data/lib/itiel/lookup/database_table.rb +36 -0
- data/lib/itiel/lookup/hash_lookup.rb +35 -0
- data/lib/itiel/nameable.rb +6 -0
- data/lib/itiel/script/chained_step.rb +18 -0
- data/lib/itiel/script/ruby_script.rb +31 -0
- data/lib/itiel/script/sql_script.rb +29 -0
- data/lib/itiel/transform/calculated_columns.rb +47 -0
- data/lib/itiel/transform/chained_step.rb +27 -0
- data/lib/itiel/transform/constant_column.rb +35 -0
- data/lib/itiel/transform/input_output_behavior.rb +44 -0
- data/lib/itiel/transform/map_values.rb +43 -0
- data/lib/itiel/transform/remove_column.rb +33 -0
- data/lib/itiel/transform/rename_column.rb +43 -0
- data/lib/itiel/transform/select_column.rb +37 -0
- data/lib/itiel/version.rb +3 -0
- data/spec/db/sql_connectable_spec.rb +20 -0
- data/spec/extract/chained_step_spec.rb +31 -0
- data/spec/extract/csv_file_spec.rb +22 -0
- data/spec/extract/custom_sql_spec.rb +19 -0
- data/spec/extract/database_table_spec.rb +22 -0
- data/spec/job_spec.rb +80 -0
- data/spec/loader/chained_step_spec.rb +39 -0
- data/spec/loader/csv_file_spec.rb +69 -0
- data/spec/loader/database_table_spec.rb +29 -0
- data/spec/lookup/hash_lookup_spec.rb +108 -0
- data/spec/nameable_spec.rb +17 -0
- data/spec/script/chained_step_spec.rb +24 -0
- data/spec/script/ruby_script_spec.rb +18 -0
- data/spec/script/sql_script_spec.rb +41 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/config/database.yml +1 -0
- data/spec/support/config/sources.yml +9 -0
- data/spec/transform/calculated_columns_spec.rb +36 -0
- data/spec/transform/chained_step_spec.rb +36 -0
- data/spec/transform/constant_column_spec.rb +22 -0
- data/spec/transform/map_values_spec.rb +26 -0
- data/spec/transform/rename_column_spec.rb +25 -0
- data/spec/transform/select_column_spec.rb +21 -0
- metadata +344 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
module Itiel
|
2
|
+
module DB
|
3
|
+
#
|
4
|
+
# Truncates specified tables
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @truncator = Itiel::DB::Truncator.new "tables", "to", "truncate"
|
9
|
+
# @truncator.connection = :database
|
10
|
+
# @truncator.truncate!
|
11
|
+
#
|
12
|
+
class Truncator
|
13
|
+
include Itiel::Nameable
|
14
|
+
include Itiel::DB::SQLConnectable
|
15
|
+
|
16
|
+
attr_accessor :tables
|
17
|
+
|
18
|
+
def initialize(*tables)
|
19
|
+
self.tables = tables
|
20
|
+
end
|
21
|
+
|
22
|
+
def truncate!
|
23
|
+
tables.each do |table|
|
24
|
+
db = self.class.sequel_connection(connection)
|
25
|
+
db[table.to_sym].truncate
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Extract
|
3
|
+
#
|
4
|
+
# Defines how the initial extractors behave
|
5
|
+
#
|
6
|
+
# All classes including this module must define the in_batches method
|
7
|
+
#
|
8
|
+
module ChainedStep
|
9
|
+
attr_accessor :next_step
|
10
|
+
|
11
|
+
alias :>> :next_step=
|
12
|
+
|
13
|
+
def start
|
14
|
+
self.next_step.input = extract
|
15
|
+
end
|
16
|
+
|
17
|
+
def extract
|
18
|
+
raise Itiel::MethodNotImplementedException.new "extract is not implemented"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Itiel
|
4
|
+
module Extract
|
5
|
+
#
|
6
|
+
# Extracts all specified CSV file rows and sends it in batches to
|
7
|
+
# its next step
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
#
|
11
|
+
# csv_file = Itiel::Extract::CSVFile.new('FileName.csv')
|
12
|
+
# csv_file.batch_size = 15
|
13
|
+
# csv.file.start
|
14
|
+
#
|
15
|
+
class CSVFile
|
16
|
+
include ChainedStep
|
17
|
+
include Itiel::Nameable
|
18
|
+
|
19
|
+
attr_accessor :file_name
|
20
|
+
|
21
|
+
def initialize(file_name)
|
22
|
+
self.file_name = file_name
|
23
|
+
end
|
24
|
+
|
25
|
+
def extract
|
26
|
+
lines = CSV.read(self.file_name, :headers => true)
|
27
|
+
lines.collect(&:to_hash)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Itiel
|
4
|
+
module Extract
|
5
|
+
#
|
6
|
+
# Creates a stream from the specified SQL query.
|
7
|
+
# Connection must be defined on a file that's on
|
8
|
+
# config/database.yml by default.
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
#
|
12
|
+
# @custom_sql = Itiel::Extract::CustomSQL.new
|
13
|
+
# @custom_sql.connection = :test
|
14
|
+
# @custom_sql.script = 'SELECT * FROM some_table'
|
15
|
+
#
|
16
|
+
#
|
17
|
+
# You can set a different path for the config file at class level
|
18
|
+
#
|
19
|
+
# Itiel::Extract::CustomSQL.connection_file_path = 'path_to_my_config/database.yml'
|
20
|
+
#
|
21
|
+
class CustomSQL
|
22
|
+
include ChainedStep
|
23
|
+
include Itiel::DB::SQLConnectable
|
24
|
+
include Itiel::Nameable
|
25
|
+
|
26
|
+
attr_accessor :script
|
27
|
+
|
28
|
+
def initialize(*args)
|
29
|
+
self.script = args[0]
|
30
|
+
end
|
31
|
+
|
32
|
+
def extract
|
33
|
+
db = self.class.sequel_connection(connection)
|
34
|
+
db[script].all
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Extract
|
3
|
+
#
|
4
|
+
# Extracts all the contents from a Database table into the stream
|
5
|
+
# and passes it on to it's next_step
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
#
|
9
|
+
# @extractor = Itiel::Extract::DatabaseTable.new
|
10
|
+
# @extractor.connection = :test
|
11
|
+
# @extractor.table_name = 'test_table'
|
12
|
+
#
|
13
|
+
#
|
14
|
+
class DatabaseTable < CustomSQL
|
15
|
+
attr_accessor :table_name
|
16
|
+
|
17
|
+
def extract
|
18
|
+
db = self.class.sequel_connection(connection)
|
19
|
+
db[table_name.to_sym].all
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/itiel/job.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
module Itiel
|
2
|
+
class Job
|
3
|
+
attr_accessor :block
|
4
|
+
#
|
5
|
+
# Pass a block to this method to process the ETL steps in order
|
6
|
+
#
|
7
|
+
#
|
8
|
+
# Understands a single line per step:
|
9
|
+
#
|
10
|
+
# Itiel::Job.run do |job|
|
11
|
+
# job.step @source
|
12
|
+
# job.step @destination
|
13
|
+
#
|
14
|
+
# ...
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
#
|
18
|
+
# In this case the @source.output is sent to the @destination's input
|
19
|
+
# A third step on the list would send the @destination's output to its
|
20
|
+
# input
|
21
|
+
#
|
22
|
+
#
|
23
|
+
# Another way to do this is by passing hashes to the step method:
|
24
|
+
#
|
25
|
+
# Itiel::Job.run do |job|
|
26
|
+
# job.step @source => @destination
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
#
|
30
|
+
# You want to use this sintax when creating more complex flows. For example,
|
31
|
+
# you could send a step output to several inputs
|
32
|
+
#
|
33
|
+
# Itiel::Job.run do |job|
|
34
|
+
# job.step @source => [ @destination, @second_destination ]
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
def self.run(&block)
|
38
|
+
Itiel::Logger.log_start_job(self)
|
39
|
+
yield self.new
|
40
|
+
Itiel::Logger.log_end_job(self)
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# Use it to define the job steps and then run the job at a later date.
|
45
|
+
#
|
46
|
+
# It returns an instance of a job, you can call run! on that instance later
|
47
|
+
# to actually run the steps defined on the block of the job
|
48
|
+
#
|
49
|
+
# Uses the same syntax as run:
|
50
|
+
#
|
51
|
+
# job = Itiel::Job.define do |job|
|
52
|
+
# job.step @source => @destination
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# job.run!
|
56
|
+
#
|
57
|
+
# Or
|
58
|
+
#
|
59
|
+
# job = Itiel::Job.define do |job|
|
60
|
+
# job.step @source
|
61
|
+
# job.step @destination
|
62
|
+
#
|
63
|
+
# ...
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# job.run!
|
67
|
+
#
|
68
|
+
def self.define(&block)
|
69
|
+
self.new(&block)
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Use it to run the steps on a previously defined Job
|
74
|
+
#
|
75
|
+
def run!
|
76
|
+
Itiel::Logger.log_start_job(self)
|
77
|
+
self.block.call(self)
|
78
|
+
Itiel::Logger.log_end_job(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Call inside the run block to denote a data flow
|
83
|
+
#
|
84
|
+
def step(*args)
|
85
|
+
if args[0].is_a?(Hash)
|
86
|
+
hash_based_step(args[0])
|
87
|
+
else
|
88
|
+
single_line_step(args[0])
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
def initialize(&block)
|
94
|
+
self.block = block
|
95
|
+
super
|
96
|
+
end
|
97
|
+
|
98
|
+
def hash_based_step(*args)
|
99
|
+
source, destination = args[0].first.to_a
|
100
|
+
if destination.is_a?(Array)
|
101
|
+
destination.each { |object| object.input = source.output }
|
102
|
+
else
|
103
|
+
destination.input = source.output
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def single_line_step(object)
|
108
|
+
unless @stream
|
109
|
+
@stream = object.output
|
110
|
+
else
|
111
|
+
object.input = @stream
|
112
|
+
@stream = object.output
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Load
|
3
|
+
#
|
4
|
+
# This module defines the input and output behavior for Loader steps
|
5
|
+
#
|
6
|
+
# Whenever the instance receives input, it calls persist! and then
|
7
|
+
# wires the input and the output
|
8
|
+
#
|
9
|
+
# All the clasess in Itiel::Output should implement persist!
|
10
|
+
#
|
11
|
+
module ChainedStep
|
12
|
+
module InstanceMethods
|
13
|
+
attr_accessor :next_step
|
14
|
+
|
15
|
+
alias :>> :next_step=
|
16
|
+
|
17
|
+
def input=(input_stream)
|
18
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
19
|
+
persist(input_stream)
|
20
|
+
self.next_step.input = input_stream if next_step
|
21
|
+
Itiel::Logger.log_processed(self, input_stream.size)
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# This method must be implemented in the class
|
26
|
+
#
|
27
|
+
def persist(input_stream)
|
28
|
+
raise Itiel::MethodNotImplementedException.new "persist is not implemented"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.included(receiver)
|
33
|
+
receiver.send :include, InstanceMethods
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Itiel
|
4
|
+
module Load
|
5
|
+
#
|
6
|
+
# Loads the data stream into a CSV file
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# @csv_file = Itiel::Load::CSVFile.new('filename.csv')
|
11
|
+
# @csv_file.input = []
|
12
|
+
#
|
13
|
+
class CSVFile
|
14
|
+
include ChainedStep
|
15
|
+
include Itiel::Nameable
|
16
|
+
|
17
|
+
def initialize(file_name, append=true)
|
18
|
+
@append = append
|
19
|
+
@file_name = file_name
|
20
|
+
end
|
21
|
+
|
22
|
+
def persist(input_stream)
|
23
|
+
headers = input_stream.collect(&:keys).flatten.uniq
|
24
|
+
mode = @append ? "ab" : "w"
|
25
|
+
skip_headers = skip_headers?
|
26
|
+
|
27
|
+
CSV.open(@file_name, mode) do |csv|
|
28
|
+
csv << headers unless skip_headers
|
29
|
+
input_stream.each do |row|
|
30
|
+
csv_row = []
|
31
|
+
headers.each do |h|
|
32
|
+
csv_row << row[h]
|
33
|
+
end
|
34
|
+
csv << csv_row
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def skip_headers?
|
41
|
+
File.exist?(@file_name) && @append
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Load
|
3
|
+
#
|
4
|
+
# Loads the stream into a database table.
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# @loader = Itiel::Load::DatabaseTable.new :connection, "table_name"
|
9
|
+
#
|
10
|
+
class DatabaseTable
|
11
|
+
include ChainedStep
|
12
|
+
include Itiel::Nameable
|
13
|
+
include Itiel::DB::SQLConnectable
|
14
|
+
|
15
|
+
attr_accessor :table_name
|
16
|
+
|
17
|
+
def initialize(connection, table_name)
|
18
|
+
self.connection = connection
|
19
|
+
self.table_name = table_name
|
20
|
+
end
|
21
|
+
|
22
|
+
def persist(input_stream)
|
23
|
+
input_stream.each do |element|
|
24
|
+
table.insert(element)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def table
|
29
|
+
@@db ||= self.class.sequel_connection(connection)
|
30
|
+
@@db[table_name.to_sym]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Itiel
|
2
|
+
module Load
|
3
|
+
#
|
4
|
+
# This module defines the input and output behavior for Loader steps
|
5
|
+
#
|
6
|
+
# Whenever an Output receives input, it calls persist! and then
|
7
|
+
# wires the input and the output
|
8
|
+
#
|
9
|
+
# All the clasess in Itiel::Output should implement persist!
|
10
|
+
#
|
11
|
+
module ChainedStep
|
12
|
+
module InstanceMethods
|
13
|
+
def input=(input_stream)
|
14
|
+
Itiel::Logger.log_received(self, input_stream.size)
|
15
|
+
persist(input_stream)
|
16
|
+
self.next_step = input_stream if self.next_step
|
17
|
+
Itiel::Logger.log_processed(self, input_stream.size)
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# This method has to be implemented in the class
|
22
|
+
#
|
23
|
+
def persist(input_stream)
|
24
|
+
raise "persist is not implemented"
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
attr_writer :output
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.included(receiver)
|
32
|
+
receiver.send :include, InstanceMethods
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/itiel/logger.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Itiel
|
2
|
+
class Logger
|
3
|
+
class << self
|
4
|
+
attr_accessor :logger
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.log_received(object, size)
|
8
|
+
msg = "#{object_name(object)} received #{size}.rows"
|
9
|
+
self.log_with_time(msg)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.log_processed(object, size)
|
13
|
+
msg = "#{object_name(object)} processed #{size}.rows"
|
14
|
+
self.log_with_time(msg)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.log_start_job(object)
|
18
|
+
msg = "#{object_name(object)} run at #{Time.now}"
|
19
|
+
self.enclosed_with_time(msg)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.log_end_job(object)
|
23
|
+
msg = "#{object_name(object)} finished at #{Time.now}"
|
24
|
+
self.enclosed_with_time(msg)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def self.object_name(object)
|
29
|
+
( object.respond_to?(:name) ? object.name : object.class.name )
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.info(message)
|
33
|
+
self.logger ||= ::Logger.new(STDOUT)
|
34
|
+
self.logger.info(message)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.log_with_time(msg)
|
38
|
+
self.info("#{Time.now} - #{msg}")
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.enclosed_with_time(msg)
|
42
|
+
self.info("\n==================================================\n")
|
43
|
+
self.info(msg)
|
44
|
+
self.info("\n==================================================\n")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|