itiel 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,30 @@
1
+ module Itiel
2
+ module DB
3
+ #
4
+ # Truncates specified tables
5
+ #
6
+ # Usage:
7
+ #
8
+ # @truncator = Itiel::DB::Truncator.new "tables", "to", "truncate"
9
+ # @truncator.connection = :database
10
+ # @truncator.truncate!
11
+ #
12
+ class Truncator
13
+ include Itiel::Nameable
14
+ include Itiel::DB::SQLConnectable
15
+
16
+ attr_accessor :tables
17
+
18
+ def initialize(*tables)
19
+ self.tables = tables
20
+ end
21
+
22
+ def truncate!
23
+ tables.each do |table|
24
+ db = self.class.sequel_connection(connection)
25
+ db[table.to_sym].truncate
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ module Itiel
2
+ module Extract
3
+ #
4
+ # Defines how the initial extractors behave
5
+ #
6
+ # All classes including this module must define the in_batches method
7
+ #
8
+ module ChainedStep
9
+ attr_accessor :next_step
10
+
11
+ alias :>> :next_step=
12
+
13
+ def start
14
+ self.next_step.input = extract
15
+ end
16
+
17
+ def extract
18
+ raise Itiel::MethodNotImplementedException.new "extract is not implemented"
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ require 'csv'
2
+
3
+ module Itiel
4
+ module Extract
5
+ #
6
+ # Extracts all specified CSV file rows and sends it in batches to
7
+ # its next step
8
+ #
9
+ # Usage:
10
+ #
11
+ # csv_file = Itiel::Extract::CSVFile.new('FileName.csv')
12
+ # csv_file.batch_size = 15
13
+ # csv.file.start
14
+ #
15
+ class CSVFile
16
+ include ChainedStep
17
+ include Itiel::Nameable
18
+
19
+ attr_accessor :file_name
20
+
21
+ def initialize(file_name)
22
+ self.file_name = file_name
23
+ end
24
+
25
+ def extract
26
+ lines = CSV.read(self.file_name, :headers => true)
27
+ lines.collect(&:to_hash)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ require 'sequel'
2
+
3
+ module Itiel
4
+ module Extract
5
+ #
6
+ # Creates a stream from the specified SQL query.
7
+ # Connection must be defined on a file that's on
8
+ # config/database.yml by default.
9
+ #
10
+ # Usage:
11
+ #
12
+ # @custom_sql = Itiel::Extract::CustomSQL.new
13
+ # @custom_sql.connection = :test
14
+ # @custom_sql.script = 'SELECT * FROM some_table'
15
+ #
16
+ #
17
+ # You can set a different path for the config file at class level
18
+ #
19
+ # Itiel::Extract::CustomSQL.connection_file_path = 'path_to_my_config/database.yml'
20
+ #
21
+ class CustomSQL
22
+ include ChainedStep
23
+ include Itiel::DB::SQLConnectable
24
+ include Itiel::Nameable
25
+
26
+ attr_accessor :script
27
+
28
+ def initialize(*args)
29
+ self.script = args[0]
30
+ end
31
+
32
+ def extract
33
+ db = self.class.sequel_connection(connection)
34
+ db[script].all
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ module Itiel
2
+ module Extract
3
+ #
4
+ # Extracts all the contents from a Database table into the stream
5
+ # and passes it on to it's next_step
6
+ #
7
+ # Usage:
8
+ #
9
+ # @extractor = Itiel::Extract::DatabaseTable.new
10
+ # @extractor.connection = :test
11
+ # @extractor.table_name = 'test_table'
12
+ #
13
+ #
14
+ class DatabaseTable < CustomSQL
15
+ attr_accessor :table_name
16
+
17
+ def extract
18
+ db = self.class.sequel_connection(connection)
19
+ db[table_name.to_sym].all
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,116 @@
1
+ module Itiel
2
+ class Job
3
+ attr_accessor :block
4
+ #
5
+ # Pass a block to this method to process the ETL steps in order
6
+ #
7
+ #
8
+ # Understands a single line per step:
9
+ #
10
+ # Itiel::Job.run do |job|
11
+ # job.step @source
12
+ # job.step @destination
13
+ #
14
+ # ...
15
+ # end
16
+ #
17
+ #
18
+ # In this case the @source.output is sent to the @destination's input
19
+ # A third step on the list would send the @destination's output to its
20
+ # input
21
+ #
22
+ #
23
+ # Another way to do this is by passing hashes to the step method:
24
+ #
25
+ # Itiel::Job.run do |job|
26
+ # job.step @source => @destination
27
+ # end
28
+ #
29
+ #
30
+ # You want to use this sintax when creating more complex flows. For example,
31
+ # you could send a step output to several inputs
32
+ #
33
+ # Itiel::Job.run do |job|
34
+ # job.step @source => [ @destination, @second_destination ]
35
+ # end
36
+ #
37
+ def self.run(&block)
38
+ Itiel::Logger.log_start_job(self)
39
+ yield self.new
40
+ Itiel::Logger.log_end_job(self)
41
+ end
42
+
43
+ #
44
+ # Use it to define the job steps and then run the job at a later date.
45
+ #
46
+ # It returns an instance of a job, you can call run! on that instance later
47
+ # to actually run the steps defined on the block of the job
48
+ #
49
+ # Uses the same syntax as run:
50
+ #
51
+ # job = Itiel::Job.define do |job|
52
+ # job.step @source => @destination
53
+ # end
54
+ #
55
+ # job.run!
56
+ #
57
+ # Or
58
+ #
59
+ # job = Itiel::Job.define do |job|
60
+ # job.step @source
61
+ # job.step @destination
62
+ #
63
+ # ...
64
+ # end
65
+ #
66
+ # job.run!
67
+ #
68
+ def self.define(&block)
69
+ self.new(&block)
70
+ end
71
+
72
+ #
73
+ # Use it to run the steps on a previously defined Job
74
+ #
75
+ def run!
76
+ Itiel::Logger.log_start_job(self)
77
+ self.block.call(self)
78
+ Itiel::Logger.log_end_job(self)
79
+ end
80
+
81
+ #
82
+ # Call inside the run block to denote a data flow
83
+ #
84
+ def step(*args)
85
+ if args[0].is_a?(Hash)
86
+ hash_based_step(args[0])
87
+ else
88
+ single_line_step(args[0])
89
+ end
90
+ end
91
+
92
+ private
93
+ def initialize(&block)
94
+ self.block = block
95
+ super
96
+ end
97
+
98
+ def hash_based_step(*args)
99
+ source, destination = args[0].first.to_a
100
+ if destination.is_a?(Array)
101
+ destination.each { |object| object.input = source.output }
102
+ else
103
+ destination.input = source.output
104
+ end
105
+ end
106
+
107
+ def single_line_step(object)
108
+ unless @stream
109
+ @stream = object.output
110
+ else
111
+ object.input = @stream
112
+ @stream = object.output
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,37 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # This module defines the input and output behavior for Loader steps
5
+ #
6
+ # Whenever the instance receives input, it calls persist! and then
7
+ # wires the input and the output
8
+ #
9
+ # All the clasess in Itiel::Output should implement persist!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ attr_accessor :next_step
14
+
15
+ alias :>> :next_step=
16
+
17
+ def input=(input_stream)
18
+ Itiel::Logger.log_received(self, input_stream.size)
19
+ persist(input_stream)
20
+ self.next_step.input = input_stream if next_step
21
+ Itiel::Logger.log_processed(self, input_stream.size)
22
+ end
23
+
24
+ #
25
+ # This method must be implemented in the class
26
+ #
27
+ def persist(input_stream)
28
+ raise Itiel::MethodNotImplementedException.new "persist is not implemented"
29
+ end
30
+ end
31
+
32
+ def self.included(receiver)
33
+ receiver.send :include, InstanceMethods
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,45 @@
1
+ require 'csv'
2
+
3
+ module Itiel
4
+ module Load
5
+ #
6
+ # Loads the data stream into a CSV file
7
+ #
8
+ # Usage:
9
+ #
10
+ # @csv_file = Itiel::Load::CSVFile.new('filename.csv')
11
+ # @csv_file.input = []
12
+ #
13
+ class CSVFile
14
+ include ChainedStep
15
+ include Itiel::Nameable
16
+
17
+ def initialize(file_name, append=true)
18
+ @append = append
19
+ @file_name = file_name
20
+ end
21
+
22
+ def persist(input_stream)
23
+ headers = input_stream.collect(&:keys).flatten.uniq
24
+ mode = @append ? "ab" : "w"
25
+ skip_headers = skip_headers?
26
+
27
+ CSV.open(@file_name, mode) do |csv|
28
+ csv << headers unless skip_headers
29
+ input_stream.each do |row|
30
+ csv_row = []
31
+ headers.each do |h|
32
+ csv_row << row[h]
33
+ end
34
+ csv << csv_row
35
+ end
36
+ end
37
+ end
38
+
39
+ private
40
+ def skip_headers?
41
+ File.exist?(@file_name) && @append
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,34 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # Loads the stream into a database table.
5
+ #
6
+ # Usage:
7
+ #
8
+ # @loader = Itiel::Load::DatabaseTable.new :connection, "table_name"
9
+ #
10
+ class DatabaseTable
11
+ include ChainedStep
12
+ include Itiel::Nameable
13
+ include Itiel::DB::SQLConnectable
14
+
15
+ attr_accessor :table_name
16
+
17
+ def initialize(connection, table_name)
18
+ self.connection = connection
19
+ self.table_name = table_name
20
+ end
21
+
22
+ def persist(input_stream)
23
+ input_stream.each do |element|
24
+ table.insert(element)
25
+ end
26
+ end
27
+
28
+ def table
29
+ @@db ||= self.class.sequel_connection(connection)
30
+ @@db[table_name.to_sym]
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,36 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # This module defines the input and output behavior for Loader steps
5
+ #
6
+ # Whenever an Output receives input, it calls persist! and then
7
+ # wires the input and the output
8
+ #
9
+ # All the clasess in Itiel::Output should implement persist!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ def input=(input_stream)
14
+ Itiel::Logger.log_received(self, input_stream.size)
15
+ persist(input_stream)
16
+ self.next_step = input_stream if self.next_step
17
+ Itiel::Logger.log_processed(self, input_stream.size)
18
+ end
19
+
20
+ #
21
+ # This method has to be implemented in the class
22
+ #
23
+ def persist(input_stream)
24
+ raise "persist is not implemented"
25
+ end
26
+
27
+ private
28
+ attr_writer :output
29
+ end
30
+
31
+ def self.included(receiver)
32
+ receiver.send :include, InstanceMethods
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,47 @@
1
+ module Itiel
2
+ class Logger
3
+ class << self
4
+ attr_accessor :logger
5
+ end
6
+
7
+ def self.log_received(object, size)
8
+ msg = "#{object_name(object)} received #{size}.rows"
9
+ self.log_with_time(msg)
10
+ end
11
+
12
+ def self.log_processed(object, size)
13
+ msg = "#{object_name(object)} processed #{size}.rows"
14
+ self.log_with_time(msg)
15
+ end
16
+
17
+ def self.log_start_job(object)
18
+ msg = "#{object_name(object)} run at #{Time.now}"
19
+ self.enclosed_with_time(msg)
20
+ end
21
+
22
+ def self.log_end_job(object)
23
+ msg = "#{object_name(object)} finished at #{Time.now}"
24
+ self.enclosed_with_time(msg)
25
+ end
26
+
27
+ private
28
+ def self.object_name(object)
29
+ ( object.respond_to?(:name) ? object.name : object.class.name )
30
+ end
31
+
32
+ def self.info(message)
33
+ self.logger ||= ::Logger.new(STDOUT)
34
+ self.logger.info(message)
35
+ end
36
+
37
+ def self.log_with_time(msg)
38
+ self.info("#{Time.now} - #{msg}")
39
+ end
40
+
41
+ def self.enclosed_with_time(msg)
42
+ self.info("\n==================================================\n")
43
+ self.info(msg)
44
+ self.info("\n==================================================\n")
45
+ end
46
+ end
47
+ end