itiel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,30 @@
1
+ module Itiel
2
+ module DB
3
+ #
4
+ # Truncates specified tables
5
+ #
6
+ # Usage:
7
+ #
8
+ # @truncator = Itiel::DB::Truncator.new "tables", "to", "truncate"
9
+ # @truncator.connection = :database
10
+ # @truncator.truncate!
11
+ #
12
+ class Truncator
13
+ include Itiel::Nameable
14
+ include Itiel::DB::SQLConnectable
15
+
16
+ attr_accessor :tables
17
+
18
+ def initialize(*tables)
19
+ self.tables = tables
20
+ end
21
+
22
+ def truncate!
23
+ tables.each do |table|
24
+ db = self.class.sequel_connection(connection)
25
+ db[table.to_sym].truncate
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ module Itiel
2
+ module Extract
3
+ #
4
+ # Defines how the initial extractors behave
5
+ #
6
+ # All classes including this module must define the in_batches method
7
+ #
8
+ module ChainedStep
9
+ attr_accessor :next_step
10
+
11
+ alias :>> :next_step=
12
+
13
+ def start
14
+ self.next_step.input = extract
15
+ end
16
+
17
+ def extract
18
+ raise Itiel::MethodNotImplementedException.new "extract is not implemented"
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,31 @@
1
+ require 'csv'
2
+
3
+ module Itiel
4
+ module Extract
5
+ #
6
+ # Extracts all specified CSV file rows and sends it in batches to
7
+ # its next step
8
+ #
9
+ # Usage:
10
+ #
11
+ # csv_file = Itiel::Extract::CSVFile.new('FileName.csv')
12
+ # csv_file.batch_size = 15
13
+ # csv.file.start
14
+ #
15
+ class CSVFile
16
+ include ChainedStep
17
+ include Itiel::Nameable
18
+
19
+ attr_accessor :file_name
20
+
21
+ def initialize(file_name)
22
+ self.file_name = file_name
23
+ end
24
+
25
+ def extract
26
+ lines = CSV.read(self.file_name, :headers => true)
27
+ lines.collect(&:to_hash)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ require 'sequel'
2
+
3
+ module Itiel
4
+ module Extract
5
+ #
6
+ # Creates a stream from the specified SQL query.
7
+ # Connection must be defined on a file that's on
8
+ # config/database.yml by default.
9
+ #
10
+ # Usage:
11
+ #
12
+ # @custom_sql = Itiel::Extract::CustomSQL.new
13
+ # @custom_sql.connection = :test
14
+ # @custom_sql.script = 'SELECT * FROM some_table'
15
+ #
16
+ #
17
+ # You can set a different path for the config file at class level
18
+ #
19
+ # Itiel::Extract::CustomSQL.connection_file_path = 'path_to_my_config/database.yml'
20
+ #
21
+ class CustomSQL
22
+ include ChainedStep
23
+ include Itiel::DB::SQLConnectable
24
+ include Itiel::Nameable
25
+
26
+ attr_accessor :script
27
+
28
+ def initialize(*args)
29
+ self.script = args[0]
30
+ end
31
+
32
+ def extract
33
+ db = self.class.sequel_connection(connection)
34
+ db[script].all
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ module Itiel
2
+ module Extract
3
+ #
4
+ # Extracts all the contents from a Database table into the stream
5
+ # and passes it on to it's next_step
6
+ #
7
+ # Usage:
8
+ #
9
+ # @extractor = Itiel::Extract::DatabaseTable.new
10
+ # @extractor.connection = :test
11
+ # @extractor.table_name = 'test_table'
12
+ #
13
+ #
14
+ class DatabaseTable < CustomSQL
15
+ attr_accessor :table_name
16
+
17
+ def extract
18
+ db = self.class.sequel_connection(connection)
19
+ db[table_name.to_sym].all
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,116 @@
1
+ module Itiel
2
+ class Job
3
+ attr_accessor :block
4
+ #
5
+ # Pass a block to this method to process the ETL steps in order
6
+ #
7
+ #
8
+ # Understands a single line per step:
9
+ #
10
+ # Itiel::Job.run do |job|
11
+ # job.step @source
12
+ # job.step @destination
13
+ #
14
+ # ...
15
+ # end
16
+ #
17
+ #
18
+ # In this case the @source.output is sent to the @destination's input
19
+ # A third step on the list would send the @destination's output to its
20
+ # input
21
+ #
22
+ #
23
+ # Another way to do this is by passing hashes to the step method:
24
+ #
25
+ # Itiel::Job.run do |job|
26
+ # job.step @source => @destination
27
+ # end
28
+ #
29
+ #
30
+ # You want to use this sintax when creating more complex flows. For example,
31
+ # you could send a step output to several inputs
32
+ #
33
+ # Itiel::Job.run do |job|
34
+ # job.step @source => [ @destination, @second_destination ]
35
+ # end
36
+ #
37
+ def self.run(&block)
38
+ Itiel::Logger.log_start_job(self)
39
+ yield self.new
40
+ Itiel::Logger.log_end_job(self)
41
+ end
42
+
43
+ #
44
+ # Use it to define the job steps and then run the job at a later date.
45
+ #
46
+ # It returns an instance of a job, you can call run! on that instance later
47
+ # to actually run the steps defined on the block of the job
48
+ #
49
+ # Uses the same syntax as run:
50
+ #
51
+ # job = Itiel::Job.define do |job|
52
+ # job.step @source => @destination
53
+ # end
54
+ #
55
+ # job.run!
56
+ #
57
+ # Or
58
+ #
59
+ # job = Itiel::Job.define do |job|
60
+ # job.step @source
61
+ # job.step @destination
62
+ #
63
+ # ...
64
+ # end
65
+ #
66
+ # job.run!
67
+ #
68
+ def self.define(&block)
69
+ self.new(&block)
70
+ end
71
+
72
+ #
73
+ # Use it to run the steps on a previously defined Job
74
+ #
75
+ def run!
76
+ Itiel::Logger.log_start_job(self)
77
+ self.block.call(self)
78
+ Itiel::Logger.log_end_job(self)
79
+ end
80
+
81
+ #
82
+ # Call inside the run block to denote a data flow
83
+ #
84
+ def step(*args)
85
+ if args[0].is_a?(Hash)
86
+ hash_based_step(args[0])
87
+ else
88
+ single_line_step(args[0])
89
+ end
90
+ end
91
+
92
+ private
93
+ def initialize(&block)
94
+ self.block = block
95
+ super
96
+ end
97
+
98
+ def hash_based_step(*args)
99
+ source, destination = args[0].first.to_a
100
+ if destination.is_a?(Array)
101
+ destination.each { |object| object.input = source.output }
102
+ else
103
+ destination.input = source.output
104
+ end
105
+ end
106
+
107
+ def single_line_step(object)
108
+ unless @stream
109
+ @stream = object.output
110
+ else
111
+ object.input = @stream
112
+ @stream = object.output
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,37 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # This module defines the input and output behavior for Loader steps
5
+ #
6
+ # Whenever the instance receives input, it calls persist! and then
7
+ # wires the input and the output
8
+ #
9
+ # All the clasess in Itiel::Output should implement persist!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ attr_accessor :next_step
14
+
15
+ alias :>> :next_step=
16
+
17
+ def input=(input_stream)
18
+ Itiel::Logger.log_received(self, input_stream.size)
19
+ persist(input_stream)
20
+ self.next_step.input = input_stream if next_step
21
+ Itiel::Logger.log_processed(self, input_stream.size)
22
+ end
23
+
24
+ #
25
+ # This method must be implemented in the class
26
+ #
27
+ def persist(input_stream)
28
+ raise Itiel::MethodNotImplementedException.new "persist is not implemented"
29
+ end
30
+ end
31
+
32
+ def self.included(receiver)
33
+ receiver.send :include, InstanceMethods
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,45 @@
1
+ require 'csv'
2
+
3
+ module Itiel
4
+ module Load
5
+ #
6
+ # Loads the data stream into a CSV file
7
+ #
8
+ # Usage:
9
+ #
10
+ # @csv_file = Itiel::Load::CSVFile.new('filename.csv')
11
+ # @csv_file.input = []
12
+ #
13
+ class CSVFile
14
+ include ChainedStep
15
+ include Itiel::Nameable
16
+
17
+ def initialize(file_name, append=true)
18
+ @append = append
19
+ @file_name = file_name
20
+ end
21
+
22
+ def persist(input_stream)
23
+ headers = input_stream.collect(&:keys).flatten.uniq
24
+ mode = @append ? "ab" : "w"
25
+ skip_headers = skip_headers?
26
+
27
+ CSV.open(@file_name, mode) do |csv|
28
+ csv << headers unless skip_headers
29
+ input_stream.each do |row|
30
+ csv_row = []
31
+ headers.each do |h|
32
+ csv_row << row[h]
33
+ end
34
+ csv << csv_row
35
+ end
36
+ end
37
+ end
38
+
39
+ private
40
+ def skip_headers?
41
+ File.exist?(@file_name) && @append
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,34 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # Loads the stream into a database table.
5
+ #
6
+ # Usage:
7
+ #
8
+ # @loader = Itiel::Load::DatabaseTable.new :connection, "table_name"
9
+ #
10
+ class DatabaseTable
11
+ include ChainedStep
12
+ include Itiel::Nameable
13
+ include Itiel::DB::SQLConnectable
14
+
15
+ attr_accessor :table_name
16
+
17
+ def initialize(connection, table_name)
18
+ self.connection = connection
19
+ self.table_name = table_name
20
+ end
21
+
22
+ def persist(input_stream)
23
+ input_stream.each do |element|
24
+ table.insert(element)
25
+ end
26
+ end
27
+
28
+ def table
29
+ @@db ||= self.class.sequel_connection(connection)
30
+ @@db[table_name.to_sym]
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,36 @@
1
+ module Itiel
2
+ module Load
3
+ #
4
+ # This module defines the input and output behavior for Loader steps
5
+ #
6
+ # Whenever an Output receives input, it calls persist! and then
7
+ # wires the input and the output
8
+ #
9
+ # All the clasess in Itiel::Output should implement persist!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ def input=(input_stream)
14
+ Itiel::Logger.log_received(self, input_stream.size)
15
+ persist(input_stream)
16
+ self.next_step = input_stream if self.next_step
17
+ Itiel::Logger.log_processed(self, input_stream.size)
18
+ end
19
+
20
+ #
21
+ # This method has to be implemented in the class
22
+ #
23
+ def persist(input_stream)
24
+ raise "persist is not implemented"
25
+ end
26
+
27
+ private
28
+ attr_writer :output
29
+ end
30
+
31
+ def self.included(receiver)
32
+ receiver.send :include, InstanceMethods
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,47 @@
1
+ module Itiel
2
+ class Logger
3
+ class << self
4
+ attr_accessor :logger
5
+ end
6
+
7
+ def self.log_received(object, size)
8
+ msg = "#{object_name(object)} received #{size}.rows"
9
+ self.log_with_time(msg)
10
+ end
11
+
12
+ def self.log_processed(object, size)
13
+ msg = "#{object_name(object)} processed #{size}.rows"
14
+ self.log_with_time(msg)
15
+ end
16
+
17
+ def self.log_start_job(object)
18
+ msg = "#{object_name(object)} run at #{Time.now}"
19
+ self.enclosed_with_time(msg)
20
+ end
21
+
22
+ def self.log_end_job(object)
23
+ msg = "#{object_name(object)} finished at #{Time.now}"
24
+ self.enclosed_with_time(msg)
25
+ end
26
+
27
+ private
28
+ def self.object_name(object)
29
+ ( object.respond_to?(:name) ? object.name : object.class.name )
30
+ end
31
+
32
+ def self.info(message)
33
+ self.logger ||= ::Logger.new(STDOUT)
34
+ self.logger.info(message)
35
+ end
36
+
37
+ def self.log_with_time(msg)
38
+ self.info("#{Time.now} - #{msg}")
39
+ end
40
+
41
+ def self.enclosed_with_time(msg)
42
+ self.info("\n==================================================\n")
43
+ self.info(msg)
44
+ self.info("\n==================================================\n")
45
+ end
46
+ end
47
+ end