itiel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Lookup
3
+ #
4
+ # This module defines the input and output behavior of Lookup Steps.
5
+ #
6
+ # Whenever the instance receives input, it calls lookup! and set its return value
7
+ # as the input of its next_step if defined.
8
+ #
9
+ # All the classes that include this moudle must implement lookup!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ attr_accessor :next_step, :output
14
+ alias :>> :next_step=
15
+
16
+ def input=(input_stream)
17
+ Itiel::Logger.log_received(self, input_stream.size)
18
+ self.output = lookup!(input_stream)
19
+ #puts output if table_name == "profiles"
20
+ #puts input_stream if table_name == "profiles"
21
+ self.next_step.input = output if next_step
22
+ Itiel::Logger.log_processed(self, input_stream.size)
23
+ end
24
+
25
+ def lookup!(input_stream)
26
+ raise "lookup is not implemented"
27
+ end
28
+ end
29
+
30
+ def self.included(receiver)
31
+ receiver.send :include, InstanceMethods
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,16 @@
1
+ module Itiel
2
+ module Lookup
3
+ class CSVFile
4
+ include ChainedStep
5
+ include HashLookup
6
+
7
+ def initialize(file_name)
8
+ @file_name = file_name
9
+ end
10
+
11
+ def lookup_source
12
+ CSV.table @file_name
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,36 @@
1
+ module Itiel
2
+ module Lookup
3
+ #
4
+ # Joins the main data stream with another lookup stream and appends the specified
5
+ # columns from the lookup stream to the input stream.
6
+ #
7
+ # Example:
8
+ #
9
+ # Set up a lookup to the *authors* table:
10
+ #
11
+ # @database_lookup = Itiel::Lookup::DatabaseTable.new
12
+ # @database_lookup.connection = :test
13
+ # @database_lookup.table_name = "authors"
14
+ #
15
+ # Join the "author_name" column on the input stream with the "name" column in the lookup stream.
16
+ #
17
+ # @database_lookup.lookup_columns = { "author_name" => "name" }
18
+ #
19
+ # Join the "id" column in the lookup stream as "author_id"
20
+ #
21
+ # @database_lookup.joined_columns = { "id" => "author_id" }
22
+ #
23
+ class DatabaseTable
24
+ include ChainedStep
25
+ include HashLookup
26
+ include Itiel::DB::SQLConnectable
27
+
28
+ attr_accessor :table_name
29
+
30
+ def lookup_source
31
+ db = self.class.sequel_connection(connection)
32
+ db[table_name.to_sym.to_sym].all
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Lookup
3
+ module HashLookup
4
+ attr_accessor :lookup_columns, :joined_columns
5
+
6
+ def lookup!(input_stream)
7
+ input_stream.collect do |row|
8
+ origin_column = lookup_columns.first[0].to_sym
9
+ merge_data = lookup_stream[row[origin_column]] || empty_joined_columns #{ origin_column => nil }
10
+ row.merge!(merge_data)
11
+ end
12
+ end
13
+
14
+ def lookup_stream
15
+ @lookup ||= lookup_source.inject({}) do |memory, row|
16
+ clean_row = {}
17
+ joined_columns.each do |original, target|
18
+ clean_row[target.to_sym] = row[original.to_sym]
19
+ end
20
+
21
+ memory[row[lookup_columns.first[1].to_sym]] = clean_row
22
+ memory
23
+ end
24
+ end
25
+
26
+ def empty_joined_columns
27
+ joined_columns.inject({}) {|m,h| m[h[1].to_sym] = nil; m }
28
+ end
29
+
30
+ def lookup_source
31
+ raise 'lookup_source is not implemented'
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,6 @@
1
+ module Itiel
2
+ module Nameable
3
+ attr_accessor :step_name, :debug
4
+
5
+ end
6
+ end
@@ -0,0 +1,18 @@
1
+ module Itiel
2
+ module Script
3
+ module ChainedStep
4
+ attr_accessor :next_step
5
+
6
+ alias :>> :next_step=
7
+
8
+ def input=(input_stream)
9
+ self.execute(input_stream)
10
+ next_step.input = input_stream if next_step
11
+ end
12
+
13
+ def execute(*)
14
+ raise "execute is not implemented"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,31 @@
1
+ module Itiel
2
+ module Script
3
+ #
4
+ # Process the stream with ruby
5
+ #
6
+ # Initialize it with a block that will yield each row of the stream.
7
+ #
8
+ # Usage:
9
+ #
10
+ # Itiel::Script::RubyScript.new do |row|
11
+ # row["slug"] = row["title"]
12
+ # end
13
+ #
14
+ class RubyScript
15
+ include ChainedStep
16
+
17
+ attr_accessor :block
18
+
19
+ def initialize(&block)
20
+ self.block = block
21
+ end
22
+
23
+ def execute(input_stream)
24
+ input_stream.each do |row|
25
+ block.call(row)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,29 @@
1
+ module Itiel
2
+ module Script
3
+ #
4
+ # Executes a SQL script or command on the specified
5
+ # connection
6
+ #
7
+ class SQLScript
8
+ include ChainedStep
9
+ include Itiel::DB::SQLConnectable
10
+
11
+ attr_accessor :connection
12
+ attr_accessor :sql
13
+
14
+ def initialize(*args)
15
+ self.sql = args[0]
16
+ end
17
+
18
+ def execute(*)
19
+ db = self.class.sequel_connection(connection)
20
+ db << sql
21
+ end
22
+
23
+ def sanity_check
24
+ raise Itiel::MissingConnection unless self.connection
25
+ raise Itiel::SQLSentenceNotProvided.new unless self.sql
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This class allow us to create multiple columns or replace the current value of a column
5
+ # by the result of the execution of a block, you must return a hash with the columns you
6
+ # wish to add as the hash keys.
7
+ #
8
+ # Example:
9
+ #
10
+ # calculated = CalculatedColumns.new do |row|
11
+ # total = row['price'] * row['quantity']
12
+ # tax = total * 0.2
13
+ # { :total => total, :tax => tax }
14
+ # end
15
+ #
16
+ # calculated.input = [{:price => 12.50, :quantity => 3},{:price => 4.95, :quantity => 5}]
17
+ # calculated.output
18
+ # => {:price => 12.50, :quantity => 3, :total => 37.5, :tax => 7.5},{:price => 4.95, :quantity => 5, :total => 24.75, :tax => 4.95}
19
+ #
20
+ # It is important to note that you have to use casting on numeric fields
21
+ # just to make sure that it is from the right type. Stored column types
22
+ # may vary depending on the source.
23
+ #
24
+ class CalculatedColumns
25
+ include ChainedStep
26
+ include Itiel::Nameable
27
+
28
+ attr_accessor :arguments
29
+
30
+ def initialize(&block)
31
+ raise "Missing block" unless block_given?
32
+ @block = block
33
+ end
34
+
35
+ def transform!(input_stream)
36
+ sanity_check
37
+ input_stream.each do |object|
38
+ object.merge! @block.call(object)
39
+ end
40
+ end
41
+
42
+ def sanity_check
43
+ raise Itiel::UndefinedNextStepException.new "Undefined next_step" unless self.next_step
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # Defined the behavior of the Transformation
5
+ # and how they handle the data stream.
6
+ #
7
+ # All classes including this module should implement a
8
+ # transform! method that does the real transformation.
9
+ #
10
+ # Than method will be called as soon as the transformation
11
+ # receives input.
12
+ #
13
+ module ChainedStep
14
+ attr_accessor :next_step
15
+
16
+ alias :>> :next_step=
17
+
18
+ def input=(stream)
19
+ next_step.input = transform!(stream)
20
+ end
21
+
22
+ def transform!(stream)
23
+ raise Itiel::MethodNotImplementedException.new "You need to define the transform! method in the implementing class"
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This transformation appends a new Column
5
+ # with a constant value to the data stream
6
+ #
7
+ # Itiel::Transform::ConstantColumn.new( "column" => "Constant Value")
8
+ #
9
+ # Or, specify the column and the value later:
10
+ #
11
+ # transformation = Itiel::Transform::ConstantColumn.new
12
+ #
13
+ # transformation.append = { "column" => "Constant Value" }
14
+ #
15
+ # The resulting stream will have a column named "column" with the value
16
+ # "Constant Value" for all rows.
17
+ #
18
+ class ConstantColumn
19
+ include ChainedStep
20
+ include Itiel::Nameable
21
+
22
+ attr_accessor :append
23
+
24
+ def initialize(*args)
25
+ self.append = args.first
26
+ end
27
+
28
+ def transform!(input_stream)
29
+ input_stream.collect do |row|
30
+ row.merge(self.append)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This module defines the behavior for input and output
5
+ # on all our transformations.
6
+ #
7
+ # Whenever a transformation receives input, it stores the result of
8
+ # transform! on the output
9
+ #
10
+ # All the classes in tiel::Transformation should implement ransform!
11
+ #
12
+ module InputOutputBehavior
13
+ module InstanceMethods
14
+ def input=(input_stream)
15
+ Itiel::Logger.log_received(self, input_stream.size)
16
+ @input = input_stream
17
+ end
18
+
19
+ def input
20
+ @input
21
+ end
22
+
23
+ #
24
+ # Returns cached output by default, call it with true to run the
25
+ # transformation before returning the output
26
+ #
27
+ def output(retransform = false)
28
+ @output = (retransform ? transform!(self.input) : @output ||= transform!(self.input))
29
+ Itiel::Logger.log_processed(self, @output.size)
30
+ @output
31
+ end
32
+
33
+ #
34
+ # This method has to be implemented in the class
35
+ #
36
+ def transform!(input_stream) ; end
37
+ end
38
+
39
+ def self.included(receiver)
40
+ receiver.send :include, InstanceMethods
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,43 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # Maps a field value to different values
5
+ #
6
+ # Usage:
7
+ #
8
+ # @transformation = Itiel::Transform::MapValues.new(
9
+ # {
10
+ # "active" => { true => "yes", false => "no" }
11
+ # }
12
+ # )
13
+ #
14
+ # This would map all the values on the active column, true to yes and false to no
15
+ #
16
+ class MapValues
17
+ include ChainedStep
18
+ include Nameable
19
+
20
+ attr_accessor :mapping
21
+
22
+ def initialize(mapping)
23
+ self.mapping = mapping
24
+ end
25
+
26
+ def transform!(input_stream)
27
+ output = []
28
+ input_stream.each do |stream_row|
29
+ new_row = {}
30
+ stream_row.each do |column, value|
31
+ if self.mapping.keys.include?(column)
32
+ new_row[column] = self.mapping[column][value] || value
33
+ else
34
+ new_row[column] = value
35
+ end
36
+ end
37
+ output << new_row
38
+ end
39
+ output
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This transformation only selects specific columns on the data stream
5
+ #
6
+ # Usage:
7
+ #
8
+ # @transformer = Itiel::Transform::RemoveColumn.new("order_id")
9
+ #
10
+ # In the example, the output stream would not have the order_id column
11
+ #
12
+ class RemoveColumn
13
+ include ChainedStep
14
+ include Itiel::Nameable
15
+
16
+ attr_accessor :mappings
17
+
18
+ def initialize(*args)
19
+ self.mappings = args
20
+ end
21
+
22
+ def transform!(input_stream)
23
+ selected_output = []
24
+ input_stream.each do |object|
25
+ selected_output << object.select {|key, value| !self.mappings.include? key }
26
+ end
27
+
28
+ selected_output
29
+ end
30
+ end
31
+ end
32
+ end
33
+