itiel 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Lookup
3
+ #
4
+ # This module defines the input and output behavior of Lookup Steps.
5
+ #
6
+ # Whenever the instance receives input, it calls lookup! and set its return value
7
+ # as the input of its next_step if defined.
8
+ #
9
+ # All the classes that include this moudle must implement lookup!
10
+ #
11
+ module ChainedStep
12
+ module InstanceMethods
13
+ attr_accessor :next_step, :output
14
+ alias :>> :next_step=
15
+
16
+ def input=(input_stream)
17
+ Itiel::Logger.log_received(self, input_stream.size)
18
+ self.output = lookup!(input_stream)
19
+ #puts output if table_name == "profiles"
20
+ #puts input_stream if table_name == "profiles"
21
+ self.next_step.input = output if next_step
22
+ Itiel::Logger.log_processed(self, input_stream.size)
23
+ end
24
+
25
+ def lookup!(input_stream)
26
+ raise "lookup is not implemented"
27
+ end
28
+ end
29
+
30
+ def self.included(receiver)
31
+ receiver.send :include, InstanceMethods
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,16 @@
1
+ module Itiel
2
+ module Lookup
3
+ class CSVFile
4
+ include ChainedStep
5
+ include HashLookup
6
+
7
+ def initialize(file_name)
8
+ @file_name = file_name
9
+ end
10
+
11
+ def lookup_source
12
+ CSV.table @file_name
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,36 @@
1
+ module Itiel
2
+ module Lookup
3
+ #
4
+ # Joins the main data stream with another lookup stream and appends the specified
5
+ # columns from the lookup stream to the input stream.
6
+ #
7
+ # Example:
8
+ #
9
+ # Set up a lookup to the *authors* table:
10
+ #
11
+ # @database_lookup = Itiel::Lookup::DatabaseTable.new
12
+ # @database_lookup.connection = :test
13
+ # @database_lookup.table_name = "authors"
14
+ #
15
+ # Join the "author_name" column on the input stream with the "name" column in the lookup stream.
16
+ #
17
+ # @database_lookup.lookup_columns = { "author_name" => "name" }
18
+ #
19
+ # Join the "id" column in the lookup stream as "author_id"
20
+ #
21
+ # @database_lookup.joined_columns = { "id" => "author_id" }
22
+ #
23
+ class DatabaseTable
24
+ include ChainedStep
25
+ include HashLookup
26
+ include Itiel::DB::SQLConnectable
27
+
28
+ attr_accessor :table_name
29
+
30
+ def lookup_source
31
+ db = self.class.sequel_connection(connection)
32
+ db[table_name.to_sym.to_sym].all
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Lookup
3
+ module HashLookup
4
+ attr_accessor :lookup_columns, :joined_columns
5
+
6
+ def lookup!(input_stream)
7
+ input_stream.collect do |row|
8
+ origin_column = lookup_columns.first[0].to_sym
9
+ merge_data = lookup_stream[row[origin_column]] || empty_joined_columns #{ origin_column => nil }
10
+ row.merge!(merge_data)
11
+ end
12
+ end
13
+
14
+ def lookup_stream
15
+ @lookup ||= lookup_source.inject({}) do |memory, row|
16
+ clean_row = {}
17
+ joined_columns.each do |original, target|
18
+ clean_row[target.to_sym] = row[original.to_sym]
19
+ end
20
+
21
+ memory[row[lookup_columns.first[1].to_sym]] = clean_row
22
+ memory
23
+ end
24
+ end
25
+
26
+ def empty_joined_columns
27
+ joined_columns.inject({}) {|m,h| m[h[1].to_sym] = nil; m }
28
+ end
29
+
30
+ def lookup_source
31
+ raise 'lookup_source is not implemented'
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,6 @@
1
+ module Itiel
2
+ module Nameable
3
+ attr_accessor :step_name, :debug
4
+
5
+ end
6
+ end
@@ -0,0 +1,18 @@
1
+ module Itiel
2
+ module Script
3
+ module ChainedStep
4
+ attr_accessor :next_step
5
+
6
+ alias :>> :next_step=
7
+
8
+ def input=(input_stream)
9
+ self.execute(input_stream)
10
+ next_step.input = input_stream if next_step
11
+ end
12
+
13
+ def execute(*)
14
+ raise "execute is not implemented"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,31 @@
1
+ module Itiel
2
+ module Script
3
+ #
4
+ # Process the stream with ruby
5
+ #
6
+ # Initialize it with a block that will yield each row of the stream.
7
+ #
8
+ # Usage:
9
+ #
10
+ # Itiel::Script::RubyScript.new do |row|
11
+ # row["slug"] = row["title"]
12
+ # end
13
+ #
14
+ class RubyScript
15
+ include ChainedStep
16
+
17
+ attr_accessor :block
18
+
19
+ def initialize(&block)
20
+ self.block = block
21
+ end
22
+
23
+ def execute(input_stream)
24
+ input_stream.each do |row|
25
+ block.call(row)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,29 @@
1
+ module Itiel
2
+ module Script
3
+ #
4
+ # Executes a SQL script or command on the specified
5
+ # connection
6
+ #
7
+ class SQLScript
8
+ include ChainedStep
9
+ include Itiel::DB::SQLConnectable
10
+
11
+ attr_accessor :connection
12
+ attr_accessor :sql
13
+
14
+ def initialize(*args)
15
+ self.sql = args[0]
16
+ end
17
+
18
+ def execute(*)
19
+ db = self.class.sequel_connection(connection)
20
+ db << sql
21
+ end
22
+
23
+ def sanity_check
24
+ raise Itiel::MissingConnection unless self.connection
25
+ raise Itiel::SQLSentenceNotProvided.new unless self.sql
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,47 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This class allow us to create multiple columns or replace the current value of a column
5
+ # by the result of the execution of a block, you must return a hash with the columns you
6
+ # wish to add as the hash keys.
7
+ #
8
+ # Example:
9
+ #
10
+ # calculated = CalculatedColumns.new do |row|
11
+ # total = row['price'] * row['quantity']
12
+ # tax = total * 0.2
13
+ # { :total => total, :tax => tax }
14
+ # end
15
+ #
16
+ # calculated.input = [{:price => 12.50, :quantity => 3},{:price => 4.95, :quantity => 5}]
17
+ # calculated.output
18
+ # => {:price => 12.50, :quantity => 3, :total => 37.5, :tax => 7.5},{:price => 4.95, :quantity => 5, :total => 24.75, :tax => 4.95}
19
+ #
20
+ # It is important to note that you have to use casting on numeric fields
21
+ # just to make sure that it is from the right type. Stored column types
22
+ # may vary depending on the source.
23
+ #
24
+ class CalculatedColumns
25
+ include ChainedStep
26
+ include Itiel::Nameable
27
+
28
+ attr_accessor :arguments
29
+
30
+ def initialize(&block)
31
+ raise "Missing block" unless block_given?
32
+ @block = block
33
+ end
34
+
35
+ def transform!(input_stream)
36
+ sanity_check
37
+ input_stream.each do |object|
38
+ object.merge! @block.call(object)
39
+ end
40
+ end
41
+
42
+ def sanity_check
43
+ raise Itiel::UndefinedNextStepException.new "Undefined next_step" unless self.next_step
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # Defined the behavior of the Transformation
5
+ # and how they handle the data stream.
6
+ #
7
+ # All classes including this module should implement a
8
+ # transform! method that does the real transformation.
9
+ #
10
+ # Than method will be called as soon as the transformation
11
+ # receives input.
12
+ #
13
+ module ChainedStep
14
+ attr_accessor :next_step
15
+
16
+ alias :>> :next_step=
17
+
18
+ def input=(stream)
19
+ next_step.input = transform!(stream)
20
+ end
21
+
22
+ def transform!(stream)
23
+ raise Itiel::MethodNotImplementedException.new "You need to define the transform! method in the implementing class"
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This transformation appends a new Column
5
+ # with a constant value to the data stream
6
+ #
7
+ # Itiel::Transform::ConstantColumn.new( "column" => "Constant Value")
8
+ #
9
+ # Or, specify the column and the value later:
10
+ #
11
+ # transformation = Itiel::Transform::ConstantColumn.new
12
+ #
13
+ # transformation.append = { "column" => "Constant Value" }
14
+ #
15
+ # The resulting stream will have a column named "column" with the value
16
+ # "Constant Value" for all rows.
17
+ #
18
+ class ConstantColumn
19
+ include ChainedStep
20
+ include Itiel::Nameable
21
+
22
+ attr_accessor :append
23
+
24
+ def initialize(*args)
25
+ self.append = args.first
26
+ end
27
+
28
+ def transform!(input_stream)
29
+ input_stream.collect do |row|
30
+ row.merge(self.append)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,44 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This module defines the behavior for input and output
5
+ # on all our transformations.
6
+ #
7
+ # Whenever a transformation receives input, it stores the result of
8
+ # transform! on the output
9
+ #
10
+ # All the classes in tiel::Transformation should implement ransform!
11
+ #
12
+ module InputOutputBehavior
13
+ module InstanceMethods
14
+ def input=(input_stream)
15
+ Itiel::Logger.log_received(self, input_stream.size)
16
+ @input = input_stream
17
+ end
18
+
19
+ def input
20
+ @input
21
+ end
22
+
23
+ #
24
+ # Returns cached output by default, call it with true to run the
25
+ # transformation before returning the output
26
+ #
27
+ def output(retransform = false)
28
+ @output = (retransform ? transform!(self.input) : @output ||= transform!(self.input))
29
+ Itiel::Logger.log_processed(self, @output.size)
30
+ @output
31
+ end
32
+
33
+ #
34
+ # This method has to be implemented in the class
35
+ #
36
+ def transform!(input_stream) ; end
37
+ end
38
+
39
+ def self.included(receiver)
40
+ receiver.send :include, InstanceMethods
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,43 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # Maps a field value to different values
5
+ #
6
+ # Usage:
7
+ #
8
+ # @transformation = Itiel::Transform::MapValues.new(
9
+ # {
10
+ # "active" => { true => "yes", false => "no" }
11
+ # }
12
+ # )
13
+ #
14
+ # This would map all the values on the active column, true to yes and false to no
15
+ #
16
+ class MapValues
17
+ include ChainedStep
18
+ include Nameable
19
+
20
+ attr_accessor :mapping
21
+
22
+ def initialize(mapping)
23
+ self.mapping = mapping
24
+ end
25
+
26
+ def transform!(input_stream)
27
+ output = []
28
+ input_stream.each do |stream_row|
29
+ new_row = {}
30
+ stream_row.each do |column, value|
31
+ if self.mapping.keys.include?(column)
32
+ new_row[column] = self.mapping[column][value] || value
33
+ else
34
+ new_row[column] = value
35
+ end
36
+ end
37
+ output << new_row
38
+ end
39
+ output
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ module Itiel
2
+ module Transform
3
+ #
4
+ # This transformation only selects specific columns on the data stream
5
+ #
6
+ # Usage:
7
+ #
8
+ # @transformer = Itiel::Transform::RemoveColumn.new("order_id")
9
+ #
10
+ # In the example, the output stream would not have the order_id column
11
+ #
12
+ class RemoveColumn
13
+ include ChainedStep
14
+ include Itiel::Nameable
15
+
16
+ attr_accessor :mappings
17
+
18
+ def initialize(*args)
19
+ self.mappings = args
20
+ end
21
+
22
+ def transform!(input_stream)
23
+ selected_output = []
24
+ input_stream.each do |object|
25
+ selected_output << object.select {|key, value| !self.mappings.include? key }
26
+ end
27
+
28
+ selected_output
29
+ end
30
+ end
31
+ end
32
+ end
33
+