itiel 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 319124d81efc2cd9b0963c9f1693862d7347170e
4
+ data.tar.gz: 9e89b3071f090742e273d80a0023f703064f7325
5
+ SHA512:
6
+ metadata.gz: e4b71fb0ccdcec9e48cae895382e069b67680924b56901e034a10e6e452448081b2ea0fa5538e63e334ed2c15760dfa0143799795cbd753488fc72ba5b0d638f
7
+ data.tar.gz: cc404b8fa1e0404e93caa8ab29d031a1496f7d74fe4440d55a524ff623d6cc6afc4ca62e87c19806ae18955768e14d1facb30728f0a41a4c1453f75fc0419620
@@ -0,0 +1,17 @@
1
+ require 'autotest/fsevent'
2
+ require 'autotest/growl'
3
+ require 'autotest/fsevent'
4
+ require 'autotest/timestamp'
5
+
6
+ Autotest.add_hook :initialize do |at|
7
+ at.clear_mappings
8
+
9
+ %w{.git coverage/ README gemspec DS_Store features/ Gemfile.lock Rakefile Gemfile}.each do |exception|
10
+ at.add_exception exception
11
+ end
12
+
13
+ at.add_mapping %r%^lib/itiel/(.*)\.rb$% do |_, m|
14
+ ["test/unit/#{m[1]}_test.rb"]
15
+ end
16
+
17
+ end
@@ -0,0 +1,13 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile*.lock
4
+ pkg/*
5
+ **.swp
6
+ .rvmrc
7
+ **.sqlite3
8
+ tmp
9
+ doc
10
+ coverage
11
+ test.db
12
+ tags
13
+ .idea
@@ -0,0 +1,36 @@
1
+ cache:
2
+ paths:
3
+ - vendor/ruby
4
+
5
+ before_script:
6
+ - ruby -v # Print out ruby version for debugging
7
+ - gem install bundler --no-ri --no-rdoc # Bundler is not installed with the image
8
+ - bundle install -j $(nproc) --path vendor # Install dependencies into ./vendor/ruby
9
+
10
+ tests:2.3.rails.4.0:
11
+ image: "ruby:2.3"
12
+ variables:
13
+ BUNDLE_GEMFILE: "Gemfile.rails.4.0"
14
+ script:
15
+ - bundle exec rake
16
+
17
+ tests:2.3.rails.4.1:
18
+ image: "ruby:2.3"
19
+ variables:
20
+ BUNDLE_GEMFILE: "Gemfile.rails.4.1"
21
+ script:
22
+ - bundle exec rake
23
+
24
+ tests:2.2.rails.4.0:
25
+ image: "ruby:2.2"
26
+ variables:
27
+ BUNDLE_GEMFILE: "Gemfile.rails.4.0"
28
+ script:
29
+ - bundle exec rake
30
+
31
+ tests:2.2.rails.4.1:
32
+ image: "ruby:2.2"
33
+ variables:
34
+ BUNDLE_GEMFILE: "Gemfile.rails.4.1"
35
+ script:
36
+ - bundle exec rake
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -0,0 +1 @@
1
+ 2.3.2
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.3.4
5
+ - 2.2.7
6
+
7
+ gemfile:
8
+ - Gemfile.rails.4.0
9
+ - Gemfile.rails.4.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.0.0'
7
+ gem 'activesupport', '~> 4.0.0'
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.1.0'
7
+ gem 'activesupport', '~> 4.1.0'
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.2.0'
7
+ gem 'activesupport', '~> 4.2.0'
@@ -0,0 +1,106 @@
1
+ # Itiel
2
+
3
+ [![build status](https://gitlab.com/dabit/itiel/badges/master/build.svg)](https://gitlab.com/dabit/itiel/commits/master)
4
+ [![coverage report](https://gitlab.com/dabit/itiel/badges/master/coverage.svg)](https://gitlab.com/dabit/itiel/commits/master)
5
+
6
+ Hopefully, it will be an awesome Framework to do ETL with Ruby. It
7
+ should only work with *Ruby 1.9*.
8
+
9
+ [A working example](https://github.com/railsmx/rails-mx-blog/blob/master/itiel/posts.rb)
10
+
11
+ # This README is work in progress
12
+
13
+ You should not trust it, until I clean it up.
14
+
15
+ For better examples and documentation, refer to the *features* folder.
16
+
17
+
18
+ # TODO: Everything I want it to be, below:
19
+
20
+ ### Define the Itiel project structure
21
+
22
+ I have in mind something like this:
23
+
24
+ /itiel_example/
25
+ |~config/
26
+ | `-sources.yml
27
+ |~lib/
28
+ | `-transformation_class1.rb
29
+ |~jobs/
30
+ | `-main_job.rb
31
+ |~tasks/
32
+ | `-itiel.rake
33
+ |-Gemfile
34
+ |-Gemfile.lock
35
+
36
+ ### Controllers
37
+
38
+ Take care of defining and running the jobs. It's where you put all the
39
+ code like the example above.
40
+
41
+ ### A command line bin to generate the basic structure
42
+
43
+ itiel new project_name.
44
+
45
+ Use thor?
46
+
47
+ ### Rake
48
+
49
+ rake itiel:run
50
+
51
+ ### Mongo
52
+
53
+ Picture yourself migrating form SQL to Mongo
54
+
55
+ ### Joins, Scripts and lots more transformations
56
+
57
+ Some on my mind:
58
+
59
+ Itiel::Joins::DatabaseTable
60
+ Itiel::Lookups::DatabaseTable
61
+ Itiel::Loads::FTP
62
+ Itiel::Loads::HTTPRequest
63
+ Itiel::Loads::MongoDBDocument
64
+ Itiel::Script::ExecuteInSystem
65
+ Itiel::Extracts::HTTP
66
+ Itiel::Extracts::MongoDBDocument
67
+ Itiel::Transforms::CustomSort
68
+ Itiel::Transforms::MapValues
69
+ Itiel::Transforms::IfNil
70
+
71
+ Just ideas, I have nothing on how to implement most of them
72
+
73
+ ### Filters and conditional stream routing
74
+
75
+ If the column X has a value of Y, send true output to this step, send
76
+ false output to this other steps
77
+
78
+ ### Slowly Changing Dimentions
79
+
80
+ Can't have ETL without these
81
+
82
+ ### Test helpers
83
+
84
+ So you can TDD your ETL
85
+
86
+ ### Logging
87
+
88
+ It would be extra nice if all the steps logged statistics
89
+
90
+ * Processing X rows of Y
91
+ * Time spent
92
+ * Transformation Name
93
+ * Configurable log level
94
+
95
+ ### Viz
96
+
97
+ rake itiel::graph
98
+
99
+ Creates a graphical representation of your ETL flow
100
+
101
+ ### Ruby 1.8 and 1.9
102
+
103
+ Not really, Ruby 2.0+ is enough. I don't have plans to make it
104
+ work on 1.8 or 1.9
105
+
106
+
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ require 'cucumber/rake/task'
6
+
7
+ task :default => [:spec, :cucumber]
8
+
9
+ RSpec::Core::RakeTask.new
10
+
11
+ Cucumber::Rake::Task.new do |task|
12
+ task.cucumber_opts = %w{--format progress}
13
+ end
@@ -0,0 +1,10 @@
1
+ #! /bin/bash -e
2
+
3
+ source "$HOME/.rvm/scripts/rvm"
4
+ rvm 1.9.2
5
+ rvm gemset create itiel
6
+ rvm use 1.9.2@itiel
7
+ bundle -v || gem install bundler
8
+ bundle install
9
+ rake
10
+
@@ -0,0 +1,16 @@
1
+ Feature: Extract data from a Database Table
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A footbal anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ # @database_table = Itiel::Extract::DatabaseTable.new
12
+ # @database_table.connection = :test
13
+ # @database_table.table_name = 'blogposts'
14
+ When I create a Itiel::Extract::DatabaseTable object for the 'blogposts' table
15
+ Then the extraction for that object should have 3 rows
16
+
@@ -0,0 +1,17 @@
1
+ Feature: Extract rows from a database using a custom SQL script
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A footbal anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ When I create a Itiel::Extract::CustomSQL object with the query:
12
+ """
13
+ SELECT * FROM blogposts WHERE id = 3;
14
+ """
15
+
16
+ Then the extraction for that object should have 1 row
17
+
@@ -0,0 +1,21 @@
1
+ Feature: Load data into a database table
2
+
3
+ Background:
4
+ Given a blogposts database table
5
+ And a "source.csv" file with the following rows:
6
+ | id | title | author |
7
+ | 1 | Pride and Prejudice | Jane Austen |
8
+ | 2 | Treasure Island | R L Stevenson |
9
+ | 3 | Steve Jobs | Walter Isaacson |
10
+ | 4 | The thank you economy | Gary Vaynerchuk |
11
+
12
+ # @source = Itiel::Extracts::CSVFile.new('source.csv')
13
+ And I create a Extractor::CSVFile object with "source.csv"
14
+ # @destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
15
+ And I create a Loader::DatabaseTable object for the "blogposts" table
16
+
17
+ @database
18
+ Scenario:
19
+ When I load the source into the destination
20
+ Then the "blogposts" table should have 4 records
21
+
@@ -0,0 +1,41 @@
1
+ Feature: Lookup data from a CSV file
2
+
3
+ @database
4
+ Scenario:
5
+ Given an orders database table with rows:
6
+ | id | email |
7
+ | 1 | john@example.com |
8
+ | 2 | tim@test.com |
9
+ | 3 | math@example.com |
10
+
11
+ And a "customers.csv" file with the following rows:
12
+ | id | email |
13
+ | 1 | john@example.com |
14
+ | 2 | tim@test.com |
15
+ | 3 | math@example.com |
16
+
17
+ # @database_table = Itiel::Extract::DatabaseTable.new
18
+ # @database_table.connection = :test
19
+ # @database_table.table_name = 'orders'
20
+ And I create a Itiel::Extract::DatabaseTable object for the 'orders' table
21
+
22
+ # @lookup = Itiel::Lookup::CSVFile.new('customers.csv')
23
+ And I create a Itiel::Lookup::CSVFile object with the "customers.csv" file
24
+
25
+ # @lookup.lookup_columns = { :email => :email }
26
+ And the lookup joins the "email" column in the source with the "email" column in the lookup stream
27
+
28
+ # @lookup.joined_columns = { :id => :customer_id }
29
+ And the lookup appends the "id" column on the lookup stream to the source as "customer_id"
30
+
31
+ And the data flows in the following direction:
32
+ | @database_table |
33
+ | @lookup |
34
+
35
+ When I start the source
36
+
37
+ Then the output for the lookup should be:
38
+ | id | email | customer_id |
39
+ | 1 | john@example.com | 1 |
40
+ | 2 | tim@test.com | 2 |
41
+ | 3 | math@example.com | 3 |
@@ -0,0 +1,43 @@
1
+ Feature: Lookup data from a Database Table
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A football anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ And an authors database table with rows:
12
+ | id | name |
13
+ | 1 | Tim Taylor |
14
+ | 2 | John Miller |
15
+ | 3 | John Mellencamp |
16
+
17
+ # @database_table = Itiel::Extract::DatabaseTable.new
18
+ # @database_table.connection = :test
19
+ # @database_table.table_name = 'blogposts'
20
+ And I create a Itiel::Extract::DatabaseTable object for the 'blogposts' table
21
+
22
+ # @lookup = Itiel::Lookup::DatabaseTable.new
23
+ # @lookup.connection = :test
24
+ # @lookup.table_name = 'author'
25
+ And I create a Itiel::Lookup::DatabaseTable object for the "authors"
26
+
27
+ # @lookup.lookup_columns = { :author => :name }
28
+ And the lookup joins the "author" column in the source with the "name" column in the lookup stream
29
+
30
+ # @lookup.joined_columns = { :id => :author_id }
31
+ And the lookup appends the "id" column on the lookup stream to the source as "author_id"
32
+
33
+ And the data flows in the following direction:
34
+ | @database_table |
35
+ | @lookup |
36
+
37
+ When I start the source
38
+
39
+ Then the output for the lookup should be:
40
+ | id | title | author | author_id |
41
+ | 1 | The great tool | Tim Taylor | 1 |
42
+ | 2 | A football anecdote | John Miller | 2 |
43
+ | 3 | Historical music | John Mellencamp | 3 |
@@ -0,0 +1,19 @@
1
+ Feature: Ruby scripting
2
+
3
+ Scenario:
4
+ Given the following data is in the stream:
5
+ | id | title | author |
6
+ | 1 | The great tool | Tim Taylor |
7
+ | 2 | A football anecdote | John Miller |
8
+ | 3 | Historical music | John Mellencamp |
9
+
10
+ And I create a Itiel::Script::RubyScript object with the following block of code:
11
+ """
12
+ row["slug"] = row["title"].parameterize
13
+ """
14
+
15
+ Then the resulting output stream should be:
16
+ | id | title | author | slug |
17
+ | 1 | The great tool | Tim Taylor | the-great-tool |
18
+ | 2 | A football anecdote | John Miller | a-football-anecdote |
19
+ | 3 | Historical music | John Mellencamp | historical-music |
@@ -0,0 +1,15 @@
1
+ Given /^a "([^"]*)" file with the following rows:$/ do |filename, table|
2
+ CSV.open File.join('tmp', filename), "wb" do |csv|
3
+ table.raw.each { |row| csv << row }
4
+ end
5
+ end
6
+
7
+ Then /^the "([^"]*)" file should exist with the following content:$/ do |filename, table|
8
+ expected_file = CSV.generate do |csv|
9
+ table.raw.each { |row| csv << row }
10
+ end
11
+
12
+ resulting_file = File.new(File.join('tmp', filename), "rb").read
13
+
14
+ expect(resulting_file).to eq expected_file
15
+ end