itiel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +17 -0
  3. data/.gitignore +13 -0
  4. data/.gitlab-ci.yml +36 -0
  5. data/.rspec +2 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/Gemfile +4 -0
  9. data/Gemfile.rails.4.0 +7 -0
  10. data/Gemfile.rails.4.1 +7 -0
  11. data/Gemfile.rails.4.2 +7 -0
  12. data/README.markdown +106 -0
  13. data/Rakefile +13 -0
  14. data/build.sh +10 -0
  15. data/features/extract/database_table.feature +16 -0
  16. data/features/extract/sql_script.feature +17 -0
  17. data/features/load/database_table_loader.feature +21 -0
  18. data/features/lookup/csv_file.feature +41 -0
  19. data/features/lookup/database_table.feature +43 -0
  20. data/features/script/ruby_script.feature +19 -0
  21. data/features/step_definitions/csv_steps.rb +15 -0
  22. data/features/step_definitions/extractor/csv_file_steps.rb +3 -0
  23. data/features/step_definitions/extractor/custom_sql_steps.rb +6 -0
  24. data/features/step_definitions/extractor/database_steps.rb +27 -0
  25. data/features/step_definitions/extractor/database_table_steps.rb +8 -0
  26. data/features/step_definitions/extractor/extraction_steps.rb +3 -0
  27. data/features/step_definitions/flow_steps.rb +9 -0
  28. data/features/step_definitions/loader/csv_file_steps.rb +4 -0
  29. data/features/step_definitions/loader/database_table_steps.rb +14 -0
  30. data/features/step_definitions/lookup/lookup_steps.rb +35 -0
  31. data/features/step_definitions/scripting/ruby_script_steps.rb +5 -0
  32. data/features/step_definitions/stream_steps.rb +8 -0
  33. data/features/step_definitions/transformation/calculated_column_steps.rb +5 -0
  34. data/features/step_definitions/transformation/calculated_columns_steps.rb +7 -0
  35. data/features/step_definitions/transformation/constant_column_steps.rb +3 -0
  36. data/features/step_definitions/transformation/map_values_step.rb +4 -0
  37. data/features/step_definitions/transformation/rename_column_steps.rb +3 -0
  38. data/features/step_definitions/transformation/select_column_steps.rb +3 -0
  39. data/features/step_definitions/transformation/single_column_sort_steps.rb +3 -0
  40. data/features/support/database.yml +1 -0
  41. data/features/support/env.rb +13 -0
  42. data/features/transform/transformations.feature +123 -0
  43. data/itiel.gemspec +34 -0
  44. data/lib/itiel.rb +45 -0
  45. data/lib/itiel/db/connection.rb +24 -0
  46. data/lib/itiel/db/sql_connectable.rb +33 -0
  47. data/lib/itiel/db/truncator.rb +30 -0
  48. data/lib/itiel/extract/chained_step.rb +22 -0
  49. data/lib/itiel/extract/csv_file.rb +31 -0
  50. data/lib/itiel/extract/custom_sql.rb +38 -0
  51. data/lib/itiel/extract/database_table.rb +23 -0
  52. data/lib/itiel/job.rb +116 -0
  53. data/lib/itiel/load/chained_step.rb +37 -0
  54. data/lib/itiel/load/csv_file.rb +45 -0
  55. data/lib/itiel/load/database_table.rb +34 -0
  56. data/lib/itiel/load/input_output_behavior.rb +36 -0
  57. data/lib/itiel/logger.rb +47 -0
  58. data/lib/itiel/lookup/chained_step.rb +35 -0
  59. data/lib/itiel/lookup/csv_file.rb +16 -0
  60. data/lib/itiel/lookup/database_table.rb +36 -0
  61. data/lib/itiel/lookup/hash_lookup.rb +35 -0
  62. data/lib/itiel/nameable.rb +6 -0
  63. data/lib/itiel/script/chained_step.rb +18 -0
  64. data/lib/itiel/script/ruby_script.rb +31 -0
  65. data/lib/itiel/script/sql_script.rb +29 -0
  66. data/lib/itiel/transform/calculated_columns.rb +47 -0
  67. data/lib/itiel/transform/chained_step.rb +27 -0
  68. data/lib/itiel/transform/constant_column.rb +35 -0
  69. data/lib/itiel/transform/input_output_behavior.rb +44 -0
  70. data/lib/itiel/transform/map_values.rb +43 -0
  71. data/lib/itiel/transform/remove_column.rb +33 -0
  72. data/lib/itiel/transform/rename_column.rb +43 -0
  73. data/lib/itiel/transform/select_column.rb +37 -0
  74. data/lib/itiel/version.rb +3 -0
  75. data/spec/db/sql_connectable_spec.rb +20 -0
  76. data/spec/extract/chained_step_spec.rb +31 -0
  77. data/spec/extract/csv_file_spec.rb +22 -0
  78. data/spec/extract/custom_sql_spec.rb +19 -0
  79. data/spec/extract/database_table_spec.rb +22 -0
  80. data/spec/job_spec.rb +80 -0
  81. data/spec/loader/chained_step_spec.rb +39 -0
  82. data/spec/loader/csv_file_spec.rb +69 -0
  83. data/spec/loader/database_table_spec.rb +29 -0
  84. data/spec/lookup/hash_lookup_spec.rb +108 -0
  85. data/spec/nameable_spec.rb +17 -0
  86. data/spec/script/chained_step_spec.rb +24 -0
  87. data/spec/script/ruby_script_spec.rb +18 -0
  88. data/spec/script/sql_script_spec.rb +41 -0
  89. data/spec/spec_helper.rb +24 -0
  90. data/spec/support/config/database.yml +1 -0
  91. data/spec/support/config/sources.yml +9 -0
  92. data/spec/transform/calculated_columns_spec.rb +36 -0
  93. data/spec/transform/chained_step_spec.rb +36 -0
  94. data/spec/transform/constant_column_spec.rb +22 -0
  95. data/spec/transform/map_values_spec.rb +26 -0
  96. data/spec/transform/rename_column_spec.rb +25 -0
  97. data/spec/transform/select_column_spec.rb +21 -0
  98. metadata +344 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 319124d81efc2cd9b0963c9f1693862d7347170e
4
+ data.tar.gz: 9e89b3071f090742e273d80a0023f703064f7325
5
+ SHA512:
6
+ metadata.gz: e4b71fb0ccdcec9e48cae895382e069b67680924b56901e034a10e6e452448081b2ea0fa5538e63e334ed2c15760dfa0143799795cbd753488fc72ba5b0d638f
7
+ data.tar.gz: cc404b8fa1e0404e93caa8ab29d031a1496f7d74fe4440d55a524ff623d6cc6afc4ca62e87c19806ae18955768e14d1facb30728f0a41a4c1453f75fc0419620
@@ -0,0 +1,17 @@
1
+ require 'autotest/fsevent'
2
+ require 'autotest/growl'
3
+ require 'autotest/fsevent'
4
+ require 'autotest/timestamp'
5
+
6
+ Autotest.add_hook :initialize do |at|
7
+ at.clear_mappings
8
+
9
+ %w{.git coverage/ README gemspec DS_Store features/ Gemfile.lock Rakefile Gemfile}.each do |exception|
10
+ at.add_exception exception
11
+ end
12
+
13
+ at.add_mapping %r%^lib/itiel/(.*)\.rb$% do |_, m|
14
+ ["test/unit/#{m[1]}_test.rb"]
15
+ end
16
+
17
+ end
@@ -0,0 +1,13 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile*.lock
4
+ pkg/*
5
+ **.swp
6
+ .rvmrc
7
+ **.sqlite3
8
+ tmp
9
+ doc
10
+ coverage
11
+ test.db
12
+ tags
13
+ .idea
@@ -0,0 +1,36 @@
1
+ cache:
2
+ paths:
3
+ - vendor/ruby
4
+
5
+ before_script:
6
+ - ruby -v # Print out ruby version for debugging
7
+ - gem install bundler --no-ri --no-rdoc # Bundler is not installed with the image
8
+ - bundle install -j $(nproc) --path vendor # Install dependencies into ./vendor/ruby
9
+
10
+ tests:2.3.rails.4.0:
11
+ image: "ruby:2.3"
12
+ variables:
13
+ BUNDLE_GEMFILE: "Gemfile.rails.4.0"
14
+ script:
15
+ - bundle exec rake
16
+
17
+ tests:2.3.rails.4.1:
18
+ image: "ruby:2.3"
19
+ variables:
20
+ BUNDLE_GEMFILE: "Gemfile.rails.4.1"
21
+ script:
22
+ - bundle exec rake
23
+
24
+ tests:2.2.rails.4.0:
25
+ image: "ruby:2.2"
26
+ variables:
27
+ BUNDLE_GEMFILE: "Gemfile.rails.4.0"
28
+ script:
29
+ - bundle exec rake
30
+
31
+ tests:2.2.rails.4.1:
32
+ image: "ruby:2.2"
33
+ variables:
34
+ BUNDLE_GEMFILE: "Gemfile.rails.4.1"
35
+ script:
36
+ - bundle exec rake
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
@@ -0,0 +1 @@
1
+ 2.3.2
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.3.4
5
+ - 2.2.7
6
+
7
+ gemfile:
8
+ - Gemfile.rails.4.0
9
+ - Gemfile.rails.4.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.0.0'
7
+ gem 'activesupport', '~> 4.0.0'
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.1.0'
7
+ gem 'activesupport', '~> 4.1.0'
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itiel.gemspec
4
+ gemspec
5
+
6
+ gem 'activerecord', '~> 4.2.0'
7
+ gem 'activesupport', '~> 4.2.0'
@@ -0,0 +1,106 @@
1
+ # Itiel
2
+
3
+ [![build status](https://gitlab.com/dabit/itiel/badges/master/build.svg)](https://gitlab.com/dabit/itiel/commits/master)
4
+ [![coverage report](https://gitlab.com/dabit/itiel/badges/master/coverage.svg)](https://gitlab.com/dabit/itiel/commits/master)
5
+
6
+ Hopefully, it will be an awesome Framework to do ETL with Ruby. It
7
+ should only work with *Ruby 1.9*.
8
+
9
+ [A working example](https://github.com/railsmx/rails-mx-blog/blob/master/itiel/posts.rb)
10
+
11
+ # This README is work in progress
12
+
13
+ You should not trust it, until I clean it up.
14
+
15
+ For better examples and documentation, refer to the *features* folder.
16
+
17
+
18
+ # TODO: Everything I want it to be, below:
19
+
20
+ ### Define the Itiel project structure
21
+
22
+ I have in mind something like this:
23
+
24
+ /itiel_example/
25
+ |~config/
26
+ | `-sources.yml
27
+ |~lib/
28
+ | `-transformation_class1.rb
29
+ |~jobs/
30
+ | `-main_job.rb
31
+ |~tasks/
32
+ | `-itiel.rake
33
+ |-Gemfile
34
+ |-Gemfile.lock
35
+
36
+ ### Controllers
37
+
38
+ Take care of defining and running the jobs. It's where you put all the
39
+ code like the example above.
40
+
41
+ ### A command line bin to generate the basic structure
42
+
43
+ itiel new project_name.
44
+
45
+ Use thor?
46
+
47
+ ### Rake
48
+
49
+ rake itiel:run
50
+
51
+ ### Mongo
52
+
53
+ Picture yourself migrating form SQL to Mongo
54
+
55
+ ### Joins, Scripts and lots more transformations
56
+
57
+ Some on my mind:
58
+
59
+ Itiel::Joins::DatabaseTable
60
+ Itiel::Lookups::DatabaseTable
61
+ Itiel::Loads::FTP
62
+ Itiel::Loads::HTTPRequest
63
+ Itiel::Loads::MongoDBDocument
64
+ Itiel::Script::ExecuteInSystem
65
+ Itiel::Extracts::HTTP
66
+ Itiel::Extracts::MongoDBDocument
67
+ Itiel::Transforms::CustomSort
68
+ Itiel::Transforms::MapValues
69
+ Itiel::Transforms::IfNil
70
+
71
+ Just ideas, I have nothing on how to implement most of them
72
+
73
+ ### Filters and conditional stream routing
74
+
75
+ If the column X has a value of Y, send true output to this step, send
76
+ false output to this other steps
77
+
78
+ ### Slowly Changing Dimentions
79
+
80
+ Can't have ETL without these
81
+
82
+ ### Test helpers
83
+
84
+ So you can TDD your ETL
85
+
86
+ ### Logging
87
+
88
+ It would be extra nice if all the steps logged statistics
89
+
90
+ * Processing X rows of Y
91
+ * Time spent
92
+ * Transformation Name
93
+ * Configurable log level
94
+
95
+ ### Viz
96
+
97
+ rake itiel::graph
98
+
99
+ Creates a graphical representation of your ETL flow
100
+
101
+ ### Ruby 1.8 and 1.9
102
+
103
+ Not really, Ruby 2.0+ is enough. I don't have plans to make it
104
+ work on 1.8 or 1.9
105
+
106
+
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ require 'cucumber/rake/task'
6
+
7
+ task :default => [:spec, :cucumber]
8
+
9
+ RSpec::Core::RakeTask.new
10
+
11
+ Cucumber::Rake::Task.new do |task|
12
+ task.cucumber_opts = %w{--format progress}
13
+ end
@@ -0,0 +1,10 @@
1
+ #! /bin/bash -e
2
+
3
+ source "$HOME/.rvm/scripts/rvm"
4
+ rvm 1.9.2
5
+ rvm gemset create itiel
6
+ rvm use 1.9.2@itiel
7
+ bundle -v || gem install bundler
8
+ bundle install
9
+ rake
10
+
@@ -0,0 +1,16 @@
1
+ Feature: Extract data from a Database Table
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A footbal anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ # @database_table = Itiel::Extract::DatabaseTable.new
12
+ # @database_table.connection = :test
13
+ # @database_table.table_name = 'blogposts'
14
+ When I create a Itiel::Extract::DatabaseTable object for the 'blogposts' table
15
+ Then the extraction for that object should have 3 rows
16
+
@@ -0,0 +1,17 @@
1
+ Feature: Extract rows from a database using a custom SQL script
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A footbal anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ When I create a Itiel::Extract::CustomSQL object with the query:
12
+ """
13
+ SELECT * FROM blogposts WHERE id = 3;
14
+ """
15
+
16
+ Then the extraction for that object should have 1 row
17
+
@@ -0,0 +1,21 @@
1
+ Feature: Load data into a database table
2
+
3
+ Background:
4
+ Given a blogposts database table
5
+ And a "source.csv" file with the following rows:
6
+ | id | title | author |
7
+ | 1 | Pride and Prejudice | Jane Austen |
8
+ | 2 | Treasure Island | R L Stevenson |
9
+ | 3 | Steve Jobs | Walter Isaacson |
10
+ | 4 | The thank you economy | Gary Vaynerchuk |
11
+
12
+ # @source = Itiel::Extracts::CSVFile.new('source.csv')
13
+ And I create a Extractor::CSVFile object with "source.csv"
14
+ # @destination = Itiel::Load::DatabaseTable.new :test, "blogposts"
15
+ And I create a Loader::DatabaseTable object for the "blogposts" table
16
+
17
+ @database
18
+ Scenario:
19
+ When I load the source into the destination
20
+ Then the "blogposts" table should have 4 records
21
+
@@ -0,0 +1,41 @@
1
+ Feature: Lookup data from a CSV file
2
+
3
+ @database
4
+ Scenario:
5
+ Given an orders database table with rows:
6
+ | id | email |
7
+ | 1 | john@example.com |
8
+ | 2 | tim@test.com |
9
+ | 3 | math@example.com |
10
+
11
+ And a "customers.csv" file with the following rows:
12
+ | id | email |
13
+ | 1 | john@example.com |
14
+ | 2 | tim@test.com |
15
+ | 3 | math@example.com |
16
+
17
+ # @database_table = Itiel::Extract::DatabaseTable.new
18
+ # @database_table.connection = :test
19
+ # @database_table.table_name = 'orders'
20
+ And I create a Itiel::Extract::DatabaseTable object for the 'orders' table
21
+
22
+ # @lookup = Itiel::Lookup::CSVFile.new('customers.csv')
23
+ And I create a Itiel::Lookup::CSVFile object with the "customers.csv" file
24
+
25
+ # @lookup.lookup_columns = { :email => :email }
26
+ And the lookup joins the "email" column in the source with the "email" column in the lookup stream
27
+
28
+ # @lookup.joined_columns = { :id => :customer_id }
29
+ And the lookup appends the "id" column on the lookup stream to the source as "customer_id"
30
+
31
+ And the data flows in the following direction:
32
+ | @database_table |
33
+ | @lookup |
34
+
35
+ When I start the source
36
+
37
+ Then the output for the lookup should be:
38
+ | id | email | customer_id |
39
+ | 1 | john@example.com | 1 |
40
+ | 2 | tim@test.com | 2 |
41
+ | 3 | math@example.com | 3 |
@@ -0,0 +1,43 @@
1
+ Feature: Lookup data from a Database Table
2
+
3
+ @database
4
+ Scenario:
5
+ Given a blogposts database table with rows:
6
+ | id | title | author |
7
+ | 1 | The great tool | Tim Taylor |
8
+ | 2 | A football anecdote | John Miller |
9
+ | 3 | Historical music | John Mellencamp |
10
+
11
+ And an authors database table with rows:
12
+ | id | name |
13
+ | 1 | Tim Taylor |
14
+ | 2 | John Miller |
15
+ | 3 | John Mellencamp |
16
+
17
+ # @database_table = Itiel::Extract::DatabaseTable.new
18
+ # @database_table.connection = :test
19
+ # @database_table.table_name = 'blogposts'
20
+ And I create a Itiel::Extract::DatabaseTable object for the 'blogposts' table
21
+
22
+ # @lookup = Itiel::Lookup::DatabaseTable.new
23
+ # @lookup.connection = :test
24
+ # @lookup.table_name = 'author'
25
+ And I create a Itiel::Lookup::DatabaseTable object for the "authors"
26
+
27
+ # @lookup.lookup_columns = { :author => :name }
28
+ And the lookup joins the "author" column in the source with the "name" column in the lookup stream
29
+
30
+ # @lookup.joined_columns = { :id => :author_id }
31
+ And the lookup appends the "id" column on the lookup stream to the source as "author_id"
32
+
33
+ And the data flows in the following direction:
34
+ | @database_table |
35
+ | @lookup |
36
+
37
+ When I start the source
38
+
39
+ Then the output for the lookup should be:
40
+ | id | title | author | author_id |
41
+ | 1 | The great tool | Tim Taylor | 1 |
42
+ | 2 | A football anecdote | John Miller | 2 |
43
+ | 3 | Historical music | John Mellencamp | 3 |
@@ -0,0 +1,19 @@
1
+ Feature: Ruby scripting
2
+
3
+ Scenario:
4
+ Given the following data is in the stream:
5
+ | id | title | author |
6
+ | 1 | The great tool | Tim Taylor |
7
+ | 2 | A football anecdote | John Miller |
8
+ | 3 | Historical music | John Mellencamp |
9
+
10
+ And I create a Itiel::Script::RubyScript object with the following block of code:
11
+ """
12
+ row["slug"] = row["title"].parameterize
13
+ """
14
+
15
+ Then the resulting output stream should be:
16
+ | id | title | author | slug |
17
+ | 1 | The great tool | Tim Taylor | the-great-tool |
18
+ | 2 | A football anecdote | John Miller | a-football-anecdote |
19
+ | 3 | Historical music | John Mellencamp | historical-music |
@@ -0,0 +1,15 @@
1
+ Given /^a "([^"]*)" file with the following rows:$/ do |filename, table|
2
+ CSV.open File.join('tmp', filename), "wb" do |csv|
3
+ table.raw.each { |row| csv << row }
4
+ end
5
+ end
6
+
7
+ Then /^the "([^"]*)" file should exist with the following content:$/ do |filename, table|
8
+ expected_file = CSV.generate do |csv|
9
+ table.raw.each { |row| csv << row }
10
+ end
11
+
12
+ resulting_file = File.new(File.join('tmp', filename), "rb").read
13
+
14
+ expect(resulting_file).to eq expected_file
15
+ end