mini_etl 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bf6d9d9f6c520143a0ed4798dae23ac0dd17d6acd338e4c954a650433c111fc5
4
+ data.tar.gz: 3b2cc9482751be5af528bfa9046eb55dd53a8d8a19eb368f80f775dc6747c72b
5
+ SHA512:
6
+ metadata.gz: acecce229c1db06eef8cabf6e2278bcfd4b0da83976e2e30101c6a3a1d0908cc002932ce292ff7489e430aaff95adf6677426072ebe03151194b696efe3e4286
7
+ data.tar.gz: 159a16e1c228a3b19485534a065b8c84a44bf93e54e1c19316723166d39550f0e3fdfe4a88184e8114854d167e83fa6d225783c97aad10d54e4762f5f894bdaa
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ # Ignore generated docs
2
+ /.yardoc
3
+ /_yardoc
4
+ /doc
5
+ README.html
6
+
7
+ # Ignore generated files
8
+ /.bundle
9
+ /pkg
10
+ /samples
11
+ /tmp
12
+
13
+ # Ignore test generated files
14
+ .rspec
15
+ .rspec_status
16
+ /coverage
17
+ /spec/reports
18
+ spec/examples.txt
data/.rubocop.yml ADDED
@@ -0,0 +1,98 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ AllCops:
4
+ RubyInterpreters:
5
+ - ruby
6
+ # Include common Ruby source files.
7
+ Include:
8
+ - '**/*.rb'
9
+ - '**/*.gemfile'
10
+ - '**/*.gemspec'
11
+ - '**/*.jb'
12
+ - '**/*.jbuilder'
13
+ - '**/*.rake'
14
+ - '**/*.spec'
15
+ Exclude:
16
+ - 'node_modules/**/*'
17
+ - 'vendor/**/*'
18
+ - '.git/**/*'
19
+ # Default formatter will be used if no `-f/--format` option is given.
20
+ DefaultFormatter: progress
21
+ # Cop names are displayed in offense messages by default. Change behavior
22
+ # by overriding DisplayCopNames, or by giving the `--no-display-cop-names`
23
+ # option.
24
+ DisplayCopNames: true
25
+ # Style guide URLs are not displayed in offense messages by default. Change
26
+ # behavior by overriding `DisplayStyleGuide`, or by giving the
27
+ # `-S/--display-style-guide` option.
28
+ DisplayStyleGuide: false
29
+ # When specifying style guide URLs, any paths and/or fragments will be
30
+ # evaluated relative to the base URL.
31
+ StyleGuideBaseURL: https://github.com/rubocop-hq/ruby-style-guide
32
+ # Extra details are not displayed in offense messages by default. Change
33
+ # behavior by overriding ExtraDetails, or by giving the
34
+ # `-E/--extra-details` option.
35
+ ExtraDetails: false
36
+ # Additional cops that do not reference a style guide rule may be enabled by
37
+ # default. Change behavior by overriding `StyleGuideCopsOnly`, or by giving
38
+ # the `--only-guide-cops` option.
39
+ StyleGuideCopsOnly: false
40
+ # All cops except the ones configured `Enabled: false` in this file are enabled by default.
41
+ # Change this behavior by overriding either `DisabledByDefault` or `EnabledByDefault`.
42
+ # When `DisabledByDefault` is `true`, all cops in the default configuration
43
+ # are disabled, and only cops in user configuration are enabled. This makes
44
+ # cops opt-in instead of opt-out. Note that when `DisabledByDefault` is `true`,
45
+ # cops in user configuration will be enabled even if they don't set the
46
+ # Enabled parameter.
47
+ # When `EnabledByDefault` is `true`, all cops, even those configured `Enabled: false`
48
+ # in this file are enabled by default. Cops can still be disabled in user configuration.
49
+ # Note that it is invalid to set both EnabledByDefault and DisabledByDefault
50
+ # to true in the same configuration.
51
+ EnabledByDefault: false
52
+ DisabledByDefault: false
53
+ # Enables the result cache if `true`. Can be overridden by the `--cache` command
54
+ # line option.
55
+ UseCache: true
56
+ # Threshold for how many files can be stored in the result cache before some
57
+ # of the files are automatically removed.
58
+ MaxFilesInCache: 20000
59
+ # The cache will be stored in "rubocop_cache" under this directory. If
60
+ # CacheRootDirectory is ~ (nil), which it is by default, the root will be
61
+ # taken from the environment variable `$XDG_CACHE_HOME` if it is set, or if
62
+ # `$XDG_CACHE_HOME` is not set, it will be `$HOME/.cache/`.
63
+ CacheRootDirectory: ~
64
+ # It is possible for a malicious user to know the location of RuboCop's cache
65
+ # directory by looking at CacheRootDirectory, and create a symlink in its
66
+ # place that could cause RuboCop to overwrite unintended files, or read
67
+ # malicious input. If you are certain that your cache location is secure from
68
+ # this kind of attack, and wish to use a symlinked cache location, set this
69
+ # value to "true".
70
+ AllowSymlinksInCacheRootDirectory: false
71
+ # What MRI version of the Ruby interpreter is the inspected code intended to
72
+ # run on? (If there is more than one, set this to the lowest version.)
73
+ # If a value is specified for TargetRubyVersion then it is used. Acceptable
74
+ # values are specificed as a float (i.e. 2.5); the teeny version of Ruby
75
+ # should not be included. If the project specifies a Ruby version in the
76
+ # .ruby-version file, Gemfile or gems.rb file, RuboCop will try to determine
77
+ # the desired version of Ruby by inspecting the .ruby-version file first,
78
+ # followed by the Gemfile.lock or gems.locked file. (Although the Ruby version
79
+ # is specified in the Gemfile or gems.rb file, RuboCop reads the final value
80
+ # from the lock file.) If the Ruby version is still unresolved, RuboCop will
81
+ # use the oldest officially supported Ruby version (currently Ruby 2.3).
82
+ TargetRubyVersion: ~
83
+ # What version of Rails is the inspected code using? If a value is specified
84
+ # for TargetRailsVersion then it is used. Acceptable values are specificed
85
+ # as a float (i.e. 5.1); the patch version of Rails should not be included.
86
+ # If TargetRailsVersion is not set, RuboCop will parse the Gemfile.lock or
87
+ # gems.locked file to find the version of Rails that has been bound to the
88
+ # application. If neither of those files exist, RuboCop will use Rails 5.0
89
+ # as the default.
90
+ TargetRailsVersion: ~
91
+
92
+ Style/FrozenStringLiteralComment:
93
+ Enabled: false
94
+ Metrics/LineLength:
95
+ Max: 150
96
+ Metrics/BlockLength:
97
+ Exclude:
98
+ - 'spec/**/*'
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,14 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2019-06-04 10:13:24 -0500 using RuboCop version 0.71.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ Style/Documentation:
11
+ Exclude:
12
+ - 'spec/**/*'
13
+ - 'test/**/*'
14
+ - 'lib/etl.rb'
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2019-06-06
10
+
11
+ ### Added
12
+ - [Task] Improve spec coverage
13
+ - [Base] Introduce Generator for structure generation
14
+ - [Base] Factor out strategies
15
+ - [Base] General extraction strategy, still static
16
+ - [Base] Basic, more useful documentation
17
+ - [Base] CSV Sample strategy: File generation and sourcing
18
+ - [Base] Initial application
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
5
+ ruby '2.6.3'
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,74 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ mini_etl (0.2.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.0)
10
+ coderay (1.1.2)
11
+ concurrent-ruby (1.1.5)
12
+ diff-lcs (1.3)
13
+ docile (1.3.1)
14
+ faker (1.9.3)
15
+ i18n (>= 0.7)
16
+ i18n (1.6.0)
17
+ concurrent-ruby (~> 1.0)
18
+ jaro_winkler (1.5.2)
19
+ json (2.2.0)
20
+ method_source (0.9.2)
21
+ parallel (1.17.0)
22
+ parser (2.6.3.0)
23
+ ast (~> 2.4.0)
24
+ pry (0.12.2)
25
+ coderay (~> 1.1.0)
26
+ method_source (~> 0.9.0)
27
+ rainbow (3.0.0)
28
+ rake (10.5.0)
29
+ rspec (3.8.0)
30
+ rspec-core (~> 3.8.0)
31
+ rspec-expectations (~> 3.8.0)
32
+ rspec-mocks (~> 3.8.0)
33
+ rspec-core (3.8.0)
34
+ rspec-support (~> 3.8.0)
35
+ rspec-expectations (3.8.3)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.8.0)
38
+ rspec-mocks (3.8.0)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.8.0)
41
+ rspec-support (3.8.0)
42
+ rubocop (0.71.0)
43
+ jaro_winkler (~> 1.5.1)
44
+ parallel (~> 1.10)
45
+ parser (>= 2.6)
46
+ rainbow (>= 2.2.2, < 4.0)
47
+ ruby-progressbar (~> 1.7)
48
+ unicode-display_width (>= 1.4.0, < 1.7)
49
+ ruby-progressbar (1.10.1)
50
+ simplecov (0.16.1)
51
+ docile (~> 1.1)
52
+ json (>= 1.8, < 3)
53
+ simplecov-html (~> 0.10.0)
54
+ simplecov-html (0.10.2)
55
+ unicode-display_width (1.6.0)
56
+
57
+ PLATFORMS
58
+ ruby
59
+
60
+ DEPENDENCIES
61
+ bundler (~> 1.16)
62
+ faker (~> 1.9)
63
+ mini_etl!
64
+ pry (~> 0.12.2)
65
+ rake (~> 10.0)
66
+ rspec (~> 3.0)
67
+ rubocop (~> 0.71.0)
68
+ simplecov (~> 0.16.1)
69
+
70
+ RUBY VERSION
71
+ ruby 2.6.3p62
72
+
73
+ BUNDLED WITH
74
+ 1.17.2
data/README.md ADDED
@@ -0,0 +1,145 @@
1
+ # Mini ETL
2
+
3
+ Basic toolkit for Extract/Transform/Load operations. Abstracts the details of
4
+ performing sourcing, intermediate structure generation and data persistance.
5
+
6
+ ## Usage
7
+
8
+ ### Sourcing
9
+
10
+ A `MiniEtl` `Process` is kicked off by configuring a process. For a basic CSV file
11
+ deserialize and bulk load:
12
+
13
+ ```
14
+ process = MiniEtl.create_process do |process|
15
+ process.source.type = :csv
16
+ process.source.location = 'samples/small.csv'
17
+ end
18
+
19
+ process.bootstrap
20
+ ```
21
+
22
+ TODO: Write a strategy for HTTP, use JSON server
23
+
24
+ ```
25
+ process = MiniEtl.create_process do |process|
26
+ process.source.type = :http
27
+ process.source.location = 'localhost:8080/sample'
28
+ end
29
+
30
+ process.bootstrap
31
+ ```
32
+
33
+ Strategies are available for CSV and JSON. If you need something else entirely,
34
+ a manual source can be used instead:
35
+
36
+ ```
37
+ process = MiniEtl.create_process do |process|
38
+ process.source.type = :manual
39
+ process.source.method = Proc.new do
40
+ ...
41
+ end
42
+ end
43
+ ```
44
+
45
+ ### Structure generation
46
+
47
+ Once data sourcing is complete, data can be fetched in-place.
48
+
49
+ ```
50
+ process = MiniEtl.create_process do |process|
51
+ process.source.type = :csv
52
+ process.source.location = 'samples/small.csv'
53
+ end
54
+
55
+ process.bootstrap
56
+ process.generate
57
+
58
+ process.generator.structures # intermediate structure for bulk import
59
+ ```
60
+
61
+ If the data source is too large to process in memory, an iterator can be given
62
+ instead:
63
+
64
+ ```
65
+ process = MiniEtl.create_process do |process|
66
+ process.source.type = :csv
67
+ process.source.location = 'samples/large.csv'
68
+ process.generator.lazy = true
69
+ end
70
+
71
+ process.bootstrap
72
+ process.generator.start do |structures|
73
+ ...
74
+ end
75
+ ```
76
+
77
+ ### Data persistance
78
+
79
+ Finally, once data is shaped the way you need it to, data can be persisted in
80
+ any kind of way you need it to. The receiver class is expected to respond to
81
+ `.create(args)`
82
+
83
+ ```
84
+ process = MiniEtl.create_process do |process|
85
+ process.source.type = :csv
86
+ process.source.location = 'samples/large.csv'
87
+ process.store.type = Person # An active record model
88
+ end
89
+
90
+ process.bootstrap
91
+ process.generate
92
+ process.persist
93
+ ```
94
+
95
+ In this way, any arbitrary store can be created,
96
+
97
+ ```
98
+ class Payroll
99
+ Struct.new(:target, :name, :last_name, ...)
100
+ @@data = []
101
+
102
+ def create(params = {})
103
+ @@data << Struct::Target.new(name: params[:name], last_name: params[:last_name], ...)
104
+ end
105
+ end
106
+
107
+ process = MiniEtl.create_process do |process|
108
+ process.source.type = :csv
109
+ process.source.location = 'samples/small.csv'
110
+ process.store.type = Payroll
111
+ end
112
+
113
+ process.bootstrap
114
+ process.generate
115
+ process.persist
116
+ ```
117
+
118
+ ## Development
119
+
120
+ TODO: Test stuff
121
+ `$ rake`
122
+
123
+ Runs rspec, rubocop, generates coverage report
124
+
125
+ TODO: Explain how to generate csv files and the rest of the samples
126
+ NOTE: This will take ~5.5 mins, super slow, would need a parallel version
127
+ ```
128
+ $ rake sample:csv:all
129
+ ```
130
+
131
+ TODO: Explain how to use JSON Server to provide a fake API
132
+
133
+ ```
134
+ $ npm install -g json-server
135
+ $ rake sample:json:small
136
+ $ json-server --watch samples/small.json --port 3001
137
+ ```
138
+
139
+ API is now available at `localhost:3001/payroll`
140
+
141
+ ...
142
+
143
+ ## Contributing
144
+
145
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/etl.
data/Rakefile ADDED
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+ require_relative 'lib/tasks/support/generation'
6
+
7
+ require 'rubygems'
8
+ require 'bundler'
9
+ Bundler.require(:default, :development)
10
+
11
+ RSpec::Core::RakeTask.new(:spec)
12
+
13
+ task default: :spec_n_cop
14
+
15
+ task :spec_n_cop do
16
+ Rake::Task['spec'].invoke
17
+ require 'rubocop'
18
+ cli = RuboCop::CLI.new
19
+ cli.run
20
+ end
21
+
22
+ namespace :sample do
23
+ include Support::Generation
24
+
25
+ namespace :csv do
26
+ desc 'Generate CSV samples'
27
+ task :all do
28
+ generate_csv(:small)
29
+ generate_csv(:medium)
30
+ generate_csv(:large)
31
+ end
32
+
33
+ desc 'Generate a CSV sample, ~1 MB'
34
+ task :small do
35
+ generate_csv(:small)
36
+ end
37
+
38
+ desc 'Generate a CSV sample, ~10 MB'
39
+ task :medium do
40
+ generate_csv(:medium)
41
+ end
42
+
43
+ desc 'Generate a CSV sample, ~100 MB'
44
+ task :large do
45
+ generate_csv(:large)
46
+ end
47
+ end
48
+
49
+ namespace :json do
50
+ desc 'Generate CSV samples'
51
+ task :all do
52
+ generate_json(:small)
53
+ generate_json(:medium)
54
+ generate_json(:large)
55
+ end
56
+
57
+ desc 'Generate a JSON sample, ~2 MB'
58
+ task :small do
59
+ generate_json(:small)
60
+ end
61
+
62
+ desc 'Generate a JSON sample, ~20 MB'
63
+ task :medium do
64
+ generate_json(:medium)
65
+ end
66
+
67
+ desc 'Generate a JSON sample, ~200 MB'
68
+ task :large do
69
+ generate_json(:large)
70
+ end
71
+ end
72
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'mini_etl'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/mini_etl.rb ADDED
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_etl/version'
4
+
5
+ require 'rubygems'
6
+ require 'bundler'
7
+ Bundler.require(:default)
8
+
9
+ require 'mini_etl/util/status'
10
+ require 'mini_etl/strategy'
11
+ require 'mini_etl/strategies/csv_strategy'
12
+ require 'mini_etl/source'
13
+ require 'mini_etl/generator'
14
+ require 'mini_etl/process'
15
+
16
+ # Place exception here
17
+ module MiniEtl
18
+ class << self
19
+ def create_process(&block)
20
+ return nil unless block_given?
21
+
22
+ process = Process.new
23
+ block.call(process)
24
+ process
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Generate record information from a source
5
+ class Generator
6
+ VALID_STATES = {
7
+ initialized: 0,
8
+ bootstrapped: 1,
9
+ transformed: 2,
10
+ failed: 3
11
+ }.freeze
12
+
13
+ include Status
14
+
15
+ attr_accessor :lazy, :data
16
+ attr_writer :type
17
+ attr_reader :payload
18
+
19
+ # TODO: This needs to know the type of the receiver
20
+ def initialize
21
+ @lazy = false
22
+ @payload = []
23
+ initialized!
24
+ end
25
+
26
+ def bootstrap(type, data)
27
+ raise ArgumentError if type.nil? || data.nil?
28
+
29
+ @type = type
30
+ @data = data
31
+ bootstrapped!
32
+ end
33
+
34
+ def transform
35
+ failed! && return unless bootstrapped?
36
+
37
+ strategy = MiniEtl::Strategy.for(@type)
38
+ @payload = strategy.generate(@data)
39
+ transformed!
40
+ end
41
+
42
+ # TODO: This needs to feed a block with source data, to stream a structure
43
+ # generation instead of doing it in place
44
+ # def start
45
+ # yield @structure.next if @lazy && block_given?
46
+ # end
47
+ end
48
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # ETL Process wrapper
5
+ # Should this go all the way?
6
+ # Probably
7
+ class Process
8
+ VALID_STATES = {
9
+ initialized: 0,
10
+ bootstrapped: 1,
11
+ generated: 2,
12
+ finished: 3,
13
+ failed: 4
14
+ }.freeze
15
+
16
+ include Status
17
+ attr_reader :source, :generator
18
+
19
+ def initialize
20
+ @source = Source.new
21
+ @generator = Generator.new
22
+
23
+ initialized!
24
+ end
25
+
26
+ def bootstrap
27
+ if initialized? && @source.validate
28
+ @source.fetch
29
+ @generator.bootstrap(@source.type, @source.payload)
30
+ bootstrapped!
31
+ else
32
+ failed!
33
+ end
34
+ end
35
+
36
+ def generate
37
+ # TODO: Parse it, bear in mind this will be in memory and may need to be split
38
+ # TODO: Transform it into useful bits
39
+ # This may be done in rails. Provide a useful interface in that case
40
+ # TODO: Load the thing wherever it needs to go
41
+ if bootstrapped?
42
+ @generator.transform
43
+ generated!
44
+ else
45
+ failed!
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Source data from a give type and location
5
+ class Source
6
+ VALID_STATES = {
7
+ initialized: 0,
8
+ validated: 1,
9
+ sourced: 2,
10
+ failed: 3
11
+ }.freeze
12
+
13
+ include Status
14
+
15
+ attr_accessor :type, :location, :data
16
+ attr_reader :payload
17
+ ACCEPTED_PARAMS = %i[type location data].freeze
18
+
19
+ def initialize(params = {})
20
+ ACCEPTED_PARAMS.each do |param|
21
+ instance_variable_set "@#{param}".to_sym, params[param]
22
+ end
23
+ @payload = []
24
+ initialized!
25
+ end
26
+
27
+ def validate
28
+ strategy = MiniEtl::Strategy.for(@type)
29
+ return false && failed! if strategy.nil?
30
+
31
+ strategy.validate(self).tap { |x| x ? validated! : failed! }
32
+ end
33
+
34
+ def fetch
35
+ strategy = MiniEtl::Strategy.for(@type)
36
+
37
+ if strategy && validated?
38
+ @payload = strategy.fetch(self)
39
+ sourced!
40
+ else
41
+ failed!
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module MiniEtl
4
+ module Strategies
5
+ # Extract/Transform strategy for a CSV input
6
+ class CSVStrategy
7
+ class << self
8
+ def validate(source)
9
+ Pathname.new(source.location).exist?
10
+ end
11
+
12
+ def fetch(source)
13
+ File.read(source.location)
14
+ end
15
+
16
+ def generate(data)
17
+ CSV.parse(data)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Fetches a Strategy kind class for a given type of data
5
+ # extraction/transformation
6
+ class Strategy
7
+ class << self
8
+ def for(type)
9
+ strategy_constant = "#{type.to_s.upcase}Strategy"
10
+ Strategies.const_get(strategy_constant) if Strategies.const_defined?(strategy_constant)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Track a status
5
+ module Status
6
+ DEFAULT_STATES = {
7
+ initialized: 0,
8
+ finished: 1,
9
+ failed: 2
10
+ }.freeze
11
+
12
+ def self.included(base)
13
+ attr_reader :status
14
+
15
+ states = base.const_defined?(:VALID_STATES) ? base.const_get(:VALID_STATES) : DEFAULT_STATES
16
+ states.each do |verb, value|
17
+ define_method "#{verb}?".to_sym do
18
+ @status == value
19
+ end
20
+
21
+ define_method "#{verb}!".to_sym do
22
+ @status = value
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ VERSION = '0.2.0'
5
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Support
4
+ # Test files generation
5
+ module Generation
6
+ # COLUMNS = %w[name last_name nationality origin phone bank iban currency segment].freeze
7
+ RECORD_SIZE = {
8
+ small: 8_500,
9
+ medium: 85_000,
10
+ large: 825_000
11
+ }.freeze
12
+
13
+ def generate_csv(size)
14
+ check_dir
15
+
16
+ File.open("samples/#{size}.csv", 'w') do |sample_file|
17
+ RECORD_SIZE[size].times { sample_file.write(dummy_data.join(',')) }
18
+ sample_file.close
19
+ end
20
+ end
21
+
22
+ def generate_json(size)
23
+ check_dir
24
+
25
+ File.open("samples/#{size}.json", 'w') do |sample_file|
26
+ sample_file.write('[')
27
+ RECORD_SIZE[size].pred.times { sample_file.write(json_string + ',') }
28
+ sample_file.write(json_string + ']')
29
+ sample_file.close
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def check_dir
36
+ Dir.mkdir('samples') unless Dir.exist?('samples')
37
+ end
38
+
39
+ def dummy_names
40
+ %i[name last_name nationality capital_city phone_number bank iban currency industry]
41
+ end
42
+
43
+ def dummy_data
44
+ [Faker::Name.first_name, Faker::Name.last_name, Faker::Nation.nationality, Faker::Nation.capital_city,
45
+ Faker::PhoneNumber.phone_number_with_country_code, Faker::Bank.name, Faker::Bank.iban, Faker::Currency.code, Faker::IndustrySegments.industry]
46
+ rescue Faker::UniqueGenerator::RetryLimitExceeded
47
+ Faker::UniqueGenerator.clear
48
+ end
49
+
50
+ def json_string
51
+ JSON.dump(Hash[dummy_names.zip(dummy_data)])
52
+ end
53
+ end
54
+ end
data/mini_etl.gemspec ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'mini_etl/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'mini_etl'
9
+ spec.version = MiniEtl::VERSION
10
+ spec.authors = ['Gerardo Galindez']
11
+ spec.email = ['ggalindezb@gmail.com']
12
+
13
+ spec.summary = 'Extract/Transform/Load wrapper'
14
+ spec.homepage = 'https://github.com/ggalindezb/mini_etl'
15
+
16
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
18
+ # if spec.respond_to?(:metadata)
19
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+
21
+ # spec.metadata["homepage_uri"] = spec.homepage
22
+ # spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
23
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
24
+ # else
25
+ # raise "RubyGems 2.0 or newer is required to protect against " \
26
+ # "public gem pushes."
27
+ # end
28
+
29
+ # Specify which files should be added to the gem when it is released.
30
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
31
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
32
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
33
+ end
34
+ spec.bindir = 'exe'
35
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
36
+ spec.require_paths = ['lib']
37
+
38
+ spec.add_development_dependency 'bundler', '~> 1.16'
39
+ spec.add_development_dependency 'faker', '~> 1.9'
40
+ spec.add_development_dependency 'pry', '~> 0.12.2'
41
+ spec.add_development_dependency 'rake', '~> 10.0'
42
+ spec.add_development_dependency 'rspec', '~> 3.0'
43
+ spec.add_development_dependency 'rubocop', '~> 0.71.0'
44
+ spec.add_development_dependency 'simplecov', '~> 0.16.1'
45
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini_etl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Gerardo Galindez
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-06-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faker
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.12.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.12.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.71.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.71.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.16.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.16.1
111
+ description:
112
+ email:
113
+ - ggalindezb@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".rubocop.yml"
120
+ - ".rubocop_todo.yml"
121
+ - CHANGELOG.md
122
+ - Gemfile
123
+ - Gemfile.lock
124
+ - README.md
125
+ - Rakefile
126
+ - bin/console
127
+ - bin/setup
128
+ - lib/mini_etl.rb
129
+ - lib/mini_etl/generator.rb
130
+ - lib/mini_etl/process.rb
131
+ - lib/mini_etl/source.rb
132
+ - lib/mini_etl/strategies/csv_strategy.rb
133
+ - lib/mini_etl/strategy.rb
134
+ - lib/mini_etl/util/status.rb
135
+ - lib/mini_etl/version.rb
136
+ - lib/tasks/support/generation.rb
137
+ - mini_etl.gemspec
138
+ homepage: https://github.com/ggalindezb/mini_etl
139
+ licenses: []
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubygems_version: 3.0.3
157
+ signing_key:
158
+ specification_version: 4
159
+ summary: Extract/Transform/Load wrapper
160
+ test_files: []