mini_etl 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bf6d9d9f6c520143a0ed4798dae23ac0dd17d6acd338e4c954a650433c111fc5
4
+ data.tar.gz: 3b2cc9482751be5af528bfa9046eb55dd53a8d8a19eb368f80f775dc6747c72b
5
+ SHA512:
6
+ metadata.gz: acecce229c1db06eef8cabf6e2278bcfd4b0da83976e2e30101c6a3a1d0908cc002932ce292ff7489e430aaff95adf6677426072ebe03151194b696efe3e4286
7
+ data.tar.gz: 159a16e1c228a3b19485534a065b8c84a44bf93e54e1c19316723166d39550f0e3fdfe4a88184e8114854d167e83fa6d225783c97aad10d54e4762f5f894bdaa
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ # Ignore generated docs
2
+ /.yardoc
3
+ /_yardoc
4
+ /doc
5
+ README.html
6
+
7
+ # Ignore generated files
8
+ /.bundle
9
+ /pkg
10
+ /samples
11
+ /tmp
12
+
13
+ # Ignore test generated files
14
+ .rspec
15
+ .rspec_status
16
+ /coverage
17
+ /spec/reports
18
+ spec/examples.txt
data/.rubocop.yml ADDED
@@ -0,0 +1,98 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ AllCops:
4
+ RubyInterpreters:
5
+ - ruby
6
+ # Include common Ruby source files.
7
+ Include:
8
+ - '**/*.rb'
9
+ - '**/*.gemfile'
10
+ - '**/*.gemspec'
11
+ - '**/*.jb'
12
+ - '**/*.jbuilder'
13
+ - '**/*.rake'
14
+ - '**/*.spec'
15
+ Exclude:
16
+ - 'node_modules/**/*'
17
+ - 'vendor/**/*'
18
+ - '.git/**/*'
19
+ # Default formatter will be used if no `-f/--format` option is given.
20
+ DefaultFormatter: progress
21
+ # Cop names are displayed in offense messages by default. Change behavior
22
+ # by overriding DisplayCopNames, or by giving the `--no-display-cop-names`
23
+ # option.
24
+ DisplayCopNames: true
25
+ # Style guide URLs are not displayed in offense messages by default. Change
26
+ # behavior by overriding `DisplayStyleGuide`, or by giving the
27
+ # `-S/--display-style-guide` option.
28
+ DisplayStyleGuide: false
29
+ # When specifying style guide URLs, any paths and/or fragments will be
30
+ # evaluated relative to the base URL.
31
+ StyleGuideBaseURL: https://github.com/rubocop-hq/ruby-style-guide
32
+ # Extra details are not displayed in offense messages by default. Change
33
+ # behavior by overriding ExtraDetails, or by giving the
34
+ # `-E/--extra-details` option.
35
+ ExtraDetails: false
36
+ # Additional cops that do not reference a style guide rule may be enabled by
37
+ # default. Change behavior by overriding `StyleGuideCopsOnly`, or by giving
38
+ # the `--only-guide-cops` option.
39
+ StyleGuideCopsOnly: false
40
+ # All cops except the ones configured `Enabled: false` in this file are enabled by default.
41
+ # Change this behavior by overriding either `DisabledByDefault` or `EnabledByDefault`.
42
+ # When `DisabledByDefault` is `true`, all cops in the default configuration
43
+ # are disabled, and only cops in user configuration are enabled. This makes
44
+ # cops opt-in instead of opt-out. Note that when `DisabledByDefault` is `true`,
45
+ # cops in user configuration will be enabled even if they don't set the
46
+ # Enabled parameter.
47
+ # When `EnabledByDefault` is `true`, all cops, even those configured `Enabled: false`
48
+ # in this file are enabled by default. Cops can still be disabled in user configuration.
49
+ # Note that it is invalid to set both EnabledByDefault and DisabledByDefault
50
+ # to true in the same configuration.
51
+ EnabledByDefault: false
52
+ DisabledByDefault: false
53
+ # Enables the result cache if `true`. Can be overridden by the `--cache` command
54
+ # line option.
55
+ UseCache: true
56
+ # Threshold for how many files can be stored in the result cache before some
57
+ # of the files are automatically removed.
58
+ MaxFilesInCache: 20000
59
+ # The cache will be stored in "rubocop_cache" under this directory. If
60
+ # CacheRootDirectory is ~ (nil), which it is by default, the root will be
61
+ # taken from the environment variable `$XDG_CACHE_HOME` if it is set, or if
62
+ # `$XDG_CACHE_HOME` is not set, it will be `$HOME/.cache/`.
63
+ CacheRootDirectory: ~
64
+ # It is possible for a malicious user to know the location of RuboCop's cache
65
+ # directory by looking at CacheRootDirectory, and create a symlink in its
66
+ # place that could cause RuboCop to overwrite unintended files, or read
67
+ # malicious input. If you are certain that your cache location is secure from
68
+ # this kind of attack, and wish to use a symlinked cache location, set this
69
+ # value to "true".
70
+ AllowSymlinksInCacheRootDirectory: false
71
+ # What MRI version of the Ruby interpreter is the inspected code intended to
72
+ # run on? (If there is more than one, set this to the lowest version.)
73
+ # If a value is specified for TargetRubyVersion then it is used. Acceptable
74
+ # values are specificed as a float (i.e. 2.5); the teeny version of Ruby
75
+ # should not be included. If the project specifies a Ruby version in the
76
+ # .ruby-version file, Gemfile or gems.rb file, RuboCop will try to determine
77
+ # the desired version of Ruby by inspecting the .ruby-version file first,
78
+ # followed by the Gemfile.lock or gems.locked file. (Although the Ruby version
79
+ # is specified in the Gemfile or gems.rb file, RuboCop reads the final value
80
+ # from the lock file.) If the Ruby version is still unresolved, RuboCop will
81
+ # use the oldest officially supported Ruby version (currently Ruby 2.3).
82
+ TargetRubyVersion: ~
83
+ # What version of Rails is the inspected code using? If a value is specified
84
+ # for TargetRailsVersion then it is used. Acceptable values are specificed
85
+ # as a float (i.e. 5.1); the patch version of Rails should not be included.
86
+ # If TargetRailsVersion is not set, RuboCop will parse the Gemfile.lock or
87
+ # gems.locked file to find the version of Rails that has been bound to the
88
+ # application. If neither of those files exist, RuboCop will use Rails 5.0
89
+ # as the default.
90
+ TargetRailsVersion: ~
91
+
92
+ Style/FrozenStringLiteralComment:
93
+ Enabled: false
94
+ Metrics/LineLength:
95
+ Max: 150
96
+ Metrics/BlockLength:
97
+ Exclude:
98
+ - 'spec/**/*'
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,14 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2019-06-04 10:13:24 -0500 using RuboCop version 0.71.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ Style/Documentation:
11
+ Exclude:
12
+ - 'spec/**/*'
13
+ - 'test/**/*'
14
+ - 'lib/etl.rb'
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2019-06-06
10
+
11
+ ### Added
12
+ - [Task] Improve spec coverage
13
+ - [Base] Introduce Generator for structure generation
14
+ - [Base] Factor out strategies
15
+ - [Base] General extraction strategy, still static
16
+ - [Base] Basic, more useful documentation
17
+ - [Base] CSV Sample strategy: File generation and sourcing
18
+ - [Base] Initial application
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
5
+ ruby '2.6.3'
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,74 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ mini_etl (0.2.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.0)
10
+ coderay (1.1.2)
11
+ concurrent-ruby (1.1.5)
12
+ diff-lcs (1.3)
13
+ docile (1.3.1)
14
+ faker (1.9.3)
15
+ i18n (>= 0.7)
16
+ i18n (1.6.0)
17
+ concurrent-ruby (~> 1.0)
18
+ jaro_winkler (1.5.2)
19
+ json (2.2.0)
20
+ method_source (0.9.2)
21
+ parallel (1.17.0)
22
+ parser (2.6.3.0)
23
+ ast (~> 2.4.0)
24
+ pry (0.12.2)
25
+ coderay (~> 1.1.0)
26
+ method_source (~> 0.9.0)
27
+ rainbow (3.0.0)
28
+ rake (10.5.0)
29
+ rspec (3.8.0)
30
+ rspec-core (~> 3.8.0)
31
+ rspec-expectations (~> 3.8.0)
32
+ rspec-mocks (~> 3.8.0)
33
+ rspec-core (3.8.0)
34
+ rspec-support (~> 3.8.0)
35
+ rspec-expectations (3.8.3)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.8.0)
38
+ rspec-mocks (3.8.0)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.8.0)
41
+ rspec-support (3.8.0)
42
+ rubocop (0.71.0)
43
+ jaro_winkler (~> 1.5.1)
44
+ parallel (~> 1.10)
45
+ parser (>= 2.6)
46
+ rainbow (>= 2.2.2, < 4.0)
47
+ ruby-progressbar (~> 1.7)
48
+ unicode-display_width (>= 1.4.0, < 1.7)
49
+ ruby-progressbar (1.10.1)
50
+ simplecov (0.16.1)
51
+ docile (~> 1.1)
52
+ json (>= 1.8, < 3)
53
+ simplecov-html (~> 0.10.0)
54
+ simplecov-html (0.10.2)
55
+ unicode-display_width (1.6.0)
56
+
57
+ PLATFORMS
58
+ ruby
59
+
60
+ DEPENDENCIES
61
+ bundler (~> 1.16)
62
+ faker (~> 1.9)
63
+ mini_etl!
64
+ pry (~> 0.12.2)
65
+ rake (~> 10.0)
66
+ rspec (~> 3.0)
67
+ rubocop (~> 0.71.0)
68
+ simplecov (~> 0.16.1)
69
+
70
+ RUBY VERSION
71
+ ruby 2.6.3p62
72
+
73
+ BUNDLED WITH
74
+ 1.17.2
data/README.md ADDED
@@ -0,0 +1,145 @@
1
+ # Mini ETL
2
+
3
+ Basic toolkit for Extract/Transform/Load operations. Abstracts the details of
4
+ performing sourcing, intermediate structure generation and data persistance.
5
+
6
+ ## Usage
7
+
8
+ ### Sourcing
9
+
10
+ A `MiniEtl` `Process` is kicked off by configuring a process. For a basic CSV file
11
+ deserialize and bulk load:
12
+
13
+ ```
14
+ process = MiniEtl.create_process do |process|
15
+ process.source.type = :csv
16
+ process.source.location = 'samples/small.csv'
17
+ end
18
+
19
+ process.bootstrap
20
+ ```
21
+
22
+ TODO: Write a strategy for HTTP, use JSON server
23
+
24
+ ```
25
+ process = MiniEtl.create_process do |process|
26
+ process.source.type = :http
27
+ process.source.location = 'localhost:8080/sample'
28
+ end
29
+
30
+ process.bootstrap
31
+ ```
32
+
33
+ Strategies are available for CSV and JSON. If you need something else entirely,
34
+ a manual source can be used instead:
35
+
36
+ ```
37
+ process = MiniEtl.create_process do |process|
38
+ process.source.type = :manual
39
+ process.source.method = Proc.new do
40
+ ...
41
+ end
42
+ end
43
+ ```
44
+
45
+ ### Structure generation
46
+
47
+ Once data sourcing is complete, data can be fetched in-place.
48
+
49
+ ```
50
+ process = MiniEtl.create_process do |process|
51
+ process.source.type = :csv
52
+ process.source.location = 'samples/small.csv'
53
+ end
54
+
55
+ process.bootstrap
56
+ process.generate
57
+
58
+ process.generator.structures # intermediate structure for bulk import
59
+ ```
60
+
61
+ If the data source is too large to process in memory, an iterator can be given
62
+ instead:
63
+
64
+ ```
65
+ process = MiniEtl.create_process do |process|
66
+ process.source.type = :csv
67
+ process.source.location = 'samples/large.csv'
68
+ process.generator.lazy = true
69
+ end
70
+
71
+ process.bootstrap
72
+ process.generator.start do |structures|
73
+ ...
74
+ end
75
+ ```
76
+
77
+ ### Data persistance
78
+
79
+ Finally, once data is shaped the way you need it to, data can be persisted in
80
+ any kind of way you need it to. The receiver class is expected to respond to
81
+ `.create(args)`
82
+
83
+ ```
84
+ process = MiniEtl.create_process do |process|
85
+ process.source.type = :csv
86
+ process.source.location = 'samples/large.csv'
87
+ process.store.type = Person # An active record model
88
+ end
89
+
90
+ process.bootstrap
91
+ process.generate
92
+ process.persist
93
+ ```
94
+
95
+ In this way, any arbitrary store can be created,
96
+
97
+ ```
98
+ class Payroll
99
+ Struct.new(:target, :name, :last_name, ...)
100
+ @@data = []
101
+
102
+ def create(params = {})
103
+ @@data << Struct::Target.new(name: params[:name], last_name: params[:last_name], ...)
104
+ end
105
+ end
106
+
107
+ process = MiniEtl.create_process do |process|
108
+ process.source.type = :csv
109
+ process.source.location = 'samples/small.csv'
110
+ process.store.type = Payroll
111
+ end
112
+
113
+ process.bootstrap
114
+ process.generate
115
+ process.persist
116
+ ```
117
+
118
+ ## Development
119
+
120
+ TODO: Test stuff
121
+ `$ rake`
122
+
123
+ Runs rspec, rubocop, generates coverage report
124
+
125
+ TODO: Explain how to generate csv files and the rest of the samples
126
+ NOTE: This will take ~5.5 mins, super slow, would need a parallel version
127
+ ```
128
+ $ rake sample:csv:all
129
+ ```
130
+
131
+ TODO: Explain how to use JSON Server to provide a fake API
132
+
133
+ ```
134
+ $ npm install -g json-server
135
+ $ rake sample:json:small
136
+ $ json-server --watch samples/small.json --port 3001
137
+ ```
138
+
139
+ API is now available at `localhost:3001/payroll`
140
+
141
+ ...
142
+
143
+ ## Contributing
144
+
145
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/etl.
data/Rakefile ADDED
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+ require_relative 'lib/tasks/support/generation'
6
+
7
+ require 'rubygems'
8
+ require 'bundler'
9
+ Bundler.require(:default, :development)
10
+
11
+ RSpec::Core::RakeTask.new(:spec)
12
+
13
+ task default: :spec_n_cop
14
+
15
+ task :spec_n_cop do
16
+ Rake::Task['spec'].invoke
17
+ require 'rubocop'
18
+ cli = RuboCop::CLI.new
19
+ cli.run
20
+ end
21
+
22
+ namespace :sample do
23
+ include Support::Generation
24
+
25
+ namespace :csv do
26
+ desc 'Generate CSV samples'
27
+ task :all do
28
+ generate_csv(:small)
29
+ generate_csv(:medium)
30
+ generate_csv(:large)
31
+ end
32
+
33
+ desc 'Generate a CSV sample, ~1 MB'
34
+ task :small do
35
+ generate_csv(:small)
36
+ end
37
+
38
+ desc 'Generate a CSV sample, ~10 MB'
39
+ task :medium do
40
+ generate_csv(:medium)
41
+ end
42
+
43
+ desc 'Generate a CSV sample, ~100 MB'
44
+ task :large do
45
+ generate_csv(:large)
46
+ end
47
+ end
48
+
49
+ namespace :json do
50
+ desc 'Generate CSV samples'
51
+ task :all do
52
+ generate_json(:small)
53
+ generate_json(:medium)
54
+ generate_json(:large)
55
+ end
56
+
57
+ desc 'Generate a JSON sample, ~2 MB'
58
+ task :small do
59
+ generate_json(:small)
60
+ end
61
+
62
+ desc 'Generate a JSON sample, ~20 MB'
63
+ task :medium do
64
+ generate_json(:medium)
65
+ end
66
+
67
+ desc 'Generate a JSON sample, ~200 MB'
68
+ task :large do
69
+ generate_json(:large)
70
+ end
71
+ end
72
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'mini_etl'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/mini_etl.rb ADDED
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_etl/version'
4
+
5
+ require 'rubygems'
6
+ require 'bundler'
7
+ Bundler.require(:default)
8
+
9
+ require 'mini_etl/util/status'
10
+ require 'mini_etl/strategy'
11
+ require 'mini_etl/strategies/csv_strategy'
12
+ require 'mini_etl/source'
13
+ require 'mini_etl/generator'
14
+ require 'mini_etl/process'
15
+
16
+ # Place exception here
17
+ module MiniEtl
18
+ class << self
19
+ def create_process(&block)
20
+ return nil unless block_given?
21
+
22
+ process = Process.new
23
+ block.call(process)
24
+ process
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Generate record information from a source
5
+ class Generator
6
+ VALID_STATES = {
7
+ initialized: 0,
8
+ bootstrapped: 1,
9
+ transformed: 2,
10
+ failed: 3
11
+ }.freeze
12
+
13
+ include Status
14
+
15
+ attr_accessor :lazy, :data
16
+ attr_writer :type
17
+ attr_reader :payload
18
+
19
+ # TODO: This needs to know the type of the receiver
20
+ def initialize
21
+ @lazy = false
22
+ @payload = []
23
+ initialized!
24
+ end
25
+
26
+ def bootstrap(type, data)
27
+ raise ArgumentError if type.nil? || data.nil?
28
+
29
+ @type = type
30
+ @data = data
31
+ bootstrapped!
32
+ end
33
+
34
+ def transform
35
+ failed! && return unless bootstrapped?
36
+
37
+ strategy = MiniEtl::Strategy.for(@type)
38
+ @payload = strategy.generate(@data)
39
+ transformed!
40
+ end
41
+
42
+ # TODO: This needs to feed a block with source data, to stream a structure
43
+ # generation instead of doing it in place
44
+ # def start
45
+ # yield @structure.next if @lazy && block_given?
46
+ # end
47
+ end
48
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # ETL Process wrapper
5
+ # Should this go all the way?
6
+ # Probably
7
+ class Process
8
+ VALID_STATES = {
9
+ initialized: 0,
10
+ bootstrapped: 1,
11
+ generated: 2,
12
+ finished: 3,
13
+ failed: 4
14
+ }.freeze
15
+
16
+ include Status
17
+ attr_reader :source, :generator
18
+
19
+ def initialize
20
+ @source = Source.new
21
+ @generator = Generator.new
22
+
23
+ initialized!
24
+ end
25
+
26
+ def bootstrap
27
+ if initialized? && @source.validate
28
+ @source.fetch
29
+ @generator.bootstrap(@source.type, @source.payload)
30
+ bootstrapped!
31
+ else
32
+ failed!
33
+ end
34
+ end
35
+
36
+ def generate
37
+ # TODO: Parse it, bear in mind this will be in memory and may need to be split
38
+ # TODO: Transform it into useful bits
39
+ # This may be done in rails. Provide a useful interface in that case
40
+ # TODO: Load the thing wherever it needs to go
41
+ if bootstrapped?
42
+ @generator.transform
43
+ generated!
44
+ else
45
+ failed!
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Source data from a give type and location
5
+ class Source
6
+ VALID_STATES = {
7
+ initialized: 0,
8
+ validated: 1,
9
+ sourced: 2,
10
+ failed: 3
11
+ }.freeze
12
+
13
+ include Status
14
+
15
+ attr_accessor :type, :location, :data
16
+ attr_reader :payload
17
+ ACCEPTED_PARAMS = %i[type location data].freeze
18
+
19
+ def initialize(params = {})
20
+ ACCEPTED_PARAMS.each do |param|
21
+ instance_variable_set "@#{param}".to_sym, params[param]
22
+ end
23
+ @payload = []
24
+ initialized!
25
+ end
26
+
27
+ def validate
28
+ strategy = MiniEtl::Strategy.for(@type)
29
+ return false && failed! if strategy.nil?
30
+
31
+ strategy.validate(self).tap { |x| x ? validated! : failed! }
32
+ end
33
+
34
+ def fetch
35
+ strategy = MiniEtl::Strategy.for(@type)
36
+
37
+ if strategy && validated?
38
+ @payload = strategy.fetch(self)
39
+ sourced!
40
+ else
41
+ failed!
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module MiniEtl
4
+ module Strategies
5
+ # Extract/Transform strategy for a CSV input
6
+ class CSVStrategy
7
+ class << self
8
+ def validate(source)
9
+ Pathname.new(source.location).exist?
10
+ end
11
+
12
+ def fetch(source)
13
+ File.read(source.location)
14
+ end
15
+
16
+ def generate(data)
17
+ CSV.parse(data)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Fetches a Strategy kind class for a given type of data
5
+ # extraction/transformation
6
+ class Strategy
7
+ class << self
8
+ def for(type)
9
+ strategy_constant = "#{type.to_s.upcase}Strategy"
10
+ Strategies.const_get(strategy_constant) if Strategies.const_defined?(strategy_constant)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ # Track a status
5
+ module Status
6
+ DEFAULT_STATES = {
7
+ initialized: 0,
8
+ finished: 1,
9
+ failed: 2
10
+ }.freeze
11
+
12
+ def self.included(base)
13
+ attr_reader :status
14
+
15
+ states = base.const_defined?(:VALID_STATES) ? base.const_get(:VALID_STATES) : DEFAULT_STATES
16
+ states.each do |verb, value|
17
+ define_method "#{verb}?".to_sym do
18
+ @status == value
19
+ end
20
+
21
+ define_method "#{verb}!".to_sym do
22
+ @status = value
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniEtl
4
+ VERSION = '0.2.0'
5
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Support
4
+ # Test files generation
5
+ module Generation
6
+ # COLUMNS = %w[name last_name nationality origin phone bank iban currency segment].freeze
7
+ RECORD_SIZE = {
8
+ small: 8_500,
9
+ medium: 85_000,
10
+ large: 825_000
11
+ }.freeze
12
+
13
+ def generate_csv(size)
14
+ check_dir
15
+
16
+ File.open("samples/#{size}.csv", 'w') do |sample_file|
17
+ RECORD_SIZE[size].times { sample_file.write(dummy_data.join(',')) }
18
+ sample_file.close
19
+ end
20
+ end
21
+
22
+ def generate_json(size)
23
+ check_dir
24
+
25
+ File.open("samples/#{size}.json", 'w') do |sample_file|
26
+ sample_file.write('[')
27
+ RECORD_SIZE[size].pred.times { sample_file.write(json_string + ',') }
28
+ sample_file.write(json_string + ']')
29
+ sample_file.close
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def check_dir
36
+ Dir.mkdir('samples') unless Dir.exist?('samples')
37
+ end
38
+
39
+ def dummy_names
40
+ %i[name last_name nationality capital_city phone_number bank iban currency industry]
41
+ end
42
+
43
+ def dummy_data
44
+ [Faker::Name.first_name, Faker::Name.last_name, Faker::Nation.nationality, Faker::Nation.capital_city,
45
+ Faker::PhoneNumber.phone_number_with_country_code, Faker::Bank.name, Faker::Bank.iban, Faker::Currency.code, Faker::IndustrySegments.industry]
46
+ rescue Faker::UniqueGenerator::RetryLimitExceeded
47
+ Faker::UniqueGenerator.clear
48
+ end
49
+
50
+ def json_string
51
+ JSON.dump(Hash[dummy_names.zip(dummy_data)])
52
+ end
53
+ end
54
+ end
data/mini_etl.gemspec ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'mini_etl/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'mini_etl'
9
+ spec.version = MiniEtl::VERSION
10
+ spec.authors = ['Gerardo Galindez']
11
+ spec.email = ['ggalindezb@gmail.com']
12
+
13
+ spec.summary = 'Extract/Transform/Load wrapper'
14
+ spec.homepage = 'https://github.com/ggalindezb/mini_etl'
15
+
16
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
18
+ # if spec.respond_to?(:metadata)
19
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+
21
+ # spec.metadata["homepage_uri"] = spec.homepage
22
+ # spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
23
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
24
+ # else
25
+ # raise "RubyGems 2.0 or newer is required to protect against " \
26
+ # "public gem pushes."
27
+ # end
28
+
29
+ # Specify which files should be added to the gem when it is released.
30
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
31
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
32
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
33
+ end
34
+ spec.bindir = 'exe'
35
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
36
+ spec.require_paths = ['lib']
37
+
38
+ spec.add_development_dependency 'bundler', '~> 1.16'
39
+ spec.add_development_dependency 'faker', '~> 1.9'
40
+ spec.add_development_dependency 'pry', '~> 0.12.2'
41
+ spec.add_development_dependency 'rake', '~> 10.0'
42
+ spec.add_development_dependency 'rspec', '~> 3.0'
43
+ spec.add_development_dependency 'rubocop', '~> 0.71.0'
44
+ spec.add_development_dependency 'simplecov', '~> 0.16.1'
45
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini_etl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Gerardo Galindez
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-06-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faker
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.12.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.12.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.71.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.71.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.16.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.16.1
111
+ description:
112
+ email:
113
+ - ggalindezb@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".rubocop.yml"
120
+ - ".rubocop_todo.yml"
121
+ - CHANGELOG.md
122
+ - Gemfile
123
+ - Gemfile.lock
124
+ - README.md
125
+ - Rakefile
126
+ - bin/console
127
+ - bin/setup
128
+ - lib/mini_etl.rb
129
+ - lib/mini_etl/generator.rb
130
+ - lib/mini_etl/process.rb
131
+ - lib/mini_etl/source.rb
132
+ - lib/mini_etl/strategies/csv_strategy.rb
133
+ - lib/mini_etl/strategy.rb
134
+ - lib/mini_etl/util/status.rb
135
+ - lib/mini_etl/version.rb
136
+ - lib/tasks/support/generation.rb
137
+ - mini_etl.gemspec
138
+ homepage: https://github.com/ggalindezb/mini_etl
139
+ licenses: []
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubygems_version: 3.0.3
157
+ signing_key:
158
+ specification_version: 4
159
+ summary: Extract/Transform/Load wrapper
160
+ test_files: []