chronicle-etl 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ab7fe84d09764034f061236d75fdf7403af9e1835d568831ae4896b90fc39b5
4
- data.tar.gz: c4c7a1ff47ecaf7d364e35d3439eef1345f90fee5a6610735124592109e2c02c
3
+ metadata.gz: 77abdc6f98e01300e0cf0ff4c8737fdc3bfe395754e9ddba1b6d3de86f2d6be8
4
+ data.tar.gz: 4a76565cfe9448b8ee7a6aa253a98923d7beccf02cc1ea7c8bacf3e7f7ab88ab
5
5
  SHA512:
6
- metadata.gz: a5a4b0e3769cd6063cb78843828ab15766df9d1ac1cd5d65fb81145d1415e95e46583c578312418fca91bfe02b05a8db3277a41a8a6b420f12263135deb49022
7
- data.tar.gz: e6d6e23d3c164d6fc5b9283d468fe9c9a450329d2d778e3ad49962f42fbe0316299111cdd02d2abc92eb52dc46823c5cbaf1c195203fc4e78129e9b28528c5ef
6
+ metadata.gz: cc6aaafde633a7316e26a23984f6a6965977e7bba388d2bb09751d3ef8ba0ed6a1ff78e50816af3a9806d5b03720fc524a270e228be0cc2894298acbbc342155
7
+ data.tar.gz: 32dd999d57c307b9e57db3b877362b0e780f8a618d5c573a5295241ec47dee65472b1e98414e4cb0959d978f83cb7cfde1177c9fb1d11c54e5edf1ca5ba4419a
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ EnabledByDefault: true
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+
7
+ Style/MethodCallWithArgsParentheses:
8
+ Enabled: false
@@ -0,0 +1 @@
1
+ --markup=markdown
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- chronicle-etl (0.1.4)
4
+ chronicle-etl (0.2.0)
5
5
  colorize (~> 0.8.1)
6
6
  thor (~> 0.20)
7
7
  tty-progressbar (~> 0.17)
@@ -27,6 +27,8 @@ GEM
27
27
  byebug (~> 11.0)
28
28
  pry (~> 0.13.0)
29
29
  rake (13.0.1)
30
+ redcarpet (3.5.0)
31
+ refinements (7.7.0)
30
32
  rspec (3.9.0)
31
33
  rspec-core (~> 3.9.0)
32
34
  rspec-expectations (~> 3.9.0)
@@ -40,6 +42,9 @@ GEM
40
42
  diff-lcs (>= 1.2.0, < 2.0)
41
43
  rspec-support (~> 3.9.0)
42
44
  rspec-support (3.9.3)
45
+ runcom (6.2.0)
46
+ refinements (~> 7.4)
47
+ xdg (~> 4.0)
43
48
  strings (0.1.8)
44
49
  strings-ansi (~> 0.1)
45
50
  unicode-display_width (~> 1.5)
@@ -62,6 +67,7 @@ GEM
62
67
  tty-screen (~> 0.7)
63
68
  unicode-display_width (1.7.0)
64
69
  unicode_utils (1.4.0)
70
+ xdg (4.2.0)
65
71
 
66
72
  PLATFORMS
67
73
  ruby
@@ -71,7 +77,9 @@ DEPENDENCIES
71
77
  chronicle-etl!
72
78
  pry-byebug (~> 3.9)
73
79
  rake (~> 13.0)
80
+ redcarpet (~> 3.5)
74
81
  rspec (~> 3.9)
82
+ runcom (~> 6.2)
75
83
 
76
84
  BUNDLED WITH
77
85
  2.1.4
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Chronicle::Etl
1
+ # Chronicle::ETL
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
4
4
 
@@ -16,12 +16,24 @@ $ gem install chronicle-etl
16
16
 
17
17
  After installing the gem, `chronicle-etl` is available to run in your shell.
18
18
 
19
+ ```bash
20
+ # read test.csv and display it as a table
21
+ $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
22
+
23
+ # Display help for the jobs:run command
24
+ $ chronicle-etl jobs help run
19
25
  ```
20
- chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
21
- cat test.csv | chronicle-etl --extractor csv --loader table
26
+
27
+ ## Connectors
28
+
29
+ Connectors are available to read, process, and load data from different formats or external services.
30
+
31
+ ```bash
32
+ # List all available connectors
33
+ $ chronicle-etl connectors:list
22
34
  ```
23
35
 
24
- ## Available importers
36
+ Built in connectors:
25
37
 
26
38
  ### Extractors
27
39
  - `stdin` - (default) Load records from line-separated stdin
@@ -54,17 +66,23 @@ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.ne
54
66
  ```
55
67
  $ chronicle-etl help
56
68
 
57
- Commands:
58
- chronicle-etl help [COMMAND] # Describe available commands or one specific command
59
- chronicle-etl job # Runs an ETL job
60
- chronicle-etl list # List all ETL classes
69
+ ALL COMMANDS
70
+ help # This help menu
71
+ connectors help [COMMAND] # Describe subcommands or one specific subcommand
72
+ connectors:install NAME # Installs connector NAME
73
+ connectors:list # Lists available connectors
74
+ jobs help [COMMAND] # Describe subcommands or one specific subcommand
75
+ jobs:create # Create a job
76
+ jobs:list # List all available jobs
77
+ jobs:run # Start a job
78
+ jobs:show # Show a job
61
79
  ```
62
80
 
63
81
  ### Job options
64
82
 
65
83
  ```
66
84
  Usage:
67
- chronicle-etl job
85
+ chronicle-etl jobs:run
68
86
 
69
87
  Options:
70
88
  -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
@@ -97,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
97
115
 
98
116
  ## Code of Conduct
99
117
 
100
- Everyone interacting in the Chronicle::Etl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
118
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "chronicle-etl"
8
- spec.version = Chronicle::Etl::VERSION
8
+ spec.version = Chronicle::ETL::VERSION
9
9
  spec.authors = ["Andrew Louis"]
10
10
  spec.email = ["andrew@hyfen.net"]
11
11
 
@@ -45,4 +45,6 @@ Gem::Specification.new do |spec|
45
45
  spec.add_development_dependency "rake", "~> 13.0"
46
46
  spec.add_development_dependency "rspec", "~> 3.9"
47
47
  spec.add_development_dependency "pry-byebug", "~> 3.9"
48
+ spec.add_development_dependency 'runcom', '~> 6.2'
49
+ spec.add_development_dependency 'redcarpet', '~> 3.5'
48
50
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require "chronicle/etl/cli/main"
4
4
 
5
- Chronicle::Etl::CLI.start(ARGV)
5
+ Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,6 +1,8 @@
1
1
  require_relative 'etl/catalog'
2
+ require_relative 'etl/config'
2
3
  require_relative 'etl/extractors/extractor'
3
- require_relative 'etl/transformers/transformer'
4
4
  require_relative 'etl/loaders/loader'
5
- require_relative 'etl/utils/progress_bar'
6
5
  require_relative 'etl/runner'
6
+ require_relative 'etl/transformers/transformer'
7
+ require_relative 'etl/utils/progress_bar'
8
+ require_relative 'etl/version'
@@ -1,30 +1,31 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
3
  # Utility methods to catalogue which Extractor, Transformer, and
4
- # Loader classes are available to chronicle-etl
4
+ # Loader connector classes are available to chronicle-etl
5
5
  module Catalog
6
- def self.available_classes
7
- parent_klasses = [
8
- Chronicle::Etl::Extractor,
9
- Chronicle::Etl::Transformer,
10
- Chronicle::Etl::Loader
11
- ]
6
+ PLUGINS = ['email', 'bash']
12
7
 
8
+ # Return which ETL connectors are available, both built in and externally-defined
9
+ def self.available_classes
13
10
  # TODO: have a registry of plugins
14
- plugins = ['email', 'bash']
15
11
 
16
12
  # Attempt to load each chronicle plugin that we might know about so
17
13
  # that we can later search for subclasses to build our list of
18
14
  # available classes
19
- plugins.each do |plugin|
15
+ PLUGINS.each do |plugin|
20
16
  require "chronicle/#{plugin}"
21
17
  rescue LoadError
22
18
  # this will happen if the gem isn't available globally
23
19
  end
24
20
 
21
+ parent_klasses = [
22
+ ::Chronicle::ETL::Extractor,
23
+ ::Chronicle::ETL::Transformer,
24
+ ::Chronicle::ETL::Loader
25
+ ]
25
26
  klasses = []
26
- parent_klasses.each do |parent|
27
- klasses += ObjectSpace.each_object(Class).select { |klass| klass < parent }
27
+ parent_klasses.map do |parent|
28
+ klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
28
29
  end
29
30
 
30
31
  klasses.map do |klass|
@@ -37,21 +38,24 @@ module Chronicle
37
38
  end
38
39
  end
39
40
 
41
+ # Returns whether a class is an Extractor, Transformer, or Loader
40
42
  def phase
41
43
  ancestors = self.ancestors
42
- return :extractor if ancestors.include? Chronicle::Etl::Extractor
43
- return :transformer if ancestors.include? Chronicle::Etl::Transformer
44
- return :loader if ancestors.include? Chronicle::Etl::Loader
44
+ return :extractor if ancestors.include? Chronicle::ETL::Extractor
45
+ return :transformer if ancestors.include? Chronicle::ETL::Transformer
46
+ return :loader if ancestors.include? Chronicle::ETL::Loader
45
47
  end
46
48
 
49
+ # Returns which third-party provider this connector is associated wtih
47
50
  def provider
48
51
  # TODO: needs better convention for a gem reporting its provider name
49
52
  provider = to_s.split('::')[1].downcase
50
53
  provider == 'etl' ? 'chronicle' : provider
51
54
  end
52
55
 
56
+ # Returns whether this connector is a built-in one
53
57
  def built_in?
54
- to_s.include? 'Chronicle::Etl'
58
+ to_s.include? 'Chronicle::ETL'
55
59
  end
56
60
  end
57
61
  end
@@ -0,0 +1,32 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # CLI commands for working with ETL connectors
5
+ class Connectors < SubcommandBase
6
+ default_task 'list'
7
+ namespace :connectors
8
+
9
+ desc "install NAME", "Installs connector NAME"
10
+ def install
11
+ puts "Installing"
12
+ end
13
+
14
+ desc "list", "Lists available connectors"
15
+ # Display all available connectors that chronicle-etl has access to
16
+ def list
17
+ klasses = Chronicle::ETL::Catalog.available_classes
18
+ klasses = klasses.sort_by do |a|
19
+ [a[:built_in].to_s, a[:provider], a[:phase]]
20
+ end
21
+
22
+ headers = klasses.first.keys.map do |key|
23
+ key.to_s.upcase.bold
24
+ end
25
+
26
+ table = TTY::Table.new(headers, klasses.map(&:values))
27
+ puts table.render(indent: 0, padding: [0, 2])
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,111 @@
1
+ require 'pp'
2
+ require 'pry'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module CLI
7
+ # CLI commands for working with ETL jobs
8
+ class Jobs < SubcommandBase
9
+ default_task "start"
10
+ namespace :jobs
11
+
12
+ class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
13
+ class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
14
+ class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
15
+ class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
16
+ class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
17
+ class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
18
+ class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
19
+
20
+ map run: :start # Thor doesn't like `run` as a command name
21
+ desc "run", "Start a job"
22
+ long_desc <<-LONG_DESC
23
+ This will run an ETL job. Each job needs three parts:
24
+
25
+ 1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
26
+
27
+ 2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
28
+
29
+ 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
30
+
31
+ If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
32
+ LONG_DESC
33
+ # Run an ETL job
34
+ def start
35
+ runner_options = build_runner_options(options)
36
+ runner = Chronicle::ETL::Runner.new(runner_options)
37
+ runner.run!
38
+ end
39
+
40
+ desc "create", "Create a job"
41
+ # Create an ETL job
42
+ def create
43
+ runner_options = build_runner_options(options)
44
+ path = File.join('chronicle', 'etl', 'jobs', options[:job])
45
+ Chronicle::ETL::Config.write(path, runner_options)
46
+ end
47
+
48
+ desc "show", "Show details about a job"
49
+ # Show an ETL job
50
+ def show
51
+ runner_options = build_runner_options(options)
52
+ pp runner_options
53
+ end
54
+
55
+ desc "list", "List all available jobs"
56
+ # List available ETL jobs
57
+ def list
58
+ jobs = Chronicle::ETL::Config.jobs
59
+
60
+ job_details = jobs.map do |job|
61
+ r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
62
+
63
+ extractor = r[:extractor][:name] if r[:extractor]
64
+ transformer = r[:transformer][:name] if r[:transformer]
65
+ loader = r[:loader][:name] if r[:loader]
66
+
67
+ [job, extractor, transformer, loader]
68
+ end
69
+
70
+ headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
71
+
72
+ table = TTY::Table.new(headers, job_details)
73
+ puts table.render(indent: 0, padding: [0, 2])
74
+ end
75
+
76
+ private
77
+
78
+ # Create runner options by reading config file and then overwriting with flag options
79
+ def build_runner_options options
80
+ flag_options = process_flag_options(options)
81
+ job_options = load_job(options[:job])
82
+ flag_options.merge(job_options)
83
+ end
84
+
85
+ def load_job job
86
+ yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
87
+ # FIXME: use better trick to depely symbolize keys
88
+ JSON.parse(yml_config.to_json, symbolize_names: true)
89
+ end
90
+
91
+ # Takes flag options and turns them into a runner config
92
+ def process_flag_options options
93
+ {
94
+ extractor: {
95
+ name: options[:extractor],
96
+ options: options[:'extractor-opts']
97
+ },
98
+ transformer: {
99
+ name: options[:transformer],
100
+ options: options[:'transformer-opts']
101
+ },
102
+ loader: {
103
+ name: options[:loader],
104
+ options: options[:'loader-opts']
105
+ }
106
+ }
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,83 @@
1
+ require 'thor'
2
+ require 'chronicle/etl'
3
+ require 'colorize'
4
+
5
+ require 'chronicle/etl/cli/subcommand_base'
6
+ require 'chronicle/etl/cli/connectors'
7
+ require 'chronicle/etl/cli/jobs'
8
+
9
+ module Chronicle
10
+ module ETL
11
+ module CLI
12
+ # Main entrypoint for CLI app
13
+ class Main < Thor
14
+ class_option "verbose", type: :boolean, default: false
15
+ default_task "jobs"
16
+
17
+ desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
18
+ subcommand 'connectors', Connectors
19
+
20
+ desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
21
+ subcommand 'jobs', Jobs
22
+
23
+ # Entrypoint for the CLI
24
+ def self.start(given_args = ARGV, config = {})
25
+ if given_args.none?
26
+ abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
27
+ end
28
+
29
+ # take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
30
+ if given_args.any? && given_args[0].include?(':')
31
+ commands = given_args.shift.split(':')
32
+ given_args = given_args.unshift(commands).flatten
33
+ end
34
+
35
+ super(given_args, config)
36
+ end
37
+
38
+ # Displays help options for chronicle-etl
39
+ def help(meth = nil, subcommand = false)
40
+ if meth && !respond_to?(meth)
41
+ klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
42
+ klass.start(['-h', task].compact, shell: shell)
43
+ else
44
+ shell.say "ABOUT".bold
45
+ shell.say " #{'chronicle-etl'.italic} is a utility tool for #{'extracting'.underline}, #{'transforming'.underline}, and #{'loading'.underline} personal data."
46
+ shell.say
47
+ shell.say "USAGE".bold
48
+ shell.say " $ chronicle-etl COMMAND"
49
+ shell.say
50
+ shell.say "EXAMPLES".bold
51
+ shell.say " Show available connectors:".italic.light_black
52
+ shell.say " $ chronicle-etl connectors:list"
53
+ shell.say
54
+ shell.say " Run a simple job:".italic.light_black
55
+ shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
56
+ shell.say
57
+ shell.say " Show full job options:".italic.light_black
58
+ shell.say " $ chronicle-etl jobs help start"
59
+
60
+ list = []
61
+
62
+ Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
63
+ list += thor_class.printable_tasks(false)
64
+ end
65
+ list.sort! { |a, b| a[0] <=> b[0] }
66
+ list.unshift ["help", "# This help menu"]
67
+
68
+ shell.say
69
+ shell.say 'ALL COMMANDS'.bold
70
+ shell.print_table(list, indent: 2, truncate: true)
71
+ shell.say
72
+ shell.say "VERSION".bold
73
+ shell.say " #{Chronicle::ETL::VERSION}"
74
+ shell.say
75
+ shell.say "FULL DOCUMENTATION".bold
76
+ shell.say " https://github.com/chronicle-app/chronicle-etl".blue
77
+ shell.say
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,37 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
+ class SubcommandBase < Thor
6
+ # Print usage instructions for a subcommand
7
+ def self.help(shell, subcommand = false)
8
+ list = printable_commands(true, subcommand)
9
+ Thor::Util.thor_classes_in(self).each do |klass|
10
+ list += klass.printable_commands(false)
11
+ end
12
+ list.sort! { |a, b| a[0] <=> b[0] }
13
+
14
+ shell.say "COMMANDS".bold
15
+ shell.print_table(list, indent: 2, truncate: true)
16
+ shell.say
17
+ class_options_help(shell)
18
+ end
19
+
20
+ # Show docs with command:subcommand pattern.
21
+ # For `help` command, don't use colon
22
+ def self.banner(command, namespace = nil, subcommand = false)
23
+ if command.name == 'help'
24
+ "#{subcommand_prefix} #{command.usage}"
25
+ else
26
+ "#{subcommand_prefix}:#{command.usage}"
27
+ end
28
+ end
29
+
30
+ # Use subcommand classname to derive display name for subcommand
31
+ def self.subcommand_prefix
32
+ self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ require 'runcom'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ # Utility methods to read, write, and access config files
6
+ module Config
7
+ # Loads a yml config file
8
+ def self.load(path)
9
+ config = Runcom::Config.new(path)
10
+ # FIXME: hack to deeply symbolize keys
11
+ JSON.parse(config.to_h.to_json, symbolize_names: true)
12
+ end
13
+
14
+ # Writes a hash as a yml config file
15
+ def self.write(path, data)
16
+ config = Runcom::Config.new(path)
17
+ filename = config.all[0].to_s + '.yml'
18
+ File.open(filename, 'w') do |f|
19
+ f << data.to_yaml
20
+ end
21
+ end
22
+
23
+ # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
24
+ def self.jobs
25
+ job_directory = Runcom::Config.new('chronicle/etl/jobs').current
26
+ Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
27
+ File.basename(filename, ".*")
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  require 'csv'
2
- class Chronicle::Etl::CsvExtractor < Chronicle::Etl::Extractor
2
+ class Chronicle::ETL::CsvExtractor < Chronicle::ETL::Extractor
3
3
  DEFAULT_OPTIONS = {
4
4
  headers: true,
5
5
  filename: $stdin
@@ -1,20 +1,26 @@
1
1
  require 'chronicle/etl'
2
2
 
3
3
  module Chronicle
4
- module Etl
4
+ module ETL
5
+ # Abstract class representing an Extractor for an ETL job
5
6
  class Extractor
6
- extend Chronicle::Etl::Catalog
7
-
8
- ETL_PHASE = :extractor
7
+ extend Chronicle::ETL::Catalog
9
8
 
9
+ # Construct a new instance of this extractor. Options are passed in from a Runner
10
+ # == Paramters:
11
+ # options::
12
+ # Options for configuring this Extractor
10
13
  def initialize(options = {})
11
- @options = options.transform_keys!(&:to_sym)
14
+ @options = options.transform_keys!(&:to_sym)
12
15
  end
13
16
 
17
+ # Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
14
18
  def extract
15
19
  raise NotImplementedError
16
20
  end
17
21
 
22
+ # An optional method to calculate how many records there are to extract. Used primarily for
23
+ # building the progress bar
18
24
  def results_count; end
19
25
  end
20
26
  end
@@ -1,8 +1,8 @@
1
1
  require 'pathname'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class FileExtractor < Chronicle::Etl::Extractor
4
+ module ETL
5
+ class FileExtractor < Chronicle::ETL::Extractor
6
6
  def extract
7
7
  if file?
8
8
  extract_file do |data, metadata|
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class StdinExtractor < Chronicle::Etl::Extractor
2
+ module ETL
3
+ class StdinExtractor < Chronicle::ETL::Extractor
4
4
  def extract
5
5
  $stdin.read.each_line do |line|
6
6
  yield line
@@ -1,8 +1,8 @@
1
1
  require 'csv'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class CsvLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class CsvLoader < Chronicle::ETL::Loader
6
6
  def initialize(options={})
7
7
  super(options)
8
8
  @rows = []
@@ -1,18 +1,26 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing a Loader for an ETL job
3
4
  class Loader
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
7
+ # Construct a new instance of this loader. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Loader
6
11
  def initialize(options = {})
7
12
  @options = options
8
13
  end
9
14
 
15
+ # Called once before processing records
10
16
  def start; end
11
17
 
18
+ # Load a single record
12
19
  def load
13
20
  raise NotImplementedError
14
21
  end
15
22
 
23
+ # Called once there are no more records to process
16
24
  def finish; end
17
25
  end
18
26
  end
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class StdoutLoader < Chronicle::Etl::Loader
2
+ module ETL
3
+ class StdoutLoader < Chronicle::ETL::Loader
4
4
  def load(result)
5
5
  puts result.inspect
6
6
  end
@@ -1,8 +1,8 @@
1
1
  require 'tty/table'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class TableLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class TableLoader < Chronicle::ETL::Loader
6
6
  def initialize(options)
7
7
  super(options)
8
8
  end
@@ -1,4 +1,4 @@
1
- class Chronicle::Etl::Runner
1
+ class Chronicle::ETL::Runner
2
2
  BUILTIN = {
3
3
  extractor: ['stdin', 'json', 'csv', 'file'],
4
4
  transformer: ['null'],
@@ -13,7 +13,7 @@ class Chronicle::Etl::Runner
13
13
 
14
14
  def run!
15
15
  total = @extractor.results_count
16
- progress_bar = Chronicle::Etl::Utils::ProgressBar.new(title: 'Running job', total: total)
16
+ progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
17
17
  count = 0
18
18
 
19
19
  @loader.start
@@ -40,7 +40,7 @@ class Chronicle::Etl::Runner
40
40
 
41
41
  def load_etl_class(phase, x)
42
42
  if BUILTIN[phase].include? x
43
- klass_name = "Chronicle::Etl::#{x.capitalize}#{phase.to_s.capitalize}"
43
+ klass_name = "Chronicle::ETL::#{x.capitalize}#{phase.to_s.capitalize}"
44
44
  else
45
45
  # TODO: come up with syntax for specifying a particular extractor in a provider library
46
46
  provider, name = x.split(":")
@@ -48,7 +48,7 @@ class Chronicle::Etl::Runner
48
48
  begin
49
49
  require "chronicle/#{provider}"
50
50
  rescue LoadError => e
51
- warn("Error loading #{phase} '#{provider}'")
51
+ warn("Error loading #{phase} '#{provider}'".red)
52
52
  warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
53
53
  exit(false)
54
54
  end
@@ -1,8 +1,8 @@
1
1
  require 'json'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class JsonTransformer < Chronicle::Etl::Transformer
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
6
  def transform data
7
7
  return JSON.parse(data)
8
8
  end
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class NullTransformer < Chronicle::Etl::Transformer
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
4
  def transform data
5
5
  return data
6
6
  end
@@ -1,15 +1,31 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
3
4
  class Transformer
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
6
11
  def initialize(options = {})
7
12
  @options = options
8
13
  end
9
14
 
15
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
10
16
  def transform data
11
17
  raise NotImplementedError
12
18
  end
19
+
20
+ # The domain or provider-specific id of the record this transformer is working on.
21
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
22
+ # data source from the beginning.
23
+ def id; end
24
+
25
+ # The domain or provider-specific timestamp of the record this transformer is working on.
26
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
27
+ # data source from the beginning.
28
+ def timestamp; end
13
29
  end
14
30
  end
15
31
  end
@@ -2,7 +2,7 @@ require 'tty/progressbar'
2
2
  require 'colorize'
3
3
 
4
4
  module Chronicle
5
- module Etl
5
+ module ETL
6
6
  module Utils
7
7
 
8
8
  class ProgressBar
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.4"
2
+ module ETL
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-18 00:00:00.000000000 Z
11
+ date: 2020-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,8 +161,10 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
136
165
  - ".ruby-version"
137
166
  - ".travis.yml"
167
+ - ".yardopts"
138
168
  - CHANGELOG.md
139
169
  - CODE_OF_CONDUCT.md
140
170
  - Gemfile
@@ -148,7 +178,11 @@ files:
148
178
  - exe/chronicle-etl
149
179
  - lib/chronicle/etl.rb
150
180
  - lib/chronicle/etl/catalog.rb
151
- - lib/chronicle/etl/cli.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
152
186
  - lib/chronicle/etl/extractors/csv_extractor.rb
153
187
  - lib/chronicle/etl/extractors/extractor.rb
154
188
  - lib/chronicle/etl/extractors/file_extractor.rb
@@ -1,56 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
- require 'colorize'
4
-
5
- module Chronicle
6
- module Etl
7
- class CLI < Thor
8
- default_task :job
9
-
10
- desc 'job', 'Runs an ETL job'
11
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
12
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
13
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
14
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
15
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
16
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
17
- method_option :job, aliases: '-j', desc: 'Job configuration file'
18
-
19
- def job
20
- runner_options = {
21
- extractor: {
22
- name: options[:extractor],
23
- options: options[:'extractor-opts']
24
- },
25
- transformer: {
26
- name: options[:transformer],
27
- options: options[:'transformer-opts']
28
- },
29
- loader: {
30
- name: options[:loader],
31
- options: options[:'loader-opts']
32
- }
33
- }
34
-
35
- runner = Runner.new(runner_options)
36
- runner.run!
37
- end
38
-
39
- # FIXME: namespace this differently
40
- desc 'list', 'List all ETL classes'
41
- def list
42
- klasses = Chronicle::Etl::Catalog.available_classes
43
- klasses = klasses.sort_by do |a|
44
- [a[:built_in].to_s, a[:provider], a[:phase]]
45
- end
46
-
47
- headers = klasses.first.keys.map do |key|
48
- key.to_s.capitalize.light_white
49
- end
50
-
51
- table = TTY::Table.new(headers, klasses.map(&:values))
52
- puts table.render(padding: [0, 2])
53
- end
54
- end
55
- end
56
- end