chronicle-etl 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ab7fe84d09764034f061236d75fdf7403af9e1835d568831ae4896b90fc39b5
4
- data.tar.gz: c4c7a1ff47ecaf7d364e35d3439eef1345f90fee5a6610735124592109e2c02c
3
+ metadata.gz: 77abdc6f98e01300e0cf0ff4c8737fdc3bfe395754e9ddba1b6d3de86f2d6be8
4
+ data.tar.gz: 4a76565cfe9448b8ee7a6aa253a98923d7beccf02cc1ea7c8bacf3e7f7ab88ab
5
5
  SHA512:
6
- metadata.gz: a5a4b0e3769cd6063cb78843828ab15766df9d1ac1cd5d65fb81145d1415e95e46583c578312418fca91bfe02b05a8db3277a41a8a6b420f12263135deb49022
7
- data.tar.gz: e6d6e23d3c164d6fc5b9283d468fe9c9a450329d2d778e3ad49962f42fbe0316299111cdd02d2abc92eb52dc46823c5cbaf1c195203fc4e78129e9b28528c5ef
6
+ metadata.gz: cc6aaafde633a7316e26a23984f6a6965977e7bba388d2bb09751d3ef8ba0ed6a1ff78e50816af3a9806d5b03720fc524a270e228be0cc2894298acbbc342155
7
+ data.tar.gz: 32dd999d57c307b9e57db3b877362b0e780f8a618d5c573a5295241ec47dee65472b1e98414e4cb0959d978f83cb7cfde1177c9fb1d11c54e5edf1ca5ba4419a
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ EnabledByDefault: true
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+
7
+ Style/MethodCallWithArgsParentheses:
8
+ Enabled: false
@@ -0,0 +1 @@
1
+ --markup=markdown
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- chronicle-etl (0.1.4)
4
+ chronicle-etl (0.2.0)
5
5
  colorize (~> 0.8.1)
6
6
  thor (~> 0.20)
7
7
  tty-progressbar (~> 0.17)
@@ -27,6 +27,8 @@ GEM
27
27
  byebug (~> 11.0)
28
28
  pry (~> 0.13.0)
29
29
  rake (13.0.1)
30
+ redcarpet (3.5.0)
31
+ refinements (7.7.0)
30
32
  rspec (3.9.0)
31
33
  rspec-core (~> 3.9.0)
32
34
  rspec-expectations (~> 3.9.0)
@@ -40,6 +42,9 @@ GEM
40
42
  diff-lcs (>= 1.2.0, < 2.0)
41
43
  rspec-support (~> 3.9.0)
42
44
  rspec-support (3.9.3)
45
+ runcom (6.2.0)
46
+ refinements (~> 7.4)
47
+ xdg (~> 4.0)
43
48
  strings (0.1.8)
44
49
  strings-ansi (~> 0.1)
45
50
  unicode-display_width (~> 1.5)
@@ -62,6 +67,7 @@ GEM
62
67
  tty-screen (~> 0.7)
63
68
  unicode-display_width (1.7.0)
64
69
  unicode_utils (1.4.0)
70
+ xdg (4.2.0)
65
71
 
66
72
  PLATFORMS
67
73
  ruby
@@ -71,7 +77,9 @@ DEPENDENCIES
71
77
  chronicle-etl!
72
78
  pry-byebug (~> 3.9)
73
79
  rake (~> 13.0)
80
+ redcarpet (~> 3.5)
74
81
  rspec (~> 3.9)
82
+ runcom (~> 6.2)
75
83
 
76
84
  BUNDLED WITH
77
85
  2.1.4
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Chronicle::Etl
1
+ # Chronicle::ETL
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
4
4
 
@@ -16,12 +16,24 @@ $ gem install chronicle-etl
16
16
 
17
17
  After installing the gem, `chronicle-etl` is available to run in your shell.
18
18
 
19
+ ```bash
20
+ # read test.csv and display it as a table
21
+ $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
22
+
23
+ # Display help for the jobs:run command
24
+ $ chronicle-etl jobs help run
19
25
  ```
20
- chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
21
- cat test.csv | chronicle-etl --extractor csv --loader table
26
+
27
+ ## Connectors
28
+
29
+ Connectors are available to read, process, and load data from different formats or external services.
30
+
31
+ ```bash
32
+ # List all available connectors
33
+ $ chronicle-etl connectors:list
22
34
  ```
23
35
 
24
- ## Available importers
36
+ Built in connectors:
25
37
 
26
38
  ### Extractors
27
39
  - `stdin` - (default) Load records from line-separated stdin
@@ -54,17 +66,23 @@ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.ne
54
66
  ```
55
67
  $ chronicle-etl help
56
68
 
57
- Commands:
58
- chronicle-etl help [COMMAND] # Describe available commands or one specific command
59
- chronicle-etl job # Runs an ETL job
60
- chronicle-etl list # List all ETL classes
69
+ ALL COMMANDS
70
+ help # This help menu
71
+ connectors help [COMMAND] # Describe subcommands or one specific subcommand
72
+ connectors:install NAME # Installs connector NAME
73
+ connectors:list # Lists available connectors
74
+ jobs help [COMMAND] # Describe subcommands or one specific subcommand
75
+ jobs:create # Create a job
76
+ jobs:list # List all available jobs
77
+ jobs:run # Start a job
78
+ jobs:show # Show a job
61
79
  ```
62
80
 
63
81
  ### Job options
64
82
 
65
83
  ```
66
84
  Usage:
67
- chronicle-etl job
85
+ chronicle-etl jobs:run
68
86
 
69
87
  Options:
70
88
  -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
@@ -97,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
97
115
 
98
116
  ## Code of Conduct
99
117
 
100
- Everyone interacting in the Chronicle::Etl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
118
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "chronicle-etl"
8
- spec.version = Chronicle::Etl::VERSION
8
+ spec.version = Chronicle::ETL::VERSION
9
9
  spec.authors = ["Andrew Louis"]
10
10
  spec.email = ["andrew@hyfen.net"]
11
11
 
@@ -45,4 +45,6 @@ Gem::Specification.new do |spec|
45
45
  spec.add_development_dependency "rake", "~> 13.0"
46
46
  spec.add_development_dependency "rspec", "~> 3.9"
47
47
  spec.add_development_dependency "pry-byebug", "~> 3.9"
48
+ spec.add_development_dependency 'runcom', '~> 6.2'
49
+ spec.add_development_dependency 'redcarpet', '~> 3.5'
48
50
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require "chronicle/etl/cli/main"
4
4
 
5
- Chronicle::Etl::CLI.start(ARGV)
5
+ Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,6 +1,8 @@
1
1
  require_relative 'etl/catalog'
2
+ require_relative 'etl/config'
2
3
  require_relative 'etl/extractors/extractor'
3
- require_relative 'etl/transformers/transformer'
4
4
  require_relative 'etl/loaders/loader'
5
- require_relative 'etl/utils/progress_bar'
6
5
  require_relative 'etl/runner'
6
+ require_relative 'etl/transformers/transformer'
7
+ require_relative 'etl/utils/progress_bar'
8
+ require_relative 'etl/version'
@@ -1,30 +1,31 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
3
  # Utility methods to catalogue which Extractor, Transformer, and
4
- # Loader classes are available to chronicle-etl
4
+ # Loader connector classes are available to chronicle-etl
5
5
  module Catalog
6
- def self.available_classes
7
- parent_klasses = [
8
- Chronicle::Etl::Extractor,
9
- Chronicle::Etl::Transformer,
10
- Chronicle::Etl::Loader
11
- ]
6
+ PLUGINS = ['email', 'bash']
12
7
 
8
+ # Return which ETL connectors are available, both built in and externally-defined
9
+ def self.available_classes
13
10
  # TODO: have a registry of plugins
14
- plugins = ['email', 'bash']
15
11
 
16
12
  # Attempt to load each chronicle plugin that we might know about so
17
13
  # that we can later search for subclasses to build our list of
18
14
  # available classes
19
- plugins.each do |plugin|
15
+ PLUGINS.each do |plugin|
20
16
  require "chronicle/#{plugin}"
21
17
  rescue LoadError
22
18
  # this will happen if the gem isn't available globally
23
19
  end
24
20
 
21
+ parent_klasses = [
22
+ ::Chronicle::ETL::Extractor,
23
+ ::Chronicle::ETL::Transformer,
24
+ ::Chronicle::ETL::Loader
25
+ ]
25
26
  klasses = []
26
- parent_klasses.each do |parent|
27
- klasses += ObjectSpace.each_object(Class).select { |klass| klass < parent }
27
+ parent_klasses.map do |parent|
28
+ klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
28
29
  end
29
30
 
30
31
  klasses.map do |klass|
@@ -37,21 +38,24 @@ module Chronicle
37
38
  end
38
39
  end
39
40
 
41
+ # Returns whether a class is an Extractor, Transformer, or Loader
40
42
  def phase
41
43
  ancestors = self.ancestors
42
- return :extractor if ancestors.include? Chronicle::Etl::Extractor
43
- return :transformer if ancestors.include? Chronicle::Etl::Transformer
44
- return :loader if ancestors.include? Chronicle::Etl::Loader
44
+ return :extractor if ancestors.include? Chronicle::ETL::Extractor
45
+ return :transformer if ancestors.include? Chronicle::ETL::Transformer
46
+ return :loader if ancestors.include? Chronicle::ETL::Loader
45
47
  end
46
48
 
49
+ # Returns which third-party provider this connector is associated wtih
47
50
  def provider
48
51
  # TODO: needs better convention for a gem reporting its provider name
49
52
  provider = to_s.split('::')[1].downcase
50
53
  provider == 'etl' ? 'chronicle' : provider
51
54
  end
52
55
 
56
+ # Returns whether this connector is a built-in one
53
57
  def built_in?
54
- to_s.include? 'Chronicle::Etl'
58
+ to_s.include? 'Chronicle::ETL'
55
59
  end
56
60
  end
57
61
  end
@@ -0,0 +1,32 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # CLI commands for working with ETL connectors
5
+ class Connectors < SubcommandBase
6
+ default_task 'list'
7
+ namespace :connectors
8
+
9
+ desc "install NAME", "Installs connector NAME"
10
+ def install
11
+ puts "Installing"
12
+ end
13
+
14
+ desc "list", "Lists available connectors"
15
+ # Display all available connectors that chronicle-etl has access to
16
+ def list
17
+ klasses = Chronicle::ETL::Catalog.available_classes
18
+ klasses = klasses.sort_by do |a|
19
+ [a[:built_in].to_s, a[:provider], a[:phase]]
20
+ end
21
+
22
+ headers = klasses.first.keys.map do |key|
23
+ key.to_s.upcase.bold
24
+ end
25
+
26
+ table = TTY::Table.new(headers, klasses.map(&:values))
27
+ puts table.render(indent: 0, padding: [0, 2])
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,111 @@
1
+ require 'pp'
2
+ require 'pry'
3
+
4
+ module Chronicle
5
+ module ETL
6
+ module CLI
7
+ # CLI commands for working with ETL jobs
8
+ class Jobs < SubcommandBase
9
+ default_task "start"
10
+ namespace :jobs
11
+
12
+ class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
13
+ class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
14
+ class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
15
+ class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
16
+ class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
17
+ class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
18
+ class_option :job, aliases: '-j', desc: 'Job configuration name (or filename)'
19
+
20
+ map run: :start # Thor doesn't like `run` as a command name
21
+ desc "run", "Start a job"
22
+ long_desc <<-LONG_DESC
23
+ This will run an ETL job. Each job needs three parts:
24
+
25
+ 1. #{'Extractor'.underline}: pulls data from an external source. By default, this is stdout. Other common options including pulling data from an API or reading JSON from a file.
26
+
27
+ 2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
28
+
29
+ 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
30
+
31
+ If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
32
+ LONG_DESC
33
+ # Run an ETL job
34
+ def start
35
+ runner_options = build_runner_options(options)
36
+ runner = Chronicle::ETL::Runner.new(runner_options)
37
+ runner.run!
38
+ end
39
+
40
+ desc "create", "Create a job"
41
+ # Create an ETL job
42
+ def create
43
+ runner_options = build_runner_options(options)
44
+ path = File.join('chronicle', 'etl', 'jobs', options[:job])
45
+ Chronicle::ETL::Config.write(path, runner_options)
46
+ end
47
+
48
+ desc "show", "Show details about a job"
49
+ # Show an ETL job
50
+ def show
51
+ runner_options = build_runner_options(options)
52
+ pp runner_options
53
+ end
54
+
55
+ desc "list", "List all available jobs"
56
+ # List available ETL jobs
57
+ def list
58
+ jobs = Chronicle::ETL::Config.jobs
59
+
60
+ job_details = jobs.map do |job|
61
+ r = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
62
+
63
+ extractor = r[:extractor][:name] if r[:extractor]
64
+ transformer = r[:transformer][:name] if r[:transformer]
65
+ loader = r[:loader][:name] if r[:loader]
66
+
67
+ [job, extractor, transformer, loader]
68
+ end
69
+
70
+ headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
71
+
72
+ table = TTY::Table.new(headers, job_details)
73
+ puts table.render(indent: 0, padding: [0, 2])
74
+ end
75
+
76
+ private
77
+
78
+ # Create runner options by reading config file and then overwriting with flag options
79
+ def build_runner_options options
80
+ flag_options = process_flag_options(options)
81
+ job_options = load_job(options[:job])
82
+ flag_options.merge(job_options)
83
+ end
84
+
85
+ def load_job job
86
+ yml_config = Chronicle::ETL::Config.load("chronicle/etl/jobs/#{job}.yml")
87
+ # FIXME: use better trick to depely symbolize keys
88
+ JSON.parse(yml_config.to_json, symbolize_names: true)
89
+ end
90
+
91
+ # Takes flag options and turns them into a runner config
92
+ def process_flag_options options
93
+ {
94
+ extractor: {
95
+ name: options[:extractor],
96
+ options: options[:'extractor-opts']
97
+ },
98
+ transformer: {
99
+ name: options[:transformer],
100
+ options: options[:'transformer-opts']
101
+ },
102
+ loader: {
103
+ name: options[:loader],
104
+ options: options[:'loader-opts']
105
+ }
106
+ }
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,83 @@
1
+ require 'thor'
2
+ require 'chronicle/etl'
3
+ require 'colorize'
4
+
5
+ require 'chronicle/etl/cli/subcommand_base'
6
+ require 'chronicle/etl/cli/connectors'
7
+ require 'chronicle/etl/cli/jobs'
8
+
9
+ module Chronicle
10
+ module ETL
11
+ module CLI
12
+ # Main entrypoint for CLI app
13
+ class Main < Thor
14
+ class_option "verbose", type: :boolean, default: false
15
+ default_task "jobs"
16
+
17
+ desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
18
+ subcommand 'connectors', Connectors
19
+
20
+ desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
21
+ subcommand 'jobs', Jobs
22
+
23
+ # Entrypoint for the CLI
24
+ def self.start(given_args = ARGV, config = {})
25
+ if given_args.none?
26
+ abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
27
+ end
28
+
29
+ # take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
30
+ if given_args.any? && given_args[0].include?(':')
31
+ commands = given_args.shift.split(':')
32
+ given_args = given_args.unshift(commands).flatten
33
+ end
34
+
35
+ super(given_args, config)
36
+ end
37
+
38
+ # Displays help options for chronicle-etl
39
+ def help(meth = nil, subcommand = false)
40
+ if meth && !respond_to?(meth)
41
+ klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
42
+ klass.start(['-h', task].compact, shell: shell)
43
+ else
44
+ shell.say "ABOUT".bold
45
+ shell.say " #{'chronicle-etl'.italic} is a utility tool for #{'extracting'.underline}, #{'transforming'.underline}, and #{'loading'.underline} personal data."
46
+ shell.say
47
+ shell.say "USAGE".bold
48
+ shell.say " $ chronicle-etl COMMAND"
49
+ shell.say
50
+ shell.say "EXAMPLES".bold
51
+ shell.say " Show available connectors:".italic.light_black
52
+ shell.say " $ chronicle-etl connectors:list"
53
+ shell.say
54
+ shell.say " Run a simple job:".italic.light_black
55
+ shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
56
+ shell.say
57
+ shell.say " Show full job options:".italic.light_black
58
+ shell.say " $ chronicle-etl jobs help start"
59
+
60
+ list = []
61
+
62
+ Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
63
+ list += thor_class.printable_tasks(false)
64
+ end
65
+ list.sort! { |a, b| a[0] <=> b[0] }
66
+ list.unshift ["help", "# This help menu"]
67
+
68
+ shell.say
69
+ shell.say 'ALL COMMANDS'.bold
70
+ shell.print_table(list, indent: 2, truncate: true)
71
+ shell.say
72
+ shell.say "VERSION".bold
73
+ shell.say " #{Chronicle::ETL::VERSION}"
74
+ shell.say
75
+ shell.say "FULL DOCUMENTATION".bold
76
+ shell.say " https://github.com/chronicle-app/chronicle-etl".blue
77
+ shell.say
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,37 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
+ class SubcommandBase < Thor
6
+ # Print usage instructions for a subcommand
7
+ def self.help(shell, subcommand = false)
8
+ list = printable_commands(true, subcommand)
9
+ Thor::Util.thor_classes_in(self).each do |klass|
10
+ list += klass.printable_commands(false)
11
+ end
12
+ list.sort! { |a, b| a[0] <=> b[0] }
13
+
14
+ shell.say "COMMANDS".bold
15
+ shell.print_table(list, indent: 2, truncate: true)
16
+ shell.say
17
+ class_options_help(shell)
18
+ end
19
+
20
+ # Show docs with command:subcommand pattern.
21
+ # For `help` command, don't use colon
22
+ def self.banner(command, namespace = nil, subcommand = false)
23
+ if command.name == 'help'
24
+ "#{subcommand_prefix} #{command.usage}"
25
+ else
26
+ "#{subcommand_prefix}:#{command.usage}"
27
+ end
28
+ end
29
+
30
+ # Use subcommand classname to derive display name for subcommand
31
+ def self.subcommand_prefix
32
+ self.name.gsub(%r{.*::}, '').gsub(%r{^[A-Z]}) { |match| match[0].downcase }.gsub(%r{[A-Z]}) { |match| "-#{match[0].downcase}" }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ require 'runcom'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ # Utility methods to read, write, and access config files
6
+ module Config
7
+ # Loads a yml config file
8
+ def self.load(path)
9
+ config = Runcom::Config.new(path)
10
+ # FIXME: hack to deeply symbolize keys
11
+ JSON.parse(config.to_h.to_json, symbolize_names: true)
12
+ end
13
+
14
+ # Writes a hash as a yml config file
15
+ def self.write(path, data)
16
+ config = Runcom::Config.new(path)
17
+ filename = config.all[0].to_s + '.yml'
18
+ File.open(filename, 'w') do |f|
19
+ f << data.to_yaml
20
+ end
21
+ end
22
+
23
+ # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
24
+ def self.jobs
25
+ job_directory = Runcom::Config.new('chronicle/etl/jobs').current
26
+ Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
27
+ File.basename(filename, ".*")
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  require 'csv'
2
- class Chronicle::Etl::CsvExtractor < Chronicle::Etl::Extractor
2
+ class Chronicle::ETL::CsvExtractor < Chronicle::ETL::Extractor
3
3
  DEFAULT_OPTIONS = {
4
4
  headers: true,
5
5
  filename: $stdin
@@ -1,20 +1,26 @@
1
1
  require 'chronicle/etl'
2
2
 
3
3
  module Chronicle
4
- module Etl
4
+ module ETL
5
+ # Abstract class representing an Extractor for an ETL job
5
6
  class Extractor
6
- extend Chronicle::Etl::Catalog
7
-
8
- ETL_PHASE = :extractor
7
+ extend Chronicle::ETL::Catalog
9
8
 
9
+ # Construct a new instance of this extractor. Options are passed in from a Runner
10
+ # == Paramters:
11
+ # options::
12
+ # Options for configuring this Extractor
10
13
  def initialize(options = {})
11
- @options = options.transform_keys!(&:to_sym)
14
+ @options = options.transform_keys!(&:to_sym)
12
15
  end
13
16
 
17
+ # Entrypoint for this Extractor. Called by a Runner. Expects a series of records to be yielded
14
18
  def extract
15
19
  raise NotImplementedError
16
20
  end
17
21
 
22
+ # An optional method to calculate how many records there are to extract. Used primarily for
23
+ # building the progress bar
18
24
  def results_count; end
19
25
  end
20
26
  end
@@ -1,8 +1,8 @@
1
1
  require 'pathname'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class FileExtractor < Chronicle::Etl::Extractor
4
+ module ETL
5
+ class FileExtractor < Chronicle::ETL::Extractor
6
6
  def extract
7
7
  if file?
8
8
  extract_file do |data, metadata|
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class StdinExtractor < Chronicle::Etl::Extractor
2
+ module ETL
3
+ class StdinExtractor < Chronicle::ETL::Extractor
4
4
  def extract
5
5
  $stdin.read.each_line do |line|
6
6
  yield line
@@ -1,8 +1,8 @@
1
1
  require 'csv'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class CsvLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class CsvLoader < Chronicle::ETL::Loader
6
6
  def initialize(options={})
7
7
  super(options)
8
8
  @rows = []
@@ -1,18 +1,26 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing a Loader for an ETL job
3
4
  class Loader
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
7
+ # Construct a new instance of this loader. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Loader
6
11
  def initialize(options = {})
7
12
  @options = options
8
13
  end
9
14
 
15
+ # Called once before processing records
10
16
  def start; end
11
17
 
18
+ # Load a single record
12
19
  def load
13
20
  raise NotImplementedError
14
21
  end
15
22
 
23
+ # Called once there are no more records to process
16
24
  def finish; end
17
25
  end
18
26
  end
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class StdoutLoader < Chronicle::Etl::Loader
2
+ module ETL
3
+ class StdoutLoader < Chronicle::ETL::Loader
4
4
  def load(result)
5
5
  puts result.inspect
6
6
  end
@@ -1,8 +1,8 @@
1
1
  require 'tty/table'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class TableLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class TableLoader < Chronicle::ETL::Loader
6
6
  def initialize(options)
7
7
  super(options)
8
8
  end
@@ -1,4 +1,4 @@
1
- class Chronicle::Etl::Runner
1
+ class Chronicle::ETL::Runner
2
2
  BUILTIN = {
3
3
  extractor: ['stdin', 'json', 'csv', 'file'],
4
4
  transformer: ['null'],
@@ -13,7 +13,7 @@ class Chronicle::Etl::Runner
13
13
 
14
14
  def run!
15
15
  total = @extractor.results_count
16
- progress_bar = Chronicle::Etl::Utils::ProgressBar.new(title: 'Running job', total: total)
16
+ progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
17
17
  count = 0
18
18
 
19
19
  @loader.start
@@ -40,7 +40,7 @@ class Chronicle::Etl::Runner
40
40
 
41
41
  def load_etl_class(phase, x)
42
42
  if BUILTIN[phase].include? x
43
- klass_name = "Chronicle::Etl::#{x.capitalize}#{phase.to_s.capitalize}"
43
+ klass_name = "Chronicle::ETL::#{x.capitalize}#{phase.to_s.capitalize}"
44
44
  else
45
45
  # TODO: come up with syntax for specifying a particular extractor in a provider library
46
46
  provider, name = x.split(":")
@@ -48,7 +48,7 @@ class Chronicle::Etl::Runner
48
48
  begin
49
49
  require "chronicle/#{provider}"
50
50
  rescue LoadError => e
51
- warn("Error loading #{phase} '#{provider}'")
51
+ warn("Error loading #{phase} '#{provider}'".red)
52
52
  warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
53
53
  exit(false)
54
54
  end
@@ -1,8 +1,8 @@
1
1
  require 'json'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class JsonTransformer < Chronicle::Etl::Transformer
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
6
  def transform data
7
7
  return JSON.parse(data)
8
8
  end
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class NullTransformer < Chronicle::Etl::Transformer
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
4
  def transform data
5
5
  return data
6
6
  end
@@ -1,15 +1,31 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
3
4
  class Transformer
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
6
11
  def initialize(options = {})
7
12
  @options = options
8
13
  end
9
14
 
15
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
10
16
  def transform data
11
17
  raise NotImplementedError
12
18
  end
19
+
20
+ # The domain or provider-specific id of the record this transformer is working on.
21
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
22
+ # data source from the beginning.
23
+ def id; end
24
+
25
+ # The domain or provider-specific timestamp of the record this transformer is working on.
26
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
27
+ # data source from the beginning.
28
+ def timestamp; end
13
29
  end
14
30
  end
15
31
  end
@@ -2,7 +2,7 @@ require 'tty/progressbar'
2
2
  require 'colorize'
3
3
 
4
4
  module Chronicle
5
- module Etl
5
+ module ETL
6
6
  module Utils
7
7
 
8
8
  class ProgressBar
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.4"
2
+ module ETL
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-18 00:00:00.000000000 Z
11
+ date: 2020-08-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.9'
125
+ - !ruby/object:Gem::Dependency
126
+ name: runcom
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '6.2'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '6.2'
139
+ - !ruby/object:Gem::Dependency
140
+ name: redcarpet
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3.5'
125
153
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
154
  transformer it, and load it.
127
155
  email:
@@ -133,8 +161,10 @@ extra_rdoc_files: []
133
161
  files:
134
162
  - ".gitignore"
135
163
  - ".rspec"
164
+ - ".rubocop.yml"
136
165
  - ".ruby-version"
137
166
  - ".travis.yml"
167
+ - ".yardopts"
138
168
  - CHANGELOG.md
139
169
  - CODE_OF_CONDUCT.md
140
170
  - Gemfile
@@ -148,7 +178,11 @@ files:
148
178
  - exe/chronicle-etl
149
179
  - lib/chronicle/etl.rb
150
180
  - lib/chronicle/etl/catalog.rb
151
- - lib/chronicle/etl/cli.rb
181
+ - lib/chronicle/etl/cli/connectors.rb
182
+ - lib/chronicle/etl/cli/jobs.rb
183
+ - lib/chronicle/etl/cli/main.rb
184
+ - lib/chronicle/etl/cli/subcommand_base.rb
185
+ - lib/chronicle/etl/config.rb
152
186
  - lib/chronicle/etl/extractors/csv_extractor.rb
153
187
  - lib/chronicle/etl/extractors/extractor.rb
154
188
  - lib/chronicle/etl/extractors/file_extractor.rb
@@ -1,56 +0,0 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
- require 'colorize'
4
-
5
- module Chronicle
6
- module Etl
7
- class CLI < Thor
8
- default_task :job
9
-
10
- desc 'job', 'Runs an ETL job'
11
- method_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
12
- method_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
13
- method_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
14
- method_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
15
- method_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
16
- method_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
17
- method_option :job, aliases: '-j', desc: 'Job configuration file'
18
-
19
- def job
20
- runner_options = {
21
- extractor: {
22
- name: options[:extractor],
23
- options: options[:'extractor-opts']
24
- },
25
- transformer: {
26
- name: options[:transformer],
27
- options: options[:'transformer-opts']
28
- },
29
- loader: {
30
- name: options[:loader],
31
- options: options[:'loader-opts']
32
- }
33
- }
34
-
35
- runner = Runner.new(runner_options)
36
- runner.run!
37
- end
38
-
39
- # FIXME: namespace this differently
40
- desc 'list', 'List all ETL classes'
41
- def list
42
- klasses = Chronicle::Etl::Catalog.available_classes
43
- klasses = klasses.sort_by do |a|
44
- [a[:built_in].to_s, a[:provider], a[:phase]]
45
- end
46
-
47
- headers = klasses.first.keys.map do |key|
48
- key.to_s.capitalize.light_white
49
- end
50
-
51
- table = TTY::Table.new(headers, klasses.map(&:values))
52
- puts table.render(padding: [0, 2])
53
- end
54
- end
55
- end
56
- end