chronicle-etl 0.1.2 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +11 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +8 -6
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +6 -2
  12. data/lib/chronicle/etl/catalog.rb +102 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +110 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/exceptions.rb +17 -0
  19. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  20. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  22. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  24. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  25. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  26. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  28. data/lib/chronicle/etl/runner.rb +27 -38
  29. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  30. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  31. data/lib/chronicle/etl/transformers/transformer.rb +28 -11
  32. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  33. data/lib/chronicle/etl/version.rb +2 -2
  34. metadata +68 -29
  35. data/lib/chronicle/etl/cli.rb +0 -38
  36. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  37. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  38. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  39. data/lib/chronicle/etl/loaders/table.rb +0 -22
  40. data/lib/chronicle/etl/transformers/json.rb +0 -13
  41. data/lib/chronicle/etl/transformers/null.rb +0 -11
  42. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a8f688143a1057176324084919170a3351007cf619489d3ab76914c527bf15a
4
- data.tar.gz: 2c7cfeeb279274f66b9eaccbe8a5f17fdf67453b82590d55a0df8f1cde8e748b
3
+ metadata.gz: e1c08bc4f71c807525090abbf1701be19ab72cce08a99cc3bbec9b0db7150a02
4
+ data.tar.gz: 172a5d7e7ba7a9424ef7b5ab4da2b8c44defdb4e0a34c833248ff1b63f40407e
5
5
  SHA512:
6
- metadata.gz: 2882b8daef2dc427fdf0ea517504f35fe286f19a9040319f6884461577d978d963c92aa19c613b92dc55e5a8edc0271396450addaf772dceeb0ff4d5ed38a30f
7
- data.tar.gz: 2f45c5b4d1e896e82a514215032cfb6b0a9802dacd2be0b9e57b3bd27a5f13b40a392e1577081d61aa219aa4c4e5c7e97744d8f371031fb2b871b27f68094dd3
6
+ metadata.gz: 0f671c00928b15f9c0f6fa159ac106ff9c4f65a8bd16048e5d0cab82d680945317f7680e7796e98c665bb5cc757e0657f1a36d773d89e3e1587d9eebc12abdd8
7
+ data.tar.gz: 449d1368e0054f39006c7903218300b9b97ca839d6eff43b6b7bd659e5146d443a31c53325c4769ae7a56db9d42417020ccde17362ae024c01aca2ed63029044
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ EnabledByDefault: true
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+
7
+ Style/MethodCallWithArgsParentheses:
8
+ Enabled: false
@@ -0,0 +1 @@
1
+ 2.7.1
@@ -0,0 +1 @@
1
+ --markup=markdown
@@ -2,6 +2,17 @@
2
2
 
3
3
  This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
4
 
5
+ ## [0.1.4] - 2020-08-18
6
+ ### Updated
7
+ - Better display of available ETL classes
8
+ - Updated documentation
9
+
10
+ ## [0.1.3] - 2020-08-13
11
+ ### Added
12
+ - Ability to list all available ETL classes
13
+ - Refactored E, T, L module and class structure
14
+ - Better progress bar
15
+
5
16
  ## [0.1.2] - 2020-08-02
6
17
  ### Added
7
18
  - This changelog
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- chronicle-etl (0.1.2)
4
+ chronicle-etl (0.2.2)
5
5
  colorize (~> 0.8.1)
6
- ruby-progressbar (~> 1.10)
7
- table_print
8
6
  thor (~> 0.20)
7
+ tty-progressbar (~> 0.17)
8
+ tty-table (~> 0.11)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
@@ -14,14 +14,21 @@ GEM
14
14
  coderay (1.1.3)
15
15
  colorize (0.8.1)
16
16
  diff-lcs (1.4.4)
17
+ equatable (0.6.1)
17
18
  method_source (1.0.0)
19
+ necromancer (0.6.0)
20
+ pastel (0.7.4)
21
+ equatable (~> 0.6)
22
+ tty-color (~> 0.5)
18
23
  pry (0.13.1)
19
24
  coderay (~> 1.1)
20
25
  method_source (~> 1.0)
21
26
  pry-byebug (3.9.0)
22
27
  byebug (~> 11.0)
23
28
  pry (~> 0.13.0)
24
- rake (10.5.0)
29
+ rake (13.0.1)
30
+ redcarpet (3.5.0)
31
+ refinements (7.7.0)
25
32
  rspec (3.9.0)
26
33
  rspec-core (~> 3.9.0)
27
34
  rspec-expectations (~> 3.9.0)
@@ -35,19 +42,44 @@ GEM
35
42
  diff-lcs (>= 1.2.0, < 2.0)
36
43
  rspec-support (~> 3.9.0)
37
44
  rspec-support (3.9.3)
38
- ruby-progressbar (1.10.1)
39
- table_print (1.5.7)
45
+ runcom (6.2.0)
46
+ refinements (~> 7.4)
47
+ xdg (~> 4.0)
48
+ strings (0.1.8)
49
+ strings-ansi (~> 0.1)
50
+ unicode-display_width (~> 1.5)
51
+ unicode_utils (~> 1.4)
52
+ strings-ansi (0.1.0)
40
53
  thor (0.20.3)
54
+ tty-color (0.5.2)
55
+ tty-cursor (0.7.1)
56
+ tty-progressbar (0.17.0)
57
+ strings-ansi (~> 0.1.0)
58
+ tty-cursor (~> 0.7)
59
+ tty-screen (~> 0.7)
60
+ unicode-display_width (~> 1.6)
61
+ tty-screen (0.8.1)
62
+ tty-table (0.11.0)
63
+ equatable (~> 0.6)
64
+ necromancer (~> 0.5)
65
+ pastel (~> 0.7.2)
66
+ strings (~> 0.1.5)
67
+ tty-screen (~> 0.7)
68
+ unicode-display_width (1.7.0)
69
+ unicode_utils (1.4.0)
70
+ xdg (4.2.0)
41
71
 
42
72
  PLATFORMS
43
73
  ruby
44
74
 
45
75
  DEPENDENCIES
46
- bundler (~> 1.17)
76
+ bundler (~> 2.1)
47
77
  chronicle-etl!
48
78
  pry-byebug (~> 3.9)
49
- rake (~> 10.0)
50
- rspec (~> 3.0)
79
+ rake (~> 13.0)
80
+ redcarpet (~> 3.5)
81
+ rspec (~> 3.9)
82
+ runcom (~> 6.2)
51
83
 
52
84
  BUNDLED WITH
53
- 1.17.2
85
+ 2.1.4
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
- # Chronicle::Etl
1
+ # Chronicle::ETL
2
2
 
3
- Chronicle ETL is a utility tool for manipulating personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
4
+
5
+ Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
6
+
7
+ This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
4
8
 
5
9
  ## Installation
6
10
 
@@ -8,28 +12,77 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
8
12
  $ gem install chronicle-etl
9
13
  ```
10
14
 
11
- ## Examples
15
+ ## Usage
12
16
 
13
17
  After installing the gem, `chronicle-etl` is available to run in your shell.
14
18
 
19
+ ```bash
20
+ # read test.csv and display it as a table
21
+ $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
22
+
23
+ # Display help for the jobs:run command
24
+ $ chronicle-etl jobs help run
15
25
  ```
16
- chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
17
- cat test.csv | chronicle-etl --extractor csv --loader table
26
+
27
+ ## Connectors
28
+
29
+ Connectors are available to read, process, and load data from different formats or external services.
30
+
31
+ ```bash
32
+ # List all available connectors
33
+ $ chronicle-etl connectors:list
18
34
  ```
19
35
 
20
- ## Full usage
36
+ Built in connectors:
37
+
38
+ ### Extractors
39
+ - `stdin` - (default) Load records from line-separated stdin
40
+ - `csv`
41
+ - `file` - load from a single file or directory (with a glob pattern)
42
+
43
+ ### Transformers
44
+ - `null` - (default) Don't do anything
45
+
46
+ ### Loaders
47
+ - `stdout` - (default) output transformed records to stdount
48
+ - `csv` - Load records to a csv file
49
+ - `table` - Output an ascii table of records. Useful for debugging.
50
+
51
+ ### Provider-specific importers
52
+
53
+ In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
54
+
55
+ - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
56
+ - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
57
+
58
+ To install any of these, run `gem install chronicle-PROVIDER`.
59
+
60
+ If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
61
+
62
+ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
63
+
64
+ ### Full commands
21
65
 
22
66
  ```
23
- Commands:
24
- chronicle-etl help [COMMAND] # Describe available commands or one specific command
25
- chronicle-etl job # Runs an ETL job
67
+ $ chronicle-etl help
68
+
69
+ ALL COMMANDS
70
+ help # This help menu
71
+ connectors help [COMMAND] # Describe subcommands or one specific subcommand
72
+ connectors:install NAME # Installs connector NAME
73
+ connectors:list # Lists available connectors
74
+ jobs help [COMMAND] # Describe subcommands or one specific subcommand
75
+ jobs:create # Create a job
76
+ jobs:list # List all available jobs
77
+ jobs:run # Start a job
78
+ jobs:show # Show a job
26
79
  ```
27
80
 
28
81
  ### Job options
29
82
 
30
83
  ```
31
84
  Usage:
32
- chronicle-etl job
85
+ chronicle-etl jobs:run
33
86
 
34
87
  Options:
35
88
  -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
@@ -62,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
62
115
 
63
116
  ## Code of Conduct
64
117
 
65
- Everyone interacting in the Chronicle::Etl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
118
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
@@ -7,8 +7,20 @@ require "chronicle/etl"
7
7
  # with your gem easier. You can also use a different console, if you like.
8
8
 
9
9
  # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
10
+ require "pry"
11
+ Pry.start
12
+
13
+ def reload!(print = true)
14
+ puts 'Reloading ...' if print
15
+ # Main project directory.
16
+ root_dir = File.expand_path('..', __dir__)
17
+ # Directories within the project that should be reloaded.
18
+ reload_dirs = %w{lib}
19
+ # Loop through and reload every file in all relevant project directories.
20
+ reload_dirs.each do |dir|
21
+ Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
22
+ end
23
+ # Return true when complete.
24
+ true
25
+ end
12
26
 
13
- require "irb"
14
- IRB.start(__FILE__)
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "chronicle-etl"
8
- spec.version = Chronicle::Etl::VERSION
8
+ spec.version = Chronicle::ETL::VERSION
9
9
  spec.authors = ["Andrew Louis"]
10
10
  spec.email = ["andrew@hyfen.net"]
11
11
 
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
38
38
 
39
39
  spec.add_dependency "thor", "~> 0.20"
40
40
  spec.add_dependency "colorize", "~> 0.8.1"
41
- spec.add_dependency "table_print"
42
- spec.add_dependency "ruby-progressbar", "~> 1.10"
41
+ spec.add_dependency "tty-table", "~> 0.11"
42
+ spec.add_dependency "tty-progressbar", "~> 0.17"
43
43
 
44
- spec.add_development_dependency "bundler", "~> 1.17"
45
- spec.add_development_dependency "rake", "~> 10.0"
46
- spec.add_development_dependency "rspec", "~> 3.0"
44
+ spec.add_development_dependency "bundler", "~> 2.1"
45
+ spec.add_development_dependency "rake", "~> 13.0"
46
+ spec.add_development_dependency "rspec", "~> 3.9"
47
47
  spec.add_development_dependency "pry-byebug", "~> 3.9"
48
+ spec.add_development_dependency 'runcom', '~> 6.2'
49
+ spec.add_development_dependency 'redcarpet', '~> 3.5'
48
50
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require "chronicle/etl/cli/main"
4
4
 
5
- Chronicle::Etl::CLI.start(ARGV)
5
+ Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,5 +1,9 @@
1
+ require_relative 'etl/catalog'
2
+ require_relative 'etl/config'
3
+ require_relative 'etl/exceptions'
1
4
  require_relative 'etl/extractors/extractor'
2
- require_relative 'etl/transformers/transformer'
3
5
  require_relative 'etl/loaders/loader'
4
- require_relative 'etl/utils/progress_bar_wrapper'
5
6
  require_relative 'etl/runner'
7
+ require_relative 'etl/transformers/transformer'
8
+ require_relative 'etl/utils/progress_bar'
9
+ require_relative 'etl/version'
@@ -0,0 +1,102 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Utility methods to catalogue which Extractor, Transformer, and
4
+ # Loader connector classes are available to chronicle-etl
5
+ module Catalog
6
+ PLUGINS = ['email', 'bash']
7
+ BUILTIN = {
8
+ extractor: ['stdin', 'json', 'csv', 'file'],
9
+ transformer: ['null'],
10
+ loader: ['stdout', 'csv', 'table', 'rest']
11
+ }.freeze
12
+
13
+ # Return which ETL connectors are available, both built in and externally-defined
14
+ def self.available_classes
15
+ # TODO: have a registry of plugins
16
+
17
+ # Attempt to load each chronicle plugin that we might know about so
18
+ # that we can later search for subclasses to build our list of
19
+ # available classes
20
+ PLUGINS.each do |plugin|
21
+ require "chronicle/#{plugin}"
22
+ rescue LoadError
23
+ # this will happen if the gem isn't available globally
24
+ end
25
+
26
+ parent_klasses = [
27
+ ::Chronicle::ETL::Extractor,
28
+ ::Chronicle::ETL::Transformer,
29
+ ::Chronicle::ETL::Loader
30
+ ]
31
+ klasses = []
32
+ parent_klasses.map do |parent|
33
+ klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
34
+ end
35
+
36
+ klasses.map do |klass|
37
+ {
38
+ name: klass.name,
39
+ built_in: klass.built_in?,
40
+ provider: klass.provider,
41
+ phase: klass.phase
42
+ }
43
+ end
44
+ end
45
+
46
+ # For a given connector identifier, return the class (either builtin, or from a
47
+ # external chronicle gem)
48
+ def self.identifier_to_klass(identifier:, phase:)
49
+ if BUILTIN[phase].include? identifier
50
+ load_builtin_klass(name: identifier, phase: phase)
51
+ else
52
+ provider, name = identifier.split(':')
53
+ name ||= ''
54
+ load_provider_klass(provider: provider, name: name, phase: phase)
55
+ end
56
+ end
57
+
58
+ # Returns whether a class is an Extractor, Transformer, or Loader
59
+ def phase
60
+ ancestors = self.ancestors
61
+ return :extractor if ancestors.include? Chronicle::ETL::Extractor
62
+ return :transformer if ancestors.include? Chronicle::ETL::Transformer
63
+ return :loader if ancestors.include? Chronicle::ETL::Loader
64
+ end
65
+
66
+ # Returns which third-party provider this connector is associated wtih
67
+ def provider
68
+ # TODO: needs better convention for a gem reporting its provider name
69
+ provider = to_s.split('::')[1].downcase
70
+ provider == 'etl' ? 'chronicle' : provider
71
+ end
72
+
73
+ # Returns whether this connector is a built-in one
74
+ def built_in?
75
+ to_s.include? 'Chronicle::ETL'
76
+ end
77
+
78
+ private
79
+
80
+ def self.load_builtin_klass(name:, phase:)
81
+ klass_str = "Chronicle::ETL::#{name.capitalize}#{phase.capitalize}"
82
+ begin
83
+ Object.const_get(klass_str)
84
+ rescue NameError => e
85
+ raise ConnectorNotAvailableError.new("Connector not found", name: name)
86
+ end
87
+ end
88
+
89
+ def self.load_provider_klass(name: '', phase:, provider:)
90
+ begin
91
+ require "chronicle/#{provider}"
92
+ klass_str = "Chronicle::#{provider.capitalize}::#{name.capitalize}#{phase.capitalize}"
93
+ Object.const_get(klass_str)
94
+ rescue LoadError => e
95
+ raise ProviderNotAvailableError.new("Provider '#{provider.capitalize}' could not be loaded", provider: provider)
96
+ rescue NameError => e
97
+ raise ProviderConnectorNotAvailableError.new("Connector '#{name}' in '#{provider}' could not be found", provider: provider, name: name)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,32 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # CLI commands for working with ETL connectors
5
+ class Connectors < SubcommandBase
6
+ default_task 'list'
7
+ namespace :connectors
8
+
9
+ desc "install NAME", "Installs connector NAME"
10
+ def install
11
+ puts "Installing"
12
+ end
13
+
14
+ desc "list", "Lists available connectors"
15
+ # Display all available connectors that chronicle-etl has access to
16
+ def list
17
+ klasses = Chronicle::ETL::Catalog.available_classes
18
+ klasses = klasses.sort_by do |a|
19
+ [a[:built_in].to_s, a[:provider], a[:phase]]
20
+ end
21
+
22
+ headers = klasses.first.keys.map do |key|
23
+ key.to_s.upcase.bold
24
+ end
25
+
26
+ table = TTY::Table.new(headers, klasses.map(&:values))
27
+ puts table.render(indent: 0, padding: [0, 2])
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end