chronicle-etl 0.1.2 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.ruby-version +1 -0
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +11 -0
  6. data/Gemfile.lock +42 -10
  7. data/README.md +64 -11
  8. data/bin/console +16 -4
  9. data/chronicle-etl.gemspec +8 -6
  10. data/exe/chronicle-etl +2 -2
  11. data/lib/chronicle/etl.rb +6 -2
  12. data/lib/chronicle/etl/catalog.rb +102 -0
  13. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  14. data/lib/chronicle/etl/cli/jobs.rb +110 -0
  15. data/lib/chronicle/etl/cli/main.rb +83 -0
  16. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  17. data/lib/chronicle/etl/config.rb +32 -0
  18. data/lib/chronicle/etl/exceptions.rb +17 -0
  19. data/lib/chronicle/etl/extractors/{csv.rb → csv_extractor.rb} +3 -3
  20. data/lib/chronicle/etl/extractors/extractor.rb +23 -12
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +52 -0
  22. data/lib/chronicle/etl/extractors/stdin_extractor.rb +11 -0
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +29 -0
  24. data/lib/chronicle/etl/loaders/loader.rb +23 -16
  25. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  26. data/lib/chronicle/etl/loaders/stdout_loader.rb +9 -0
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -0
  28. data/lib/chronicle/etl/runner.rb +27 -38
  29. data/lib/chronicle/etl/transformers/json_transformer.rb +11 -0
  30. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -0
  31. data/lib/chronicle/etl/transformers/transformer.rb +28 -11
  32. data/lib/chronicle/etl/utils/progress_bar.rb +76 -0
  33. data/lib/chronicle/etl/version.rb +2 -2
  34. metadata +68 -29
  35. data/lib/chronicle/etl/cli.rb +0 -38
  36. data/lib/chronicle/etl/extractors/stdin.rb +0 -13
  37. data/lib/chronicle/etl/loaders/csv.rb +0 -31
  38. data/lib/chronicle/etl/loaders/stdout.rb +0 -11
  39. data/lib/chronicle/etl/loaders/table.rb +0 -22
  40. data/lib/chronicle/etl/transformers/json.rb +0 -13
  41. data/lib/chronicle/etl/transformers/null.rb +0 -11
  42. data/lib/chronicle/etl/utils/progress_bar_wrapper.rb +0 -43
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a8f688143a1057176324084919170a3351007cf619489d3ab76914c527bf15a
4
- data.tar.gz: 2c7cfeeb279274f66b9eaccbe8a5f17fdf67453b82590d55a0df8f1cde8e748b
3
+ metadata.gz: e1c08bc4f71c807525090abbf1701be19ab72cce08a99cc3bbec9b0db7150a02
4
+ data.tar.gz: 172a5d7e7ba7a9424ef7b5ab4da2b8c44defdb4e0a34c833248ff1b63f40407e
5
5
  SHA512:
6
- metadata.gz: 2882b8daef2dc427fdf0ea517504f35fe286f19a9040319f6884461577d978d963c92aa19c613b92dc55e5a8edc0271396450addaf772dceeb0ff4d5ed38a30f
7
- data.tar.gz: 2f45c5b4d1e896e82a514215032cfb6b0a9802dacd2be0b9e57b3bd27a5f13b40a392e1577081d61aa219aa4c4e5c7e97744d8f371031fb2b871b27f68094dd3
6
+ metadata.gz: 0f671c00928b15f9c0f6fa159ac106ff9c4f65a8bd16048e5d0cab82d680945317f7680e7796e98c665bb5cc757e0657f1a36d773d89e3e1587d9eebc12abdd8
7
+ data.tar.gz: 449d1368e0054f39006c7903218300b9b97ca839d6eff43b6b7bd659e5146d443a31c53325c4769ae7a56db9d42417020ccde17362ae024c01aca2ed63029044
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ EnabledByDefault: true
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+
7
+ Style/MethodCallWithArgsParentheses:
8
+ Enabled: false
@@ -0,0 +1 @@
1
+ 2.7.1
@@ -0,0 +1 @@
1
+ --markup=markdown
@@ -2,6 +2,17 @@
2
2
 
3
3
  This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
4
 
5
+ ## [0.1.4] - 2020-08-18
6
+ ### Updated
7
+ - Better display of available ETL classes
8
+ - Updated documentation
9
+
10
+ ## [0.1.3] - 2020-08-13
11
+ ### Added
12
+ - Ability to list all available ETL classes
13
+ - Refactored E, T, L module and class structure
14
+ - Better progress bar
15
+
5
16
  ## [0.1.2] - 2020-08-02
6
17
  ### Added
7
18
  - This changelog
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- chronicle-etl (0.1.2)
4
+ chronicle-etl (0.2.2)
5
5
  colorize (~> 0.8.1)
6
- ruby-progressbar (~> 1.10)
7
- table_print
8
6
  thor (~> 0.20)
7
+ tty-progressbar (~> 0.17)
8
+ tty-table (~> 0.11)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
@@ -14,14 +14,21 @@ GEM
14
14
  coderay (1.1.3)
15
15
  colorize (0.8.1)
16
16
  diff-lcs (1.4.4)
17
+ equatable (0.6.1)
17
18
  method_source (1.0.0)
19
+ necromancer (0.6.0)
20
+ pastel (0.7.4)
21
+ equatable (~> 0.6)
22
+ tty-color (~> 0.5)
18
23
  pry (0.13.1)
19
24
  coderay (~> 1.1)
20
25
  method_source (~> 1.0)
21
26
  pry-byebug (3.9.0)
22
27
  byebug (~> 11.0)
23
28
  pry (~> 0.13.0)
24
- rake (10.5.0)
29
+ rake (13.0.1)
30
+ redcarpet (3.5.0)
31
+ refinements (7.7.0)
25
32
  rspec (3.9.0)
26
33
  rspec-core (~> 3.9.0)
27
34
  rspec-expectations (~> 3.9.0)
@@ -35,19 +42,44 @@ GEM
35
42
  diff-lcs (>= 1.2.0, < 2.0)
36
43
  rspec-support (~> 3.9.0)
37
44
  rspec-support (3.9.3)
38
- ruby-progressbar (1.10.1)
39
- table_print (1.5.7)
45
+ runcom (6.2.0)
46
+ refinements (~> 7.4)
47
+ xdg (~> 4.0)
48
+ strings (0.1.8)
49
+ strings-ansi (~> 0.1)
50
+ unicode-display_width (~> 1.5)
51
+ unicode_utils (~> 1.4)
52
+ strings-ansi (0.1.0)
40
53
  thor (0.20.3)
54
+ tty-color (0.5.2)
55
+ tty-cursor (0.7.1)
56
+ tty-progressbar (0.17.0)
57
+ strings-ansi (~> 0.1.0)
58
+ tty-cursor (~> 0.7)
59
+ tty-screen (~> 0.7)
60
+ unicode-display_width (~> 1.6)
61
+ tty-screen (0.8.1)
62
+ tty-table (0.11.0)
63
+ equatable (~> 0.6)
64
+ necromancer (~> 0.5)
65
+ pastel (~> 0.7.2)
66
+ strings (~> 0.1.5)
67
+ tty-screen (~> 0.7)
68
+ unicode-display_width (1.7.0)
69
+ unicode_utils (1.4.0)
70
+ xdg (4.2.0)
41
71
 
42
72
  PLATFORMS
43
73
  ruby
44
74
 
45
75
  DEPENDENCIES
46
- bundler (~> 1.17)
76
+ bundler (~> 2.1)
47
77
  chronicle-etl!
48
78
  pry-byebug (~> 3.9)
49
- rake (~> 10.0)
50
- rspec (~> 3.0)
79
+ rake (~> 13.0)
80
+ redcarpet (~> 3.5)
81
+ rspec (~> 3.9)
82
+ runcom (~> 6.2)
51
83
 
52
84
  BUNDLED WITH
53
- 1.17.2
85
+ 2.1.4
data/README.md CHANGED
@@ -1,6 +1,10 @@
1
- # Chronicle::Etl
1
+ # Chronicle::ETL
2
2
 
3
- Chronicle ETL is a utility tool for manipulating personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
4
+
5
+ Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
6
+
7
+ This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
4
8
 
5
9
  ## Installation
6
10
 
@@ -8,28 +12,77 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
8
12
  $ gem install chronicle-etl
9
13
  ```
10
14
 
11
- ## Examples
15
+ ## Usage
12
16
 
13
17
  After installing the gem, `chronicle-etl` is available to run in your shell.
14
18
 
19
+ ```bash
20
+ # read test.csv and display it as a table
21
+ $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
22
+
23
+ # Display help for the jobs:run command
24
+ $ chronicle-etl jobs help run
15
25
  ```
16
- chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
17
- cat test.csv | chronicle-etl --extractor csv --loader table
26
+
27
+ ## Connectors
28
+
29
+ Connectors are available to read, process, and load data from different formats or external services.
30
+
31
+ ```bash
32
+ # List all available connectors
33
+ $ chronicle-etl connectors:list
18
34
  ```
19
35
 
20
- ## Full usage
36
+ Built in connectors:
37
+
38
+ ### Extractors
39
+ - `stdin` - (default) Load records from line-separated stdin
40
+ - `csv`
41
+ - `file` - load from a single file or directory (with a glob pattern)
42
+
43
+ ### Transformers
44
+ - `null` - (default) Don't do anything
45
+
46
+ ### Loaders
47
+ - `stdout` - (default) output transformed records to stdount
48
+ - `csv` - Load records to a csv file
49
+ - `table` - Output an ascii table of records. Useful for debugging.
50
+
51
+ ### Provider-specific importers
52
+
53
+ In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
54
+
55
+ - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
56
+ - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
57
+
58
+ To install any of these, run `gem install chronicle-PROVIDER`.
59
+
60
+ If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
61
+
62
+ I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
63
+
64
+ ### Full commands
21
65
 
22
66
  ```
23
- Commands:
24
- chronicle-etl help [COMMAND] # Describe available commands or one specific command
25
- chronicle-etl job # Runs an ETL job
67
+ $ chronicle-etl help
68
+
69
+ ALL COMMANDS
70
+ help # This help menu
71
+ connectors help [COMMAND] # Describe subcommands or one specific subcommand
72
+ connectors:install NAME # Installs connector NAME
73
+ connectors:list # Lists available connectors
74
+ jobs help [COMMAND] # Describe subcommands or one specific subcommand
75
+ jobs:create # Create a job
76
+ jobs:list # List all available jobs
77
+ jobs:run # Start a job
78
+ jobs:show # Show a job
26
79
  ```
27
80
 
28
81
  ### Job options
29
82
 
30
83
  ```
31
84
  Usage:
32
- chronicle-etl job
85
+ chronicle-etl jobs:run
33
86
 
34
87
  Options:
35
88
  -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
@@ -62,4 +115,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
62
115
 
63
116
  ## Code of Conduct
64
117
 
65
- Everyone interacting in the Chronicle::Etl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
118
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
@@ -7,8 +7,20 @@ require "chronicle/etl"
7
7
  # with your gem easier. You can also use a different console, if you like.
8
8
 
9
9
  # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
10
+ require "pry"
11
+ Pry.start
12
+
13
+ def reload!(print = true)
14
+ puts 'Reloading ...' if print
15
+ # Main project directory.
16
+ root_dir = File.expand_path('..', __dir__)
17
+ # Directories within the project that should be reloaded.
18
+ reload_dirs = %w{lib}
19
+ # Loop through and reload every file in all relevant project directories.
20
+ reload_dirs.each do |dir|
21
+ Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
22
+ end
23
+ # Return true when complete.
24
+ true
25
+ end
12
26
 
13
- require "irb"
14
- IRB.start(__FILE__)
@@ -5,7 +5,7 @@ require "chronicle/etl/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "chronicle-etl"
8
- spec.version = Chronicle::Etl::VERSION
8
+ spec.version = Chronicle::ETL::VERSION
9
9
  spec.authors = ["Andrew Louis"]
10
10
  spec.email = ["andrew@hyfen.net"]
11
11
 
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
38
38
 
39
39
  spec.add_dependency "thor", "~> 0.20"
40
40
  spec.add_dependency "colorize", "~> 0.8.1"
41
- spec.add_dependency "table_print"
42
- spec.add_dependency "ruby-progressbar", "~> 1.10"
41
+ spec.add_dependency "tty-table", "~> 0.11"
42
+ spec.add_dependency "tty-progressbar", "~> 0.17"
43
43
 
44
- spec.add_development_dependency "bundler", "~> 1.17"
45
- spec.add_development_dependency "rake", "~> 10.0"
46
- spec.add_development_dependency "rspec", "~> 3.0"
44
+ spec.add_development_dependency "bundler", "~> 2.1"
45
+ spec.add_development_dependency "rake", "~> 13.0"
46
+ spec.add_development_dependency "rspec", "~> 3.9"
47
47
  spec.add_development_dependency "pry-byebug", "~> 3.9"
48
+ spec.add_development_dependency 'runcom', '~> 6.2'
49
+ spec.add_development_dependency 'redcarpet', '~> 3.5'
48
50
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require "chronicle/etl/cli/main"
4
4
 
5
- Chronicle::Etl::CLI.start(ARGV)
5
+ Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,5 +1,9 @@
1
+ require_relative 'etl/catalog'
2
+ require_relative 'etl/config'
3
+ require_relative 'etl/exceptions'
1
4
  require_relative 'etl/extractors/extractor'
2
- require_relative 'etl/transformers/transformer'
3
5
  require_relative 'etl/loaders/loader'
4
- require_relative 'etl/utils/progress_bar_wrapper'
5
6
  require_relative 'etl/runner'
7
+ require_relative 'etl/transformers/transformer'
8
+ require_relative 'etl/utils/progress_bar'
9
+ require_relative 'etl/version'
@@ -0,0 +1,102 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Utility methods to catalogue which Extractor, Transformer, and
4
+ # Loader connector classes are available to chronicle-etl
5
+ module Catalog
6
+ PLUGINS = ['email', 'bash']
7
+ BUILTIN = {
8
+ extractor: ['stdin', 'json', 'csv', 'file'],
9
+ transformer: ['null'],
10
+ loader: ['stdout', 'csv', 'table', 'rest']
11
+ }.freeze
12
+
13
+ # Return which ETL connectors are available, both built in and externally-defined
14
+ def self.available_classes
15
+ # TODO: have a registry of plugins
16
+
17
+ # Attempt to load each chronicle plugin that we might know about so
18
+ # that we can later search for subclasses to build our list of
19
+ # available classes
20
+ PLUGINS.each do |plugin|
21
+ require "chronicle/#{plugin}"
22
+ rescue LoadError
23
+ # this will happen if the gem isn't available globally
24
+ end
25
+
26
+ parent_klasses = [
27
+ ::Chronicle::ETL::Extractor,
28
+ ::Chronicle::ETL::Transformer,
29
+ ::Chronicle::ETL::Loader
30
+ ]
31
+ klasses = []
32
+ parent_klasses.map do |parent|
33
+ klasses += ::ObjectSpace.each_object(::Class).select { |klass| klass < parent }
34
+ end
35
+
36
+ klasses.map do |klass|
37
+ {
38
+ name: klass.name,
39
+ built_in: klass.built_in?,
40
+ provider: klass.provider,
41
+ phase: klass.phase
42
+ }
43
+ end
44
+ end
45
+
46
+ # For a given connector identifier, return the class (either builtin, or from a
47
+ # external chronicle gem)
48
+ def self.identifier_to_klass(identifier:, phase:)
49
+ if BUILTIN[phase].include? identifier
50
+ load_builtin_klass(name: identifier, phase: phase)
51
+ else
52
+ provider, name = identifier.split(':')
53
+ name ||= ''
54
+ load_provider_klass(provider: provider, name: name, phase: phase)
55
+ end
56
+ end
57
+
58
+ # Returns whether a class is an Extractor, Transformer, or Loader
59
+ def phase
60
+ ancestors = self.ancestors
61
+ return :extractor if ancestors.include? Chronicle::ETL::Extractor
62
+ return :transformer if ancestors.include? Chronicle::ETL::Transformer
63
+ return :loader if ancestors.include? Chronicle::ETL::Loader
64
+ end
65
+
66
+ # Returns which third-party provider this connector is associated wtih
67
+ def provider
68
+ # TODO: needs better convention for a gem reporting its provider name
69
+ provider = to_s.split('::')[1].downcase
70
+ provider == 'etl' ? 'chronicle' : provider
71
+ end
72
+
73
+ # Returns whether this connector is a built-in one
74
+ def built_in?
75
+ to_s.include? 'Chronicle::ETL'
76
+ end
77
+
78
+ private
79
+
80
+ def self.load_builtin_klass(name:, phase:)
81
+ klass_str = "Chronicle::ETL::#{name.capitalize}#{phase.capitalize}"
82
+ begin
83
+ Object.const_get(klass_str)
84
+ rescue NameError => e
85
+ raise ConnectorNotAvailableError.new("Connector not found", name: name)
86
+ end
87
+ end
88
+
89
+ def self.load_provider_klass(name: '', phase:, provider:)
90
+ begin
91
+ require "chronicle/#{provider}"
92
+ klass_str = "Chronicle::#{provider.capitalize}::#{name.capitalize}#{phase.capitalize}"
93
+ Object.const_get(klass_str)
94
+ rescue LoadError => e
95
+ raise ProviderNotAvailableError.new("Provider '#{provider.capitalize}' could not be loaded", provider: provider)
96
+ rescue NameError => e
97
+ raise ProviderConnectorNotAvailableError.new("Connector '#{name}' in '#{provider}' could not be found", provider: provider, name: name)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,32 @@
1
+ module Chronicle
2
+ module ETL
3
+ module CLI
4
+ # CLI commands for working with ETL connectors
5
+ class Connectors < SubcommandBase
6
+ default_task 'list'
7
+ namespace :connectors
8
+
9
+ desc "install NAME", "Installs connector NAME"
10
+ def install
11
+ puts "Installing"
12
+ end
13
+
14
+ desc "list", "Lists available connectors"
15
+ # Display all available connectors that chronicle-etl has access to
16
+ def list
17
+ klasses = Chronicle::ETL::Catalog.available_classes
18
+ klasses = klasses.sort_by do |a|
19
+ [a[:built_in].to_s, a[:provider], a[:phase]]
20
+ end
21
+
22
+ headers = klasses.first.keys.map do |key|
23
+ key.to_s.upcase.bold
24
+ end
25
+
26
+ table = TTY::Table.new(headers, klasses.map(&:values))
27
+ puts table.render(indent: 0, padding: [0, 2])
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end