chronicle-etl 0.2.4 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.gitignore +3 -0
  4. data/.rubocop.yml +31 -1
  5. data/Guardfile +7 -0
  6. data/README.md +21 -14
  7. data/Rakefile +4 -2
  8. data/chronicle-etl.gemspec +18 -10
  9. data/exe/chronicle-etl +1 -1
  10. data/lib/chronicle/etl/cli/connectors.rb +53 -7
  11. data/lib/chronicle/etl/cli/jobs.rb +59 -24
  12. data/lib/chronicle/etl/cli/main.rb +18 -16
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +7 -0
  15. data/lib/chronicle/etl/config.rb +1 -1
  16. data/lib/chronicle/etl/configurable.rb +150 -0
  17. data/lib/chronicle/etl/exceptions.rb +14 -1
  18. data/lib/chronicle/etl/extraction.rb +12 -0
  19. data/lib/chronicle/etl/extractors/csv_extractor.rb +32 -31
  20. data/lib/chronicle/etl/extractors/extractor.rb +25 -13
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +17 -32
  22. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
  23. data/lib/chronicle/etl/extractors/json_extractor.rb +37 -0
  24. data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
  25. data/lib/chronicle/etl/job.rb +30 -29
  26. data/lib/chronicle/etl/job_definition.rb +45 -7
  27. data/lib/chronicle/etl/job_log.rb +10 -0
  28. data/lib/chronicle/etl/job_logger.rb +23 -20
  29. data/lib/chronicle/etl/loaders/csv_loader.rb +5 -1
  30. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  31. data/lib/chronicle/etl/loaders/rest_loader.rb +9 -5
  32. data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
  33. data/lib/chronicle/etl/loaders/table_loader.rb +51 -7
  34. data/lib/chronicle/etl/logger.rb +48 -0
  35. data/lib/chronicle/etl/models/attachment.rb +14 -0
  36. data/lib/chronicle/etl/models/base.rb +23 -7
  37. data/lib/chronicle/etl/models/entity.rb +9 -3
  38. data/lib/chronicle/etl/registry/connector_registration.rb +62 -0
  39. data/lib/chronicle/etl/registry/registry.rb +52 -0
  40. data/lib/chronicle/etl/registry/self_registering.rb +25 -0
  41. data/lib/chronicle/etl/runner.rb +58 -7
  42. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
  43. data/lib/chronicle/etl/serializers/serializer.rb +27 -0
  44. data/lib/chronicle/etl/transformers/image_file_transformer.rb +247 -0
  45. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
  46. data/lib/chronicle/etl/transformers/transformer.rb +41 -10
  47. data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
  48. data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
  49. data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
  50. data/lib/chronicle/etl/version.rb +1 -1
  51. data/lib/chronicle/etl.rb +8 -2
  52. metadata +146 -34
  53. data/.ruby-version +0 -1
  54. data/Gemfile.lock +0 -91
  55. data/lib/chronicle/etl/catalog.rb +0 -108
  56. data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7a02a2377d0e8d4135f3b931bc73641eac28058d736d9c1dba0a97107c1d4c0e
4
- data.tar.gz: 810d5bff80e852fa08ef9824ed6b313aa309bb69e84228bc1fbb7595069e043b
3
+ metadata.gz: 5fd411a9a41a645b85780230c79b09f361e121d0e8ca7f3270ca8eba55a76ca8
4
+ data.tar.gz: c09053715910ab4f027fbdc3a5b7d10c042eee962f7fa93c6571ce8359f51009
5
5
  SHA512:
6
- metadata.gz: 0d5fbea3c63349bb3f566e6137755f6cc8a4060d0e401abf5a0e7d8b44a4c4278089c10ffb8bb9cf2d783a238449140e5e54d90f3ad158aa362c6335eedca5aa
7
- data.tar.gz: bf6fa83b1d5e55760e62d3cc090bf09bb69a7c761ae4a9358fb4d82192c7efc7500b6db361f39adac3581982862654aa4603a78dfbb3aed53b51d01137ffd736
6
+ metadata.gz: 2c9ec14b6c0a51f1c5ec77ee8d9a7f016d16bdc35db5634f9fa5d38aabc30dec201cd4b8bef06a31b86773a0c1cda2d271d7008dcb247a86d956c094919f3c0f
7
+ data.tar.gz: 0dca41e1654e5b2b98a148f853492a67126cdac767000b3c5f97c5c8ff88b77464e17a2fab38b72c1f014f3515c911e5f3f391eaf68d64e73dcfcff5d8e6cb6a
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ master ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -7,6 +7,9 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
 
10
+ # https://yehudakatz.com/2010/12/16/clarifying-the-roles-of-the-gemspec-and-gemfile/
11
+ Gemfile.lock
12
+
10
13
  # rspec failure tracking
11
14
  .rspec_status
12
15
  .DS_Store
data/.rubocop.yml CHANGED
@@ -1,8 +1,38 @@
1
1
  AllCops:
2
2
  EnabledByDefault: true
3
+ TargetRubyVersion: 2.7
4
+
5
+ Style/FrozenStringLiteralComment:
6
+ SafeAutoCorrect: true
3
7
 
4
8
  Style/StringLiterals:
5
9
  Enabled: false
6
10
 
11
+ Layout/MultilineAssignmentLayout:
12
+ Enabled: false
13
+
14
+ Layout/RedundantLineBreak:
15
+ Enabled: false
16
+
7
17
  Style/MethodCallWithArgsParentheses:
8
- Enabled: false
18
+ Enabled: false
19
+
20
+ Style/MethodCalledOnDoEndBlock:
21
+ Exclude:
22
+ - 'spec/**/*'
23
+
24
+ Style/OpenStructUse:
25
+ Enabled: false
26
+
27
+ Style/Copyright:
28
+ Enabled: false
29
+
30
+ Style/SymbolArray:
31
+ EnforcedStyle: brackets
32
+
33
+ Style/WordArray:
34
+ EnforcedStyle: brackets
35
+
36
+ Lint/ConstantResolution:
37
+ Enabled: false
38
+
data/Guardfile ADDED
@@ -0,0 +1,7 @@
1
+ guard :rspec, cmd: "bundle exec rspec" do
2
+ require "guard/rspec/dsl"
3
+
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ end
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Chronicle::ETL
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
4
4
 
5
5
  Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
6
6
 
@@ -31,6 +31,9 @@ Connectors are available to read, process, and load data from different formats
31
31
  ```bash
32
32
  # List all available connectors
33
33
  $ chronicle-etl connectors:list
34
+
35
+ # Install a connector
36
+ $ chronicle-etl connectors:install imessage
34
37
  ```
35
38
 
36
39
  Built in connectors:
@@ -44,16 +47,18 @@ Built in connectors:
44
47
  - `null` - (default) Don't do anything
45
48
 
46
49
  ### Loaders
47
- - `stdout` - (default) output transformed records to stdount
50
+ - `stdout` - (default) output records to stdout serialized as JSON
48
51
  - `csv` - Load records to a csv file
52
+ - `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
49
53
  - `table` - Output an ascii table of records. Useful for debugging.
50
54
 
51
55
  ### Provider-specific importers
52
56
 
53
57
  In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
54
58
 
55
- - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files. Transformers for chronicle schema
56
- - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
59
+ - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
60
+ - [shell](https://github.com/chronicle-app/chronicle-shell). Extract shell history from Bash or Zsh`
61
+ - [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
57
62
 
58
63
  To install any of these, run `gem install chronicle-PROVIDER`.
59
64
 
@@ -61,7 +66,7 @@ If you don't want to use the available rubygem importers, `chronicle-etl` can us
61
66
 
62
67
  I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
63
68
 
64
- ### Full commands
69
+ ## Full commands
65
70
 
66
71
  ```
67
72
  $ chronicle-etl help
@@ -75,26 +80,28 @@ ALL COMMANDS
75
80
  jobs:create # Create a job
76
81
  jobs:list # List all available jobs
77
82
  jobs:run # Start a job
78
- jobs:show # Show a job
83
+ jobs:show # Show details about a job
79
84
  ```
80
85
 
81
- ### Job options
86
+ ### Running a job
82
87
 
83
88
  ```
84
89
  Usage:
85
90
  chronicle-etl jobs:run
86
91
 
87
92
  Options:
88
- -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
89
- # Default: stdin
93
+ [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
94
+ # Default: info
95
+ -v, [--verbose], [--no-verbose] # Set log level to verbose
96
+ [--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
97
+ -e, [--extractor=extractor-name] # Extractor class. Default: stdin
90
98
  [--extractor-opts=key:value] # Extractor options
91
- -t, [--transformer=transformer-name] # Transformer class (available: null)
92
- # Default: null
99
+ -t, [--transformer=transformer-name] # Transformer class. Default: null
93
100
  [--transformer-opts=key:value] # Transformer options
94
- -l, [--loader=loader-name] # Loader class (available: stdout, csv, table)
95
- # Default: stdout
101
+ -l, [--loader=loader-name] # Loader class. Default: stdout
96
102
  [--loader-opts=key:value] # Loader options
97
- -j, [--job=JOB] # Job configuration file
103
+ -j, [--name=NAME] # Job configuration name
104
+
98
105
 
99
106
  Runs an ETL job
100
107
  ```
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rspec/core/rake_task"
3
-
4
3
  RSpec::Core::RakeTask.new(:spec)
5
4
 
6
- task :default => :spec
5
+ require 'yard'
6
+ YARD::Rake::YardocTask.new
7
+
8
+ task default: :spec
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
17
17
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
18
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
19
  if spec.respond_to?(:metadata)
20
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
21
 
22
22
  spec.metadata["homepage_uri"] = spec.homepage
23
23
  spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
24
+ spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
25
25
  else
26
26
  raise "RubyGems 2.0 or newer is required to protect against " \
27
27
  "public gem pushes."
@@ -35,19 +35,27 @@ Gem::Specification.new do |spec|
35
35
  spec.bindir = "exe"
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
+ spec.required_ruby_version = ">= 2.7"
38
39
 
39
- spec.add_dependency "thor", "~> 0.20"
40
+ spec.add_dependency "activesupport", "~> 7.0"
41
+ spec.add_dependency "chronic_duration", "~> 0.10.6"
40
42
  spec.add_dependency "colorize", "~> 0.8.1"
41
- spec.add_dependency "tty-table", "~> 0.11"
43
+ spec.add_dependency "marcel", "~> 1.0.2"
44
+ spec.add_dependency "mini_exiftool", "~> 2.10"
45
+ spec.add_dependency "nokogiri", "~> 1.13"
46
+ spec.add_dependency "runcom", ">= 6.0"
47
+ spec.add_dependency "sequel", "~> 5.35"
48
+ spec.add_dependency "sqlite3", "~> 1.4"
49
+ spec.add_dependency "thor", "~> 1.2"
42
50
  spec.add_dependency "tty-progressbar", "~> 0.17"
43
- spec.add_dependency 'sequel', '~> 5.35'
44
- spec.add_dependency 'deep_merge', '~> 1.2'
51
+ spec.add_dependency "tty-table", "~> 0.11"
45
52
 
46
53
  spec.add_development_dependency "bundler", "~> 2.1"
54
+ spec.add_development_dependency "pry-byebug", "~> 3.9"
47
55
  spec.add_development_dependency "rake", "~> 13.0"
48
56
  spec.add_development_dependency "rspec", "~> 3.9"
49
- spec.add_development_dependency "pry-byebug", "~> 3.9"
50
- spec.add_development_dependency 'runcom', '~> 6.2'
51
- spec.add_development_dependency 'redcarpet', '~> 3.5'
52
- spec.add_development_dependency 'sqlite3', '~> 1.4'
57
+ spec.add_development_dependency "simplecov", "~> 0.21"
58
+ spec.add_development_dependency "guard-rspec", "~> 4.7.3"
59
+ spec.add_development_dependency "yard", "~> 0.9.7"
60
+ spec.add_development_dependency "rubocop", "~> 1.25.1"
53
61
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli/main"
3
+ require "chronicle/etl/cli"
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  module CLI
@@ -7,23 +9,67 @@ module Chronicle
7
9
  namespace :connectors
8
10
 
9
11
  desc "install NAME", "Installs connector NAME"
10
- def install
11
- puts "Installing"
12
+ def install(name)
13
+ Chronicle::ETL::Registry.install_connector(name)
12
14
  end
13
15
 
14
16
  desc "list", "Lists available connectors"
15
17
  # Display all available connectors that chronicle-etl has access to
16
18
  def list
17
- klasses = Chronicle::ETL::Catalog.available_classes
18
- klasses = klasses.sort_by do |a|
19
- [a[:built_in].to_s, a[:provider], a[:phase]]
19
+ Chronicle::ETL::Registry.load_all!
20
+
21
+ connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
22
+ {
23
+ identifier: connector_registration.identifier,
24
+ phase: connector_registration.phase,
25
+ description: connector_registration.descriptive_phrase,
26
+ provider: connector_registration.provider,
27
+ core: connector_registration.built_in? ? '✓' : '',
28
+ class: connector_registration.klass_name
29
+ }
30
+ end
31
+
32
+ connector_info = connector_info.sort_by do |a|
33
+ [a[:core].to_s, a[:provider], a[:phase], a[:identifier]]
20
34
  end
21
35
 
22
- headers = klasses.first.keys.map do |key|
36
+ headers = connector_info.first.keys.map do |key|
23
37
  key.to_s.upcase.bold
24
38
  end
25
39
 
26
- table = TTY::Table.new(headers, klasses.map(&:values))
40
+ table = TTY::Table.new(headers, connector_info.map(&:values))
41
+ puts table.render(indent: 0, padding: [0, 2])
42
+ end
43
+
44
+ desc "show PHASE IDENTIFIER", "Show information about a connector"
45
+ def show(phase, identifier)
46
+ unless ['extractor', 'transformer', 'loader'].include?(phase)
47
+ puts "phase argument must be one of: [extractor, transformer, loader]"
48
+ return
49
+ end
50
+
51
+ begin
52
+ connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
53
+ rescue Chronicle::ETL::ConnectorNotAvailableError
54
+ puts "Could not find #{phase} #{identifier}"
55
+ return
56
+ end
57
+
58
+ puts connector.klass.to_s.bold
59
+ puts " #{connector.descriptive_phrase}"
60
+ puts
61
+ puts "OPTIONS"
62
+
63
+ headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
64
+
65
+ settings = connector.klass.settings.map do |name, setting|
66
+ [
67
+ name,
68
+ setting.default,
69
+ setting.required ? 'yes' : 'no'
70
+ ]
71
+ end
72
+ table = TTY::Table.new(headers, settings)
27
73
  puts table.render(indent: 0, padding: [0, 2])
28
74
  end
29
75
  end
@@ -1,4 +1,5 @@
1
1
  require 'pp'
2
+
2
3
  module Chronicle
3
4
  module ETL
4
5
  module CLI
@@ -7,16 +8,33 @@ module Chronicle
7
8
  default_task "start"
8
9
  namespace :jobs
9
10
 
10
- class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
11
+ class_option :name, aliases: '-j', desc: 'Job configuration name'
12
+
13
+ class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
11
14
  class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
15
+ class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
13
16
  class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
17
+ class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
15
18
  class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- class_option :name, aliases: '-j', desc: 'Job configuration name'
17
19
 
18
- map run: :start # Thor doesn't like `run` as a command name
20
+ # This is an array to deal with shell globbing
21
+ class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
22
+ class_option :since, desc: "Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options", banner: 'DATE'
23
+ class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
24
+ class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
25
+
26
+ class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
27
+ class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
28
+
29
+ class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
30
+ class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
31
+
32
+ # Thor doesn't like `run` as a command name
33
+ map run: :start
19
34
  desc "run", "Start a job"
35
+ option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
36
+ option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
37
+ option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
20
38
  long_desc <<-LONG_DESC
21
39
  This will run an ETL job. Each job needs three parts:
22
40
 
@@ -24,23 +42,17 @@ module Chronicle
24
42
 
25
43
  2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
26
44
 
27
- 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
45
+ 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
28
46
 
29
47
  If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
30
48
  LONG_DESC
31
49
  # Run an ETL job
32
50
  def start
51
+ setup_log_level
33
52
  job_definition = build_job_definition(options)
34
53
  job = Chronicle::ETL::Job.new(job_definition)
35
54
  runner = Chronicle::ETL::Runner.new(job)
36
55
  runner.run!
37
- rescue Chronicle::ETL::ProviderNotAvailableError => e
38
- warn(e.message.red)
39
- warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
40
- exit(false)
41
- rescue Chronicle::ETL::ConnectorNotAvailableError => e
42
- warn(e.message.red)
43
- exit(false)
44
56
  end
45
57
 
46
58
  desc "create", "Create a job"
@@ -48,14 +60,13 @@ LONG_DESC
48
60
  def create
49
61
  job_definition = build_job_definition(options)
50
62
  path = File.join('chronicle', 'etl', 'jobs', options[:name])
51
- Chronicle::ETL::Config.write(path, job_definition)
63
+ Chronicle::ETL::Config.write(path, job_definition.definition)
52
64
  end
53
65
 
54
66
  desc "show", "Show details about a job"
55
67
  # Show an ETL job
56
68
  def show
57
- job_config = build_job_definition(options)
58
- pp job_config
69
+ puts Chronicle::ETL::Job.new(build_job_definition(options))
59
70
  end
60
71
 
61
72
  desc "list", "List all available jobs"
@@ -73,7 +84,7 @@ LONG_DESC
73
84
  [job, extractor, transformer, loader]
74
85
  end
75
86
 
76
- headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
87
+ headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
77
88
 
78
89
  table = TTY::Table.new(headers, job_details)
79
90
  puts table.render(indent: 0, padding: [0, 2])
@@ -81,11 +92,20 @@ LONG_DESC
81
92
 
82
93
  private
83
94
 
95
+ def setup_log_level
96
+ if options[:verbose]
97
+ Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
98
+ elsif options[:log_level]
99
+ level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
100
+ Chronicle::ETL::Logger.log_level = level
101
+ end
102
+ end
103
+
84
104
  # Create job definition by reading config file and then overwriting with flag options
85
105
  def build_job_definition(options)
86
106
  definition = Chronicle::ETL::JobDefinition.new
87
- definition.add_config(process_flag_options(options))
88
107
  definition.add_config(load_job_config(options[:name]))
108
+ definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
89
109
  definition
90
110
  end
91
111
 
@@ -95,19 +115,34 @@ LONG_DESC
95
115
 
96
116
  # Takes flag options and turns them into a runner config
97
117
  def process_flag_options options
118
+ extractor_options = options[:'extractor-opts'].merge({
119
+ filename: (options[:input] if options[:input].any?),
120
+ since: options[:since],
121
+ until: options[:until],
122
+ limit: options[:limit],
123
+ }.compact)
124
+
125
+ transformer_options = options[:'transformer-opts']
126
+
127
+ loader_options = options[:'loader-opts'].merge({
128
+ output: options[:output],
129
+ fields: options[:fields]
130
+ }.compact)
131
+
98
132
  {
133
+ dry_run: options[:dry_run],
99
134
  extractor: {
100
135
  name: options[:extractor],
101
- options: options[:'extractor-opts']
102
- },
136
+ options: extractor_options
137
+ }.compact,
103
138
  transformer: {
104
139
  name: options[:transformer],
105
- options: options[:'transformer-opts']
106
- },
140
+ options: transformer_options
141
+ }.compact,
107
142
  loader: {
108
143
  name: options[:loader],
109
- options: options[:'loader-opts']
110
- }
144
+ options: loader_options
145
+ }.compact
111
146
  }
112
147
  end
113
148
  end
@@ -1,17 +1,10 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
1
  require 'colorize'
4
2
 
5
- require 'chronicle/etl/cli/subcommand_base'
6
- require 'chronicle/etl/cli/connectors'
7
- require 'chronicle/etl/cli/jobs'
8
-
9
3
  module Chronicle
10
4
  module ETL
11
5
  module CLI
12
6
  # Main entrypoint for CLI app
13
- class Main < Thor
14
- class_option "verbose", type: :boolean, default: false
7
+ class Main < ::Thor
15
8
  default_task "jobs"
16
9
 
17
10
  desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
@@ -22,10 +15,6 @@ module Chronicle
22
15
 
23
16
  # Entrypoint for the CLI
24
17
  def self.start(given_args = ARGV, config = {})
25
- if given_args.none?
26
- abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
27
- end
28
-
29
18
  # take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
30
19
  if given_args.any? && given_args[0].include?(':')
31
20
  commands = given_args.shift.split(':')
@@ -35,10 +24,20 @@ module Chronicle
35
24
  super(given_args, config)
36
25
  end
37
26
 
27
+ def self.exit_on_failure?
28
+ true
29
+ end
30
+
31
+ desc "version", "Show version"
32
+ map %w(--version -v) => :version
33
+ def version
34
+ shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
35
+ end
36
+
38
37
  # Displays help options for chronicle-etl
39
38
  def help(meth = nil, subcommand = false)
40
39
  if meth && !respond_to?(meth)
41
- klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
40
+ klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
42
41
  klass.start(['-h', task].compact, shell: shell)
43
42
  else
44
43
  shell.say "ABOUT".bold
@@ -52,14 +51,14 @@ module Chronicle
52
51
  shell.say " $ chronicle-etl connectors:list"
53
52
  shell.say
54
53
  shell.say " Run a simple job:".italic.light_black
55
- shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
54
+ shell.say " $ chronicle-etl jobs:run --extractor stdin --transformer null --loader stdout"
56
55
  shell.say
57
56
  shell.say " Show full job options:".italic.light_black
58
- shell.say " $ chronicle-etl jobs help start"
57
+ shell.say " $ chronicle-etl jobs help run"
59
58
 
60
59
  list = []
61
60
 
62
- Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
61
+ ::Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
63
62
  list += thor_class.printable_tasks(false)
64
63
  end
65
64
  list.sort! { |a, b| a[0] <=> b[0] }
@@ -72,6 +71,9 @@ module Chronicle
72
71
  shell.say "VERSION".bold
73
72
  shell.say " #{Chronicle::ETL::VERSION}"
74
73
  shell.say
74
+ shell.say " Display current version:".italic.light_black
75
+ shell.say " $ chronicle-etl --version"
76
+ shell.say
75
77
  shell.say "FULL DOCUMENTATION".bold
76
78
  shell.say " https://github.com/chronicle-app/chronicle-etl".blue
77
79
  shell.say
@@ -2,11 +2,11 @@ module Chronicle
2
2
  module ETL
3
3
  module CLI
4
4
  # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
- class SubcommandBase < Thor
5
+ class SubcommandBase < ::Thor
6
6
  # Print usage instructions for a subcommand
7
7
  def self.help(shell, subcommand = false)
8
8
  list = printable_commands(true, subcommand)
9
- Thor::Util.thor_classes_in(self).each do |klass|
9
+ ::Thor::Util.thor_classes_in(self).each do |klass|
10
10
  list += klass.printable_commands(false)
11
11
  end
12
12
  list.sort! { |a, b| a[0] <=> b[0] }
@@ -0,0 +1,7 @@
1
+ require 'thor'
2
+ require 'chronicle/etl'
3
+
4
+ require 'chronicle/etl/cli/subcommand_base'
5
+ require 'chronicle/etl/cli/connectors'
6
+ require 'chronicle/etl/cli/jobs'
7
+ require 'chronicle/etl/cli/main'
@@ -30,7 +30,7 @@ module Chronicle
30
30
  end
31
31
  end
32
32
 
33
- # Returns all available credentials available in ~/.config/chronilce/etl/credenetials/*.yml
33
+ # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
34
34
  def available_credentials
35
35
  job_directory = Runcom::Config.new('chronicle/etl/credentials').current
36
36
  Dir.glob(File.join(job_directory, "*.yml")).map do |filename|