chronicle-etl 0.2.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.gitignore +3 -0
  4. data/.rubocop.yml +31 -1
  5. data/Guardfile +7 -0
  6. data/README.md +21 -14
  7. data/Rakefile +4 -2
  8. data/chronicle-etl.gemspec +18 -10
  9. data/exe/chronicle-etl +1 -1
  10. data/lib/chronicle/etl/cli/connectors.rb +53 -7
  11. data/lib/chronicle/etl/cli/jobs.rb +59 -24
  12. data/lib/chronicle/etl/cli/main.rb +18 -16
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +7 -0
  15. data/lib/chronicle/etl/config.rb +1 -1
  16. data/lib/chronicle/etl/configurable.rb +150 -0
  17. data/lib/chronicle/etl/exceptions.rb +14 -1
  18. data/lib/chronicle/etl/extraction.rb +12 -0
  19. data/lib/chronicle/etl/extractors/csv_extractor.rb +32 -31
  20. data/lib/chronicle/etl/extractors/extractor.rb +25 -13
  21. data/lib/chronicle/etl/extractors/file_extractor.rb +17 -32
  22. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +104 -0
  23. data/lib/chronicle/etl/extractors/json_extractor.rb +37 -0
  24. data/lib/chronicle/etl/extractors/stdin_extractor.rb +6 -1
  25. data/lib/chronicle/etl/job.rb +30 -29
  26. data/lib/chronicle/etl/job_definition.rb +45 -7
  27. data/lib/chronicle/etl/job_log.rb +10 -0
  28. data/lib/chronicle/etl/job_logger.rb +23 -20
  29. data/lib/chronicle/etl/loaders/csv_loader.rb +5 -1
  30. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  31. data/lib/chronicle/etl/loaders/rest_loader.rb +9 -5
  32. data/lib/chronicle/etl/loaders/stdout_loader.rb +6 -1
  33. data/lib/chronicle/etl/loaders/table_loader.rb +51 -7
  34. data/lib/chronicle/etl/logger.rb +48 -0
  35. data/lib/chronicle/etl/models/attachment.rb +14 -0
  36. data/lib/chronicle/etl/models/base.rb +23 -7
  37. data/lib/chronicle/etl/models/entity.rb +9 -3
  38. data/lib/chronicle/etl/registry/connector_registration.rb +62 -0
  39. data/lib/chronicle/etl/registry/registry.rb +52 -0
  40. data/lib/chronicle/etl/registry/self_registering.rb +25 -0
  41. data/lib/chronicle/etl/runner.rb +58 -7
  42. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +25 -0
  43. data/lib/chronicle/etl/serializers/serializer.rb +27 -0
  44. data/lib/chronicle/etl/transformers/image_file_transformer.rb +247 -0
  45. data/lib/chronicle/etl/transformers/null_transformer.rb +10 -1
  46. data/lib/chronicle/etl/transformers/transformer.rb +41 -10
  47. data/lib/chronicle/etl/utils/binary_attachments.rb +21 -0
  48. data/lib/chronicle/etl/utils/progress_bar.rb +3 -1
  49. data/lib/chronicle/etl/utils/text_recognition.rb +15 -0
  50. data/lib/chronicle/etl/version.rb +1 -1
  51. data/lib/chronicle/etl.rb +8 -2
  52. metadata +146 -34
  53. data/.ruby-version +0 -1
  54. data/Gemfile.lock +0 -91
  55. data/lib/chronicle/etl/catalog.rb +0 -108
  56. data/lib/chronicle/etl/utils/jsonapi.rb +0 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7a02a2377d0e8d4135f3b931bc73641eac28058d736d9c1dba0a97107c1d4c0e
4
- data.tar.gz: 810d5bff80e852fa08ef9824ed6b313aa309bb69e84228bc1fbb7595069e043b
3
+ metadata.gz: 5fd411a9a41a645b85780230c79b09f361e121d0e8ca7f3270ca8eba55a76ca8
4
+ data.tar.gz: c09053715910ab4f027fbdc3a5b7d10c042eee962f7fa93c6571ce8359f51009
5
5
  SHA512:
6
- metadata.gz: 0d5fbea3c63349bb3f566e6137755f6cc8a4060d0e401abf5a0e7d8b44a4c4278089c10ffb8bb9cf2d783a238449140e5e54d90f3ad158aa362c6335eedca5aa
7
- data.tar.gz: bf6fa83b1d5e55760e62d3cc090bf09bb69a7c761ae4a9358fb4d82192c7efc7500b6db361f39adac3581982862654aa4603a78dfbb3aed53b51d01137ffd736
6
+ metadata.gz: 2c9ec14b6c0a51f1c5ec77ee8d9a7f016d16bdc35db5634f9fa5d38aabc30dec201cd4b8bef06a31b86773a0c1cda2d271d7008dcb247a86d956c094919f3c0f
7
+ data.tar.gz: 0dca41e1654e5b2b98a148f853492a67126cdac767000b3c5f97c5c8ff88b77464e17a2fab38b72c1f014f3515c911e5f3f391eaf68d64e73dcfcff5d8e6cb6a
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ master ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -7,6 +7,9 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
 
10
+ # https://yehudakatz.com/2010/12/16/clarifying-the-roles-of-the-gemspec-and-gemfile/
11
+ Gemfile.lock
12
+
10
13
  # rspec failure tracking
11
14
  .rspec_status
12
15
  .DS_Store
data/.rubocop.yml CHANGED
@@ -1,8 +1,38 @@
1
1
  AllCops:
2
2
  EnabledByDefault: true
3
+ TargetRubyVersion: 2.7
4
+
5
+ Style/FrozenStringLiteralComment:
6
+ SafeAutoCorrect: true
3
7
 
4
8
  Style/StringLiterals:
5
9
  Enabled: false
6
10
 
11
+ Layout/MultilineAssignmentLayout:
12
+ Enabled: false
13
+
14
+ Layout/RedundantLineBreak:
15
+ Enabled: false
16
+
7
17
  Style/MethodCallWithArgsParentheses:
8
- Enabled: false
18
+ Enabled: false
19
+
20
+ Style/MethodCalledOnDoEndBlock:
21
+ Exclude:
22
+ - 'spec/**/*'
23
+
24
+ Style/OpenStructUse:
25
+ Enabled: false
26
+
27
+ Style/Copyright:
28
+ Enabled: false
29
+
30
+ Style/SymbolArray:
31
+ EnforcedStyle: brackets
32
+
33
+ Style/WordArray:
34
+ EnforcedStyle: brackets
35
+
36
+ Lint/ConstantResolution:
37
+ Enabled: false
38
+
data/Guardfile ADDED
@@ -0,0 +1,7 @@
1
+ guard :rspec, cmd: "bundle exec rspec" do
2
+ require "guard/rspec/dsl"
3
+
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ end
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Chronicle::ETL
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
4
4
 
5
5
  Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
6
6
 
@@ -31,6 +31,9 @@ Connectors are available to read, process, and load data from different formats
31
31
  ```bash
32
32
  # List all available connectors
33
33
  $ chronicle-etl connectors:list
34
+
35
+ # Install a connector
36
+ $ chronicle-etl connectors:install imessage
34
37
  ```
35
38
 
36
39
  Built in connectors:
@@ -44,16 +47,18 @@ Built in connectors:
44
47
  - `null` - (default) Don't do anything
45
48
 
46
49
  ### Loaders
47
- - `stdout` - (default) output transformed records to stdount
50
+ - `stdout` - (default) output records to stdout serialized as JSON
48
51
  - `csv` - Load records to a csv file
52
+ - `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
49
53
  - `table` - Output an ascii table of records. Useful for debugging.
50
54
 
51
55
  ### Provider-specific importers
52
56
 
53
57
  In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
54
58
 
55
- - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files. Transformers for chronicle schema
56
- - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
59
+ - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
60
+ - [shell](https://github.com/chronicle-app/chronicle-shell). Extract shell history from Bash or Zsh`
61
+ - [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
57
62
 
58
63
  To install any of these, run `gem install chronicle-PROVIDER`.
59
64
 
@@ -61,7 +66,7 @@ If you don't want to use the available rubygem importers, `chronicle-etl` can us
61
66
 
62
67
  I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
63
68
 
64
- ### Full commands
69
+ ## Full commands
65
70
 
66
71
  ```
67
72
  $ chronicle-etl help
@@ -75,26 +80,28 @@ ALL COMMANDS
75
80
  jobs:create # Create a job
76
81
  jobs:list # List all available jobs
77
82
  jobs:run # Start a job
78
- jobs:show # Show a job
83
+ jobs:show # Show details about a job
79
84
  ```
80
85
 
81
- ### Job options
86
+ ### Running a job
82
87
 
83
88
  ```
84
89
  Usage:
85
90
  chronicle-etl jobs:run
86
91
 
87
92
  Options:
88
- -e, [--extractor=extractor-name] # Extractor class (available: stdin, csv, file)
89
- # Default: stdin
93
+ [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
94
+ # Default: info
95
+ -v, [--verbose], [--no-verbose] # Set log level to verbose
96
+ [--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
97
+ -e, [--extractor=extractor-name] # Extractor class. Default: stdin
90
98
  [--extractor-opts=key:value] # Extractor options
91
- -t, [--transformer=transformer-name] # Transformer class (available: null)
92
- # Default: null
99
+ -t, [--transformer=transformer-name] # Transformer class. Default: null
93
100
  [--transformer-opts=key:value] # Transformer options
94
- -l, [--loader=loader-name] # Loader class (available: stdout, csv, table)
95
- # Default: stdout
101
+ -l, [--loader=loader-name] # Loader class. Default: stdout
96
102
  [--loader-opts=key:value] # Loader options
97
- -j, [--job=JOB] # Job configuration file
103
+ -j, [--name=NAME] # Job configuration name
104
+
98
105
 
99
106
  Runs an ETL job
100
107
  ```
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rspec/core/rake_task"
3
-
4
3
  RSpec::Core::RakeTask.new(:spec)
5
4
 
6
- task :default => :spec
5
+ require 'yard'
6
+ YARD::Rake::YardocTask.new
7
+
8
+ task default: :spec
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
17
17
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
18
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
19
  if spec.respond_to?(:metadata)
20
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
21
 
22
22
  spec.metadata["homepage_uri"] = spec.homepage
23
23
  spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
24
+ spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
25
25
  else
26
26
  raise "RubyGems 2.0 or newer is required to protect against " \
27
27
  "public gem pushes."
@@ -35,19 +35,27 @@ Gem::Specification.new do |spec|
35
35
  spec.bindir = "exe"
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
+ spec.required_ruby_version = ">= 2.7"
38
39
 
39
- spec.add_dependency "thor", "~> 0.20"
40
+ spec.add_dependency "activesupport", "~> 7.0"
41
+ spec.add_dependency "chronic_duration", "~> 0.10.6"
40
42
  spec.add_dependency "colorize", "~> 0.8.1"
41
- spec.add_dependency "tty-table", "~> 0.11"
43
+ spec.add_dependency "marcel", "~> 1.0.2"
44
+ spec.add_dependency "mini_exiftool", "~> 2.10"
45
+ spec.add_dependency "nokogiri", "~> 1.13"
46
+ spec.add_dependency "runcom", ">= 6.0"
47
+ spec.add_dependency "sequel", "~> 5.35"
48
+ spec.add_dependency "sqlite3", "~> 1.4"
49
+ spec.add_dependency "thor", "~> 1.2"
42
50
  spec.add_dependency "tty-progressbar", "~> 0.17"
43
- spec.add_dependency 'sequel', '~> 5.35'
44
- spec.add_dependency 'deep_merge', '~> 1.2'
51
+ spec.add_dependency "tty-table", "~> 0.11"
45
52
 
46
53
  spec.add_development_dependency "bundler", "~> 2.1"
54
+ spec.add_development_dependency "pry-byebug", "~> 3.9"
47
55
  spec.add_development_dependency "rake", "~> 13.0"
48
56
  spec.add_development_dependency "rspec", "~> 3.9"
49
- spec.add_development_dependency "pry-byebug", "~> 3.9"
50
- spec.add_development_dependency 'runcom', '~> 6.2'
51
- spec.add_development_dependency 'redcarpet', '~> 3.5'
52
- spec.add_development_dependency 'sqlite3', '~> 1.4'
57
+ spec.add_development_dependency "simplecov", "~> 0.21"
58
+ spec.add_development_dependency "guard-rspec", "~> 4.7.3"
59
+ spec.add_development_dependency "yard", "~> 0.9.7"
60
+ spec.add_development_dependency "rubocop", "~> 1.25.1"
53
61
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli/main"
3
+ require "chronicle/etl/cli"
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  module CLI
@@ -7,23 +9,67 @@ module Chronicle
7
9
  namespace :connectors
8
10
 
9
11
  desc "install NAME", "Installs connector NAME"
10
- def install
11
- puts "Installing"
12
+ def install(name)
13
+ Chronicle::ETL::Registry.install_connector(name)
12
14
  end
13
15
 
14
16
  desc "list", "Lists available connectors"
15
17
  # Display all available connectors that chronicle-etl has access to
16
18
  def list
17
- klasses = Chronicle::ETL::Catalog.available_classes
18
- klasses = klasses.sort_by do |a|
19
- [a[:built_in].to_s, a[:provider], a[:phase]]
19
+ Chronicle::ETL::Registry.load_all!
20
+
21
+ connector_info = Chronicle::ETL::Registry.connectors.map do |connector_registration|
22
+ {
23
+ identifier: connector_registration.identifier,
24
+ phase: connector_registration.phase,
25
+ description: connector_registration.descriptive_phrase,
26
+ provider: connector_registration.provider,
27
+ core: connector_registration.built_in? ? '✓' : '',
28
+ class: connector_registration.klass_name
29
+ }
30
+ end
31
+
32
+ connector_info = connector_info.sort_by do |a|
33
+ [a[:core].to_s, a[:provider], a[:phase], a[:identifier]]
20
34
  end
21
35
 
22
- headers = klasses.first.keys.map do |key|
36
+ headers = connector_info.first.keys.map do |key|
23
37
  key.to_s.upcase.bold
24
38
  end
25
39
 
26
- table = TTY::Table.new(headers, klasses.map(&:values))
40
+ table = TTY::Table.new(headers, connector_info.map(&:values))
41
+ puts table.render(indent: 0, padding: [0, 2])
42
+ end
43
+
44
+ desc "show PHASE IDENTIFIER", "Show information about a connector"
45
+ def show(phase, identifier)
46
+ unless ['extractor', 'transformer', 'loader'].include?(phase)
47
+ puts "phase argument must be one of: [extractor, transformer, loader]"
48
+ return
49
+ end
50
+
51
+ begin
52
+ connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
53
+ rescue Chronicle::ETL::ConnectorNotAvailableError
54
+ puts "Could not find #{phase} #{identifier}"
55
+ return
56
+ end
57
+
58
+ puts connector.klass.to_s.bold
59
+ puts " #{connector.descriptive_phrase}"
60
+ puts
61
+ puts "OPTIONS"
62
+
63
+ headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
64
+
65
+ settings = connector.klass.settings.map do |name, setting|
66
+ [
67
+ name,
68
+ setting.default,
69
+ setting.required ? 'yes' : 'no'
70
+ ]
71
+ end
72
+ table = TTY::Table.new(headers, settings)
27
73
  puts table.render(indent: 0, padding: [0, 2])
28
74
  end
29
75
  end
@@ -1,4 +1,5 @@
1
1
  require 'pp'
2
+
2
3
  module Chronicle
3
4
  module ETL
4
5
  module CLI
@@ -7,16 +8,33 @@ module Chronicle
7
8
  default_task "start"
8
9
  namespace :jobs
9
10
 
10
- class_option :extractor, aliases: '-e', desc: 'Extractor class (available: stdin, csv, file)', default: 'stdin', banner: 'extractor-name'
11
+ class_option :name, aliases: '-j', desc: 'Job configuration name'
12
+
13
+ class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
11
14
  class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- class_option :transformer, aliases: '-t', desc: 'Transformer class (available: null)', default: 'null', banner: 'transformer-name'
15
+ class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
13
16
  class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- class_option :loader, aliases: '-l', desc: 'Loader class (available: stdout, csv, table)', default: 'stdout', banner: 'loader-name'
17
+ class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
15
18
  class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- class_option :name, aliases: '-j', desc: 'Job configuration name'
17
19
 
18
- map run: :start # Thor doesn't like `run` as a command name
20
+ # This is an array to deal with shell globbing
21
+ class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
22
+ class_option :since, desc: "Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options", banner: 'DATE'
23
+ class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
24
+ class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
25
+
26
+ class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
27
+ class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
28
+
29
+ class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
30
+ class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
31
+
32
+ # Thor doesn't like `run` as a command name
33
+ map run: :start
19
34
  desc "run", "Start a job"
35
+ option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
36
+ option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
37
+ option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
20
38
  long_desc <<-LONG_DESC
21
39
  This will run an ETL job. Each job needs three parts:
22
40
 
@@ -24,23 +42,17 @@ module Chronicle
24
42
 
25
43
  2. #{'Transformer'.underline}: transforms data into a new format. If none is specified, we use the `null` transformer which does nothing to the data.
26
44
 
27
- 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout.
45
+ 3. #{'Loader'.underline}: takes that transformed data and loads it externally. This can be an API, flat files, (or by default), stdout. With the --dry-run option, this step won't be run.
28
46
 
29
47
  If you do not want to use the command line flags, you can also configure a job with a .yml config file. You can either specify the path to this file or use the filename and place the file in ~/.config/chronicle/etl/jobs/NAME.yml and call it with `--job NAME`
30
48
  LONG_DESC
31
49
  # Run an ETL job
32
50
  def start
51
+ setup_log_level
33
52
  job_definition = build_job_definition(options)
34
53
  job = Chronicle::ETL::Job.new(job_definition)
35
54
  runner = Chronicle::ETL::Runner.new(job)
36
55
  runner.run!
37
- rescue Chronicle::ETL::ProviderNotAvailableError => e
38
- warn(e.message.red)
39
- warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{e.provider}`")
40
- exit(false)
41
- rescue Chronicle::ETL::ConnectorNotAvailableError => e
42
- warn(e.message.red)
43
- exit(false)
44
56
  end
45
57
 
46
58
  desc "create", "Create a job"
@@ -48,14 +60,13 @@ LONG_DESC
48
60
  def create
49
61
  job_definition = build_job_definition(options)
50
62
  path = File.join('chronicle', 'etl', 'jobs', options[:name])
51
- Chronicle::ETL::Config.write(path, job_definition)
63
+ Chronicle::ETL::Config.write(path, job_definition.definition)
52
64
  end
53
65
 
54
66
  desc "show", "Show details about a job"
55
67
  # Show an ETL job
56
68
  def show
57
- job_config = build_job_definition(options)
58
- pp job_config
69
+ puts Chronicle::ETL::Job.new(build_job_definition(options))
59
70
  end
60
71
 
61
72
  desc "list", "List all available jobs"
@@ -73,7 +84,7 @@ LONG_DESC
73
84
  [job, extractor, transformer, loader]
74
85
  end
75
86
 
76
- headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
87
+ headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
77
88
 
78
89
  table = TTY::Table.new(headers, job_details)
79
90
  puts table.render(indent: 0, padding: [0, 2])
@@ -81,11 +92,20 @@ LONG_DESC
81
92
 
82
93
  private
83
94
 
95
+ def setup_log_level
96
+ if options[:verbose]
97
+ Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
98
+ elsif options[:log_level]
99
+ level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
100
+ Chronicle::ETL::Logger.log_level = level
101
+ end
102
+ end
103
+
84
104
  # Create job definition by reading config file and then overwriting with flag options
85
105
  def build_job_definition(options)
86
106
  definition = Chronicle::ETL::JobDefinition.new
87
- definition.add_config(process_flag_options(options))
88
107
  definition.add_config(load_job_config(options[:name]))
108
+ definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
89
109
  definition
90
110
  end
91
111
 
@@ -95,19 +115,34 @@ LONG_DESC
95
115
 
96
116
  # Takes flag options and turns them into a runner config
97
117
  def process_flag_options options
118
+ extractor_options = options[:'extractor-opts'].merge({
119
+ filename: (options[:input] if options[:input].any?),
120
+ since: options[:since],
121
+ until: options[:until],
122
+ limit: options[:limit],
123
+ }.compact)
124
+
125
+ transformer_options = options[:'transformer-opts']
126
+
127
+ loader_options = options[:'loader-opts'].merge({
128
+ output: options[:output],
129
+ fields: options[:fields]
130
+ }.compact)
131
+
98
132
  {
133
+ dry_run: options[:dry_run],
99
134
  extractor: {
100
135
  name: options[:extractor],
101
- options: options[:'extractor-opts']
102
- },
136
+ options: extractor_options
137
+ }.compact,
103
138
  transformer: {
104
139
  name: options[:transformer],
105
- options: options[:'transformer-opts']
106
- },
140
+ options: transformer_options
141
+ }.compact,
107
142
  loader: {
108
143
  name: options[:loader],
109
- options: options[:'loader-opts']
110
- }
144
+ options: loader_options
145
+ }.compact
111
146
  }
112
147
  end
113
148
  end
@@ -1,17 +1,10 @@
1
- require 'thor'
2
- require 'chronicle/etl'
3
1
  require 'colorize'
4
2
 
5
- require 'chronicle/etl/cli/subcommand_base'
6
- require 'chronicle/etl/cli/connectors'
7
- require 'chronicle/etl/cli/jobs'
8
-
9
3
  module Chronicle
10
4
  module ETL
11
5
  module CLI
12
6
  # Main entrypoint for CLI app
13
- class Main < Thor
14
- class_option "verbose", type: :boolean, default: false
7
+ class Main < ::Thor
15
8
  default_task "jobs"
16
9
 
17
10
  desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
@@ -22,10 +15,6 @@ module Chronicle
22
15
 
23
16
  # Entrypoint for the CLI
24
17
  def self.start(given_args = ARGV, config = {})
25
- if given_args.none?
26
- abort "No command entered or job specified. To see commands, run `chronicle-etl help`".red
27
- end
28
-
29
18
  # take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
30
19
  if given_args.any? && given_args[0].include?(':')
31
20
  commands = given_args.shift.split(':')
@@ -35,10 +24,20 @@ module Chronicle
35
24
  super(given_args, config)
36
25
  end
37
26
 
27
+ def self.exit_on_failure?
28
+ true
29
+ end
30
+
31
+ desc "version", "Show version"
32
+ map %w(--version -v) => :version
33
+ def version
34
+ shell.say "chronicle-etl #{Chronicle::ETL::VERSION}"
35
+ end
36
+
38
37
  # Displays help options for chronicle-etl
39
38
  def help(meth = nil, subcommand = false)
40
39
  if meth && !respond_to?(meth)
41
- klass, task = Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
40
+ klass, task = ::Thor::Util.find_class_and_task_by_namespace("#{meth}:#{meth}")
42
41
  klass.start(['-h', task].compact, shell: shell)
43
42
  else
44
43
  shell.say "ABOUT".bold
@@ -52,14 +51,14 @@ module Chronicle
52
51
  shell.say " $ chronicle-etl connectors:list"
53
52
  shell.say
54
53
  shell.say " Run a simple job:".italic.light_black
55
- shell.say " $ chronicle-etl jobs:start --extractor stdin --transformer null --loader stdout"
54
+ shell.say " $ chronicle-etl jobs:run --extractor stdin --transformer null --loader stdout"
56
55
  shell.say
57
56
  shell.say " Show full job options:".italic.light_black
58
- shell.say " $ chronicle-etl jobs help start"
57
+ shell.say " $ chronicle-etl jobs help run"
59
58
 
60
59
  list = []
61
60
 
62
- Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
61
+ ::Thor::Util.thor_classes_in(Chronicle::ETL::CLI).each do |thor_class|
63
62
  list += thor_class.printable_tasks(false)
64
63
  end
65
64
  list.sort! { |a, b| a[0] <=> b[0] }
@@ -72,6 +71,9 @@ module Chronicle
72
71
  shell.say "VERSION".bold
73
72
  shell.say " #{Chronicle::ETL::VERSION}"
74
73
  shell.say
74
+ shell.say " Display current version:".italic.light_black
75
+ shell.say " $ chronicle-etl --version"
76
+ shell.say
75
77
  shell.say "FULL DOCUMENTATION".bold
76
78
  shell.say " https://github.com/chronicle-app/chronicle-etl".blue
77
79
  shell.say
@@ -2,11 +2,11 @@ module Chronicle
2
2
  module ETL
3
3
  module CLI
4
4
  # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
- class SubcommandBase < Thor
5
+ class SubcommandBase < ::Thor
6
6
  # Print usage instructions for a subcommand
7
7
  def self.help(shell, subcommand = false)
8
8
  list = printable_commands(true, subcommand)
9
- Thor::Util.thor_classes_in(self).each do |klass|
9
+ ::Thor::Util.thor_classes_in(self).each do |klass|
10
10
  list += klass.printable_commands(false)
11
11
  end
12
12
  list.sort! { |a, b| a[0] <=> b[0] }
@@ -0,0 +1,7 @@
1
+ require 'thor'
2
+ require 'chronicle/etl'
3
+
4
+ require 'chronicle/etl/cli/subcommand_base'
5
+ require 'chronicle/etl/cli/connectors'
6
+ require 'chronicle/etl/cli/jobs'
7
+ require 'chronicle/etl/cli/main'
@@ -30,7 +30,7 @@ module Chronicle
30
30
  end
31
31
  end
32
32
 
33
- # Returns all available credentials available in ~/.config/chronilce/etl/credenetials/*.yml
33
+ # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
34
34
  def available_credentials
35
35
  job_directory = Runcom::Config.new('chronicle/etl/credentials').current
36
36
  Dir.glob(File.join(job_directory, "*.yml")).map do |filename|