chronicle-etl 0.4.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/.rubocop.yml +3 -0
- data/README.md +156 -81
- data/chronicle-etl.gemspec +3 -0
- data/lib/chronicle/etl/cli/cli_base.rb +31 -0
- data/lib/chronicle/etl/cli/connectors.rb +4 -11
- data/lib/chronicle/etl/cli/jobs.rb +49 -22
- data/lib/chronicle/etl/cli/main.rb +32 -1
- data/lib/chronicle/etl/cli/plugins.rb +62 -0
- data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
- data/lib/chronicle/etl/cli.rb +3 -0
- data/lib/chronicle/etl/config.rb +7 -4
- data/lib/chronicle/etl/configurable.rb +15 -2
- data/lib/chronicle/etl/exceptions.rb +29 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
- data/lib/chronicle/etl/extractors/extractor.rb +5 -5
- data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
- data/lib/chronicle/etl/job.rb +7 -1
- data/lib/chronicle/etl/job_definition.rb +32 -6
- data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
- data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +24 -1
- data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
- data/lib/chronicle/etl/logger.rb +6 -2
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
- data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
- data/lib/chronicle/etl/registry/registry.rb +27 -14
- data/lib/chronicle/etl/runner.rb +35 -17
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +11 -4
- metadata +53 -6
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2b6fdca3723ec52287c070a0dd08d0cfaf825f5e8f46da0d5a34172c0008573
|
4
|
+
data.tar.gz: e15181ba7edc1698404af8ff8c05d5367786ea809360393e825ca5ee5eef6c75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5508dcfc5e1367122ebbc191dc60a76cc7b1f088ae9ebd52bae41e07420f54ea7977a35f2b25839933b06d4673e2a120feecbd958efee07e0d313eaa7a5d167
|
7
|
+
data.tar.gz: adcb90549af364189c5ae3b0811c039277aba7dc6fbf2fbd6de8b89c572948d0dc00ba8da59db03252f4770737423e62c3a1175ef018742ef8bd7aee14837f63
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -1,125 +1,200 @@
|
|
1
|
-
|
1
|
+
## A CLI toolkit for extracting and working with your digital history
|
2
|
+
|
3
|
+

|
2
4
|
|
3
5
|
[](https://badge.fury.io/rb/chronicle-etl) [](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
|
4
6
|
|
5
|
-
|
7
|
+
Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
|
6
8
|
|
7
|
-
|
9
|
+
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this project is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
|
8
10
|
|
9
|
-
|
11
|
+
**`chronicle-etl` is a CLI tool that gives you a unified interface for accessing your personal data.** It uses the ETL pattern to *extract* it from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), *transform* it (into a given schema), and *load* it to a source (e.g. a CSV file, JSON, external API).
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
+
## What does `chronicle-etl` give you?
|
14
|
+
* **CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
|
15
|
+
* **Plugins for many third-party providers**. A plugin system allows you to access data from third-party providers and hook it into the shared CLI infrastructure.
|
16
|
+
* **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are stored in a common schema. Don’t want to use the schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
```sh
|
20
|
+
# Install chronicle-etl
|
21
|
+
gem install chronicle-etl
|
13
22
|
```
|
14
23
|
|
15
|
-
|
24
|
+
After installation, the `chronicle-etl` command will be available in your shell. Homebrew support [is coming soon](https://github.com/chronicle-app/chronicle-etl/issues/13).
|
16
25
|
|
17
|
-
|
26
|
+
## Basic usage and running jobs
|
18
27
|
|
19
|
-
```
|
20
|
-
#
|
21
|
-
$ chronicle-etl
|
28
|
+
```sh
|
29
|
+
# Display help
|
30
|
+
$ chronicle-etl help
|
22
31
|
|
23
|
-
#
|
24
|
-
$ chronicle-etl
|
32
|
+
# Basic job usage
|
33
|
+
$ chronicle-etl --extractor NAME --transformer NAME --loader NAME
|
34
|
+
|
35
|
+
# Read test.csv and display it to stdout as a table
|
36
|
+
$ chronicle-etl --extractor csv --input ./data.csv --loader table
|
25
37
|
```
|
26
38
|
|
27
|
-
|
39
|
+
### Common options
|
40
|
+
```sh
|
41
|
+
Options:
|
42
|
+
-j, [--name=NAME] # Job configuration name
|
43
|
+
-e, [--extractor=EXTRACTOR-NAME] # Extractor class. Default: stdin
|
44
|
+
[--extractor-opts=key:value] # Extractor options
|
45
|
+
-t, [--transformer=TRANFORMER-NAME] # Transformer class. Default: null
|
46
|
+
[--transformer-opts=key:value] # Transformer options
|
47
|
+
-l, [--loader=LOADER-NAME] # Loader class. Default: stdout
|
48
|
+
[--loader-opts=key:value] # Loader options
|
49
|
+
-i, [--input=FILENAME] # Input filename or directory
|
50
|
+
[--since=DATE] # Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options
|
51
|
+
[--until=DATE] # Load records UNTIL this date
|
52
|
+
[--limit=N] # Only extract the first LIMIT records
|
53
|
+
-o, [--output=OUTPUT] # Output filename
|
54
|
+
[--fields=field1 field2 ...] # Output only these fields
|
55
|
+
[--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
|
56
|
+
# Default: info
|
57
|
+
-v, [--verbose], [--no-verbose] # Set log level to verbose
|
58
|
+
[--silent], [--no-silent] # Silence all output
|
59
|
+
```
|
28
60
|
|
61
|
+
## Connectors
|
29
62
|
Connectors are available to read, process, and load data from different formats or external services.
|
30
63
|
|
31
|
-
```
|
64
|
+
```sh
|
32
65
|
# List all available connectors
|
33
66
|
$ chronicle-etl connectors:list
|
34
|
-
|
35
|
-
# Install a connector
|
36
|
-
$ chronicle-etl connectors:install imessage
|
37
67
|
```
|
38
68
|
|
39
|
-
Built
|
40
|
-
|
41
|
-
### Extractors
|
42
|
-
- `stdin` - (default) Load records from line-separated stdin
|
43
|
-
- `csv`
|
44
|
-
- `file` - load from a single file or directory (with a glob pattern)
|
45
|
-
|
46
|
-
### Transformers
|
47
|
-
- `null` - (default) Don't do anything
|
48
|
-
|
49
|
-
### Loaders
|
50
|
-
- `stdout` - (default) output records to stdout serialized as JSON
|
51
|
-
- `csv` - Load records to a csv file
|
52
|
-
- `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
53
|
-
- `table` - Output an ascii table of records. Useful for debugging.
|
69
|
+
### Built-in Connectors
|
70
|
+
`chronicle-etl` comes with several built-in connectors for common formats and sources.
|
54
71
|
|
55
|
-
|
72
|
+
#### Extractors
|
73
|
+
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
|
74
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
|
75
|
+
- [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
|
56
76
|
|
57
|
-
|
77
|
+
#### Transformers
|
78
|
+
- [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
|
58
79
|
|
59
|
-
|
60
|
-
- [
|
61
|
-
- [
|
80
|
+
#### Loaders
|
81
|
+
- [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
|
82
|
+
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
|
83
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
|
84
|
+
- [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
62
85
|
|
63
|
-
|
86
|
+
### Plugins
|
87
|
+
Plugins provide access to data from third-party platforms, services, or formats.
|
64
88
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
## Full commands
|
89
|
+
```bash
|
90
|
+
# Install a plugin
|
91
|
+
$ chronicle-etl plugins:install NAME
|
70
92
|
|
71
|
-
|
72
|
-
$ chronicle-etl
|
73
|
-
|
74
|
-
ALL COMMANDS
|
75
|
-
help # This help menu
|
76
|
-
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
77
|
-
connectors:install NAME # Installs connector NAME
|
78
|
-
connectors:list # Lists available connectors
|
79
|
-
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
80
|
-
jobs:create # Create a job
|
81
|
-
jobs:list # List all available jobs
|
82
|
-
jobs:run # Start a job
|
83
|
-
jobs:show # Show details about a job
|
84
|
-
```
|
93
|
+
# Install the imessage plugin
|
94
|
+
$ chronicle-etl plugins:install imessage
|
85
95
|
|
86
|
-
|
96
|
+
# List installed plugins
|
97
|
+
$ chronicle-etl plugins:list
|
87
98
|
|
99
|
+
# Uninstall a plugin
|
100
|
+
$ chronicle-etl plugins:uninstall NAME
|
88
101
|
```
|
89
|
-
Usage:
|
90
|
-
chronicle-etl jobs:run
|
91
102
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
103
|
+
A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and they’re being ported over to the Chronicle system. This table shows what’s available now and what’s coming. Rows are sorted in very rough order of priority.
|
104
|
+
|
105
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
106
|
+
|
107
|
+
| Name | Description | Availability |
|
108
|
+
|-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
|
109
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
|
110
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (zsh support pending) |
|
111
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available (imap support pending) |
|
112
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
|
113
|
+
| [safari](https://github.com/chronicle-app/chronicle-safari) | Browser history from local sqlite db | Available |
|
114
|
+
| github | Github user and repo activity | In progress |
|
115
|
+
| chrome | Browser history from local sqlite db | Needs porting |
|
116
|
+
| whatsapp | Messaging history (via individual chat exports) or reverse-engineered local desktop install | Unstarted |
|
117
|
+
| anki | Studying and card creation history | Needs porting |
|
118
|
+
| facebook | Messaging and history posting via data export files | Needs porting |
|
119
|
+
| twitter | History via API or export data files | Needs porting |
|
120
|
+
| foursquare | Location history via API | Needs porting |
|
121
|
+
| goodreads | Reading history via export csv (RIP goodreads API) | Needs porting |
|
122
|
+
| lastfm | Listening history via API | Needs porting |
|
123
|
+
| images | Process image files | Needs porting |
|
124
|
+
| arc | Location history from synced icloud backup files | Needs porting |
|
125
|
+
| firefox | Browser history from local sqlite db | Needs porting |
|
126
|
+
| fitbit | Personal analytics via API | Needs porting |
|
127
|
+
| git | Commit history on a repo | Needs porting |
|
128
|
+
| google-calendar | Calendar events via API | Needs porting |
|
129
|
+
| instagram | Posting and messaging history via export data | Needs porting |
|
130
|
+
| shazam | Song tags via reverse-engineered API | Needs porting |
|
131
|
+
| slack | Messaging history via API | Need rethinking |
|
132
|
+
| strava | Activity history via API | Needs porting |
|
133
|
+
| things | Task activity via local sqlite db | Needs porting |
|
134
|
+
| bear | Note taking activity via local sqlite db | Needs porting |
|
135
|
+
| youtube | Video activity via takeout data and API | Needs porting |
|
136
|
+
|
137
|
+
### Writing your own connector
|
138
|
+
|
139
|
+
Additional connectors are packaged as separate ruby gems. You can view the [iMessage plugin](https://github.com/chronicle-app/chronicle-imessage) for an example.
|
140
|
+
|
141
|
+
If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
|
142
|
+
|
143
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
144
|
+
|
145
|
+
#### Sample custom Extractor class
|
146
|
+
```ruby
|
147
|
+
module Chronicle
|
148
|
+
module FooService
|
149
|
+
class FooExtractor < Chronicle::ETL::Extractor
|
150
|
+
register_connector do |r|
|
151
|
+
r.identifier = 'foo'
|
152
|
+
r.description = 'From foo.com'
|
153
|
+
end
|
154
|
+
|
155
|
+
setting :access_token, required: true
|
156
|
+
|
157
|
+
def prepare
|
158
|
+
@records = # load from somewhere
|
159
|
+
end
|
160
|
+
|
161
|
+
def extract
|
162
|
+
@records.each do |record|
|
163
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
107
169
|
```
|
108
170
|
|
109
171
|
## Development
|
110
|
-
|
111
172
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
112
173
|
|
113
174
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
114
175
|
|
115
|
-
|
176
|
+
### Additional development commands
|
177
|
+
```bash
|
178
|
+
# run tests
|
179
|
+
bundle exec rake spec
|
180
|
+
|
181
|
+
# generate docs
|
182
|
+
bundle exec rake yard
|
183
|
+
|
184
|
+
# use Guard to run specs automatically
|
185
|
+
bundle exec guard
|
186
|
+
```
|
116
187
|
|
188
|
+
## Get in touch
|
189
|
+
- [@hyfen](https://twitter.com/hyfen) on Twitter
|
190
|
+
- [@hyfen](https://github.com/hyfen) on Github
|
191
|
+
- Email: andrew@hyfen.net
|
192
|
+
|
193
|
+
## Contributing
|
117
194
|
Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
118
195
|
|
119
196
|
## License
|
120
|
-
|
121
197
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
122
198
|
|
123
199
|
## Code of Conduct
|
124
|
-
|
125
|
-
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
200
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/main/CODE_OF_CONDUCT.md).
|
data/chronicle-etl.gemspec
CHANGED
@@ -47,8 +47,11 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.add_dependency "sequel", "~> 5.35"
|
48
48
|
spec.add_dependency "sqlite3", "~> 1.4"
|
49
49
|
spec.add_dependency "thor", "~> 1.2"
|
50
|
+
spec.add_dependency "thor-hollaback", "~> 0.2"
|
50
51
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
52
|
+
spec.add_dependency "tty-spinner"
|
51
53
|
spec.add_dependency "tty-table", "~> 0.11"
|
54
|
+
spec.add_dependency "tty-prompt", "~> 0.23"
|
52
55
|
|
53
56
|
spec.add_development_dependency "bundler", "~> 2.1"
|
54
57
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Chronicle
|
2
|
+
module ETL
|
3
|
+
module CLI
|
4
|
+
# Base class for CLI commands
|
5
|
+
class CLIBase < ::Thor
|
6
|
+
no_commands do
|
7
|
+
# Shorthand for cli_exit(status: :failure)
|
8
|
+
def cli_fail(message: nil, exception: nil)
|
9
|
+
cli_exit(status: :failure, message: message, exception: exception)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Exit from CLI
|
13
|
+
#
|
14
|
+
# @params status Can be eitiher :success or :failure
|
15
|
+
# @params message to print
|
16
|
+
# @params exception stacktrace if log_level is set to debug
|
17
|
+
def cli_exit(status: :success, message: nil, exception: nil)
|
18
|
+
exit_code = status == :success ? 0 : 1
|
19
|
+
log_level = status == :success ? :info : :fatal
|
20
|
+
|
21
|
+
message = message.red if status != :success
|
22
|
+
|
23
|
+
Chronicle::ETL::Logger.debug(exception.full_message) if exception
|
24
|
+
Chronicle::ETL::Logger.send(log_level, message) if message
|
25
|
+
exit(exit_code)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -8,11 +8,6 @@ module Chronicle
|
|
8
8
|
default_task 'list'
|
9
9
|
namespace :connectors
|
10
10
|
|
11
|
-
desc "install NAME", "Installs connector NAME"
|
12
|
-
def install(name)
|
13
|
-
Chronicle::ETL::Registry.install_connector(name)
|
14
|
-
end
|
15
|
-
|
16
11
|
desc "list", "Lists available connectors"
|
17
12
|
# Display all available connectors that chronicle-etl has access to
|
18
13
|
def list
|
@@ -44,21 +39,19 @@ module Chronicle
|
|
44
39
|
desc "show PHASE IDENTIFIER", "Show information about a connector"
|
45
40
|
def show(phase, identifier)
|
46
41
|
unless ['extractor', 'transformer', 'loader'].include?(phase)
|
47
|
-
|
48
|
-
return
|
42
|
+
cli_fail(message: "Phase argument must be one of: [extractor, transformer, loader]")
|
49
43
|
end
|
50
44
|
|
51
45
|
begin
|
52
46
|
connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
|
53
|
-
rescue Chronicle::ETL::ConnectorNotAvailableError
|
54
|
-
|
55
|
-
return
|
47
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError, Chronicle::ETL::PluginError => e
|
48
|
+
cli_fail(message: "Could not find #{phase} #{identifier}", exception: e)
|
56
49
|
end
|
57
50
|
|
58
51
|
puts connector.klass.to_s.bold
|
59
52
|
puts " #{connector.descriptive_phrase}"
|
60
53
|
puts
|
61
|
-
puts "
|
54
|
+
puts "Settings:"
|
62
55
|
|
63
56
|
headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
|
64
57
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pp'
|
2
|
+
require 'tty-prompt'
|
2
3
|
|
3
4
|
module Chronicle
|
4
5
|
module ETL
|
@@ -10,30 +11,26 @@ module Chronicle
|
|
10
11
|
|
11
12
|
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
12
13
|
|
13
|
-
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: '
|
14
|
+
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'NAME'
|
14
15
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
15
|
-
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: '
|
16
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'NAME'
|
16
17
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
17
|
-
class_option :loader, aliases: '-l', desc: 'Loader class. Default:
|
18
|
+
class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
|
18
19
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
19
20
|
|
20
21
|
# This is an array to deal with shell globbing
|
21
22
|
class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
|
22
|
-
class_option :since, desc: "Load records SINCE this date
|
23
|
+
class_option :since, desc: "Load records SINCE this date", banner: 'DATE'
|
23
24
|
class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
|
24
25
|
class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
|
25
26
|
|
26
27
|
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
27
28
|
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
28
|
-
|
29
|
-
class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
30
|
-
class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
29
|
+
class_option :header_row, desc: 'Output the header row of tabular output', type: 'boolean'
|
31
30
|
|
32
31
|
# Thor doesn't like `run` as a command name
|
33
32
|
map run: :start
|
34
33
|
desc "run", "Start a job"
|
35
|
-
option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
36
|
-
option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
37
34
|
option :dry_run, desc: 'Only run the extraction and transform steps, not the loading', type: :boolean
|
38
35
|
long_desc <<-LONG_DESC
|
39
36
|
This will run an ETL job. Each job needs three parts:
|
@@ -48,25 +45,41 @@ module Chronicle
|
|
48
45
|
LONG_DESC
|
49
46
|
# Run an ETL job
|
50
47
|
def start
|
51
|
-
setup_log_level
|
52
48
|
job_definition = build_job_definition(options)
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
|
50
|
+
if job_definition.plugins_missing?
|
51
|
+
missing_plugins = job_definition.errors[:plugins]
|
52
|
+
.select { |error| error.is_a?(Chronicle::ETL::PluginLoadError) }
|
53
|
+
.map(&:name)
|
54
|
+
.uniq
|
55
|
+
install_missing_plugins(missing_plugins)
|
56
|
+
end
|
57
|
+
|
58
|
+
run_job(job_definition)
|
59
|
+
rescue Chronicle::ETL::JobDefinitionError => e
|
60
|
+
cli_fail(message: "Error running job.\n#{job_definition.errors}", exception: e)
|
56
61
|
end
|
57
62
|
|
58
63
|
desc "create", "Create a job"
|
59
64
|
# Create an ETL job
|
60
65
|
def create
|
61
66
|
job_definition = build_job_definition(options)
|
67
|
+
job_definition.validate!
|
68
|
+
|
62
69
|
path = File.join('chronicle', 'etl', 'jobs', options[:name])
|
63
70
|
Chronicle::ETL::Config.write(path, job_definition.definition)
|
71
|
+
rescue Chronicle::ETL::JobDefinitionError => e
|
72
|
+
cli_fail(message: "Job definition error", exception: e)
|
64
73
|
end
|
65
74
|
|
66
75
|
desc "show", "Show details about a job"
|
67
76
|
# Show an ETL job
|
68
77
|
def show
|
69
|
-
|
78
|
+
job_definition = build_job_definition(options)
|
79
|
+
job_definition.validate!
|
80
|
+
puts Chronicle::ETL::Job.new(job_definition)
|
81
|
+
rescue Chronicle::ETL::JobDefinitionError => e
|
82
|
+
cli_fail(message: "Job definition error", exception: e)
|
70
83
|
end
|
71
84
|
|
72
85
|
desc "list", "List all available jobs"
|
@@ -86,19 +99,32 @@ LONG_DESC
|
|
86
99
|
|
87
100
|
headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
|
88
101
|
|
102
|
+
puts "Available jobs:"
|
89
103
|
table = TTY::Table.new(headers, job_details)
|
90
104
|
puts table.render(indent: 0, padding: [0, 2])
|
105
|
+
rescue Chronicle::ETL::ConfigError => e
|
106
|
+
cli_fail(message: "Config error. #{e.message}", exception: e)
|
91
107
|
end
|
92
108
|
|
93
109
|
private
|
94
110
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
111
|
+
def run_job(job_definition)
|
112
|
+
job = Chronicle::ETL::Job.new(job_definition)
|
113
|
+
runner = Chronicle::ETL::Runner.new(job)
|
114
|
+
runner.run!
|
115
|
+
end
|
116
|
+
|
117
|
+
# TODO: probably could merge this with something in cli/plugin
|
118
|
+
def install_missing_plugins(missing_plugins)
|
119
|
+
prompt = TTY::Prompt.new
|
120
|
+
message = "Plugin#{'s' if missing_plugins.count > 1} specified by job not installed.\n"
|
121
|
+
message += "Do you want to install "
|
122
|
+
message += missing_plugins.map { |name| "chronicle-#{name}".bold}.join(", ")
|
123
|
+
message += " and start the job?"
|
124
|
+
will_install = prompt.yes?(message)
|
125
|
+
cli_fail(message: "Must install #{missing_plugins.join(", ")} plugin to run job") unless will_install
|
126
|
+
|
127
|
+
Chronicle::ETL::CLI::Plugins.new.install(*missing_plugins)
|
102
128
|
end
|
103
129
|
|
104
130
|
# Create job definition by reading config file and then overwriting with flag options
|
@@ -116,7 +142,7 @@ LONG_DESC
|
|
116
142
|
# Takes flag options and turns them into a runner config
|
117
143
|
def process_flag_options options
|
118
144
|
extractor_options = options[:'extractor-opts'].merge({
|
119
|
-
|
145
|
+
input: (options[:input] if options[:input].any?),
|
120
146
|
since: options[:since],
|
121
147
|
until: options[:until],
|
122
148
|
limit: options[:limit],
|
@@ -126,6 +152,7 @@ LONG_DESC
|
|
126
152
|
|
127
153
|
loader_options = options[:'loader-opts'].merge({
|
128
154
|
output: options[:output],
|
155
|
+
header_row: options[:header_row],
|
129
156
|
fields: options[:fields]
|
130
157
|
}.compact)
|
131
158
|
|
@@ -4,7 +4,15 @@ module Chronicle
|
|
4
4
|
module ETL
|
5
5
|
module CLI
|
6
6
|
# Main entrypoint for CLI app
|
7
|
-
class Main < ::
|
7
|
+
class Main < Chronicle::ETL::CLI::CLIBase
|
8
|
+
class_before :set_log_level
|
9
|
+
class_before :set_color_output
|
10
|
+
|
11
|
+
class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal, silent)', default: 'info'
|
12
|
+
class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
13
|
+
class_option :silent, desc: 'Silence all output', type: :boolean
|
14
|
+
class_option :'no-color', desc: 'Disable colour output', type: :boolean
|
15
|
+
|
8
16
|
default_task "jobs"
|
9
17
|
|
10
18
|
desc 'connectors:COMMAND', 'Connectors available for ETL jobs', hide: true
|
@@ -13,6 +21,9 @@ module Chronicle
|
|
13
21
|
desc 'jobs:COMMAND', 'Configure and run jobs', hide: true
|
14
22
|
subcommand 'jobs', Jobs
|
15
23
|
|
24
|
+
desc 'plugins:COMMAND', 'Configure plugins', hide: true
|
25
|
+
subcommand 'plugins', Plugins
|
26
|
+
|
16
27
|
# Entrypoint for the CLI
|
17
28
|
def self.start(given_args = ARGV, config = {})
|
18
29
|
# take a subcommand:command and splits them so Thor knows how to hand off to the subcommand class
|
@@ -79,6 +90,26 @@ module Chronicle
|
|
79
90
|
shell.say
|
80
91
|
end
|
81
92
|
end
|
93
|
+
|
94
|
+
no_commands do
|
95
|
+
def testb
|
96
|
+
puts "hi"
|
97
|
+
end
|
98
|
+
def set_color_output
|
99
|
+
String.disable_colorization true if options[:'no-color'] || ENV['NO_COLOR']
|
100
|
+
end
|
101
|
+
|
102
|
+
def set_log_level
|
103
|
+
if options[:silent]
|
104
|
+
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::SILENT
|
105
|
+
elsif options[:verbose]
|
106
|
+
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
|
107
|
+
elsif options[:log_level]
|
108
|
+
level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
|
109
|
+
Chronicle::ETL::Logger.log_level = level
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
82
113
|
end
|
83
114
|
end
|
84
115
|
end
|