chronicle-etl 0.3.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/.rubocop.yml +28 -1
- data/Guardfile +7 -0
- data/README.md +149 -85
- data/Rakefile +4 -2
- data/chronicle-etl.gemspec +10 -5
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/cli/connectors.rb +34 -0
- data/lib/chronicle/etl/cli/jobs.rb +44 -12
- data/lib/chronicle/etl/cli/main.rb +13 -19
- data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
- data/lib/chronicle/etl/cli.rb +7 -0
- data/lib/chronicle/etl/configurable.rb +158 -0
- data/lib/chronicle/etl/exceptions.rb +7 -1
- data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
- data/lib/chronicle/etl/extractors/extractor.rb +23 -19
- data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
- data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
- data/lib/chronicle/etl/job.rb +1 -1
- data/lib/chronicle/etl/job_definition.rb +1 -1
- data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
- data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
- data/lib/chronicle/etl/loaders/loader.rb +5 -2
- data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
- data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
- data/lib/chronicle/etl/logger.rb +1 -0
- data/lib/chronicle/etl/models/base.rb +3 -0
- data/lib/chronicle/etl/models/entity.rb +8 -2
- data/lib/chronicle/etl/models/raw.rb +26 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
- data/lib/chronicle/etl/runner.rb +6 -4
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
- data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
- data/lib/chronicle/etl/serializers/serializer.rb +2 -1
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
- data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
- data/lib/chronicle/etl/transformers/transformer.rb +3 -2
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +12 -4
- metadata +80 -19
- data/.ruby-version +0 -1
- data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
- data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
- data/lib/chronicle/etl/models/generic.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a267de435b41b579e36128b7392729ef499eb37f05fabaead7811f089938ddb
|
4
|
+
data.tar.gz: d4af2f62f3f5de926bdfbb0e3d6dbe2c952ec286c07317af4dca8d98f665d6da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c78080cce008340f0b2795be46da2b5eb6562b2bffd97728150960343870f2bea4699e4efa07905710dd0e2eba7aaa1e803d8c0f727196f5d9d655b28a04f02e
|
7
|
+
data.tar.gz: cae3a3ffb6527f5c0b3ff89c75dc98d9cd66157ee6230c9db797f4683f90e2146daadf291108e55d3090d0120d3c9e25135cb21c4e9078bcaf4d1edf2172c930
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ main ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ main ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.rubocop.yml
CHANGED
@@ -1,11 +1,38 @@
|
|
1
1
|
AllCops:
|
2
2
|
EnabledByDefault: true
|
3
|
+
TargetRubyVersion: 2.7
|
4
|
+
|
5
|
+
Style/FrozenStringLiteralComment:
|
6
|
+
SafeAutoCorrect: true
|
3
7
|
|
4
8
|
Style/StringLiterals:
|
5
9
|
Enabled: false
|
6
10
|
|
11
|
+
Layout/MultilineAssignmentLayout:
|
12
|
+
Enabled: false
|
13
|
+
|
14
|
+
Layout/RedundantLineBreak:
|
15
|
+
Enabled: false
|
16
|
+
|
7
17
|
Style/MethodCallWithArgsParentheses:
|
8
18
|
Enabled: false
|
9
19
|
|
20
|
+
Style/MethodCalledOnDoEndBlock:
|
21
|
+
Exclude:
|
22
|
+
- 'spec/**/*'
|
23
|
+
|
24
|
+
Style/OpenStructUse:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Style/Copyright:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/SymbolArray:
|
31
|
+
EnforcedStyle: brackets
|
32
|
+
|
33
|
+
Style/WordArray:
|
34
|
+
EnforcedStyle: brackets
|
35
|
+
|
10
36
|
Lint/ConstantResolution:
|
11
|
-
Enabled: false
|
37
|
+
Enabled: false
|
38
|
+
|
data/Guardfile
ADDED
data/README.md
CHANGED
@@ -1,125 +1,189 @@
|
|
1
|
-
|
1
|
+
## A CLI toolkit for extracting and working with your digital history
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
|
4
4
|
|
5
|
-
|
5
|
+
Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
|
6
6
|
|
7
|
-
|
7
|
+
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this project is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
|
8
8
|
|
9
|
-
|
9
|
+
`chronicle-etl` is a CLI tool that gives you the ability to easily access your personal data. It uses the ETL pattern to **extract** it from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), **transform** it (into a given schema), and **load** it to a source (e.g. a CSV file, JSON, external API).
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
## What does `chronicle-etl` give you?
|
12
|
+
* **CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
|
13
|
+
* **Plugins for many third-party providers**. A plugin system allows you to access data from third-party providers and hook it into the shared CLI infrastructure.
|
14
|
+
* **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are stored in a common schema. Don’t want to use the schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
```sh
|
18
|
+
# Install chronicle-etl
|
19
|
+
gem install chronicle-etl
|
13
20
|
```
|
14
21
|
|
15
|
-
|
22
|
+
After installation, the `chronicle-etl` command will be available in your shell. Homebrew support [is coming soon](https://github.com/chronicle-app/chronicle-etl/issues/13).
|
16
23
|
|
17
|
-
|
24
|
+
## Basic usage and running jobs
|
18
25
|
|
19
|
-
```
|
20
|
-
#
|
21
|
-
$ chronicle-etl
|
26
|
+
```sh
|
27
|
+
# Display help
|
28
|
+
$ chronicle-etl help
|
22
29
|
|
23
|
-
#
|
24
|
-
$ chronicle-etl
|
30
|
+
# Basic job usage
|
31
|
+
$ chronicle-etl --extractor NAME --transformer NAME --loader NAME
|
32
|
+
|
33
|
+
# Read test.csv and display it to stdout as a table
|
34
|
+
$ chronicle-etl --extractor csv --input ./data.csv --loader table
|
25
35
|
```
|
26
36
|
|
27
|
-
|
37
|
+
### Common options
|
38
|
+
```sh
|
39
|
+
Options:
|
40
|
+
-j, [--name=NAME] # Job configuration name
|
41
|
+
-e, [--extractor=EXTRACTOR-NAME] # Extractor class. Default: stdin
|
42
|
+
[--extractor-opts=key:value] # Extractor options
|
43
|
+
-t, [--transformer=TRANFORMER-NAME] # Transformer class. Default: null
|
44
|
+
[--transformer-opts=key:value] # Transformer options
|
45
|
+
-l, [--loader=LOADER-NAME] # Loader class. Default: stdout
|
46
|
+
[--loader-opts=key:value] # Loader options
|
47
|
+
-i, [--input=FILENAME] # Input filename or directory
|
48
|
+
[--since=DATE] # Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options
|
49
|
+
[--until=DATE] # Load records UNTIL this date
|
50
|
+
[--limit=N] # Only extract the first LIMIT records
|
51
|
+
-o, [--output=OUTPUT] # Output filename
|
52
|
+
[--fields=field1 field2 ...] # Output only these fields
|
53
|
+
[--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
|
54
|
+
# Default: info
|
55
|
+
-v, [--verbose], [--no-verbose] # Set log level to verbose
|
56
|
+
[--silent], [--no-silent] # Silence all output
|
57
|
+
```
|
28
58
|
|
59
|
+
## Connectors
|
29
60
|
Connectors are available to read, process, and load data from different formats or external services.
|
30
61
|
|
31
|
-
```
|
62
|
+
```sh
|
32
63
|
# List all available connectors
|
33
64
|
$ chronicle-etl connectors:list
|
34
|
-
|
35
|
-
# Install a connector
|
36
|
-
$ chronicle-etl connectors:install imessage
|
37
65
|
```
|
38
66
|
|
39
|
-
Built
|
40
|
-
|
41
|
-
### Extractors
|
42
|
-
- `stdin` - (default) Load records from line-separated stdin
|
43
|
-
- `csv`
|
44
|
-
- `file` - load from a single file or directory (with a glob pattern)
|
45
|
-
|
46
|
-
### Transformers
|
47
|
-
- `null` - (default) Don't do anything
|
48
|
-
|
49
|
-
### Loaders
|
50
|
-
- `stdout` - (default) output records to stdout serialized as JSON
|
51
|
-
- `csv` - Load records to a csv file
|
52
|
-
- `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
53
|
-
- `table` - Output an ascii table of records. Useful for debugging.
|
54
|
-
|
55
|
-
### Provider-specific importers
|
56
|
-
|
57
|
-
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
67
|
+
### Built-in Connectors
|
68
|
+
`chronicle-etl` comes with several built-in connectors for common formats and sources.
|
58
69
|
|
59
|
-
|
60
|
-
- [
|
61
|
-
- [
|
70
|
+
#### Extractors
|
71
|
+
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
|
72
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
|
73
|
+
- [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
|
62
74
|
|
63
|
-
|
75
|
+
#### Transformers
|
76
|
+
- [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
|
64
77
|
|
65
|
-
|
78
|
+
#### Loaders
|
79
|
+
- [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
|
80
|
+
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
|
81
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
|
82
|
+
- [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
66
83
|
|
67
|
-
|
68
|
-
|
69
|
-
## Full commands
|
70
|
-
|
71
|
-
```
|
72
|
-
$ chronicle-etl help
|
73
|
-
|
74
|
-
ALL COMMANDS
|
75
|
-
help # This help menu
|
76
|
-
connectors help [COMMAND] # Describe subcommands or one specific subcommand
|
77
|
-
connectors:install NAME # Installs connector NAME
|
78
|
-
connectors:list # Lists available connectors
|
79
|
-
jobs help [COMMAND] # Describe subcommands or one specific subcommand
|
80
|
-
jobs:create # Create a job
|
81
|
-
jobs:list # List all available jobs
|
82
|
-
jobs:run # Start a job
|
83
|
-
jobs:show # Show details about a job
|
84
|
-
```
|
85
|
-
|
86
|
-
### Running a job
|
84
|
+
### Plugins
|
85
|
+
Plugins provide access to data from third-party platforms, services, or formats.
|
87
86
|
|
87
|
+
```bash
|
88
|
+
# Install a plugin
|
89
|
+
$ chronicle-etl connectors:install NAME
|
88
90
|
```
|
89
|
-
Usage:
|
90
|
-
chronicle-etl jobs:run
|
91
91
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
92
|
+
A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and they’re being ported over to the Chronicle system. This table shows what’s available now and what’s coming. Rows are sorted in very rough order of priority.
|
93
|
+
|
94
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
95
|
+
|
96
|
+
| Name | Description | Availability |
|
97
|
+
|-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
|
98
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
|
99
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (zsh support pending) |
|
100
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available (imap support pending) |
|
101
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
|
102
|
+
| github | Github user and repo activity | In progress |
|
103
|
+
| safari | Browser history from local sqlite db | Needs porting |
|
104
|
+
| chrome | Browser history from local sqlite db | Needs porting |
|
105
|
+
| whatsapp | Messaging history (via individual chat exports) or reverse-engineered local desktop install | Unstarted |
|
106
|
+
| anki | Studying and card creation history | Needs porting |
|
107
|
+
| facebook | Messaging and history posting via data export files | Needs porting |
|
108
|
+
| twitter | History via API or export data files | Needs porting |
|
109
|
+
| foursquare | Location history via API | Needs porting |
|
110
|
+
| goodreads | Reading history via export csv (RIP goodreads API) | Needs porting |
|
111
|
+
| lastfm | Listening history via API | Needs porting |
|
112
|
+
| images | Process image files | Needs porting |
|
113
|
+
| arc | Location history from synced icloud backup files | Needs porting |
|
114
|
+
| firefox | Browser history from local sqlite db | Needs porting |
|
115
|
+
| fitbit | Personal analytics via API | Needs porting |
|
116
|
+
| git | Commit history on a repo | Needs porting |
|
117
|
+
| google-calendar | Calendar events via API | Needs porting |
|
118
|
+
| instagram | Posting and messaging history via export data | Needs porting |
|
119
|
+
| shazam | Song tags via reverse-engineered API | Needs porting |
|
120
|
+
| slack | Messaging history via API | Need rethinking |
|
121
|
+
| strava | Activity history via API | Needs porting |
|
122
|
+
| things | Task activity via local sqlite db | Needs porting |
|
123
|
+
| bear | Note taking activity via local sqlite db | Needs porting |
|
124
|
+
| youtube | Video activity via takeout data and API | Needs porting |
|
125
|
+
|
126
|
+
### Writing your own connector
|
127
|
+
|
128
|
+
Additional connectors are packaged as separate ruby gems. You can view the [iMessage plugin](https://github.com/chronicle-app/chronicle-imessage) for an example.
|
129
|
+
|
130
|
+
If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
|
131
|
+
|
132
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
133
|
+
|
134
|
+
#### Sample custom Extractor class
|
135
|
+
```ruby
|
136
|
+
module Chronicle
|
137
|
+
module FooService
|
138
|
+
class FooExtractor < Chronicle::ETL::Extractor
|
139
|
+
register_connector do |r|
|
140
|
+
r.identifier = 'foo'
|
141
|
+
r.description = 'From foo.com'
|
142
|
+
end
|
143
|
+
|
144
|
+
setting :access_token, required: true
|
145
|
+
|
146
|
+
def prepare
|
147
|
+
@records = # load from somewhere
|
148
|
+
end
|
149
|
+
|
150
|
+
def extract
|
151
|
+
@records.each do |record|
|
152
|
+
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
107
158
|
```
|
108
159
|
|
109
160
|
## Development
|
110
|
-
|
111
161
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
112
162
|
|
113
163
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
114
164
|
|
115
|
-
|
165
|
+
### Additional development commands
|
166
|
+
```bash
|
167
|
+
# run tests
|
168
|
+
bundle exec rake spec
|
169
|
+
|
170
|
+
# generate docs
|
171
|
+
bundle exec rake yard
|
172
|
+
|
173
|
+
# use Guard to run specs automatically
|
174
|
+
bundle exec guard
|
175
|
+
```
|
116
176
|
|
177
|
+
## Get in touch
|
178
|
+
- [@hyfen](https://twitter.com/hyfen) on Twitter
|
179
|
+
- [@hyfen](https://github.com/hyfen) on Github
|
180
|
+
- Email: andrew@hyfen.net
|
181
|
+
|
182
|
+
## Contributing
|
117
183
|
Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
118
184
|
|
119
185
|
## License
|
120
|
-
|
121
186
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
122
187
|
|
123
188
|
## Code of Conduct
|
124
|
-
|
125
|
-
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
189
|
+
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/chronicle-etl.gemspec
CHANGED
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
|
|
17
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
19
|
if spec.respond_to?(:metadata)
|
20
|
-
|
20
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
21
21
|
|
22
22
|
spec.metadata["homepage_uri"] = spec.homepage
|
23
23
|
spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
|
24
|
-
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
|
25
25
|
else
|
26
26
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
27
|
"public gem pushes."
|
@@ -35,17 +35,18 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.bindir = "exe"
|
36
36
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
37
|
spec.require_paths = ["lib"]
|
38
|
+
spec.required_ruby_version = ">= 2.7"
|
38
39
|
|
39
|
-
spec.add_dependency "activesupport"
|
40
|
+
spec.add_dependency "activesupport", "~> 7.0"
|
40
41
|
spec.add_dependency "chronic_duration", "~> 0.10.6"
|
41
42
|
spec.add_dependency "colorize", "~> 0.8.1"
|
42
43
|
spec.add_dependency "marcel", "~> 1.0.2"
|
43
44
|
spec.add_dependency "mini_exiftool", "~> 2.10"
|
44
45
|
spec.add_dependency "nokogiri", "~> 1.13"
|
45
|
-
spec.add_dependency "runcom", "
|
46
|
+
spec.add_dependency "runcom", ">= 6.0"
|
46
47
|
spec.add_dependency "sequel", "~> 5.35"
|
47
48
|
spec.add_dependency "sqlite3", "~> 1.4"
|
48
|
-
spec.add_dependency "thor", "~>
|
49
|
+
spec.add_dependency "thor", "~> 1.2"
|
49
50
|
spec.add_dependency "tty-progressbar", "~> 0.17"
|
50
51
|
spec.add_dependency "tty-table", "~> 0.11"
|
51
52
|
|
@@ -53,4 +54,8 @@ Gem::Specification.new do |spec|
|
|
53
54
|
spec.add_development_dependency "pry-byebug", "~> 3.9"
|
54
55
|
spec.add_development_dependency "rake", "~> 13.0"
|
55
56
|
spec.add_development_dependency "rspec", "~> 3.9"
|
57
|
+
spec.add_development_dependency "simplecov", "~> 0.21"
|
58
|
+
spec.add_development_dependency "guard-rspec", "~> 4.7.3"
|
59
|
+
spec.add_development_dependency "yard", "~> 0.9.7"
|
60
|
+
spec.add_development_dependency "rubocop", "~> 1.25.1"
|
56
61
|
end
|
data/exe/chronicle-etl
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Chronicle
|
2
4
|
module ETL
|
3
5
|
module CLI
|
@@ -38,6 +40,38 @@ module Chronicle
|
|
38
40
|
table = TTY::Table.new(headers, connector_info.map(&:values))
|
39
41
|
puts table.render(indent: 0, padding: [0, 2])
|
40
42
|
end
|
43
|
+
|
44
|
+
desc "show PHASE IDENTIFIER", "Show information about a connector"
|
45
|
+
def show(phase, identifier)
|
46
|
+
unless ['extractor', 'transformer', 'loader'].include?(phase)
|
47
|
+
puts "phase argument must be one of: [extractor, transformer, loader]"
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
begin
|
52
|
+
connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
|
53
|
+
rescue Chronicle::ETL::ConnectorNotAvailableError
|
54
|
+
puts "Could not find #{phase} #{identifier}"
|
55
|
+
return
|
56
|
+
end
|
57
|
+
|
58
|
+
puts connector.klass.to_s.bold
|
59
|
+
puts " #{connector.descriptive_phrase}"
|
60
|
+
puts
|
61
|
+
puts "OPTIONS"
|
62
|
+
|
63
|
+
headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
|
64
|
+
|
65
|
+
settings = connector.klass.settings.map do |name, setting|
|
66
|
+
[
|
67
|
+
name,
|
68
|
+
setting.default,
|
69
|
+
setting.required ? 'yes' : 'no'
|
70
|
+
]
|
71
|
+
end
|
72
|
+
table = TTY::Table.new(headers, settings)
|
73
|
+
puts table.render(indent: 0, padding: [0, 2])
|
74
|
+
end
|
41
75
|
end
|
42
76
|
end
|
43
77
|
end
|
@@ -1,21 +1,37 @@
|
|
1
1
|
require 'pp'
|
2
|
+
|
2
3
|
module Chronicle
|
3
4
|
module ETL
|
4
5
|
module CLI
|
5
6
|
# CLI commands for working with ETL jobs
|
6
7
|
class Jobs < SubcommandBase
|
7
8
|
default_task "start"
|
8
|
-
namespace :jobs
|
9
|
+
namespace :jobs
|
10
|
+
|
11
|
+
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
9
12
|
|
10
|
-
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: '
|
13
|
+
class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'NAME'
|
11
14
|
class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
|
12
|
-
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: '
|
15
|
+
class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'NAME'
|
13
16
|
class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
|
14
|
-
class_option :loader, aliases: '-l', desc: 'Loader class. Default:
|
17
|
+
class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
|
15
18
|
class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
|
16
|
-
class_option :name, aliases: '-j', desc: 'Job configuration name'
|
17
19
|
|
18
|
-
|
20
|
+
# This is an array to deal with shell globbing
|
21
|
+
class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
|
22
|
+
class_option :since, desc: "Load records SINCE this date", banner: 'DATE'
|
23
|
+
class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
|
24
|
+
class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
|
25
|
+
|
26
|
+
class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
|
27
|
+
class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
|
28
|
+
|
29
|
+
class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
30
|
+
class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
31
|
+
class_option :silent, desc: 'Silence all output', type: :boolean
|
32
|
+
|
33
|
+
# Thor doesn't like `run` as a command name
|
34
|
+
map run: :start
|
19
35
|
desc "run", "Start a job"
|
20
36
|
option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
|
21
37
|
option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
|
@@ -69,7 +85,7 @@ LONG_DESC
|
|
69
85
|
[job, extractor, transformer, loader]
|
70
86
|
end
|
71
87
|
|
72
|
-
headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
|
88
|
+
headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
|
73
89
|
|
74
90
|
table = TTY::Table.new(headers, job_details)
|
75
91
|
puts table.render(indent: 0, padding: [0, 2])
|
@@ -78,7 +94,9 @@ LONG_DESC
|
|
78
94
|
private
|
79
95
|
|
80
96
|
def setup_log_level
|
81
|
-
if options[:
|
97
|
+
if options[:silent]
|
98
|
+
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::SILENT
|
99
|
+
elsif options[:verbose]
|
82
100
|
Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
|
83
101
|
elsif options[:log_level]
|
84
102
|
level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
|
@@ -90,7 +108,7 @@ LONG_DESC
|
|
90
108
|
def build_job_definition(options)
|
91
109
|
definition = Chronicle::ETL::JobDefinition.new
|
92
110
|
definition.add_config(load_job_config(options[:name]))
|
93
|
-
definition.add_config(process_flag_options(options))
|
111
|
+
definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
|
94
112
|
definition
|
95
113
|
end
|
96
114
|
|
@@ -100,19 +118,33 @@ LONG_DESC
|
|
100
118
|
|
101
119
|
# Takes flag options and turns them into a runner config
|
102
120
|
def process_flag_options options
|
121
|
+
extractor_options = options[:'extractor-opts'].merge({
|
122
|
+
input: (options[:input] if options[:input].any?),
|
123
|
+
since: options[:since],
|
124
|
+
until: options[:until],
|
125
|
+
limit: options[:limit],
|
126
|
+
}.compact)
|
127
|
+
|
128
|
+
transformer_options = options[:'transformer-opts']
|
129
|
+
|
130
|
+
loader_options = options[:'loader-opts'].merge({
|
131
|
+
output: options[:output],
|
132
|
+
fields: options[:fields]
|
133
|
+
}.compact)
|
134
|
+
|
103
135
|
{
|
104
136
|
dry_run: options[:dry_run],
|
105
137
|
extractor: {
|
106
138
|
name: options[:extractor],
|
107
|
-
options:
|
139
|
+
options: extractor_options
|
108
140
|
}.compact,
|
109
141
|
transformer: {
|
110
142
|
name: options[:transformer],
|
111
|
-
options:
|
143
|
+
options: transformer_options
|
112
144
|
}.compact,
|
113
145
|
loader: {
|
114
146
|
name: options[:loader],
|
115
|
-
options:
|
147
|
+
options: loader_options
|
116
148
|
}.compact
|
117
149
|
}
|
118
150
|
end
|