chronicle-etl 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +28 -1
  4. data/Guardfile +7 -0
  5. data/README.md +149 -85
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +10 -5
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -0
  10. data/lib/chronicle/etl/cli/jobs.rb +44 -12
  11. data/lib/chronicle/etl/cli/main.rb +13 -19
  12. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  13. data/lib/chronicle/etl/cli.rb +7 -0
  14. data/lib/chronicle/etl/configurable.rb +158 -0
  15. data/lib/chronicle/etl/exceptions.rb +7 -1
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  17. data/lib/chronicle/etl/extractors/extractor.rb +23 -19
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  21. data/lib/chronicle/etl/job.rb +1 -1
  22. data/lib/chronicle/etl/job_definition.rb +1 -1
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
  24. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  25. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
  28. data/lib/chronicle/etl/logger.rb +1 -0
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
  33. data/lib/chronicle/etl/runner.rb +6 -4
  34. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  35. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  36. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  37. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  38. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  39. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +12 -4
  42. metadata +80 -19
  43. data/.ruby-version +0 -1
  44. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  45. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  46. data/lib/chronicle/etl/models/generic.rb +0 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bfd4e081bfeda7d097a5a5eee6ccf28baf0a9b3878968d74c9a604013d0b55a6
4
- data.tar.gz: 003ebd2ffe2b1220c7f43a4875043cb5500aa1b7a6327b84c9be10f04e0e8d40
3
+ metadata.gz: 8a267de435b41b579e36128b7392729ef499eb37f05fabaead7811f089938ddb
4
+ data.tar.gz: d4af2f62f3f5de926bdfbb0e3d6dbe2c952ec286c07317af4dca8d98f665d6da
5
5
  SHA512:
6
- metadata.gz: 3d786fb4acf8d0b03e65262209def310ca25b92646847f6e96791e6491e9b159ab11db7fa35f785f6782fbc0b9e3daebb625e2353fce2422f7fc79aed7a4d6bc
7
- data.tar.gz: 87771745b9df2160966299f1d73eb568b46080ed217a1952e1dd938fff7758432f2cc6f449036d24951660b405e18083f1adac26d00243a0fab9003a96eb569d
6
+ metadata.gz: c78080cce008340f0b2795be46da2b5eb6562b2bffd97728150960343870f2bea4699e4efa07905710dd0e2eba7aaa1e803d8c0f727196f5d9d655b28a04f02e
7
+ data.tar.gz: cae3a3ffb6527f5c0b3ff89c75dc98d9cd66157ee6230c9db797f4683f90e2146daadf291108e55d3090d0120d3c9e25135cb21c4e9078bcaf4d1edf2172c930
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ main ]
13
+ pull_request:
14
+ branches: [ main ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.rubocop.yml CHANGED
@@ -1,11 +1,38 @@
1
1
  AllCops:
2
2
  EnabledByDefault: true
3
+ TargetRubyVersion: 2.7
4
+
5
+ Style/FrozenStringLiteralComment:
6
+ SafeAutoCorrect: true
3
7
 
4
8
  Style/StringLiterals:
5
9
  Enabled: false
6
10
 
11
+ Layout/MultilineAssignmentLayout:
12
+ Enabled: false
13
+
14
+ Layout/RedundantLineBreak:
15
+ Enabled: false
16
+
7
17
  Style/MethodCallWithArgsParentheses:
8
18
  Enabled: false
9
19
 
20
+ Style/MethodCalledOnDoEndBlock:
21
+ Exclude:
22
+ - 'spec/**/*'
23
+
24
+ Style/OpenStructUse:
25
+ Enabled: false
26
+
27
+ Style/Copyright:
28
+ Enabled: false
29
+
30
+ Style/SymbolArray:
31
+ EnforcedStyle: brackets
32
+
33
+ Style/WordArray:
34
+ EnforcedStyle: brackets
35
+
10
36
  Lint/ConstantResolution:
11
- Enabled: false
37
+ Enabled: false
38
+
data/Guardfile ADDED
@@ -0,0 +1,7 @@
1
+ guard :rspec, cmd: "bundle exec rspec" do
2
+ require "guard/rspec/dsl"
3
+
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ end
data/README.md CHANGED
@@ -1,125 +1,189 @@
1
- # Chronicle::ETL
1
+ ## A CLI toolkit for extracting and working with your digital history
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
4
4
 
5
- Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
5
+ Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
6
6
 
7
- This tool is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex) and the dozens of existing importers are being migrated to Chronicle.
7
+ If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this project is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
8
8
 
9
- ## Installation
9
+ `chronicle-etl` is a CLI tool that gives you the ability to easily access your personal data. It uses the ETL pattern to **extract** it from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), **transform** it (into a given schema), and **load** it to a source (e.g. a CSV file, JSON, external API).
10
10
 
11
- ```bash
12
- $ gem install chronicle-etl
11
+ ## What does `chronicle-etl` give you?
12
+ * **CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
13
+ * **Plugins for many third-party providers**. A plugin system allows you to access data from third-party providers and hook it into the shared CLI infrastructure.
14
+ * **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are stored in a common schema. Don’t want to use the schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.
15
+
16
+ ## Installation
17
+ ```sh
18
+ # Install chronicle-etl
19
+ gem install chronicle-etl
13
20
  ```
14
21
 
15
- ## Usage
22
+ After installation, the `chronicle-etl` command will be available in your shell. Homebrew support [is coming soon](https://github.com/chronicle-app/chronicle-etl/issues/13).
16
23
 
17
- After installing the gem, `chronicle-etl` is available to run in your shell.
24
+ ## Basic usage and running jobs
18
25
 
19
- ```bash
20
- # read test.csv and display it as a table
21
- $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
26
+ ```sh
27
+ # Display help
28
+ $ chronicle-etl help
22
29
 
23
- # Display help for the jobs:run command
24
- $ chronicle-etl jobs help run
30
+ # Basic job usage
31
+ $ chronicle-etl --extractor NAME --transformer NAME --loader NAME
32
+
33
+ # Read test.csv and display it to stdout as a table
34
+ $ chronicle-etl --extractor csv --input ./data.csv --loader table
25
35
  ```
26
36
 
27
- ## Connectors
37
+ ### Common options
38
+ ```sh
39
+ Options:
40
+ -j, [--name=NAME] # Job configuration name
41
+ -e, [--extractor=EXTRACTOR-NAME] # Extractor class. Default: stdin
42
+ [--extractor-opts=key:value] # Extractor options
43
+ -t, [--transformer=TRANFORMER-NAME] # Transformer class. Default: null
44
+ [--transformer-opts=key:value] # Transformer options
45
+ -l, [--loader=LOADER-NAME] # Loader class. Default: stdout
46
+ [--loader-opts=key:value] # Loader options
47
+ -i, [--input=FILENAME] # Input filename or directory
48
+ [--since=DATE] # Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options
49
+ [--until=DATE] # Load records UNTIL this date
50
+ [--limit=N] # Only extract the first LIMIT records
51
+ -o, [--output=OUTPUT] # Output filename
52
+ [--fields=field1 field2 ...] # Output only these fields
53
+ [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
54
+ # Default: info
55
+ -v, [--verbose], [--no-verbose] # Set log level to verbose
56
+ [--silent], [--no-silent] # Silence all output
57
+ ```
28
58
 
59
+ ## Connectors
29
60
  Connectors are available to read, process, and load data from different formats or external services.
30
61
 
31
- ```bash
62
+ ```sh
32
63
  # List all available connectors
33
64
  $ chronicle-etl connectors:list
34
-
35
- # Install a connector
36
- $ chronicle-etl connectors:install imessage
37
65
  ```
38
66
 
39
- Built in connectors:
40
-
41
- ### Extractors
42
- - `stdin` - (default) Load records from line-separated stdin
43
- - `csv`
44
- - `file` - load from a single file or directory (with a glob pattern)
45
-
46
- ### Transformers
47
- - `null` - (default) Don't do anything
48
-
49
- ### Loaders
50
- - `stdout` - (default) output records to stdout serialized as JSON
51
- - `csv` - Load records to a csv file
52
- - `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
53
- - `table` - Output an ascii table of records. Useful for debugging.
54
-
55
- ### Provider-specific importers
56
-
57
- In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
67
+ ### Built-in Connectors
68
+ `chronicle-etl` comes with several built-in connectors for common formats and sources.
58
69
 
59
- - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
60
- - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`
61
- - [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
70
+ #### Extractors
71
+ - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
72
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
73
+ - [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
62
74
 
63
- To install any of these, run `gem install chronicle-PROVIDER`.
75
+ #### Transformers
76
+ - [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
64
77
 
65
- If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
78
+ #### Loaders
79
+ - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
80
+ - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
81
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
82
+ - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
66
83
 
67
- I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
68
-
69
- ## Full commands
70
-
71
- ```
72
- $ chronicle-etl help
73
-
74
- ALL COMMANDS
75
- help # This help menu
76
- connectors help [COMMAND] # Describe subcommands or one specific subcommand
77
- connectors:install NAME # Installs connector NAME
78
- connectors:list # Lists available connectors
79
- jobs help [COMMAND] # Describe subcommands or one specific subcommand
80
- jobs:create # Create a job
81
- jobs:list # List all available jobs
82
- jobs:run # Start a job
83
- jobs:show # Show details about a job
84
- ```
85
-
86
- ### Running a job
84
+ ### Plugins
85
+ Plugins provide access to data from third-party platforms, services, or formats.
87
86
 
87
+ ```bash
88
+ # Install a plugin
89
+ $ chronicle-etl connectors:install NAME
88
90
  ```
89
- Usage:
90
- chronicle-etl jobs:run
91
91
 
92
- Options:
93
- [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
94
- # Default: info
95
- -v, [--verbose], [--no-verbose] # Set log level to verbose
96
- [--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
97
- -e, [--extractor=extractor-name] # Extractor class. Default: stdin
98
- [--extractor-opts=key:value] # Extractor options
99
- -t, [--transformer=transformer-name] # Transformer class. Default: null
100
- [--transformer-opts=key:value] # Transformer options
101
- -l, [--loader=loader-name] # Loader class. Default: stdout
102
- [--loader-opts=key:value] # Loader options
103
- -j, [--name=NAME] # Job configuration name
104
-
105
-
106
- Runs an ETL job
92
+ A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and they’re being ported over to the Chronicle system. This table shows what’s available now and what’s coming. Rows are sorted in very rough order of priority.
93
+
94
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
95
+
96
+ | Name | Description | Availability |
97
+ |-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
98
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
99
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (zsh support pending) |
100
+ | [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available (imap support pending) |
101
+ | [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
102
+ | github | Github user and repo activity | In progress |
103
+ | safari | Browser history from local sqlite db | Needs porting |
104
+ | chrome | Browser history from local sqlite db | Needs porting |
105
+ | whatsapp | Messaging history (via individual chat exports) or reverse-engineered local desktop install | Unstarted |
106
+ | anki | Studying and card creation history | Needs porting |
107
+ | facebook | Messaging and history posting via data export files | Needs porting |
108
+ | twitter | History via API or export data files | Needs porting |
109
+ | foursquare | Location history via API | Needs porting |
110
+ | goodreads | Reading history via export csv (RIP goodreads API) | Needs porting |
111
+ | lastfm | Listening history via API | Needs porting |
112
+ | images | Process image files | Needs porting |
113
+ | arc | Location history from synced icloud backup files | Needs porting |
114
+ | firefox | Browser history from local sqlite db | Needs porting |
115
+ | fitbit | Personal analytics via API | Needs porting |
116
+ | git | Commit history on a repo | Needs porting |
117
+ | google-calendar | Calendar events via API | Needs porting |
118
+ | instagram | Posting and messaging history via export data | Needs porting |
119
+ | shazam | Song tags via reverse-engineered API | Needs porting |
120
+ | slack | Messaging history via API | Need rethinking |
121
+ | strava | Activity history via API | Needs porting |
122
+ | things | Task activity via local sqlite db | Needs porting |
123
+ | bear | Note taking activity via local sqlite db | Needs porting |
124
+ | youtube | Video activity via takeout data and API | Needs porting |
125
+
126
+ ### Writing your own connector
127
+
128
+ Additional connectors are packaged as separate ruby gems. You can view the [iMessage plugin](https://github.com/chronicle-app/chronicle-imessage) for an example.
129
+
130
+ If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
131
+
132
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
133
+
134
+ #### Sample custom Extractor class
135
+ ```ruby
136
+ module Chronicle
137
+ module FooService
138
+ class FooExtractor < Chronicle::ETL::Extractor
139
+ register_connector do |r|
140
+ r.identifier = 'foo'
141
+ r.description = 'From foo.com'
142
+ end
143
+
144
+ setting :access_token, required: true
145
+
146
+ def prepare
147
+ @records = # load from somewhere
148
+ end
149
+
150
+ def extract
151
+ @records.each do |record|
152
+ yield Chronicle::ETL::Extraction.new(data: row.to_h)
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
107
158
  ```
108
159
 
109
160
  ## Development
110
-
111
161
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
112
162
 
113
163
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
114
164
 
115
- ## Contributing
165
+ ### Additional development commands
166
+ ```bash
167
+ # run tests
168
+ bundle exec rake spec
169
+
170
+ # generate docs
171
+ bundle exec rake yard
172
+
173
+ # use Guard to run specs automatically
174
+ bundle exec guard
175
+ ```
116
176
 
177
+ ## Get in touch
178
+ - [@hyfen](https://twitter.com/hyfen) on Twitter
179
+ - [@hyfen](https://github.com/hyfen) on Github
180
+ - Email: andrew@hyfen.net
181
+
182
+ ## Contributing
117
183
  Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
118
184
 
119
185
  ## License
120
-
121
186
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
122
187
 
123
188
  ## Code of Conduct
124
-
125
- Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
189
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rspec/core/rake_task"
3
-
4
3
  RSpec::Core::RakeTask.new(:spec)
5
4
 
6
- task :default => :spec
5
+ require 'yard'
6
+ YARD::Rake::YardocTask.new
7
+
8
+ task default: :spec
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
17
17
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
18
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
19
  if spec.respond_to?(:metadata)
20
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
21
 
22
22
  spec.metadata["homepage_uri"] = spec.homepage
23
23
  spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
24
+ spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
25
25
  else
26
26
  raise "RubyGems 2.0 or newer is required to protect against " \
27
27
  "public gem pushes."
@@ -35,17 +35,18 @@ Gem::Specification.new do |spec|
35
35
  spec.bindir = "exe"
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
+ spec.required_ruby_version = ">= 2.7"
38
39
 
39
- spec.add_dependency "activesupport"
40
+ spec.add_dependency "activesupport", "~> 7.0"
40
41
  spec.add_dependency "chronic_duration", "~> 0.10.6"
41
42
  spec.add_dependency "colorize", "~> 0.8.1"
42
43
  spec.add_dependency "marcel", "~> 1.0.2"
43
44
  spec.add_dependency "mini_exiftool", "~> 2.10"
44
45
  spec.add_dependency "nokogiri", "~> 1.13"
45
- spec.add_dependency "runcom", "~> 6.2"
46
+ spec.add_dependency "runcom", ">= 6.0"
46
47
  spec.add_dependency "sequel", "~> 5.35"
47
48
  spec.add_dependency "sqlite3", "~> 1.4"
48
- spec.add_dependency "thor", "~> 0.20"
49
+ spec.add_dependency "thor", "~> 1.2"
49
50
  spec.add_dependency "tty-progressbar", "~> 0.17"
50
51
  spec.add_dependency "tty-table", "~> 0.11"
51
52
 
@@ -53,4 +54,8 @@ Gem::Specification.new do |spec|
53
54
  spec.add_development_dependency "pry-byebug", "~> 3.9"
54
55
  spec.add_development_dependency "rake", "~> 13.0"
55
56
  spec.add_development_dependency "rspec", "~> 3.9"
57
+ spec.add_development_dependency "simplecov", "~> 0.21"
58
+ spec.add_development_dependency "guard-rspec", "~> 4.7.3"
59
+ spec.add_development_dependency "yard", "~> 0.9.7"
60
+ spec.add_development_dependency "rubocop", "~> 1.25.1"
56
61
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli/main"
3
+ require "chronicle/etl/cli"
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  module CLI
@@ -38,6 +40,38 @@ module Chronicle
38
40
  table = TTY::Table.new(headers, connector_info.map(&:values))
39
41
  puts table.render(indent: 0, padding: [0, 2])
40
42
  end
43
+
44
+ desc "show PHASE IDENTIFIER", "Show information about a connector"
45
+ def show(phase, identifier)
46
+ unless ['extractor', 'transformer', 'loader'].include?(phase)
47
+ puts "phase argument must be one of: [extractor, transformer, loader]"
48
+ return
49
+ end
50
+
51
+ begin
52
+ connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
53
+ rescue Chronicle::ETL::ConnectorNotAvailableError
54
+ puts "Could not find #{phase} #{identifier}"
55
+ return
56
+ end
57
+
58
+ puts connector.klass.to_s.bold
59
+ puts " #{connector.descriptive_phrase}"
60
+ puts
61
+ puts "OPTIONS"
62
+
63
+ headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
64
+
65
+ settings = connector.klass.settings.map do |name, setting|
66
+ [
67
+ name,
68
+ setting.default,
69
+ setting.required ? 'yes' : 'no'
70
+ ]
71
+ end
72
+ table = TTY::Table.new(headers, settings)
73
+ puts table.render(indent: 0, padding: [0, 2])
74
+ end
41
75
  end
42
76
  end
43
77
  end
@@ -1,21 +1,37 @@
1
1
  require 'pp'
2
+
2
3
  module Chronicle
3
4
  module ETL
4
5
  module CLI
5
6
  # CLI commands for working with ETL jobs
6
7
  class Jobs < SubcommandBase
7
8
  default_task "start"
8
- namespace :jobs
9
+ namespace :jobs
10
+
11
+ class_option :name, aliases: '-j', desc: 'Job configuration name'
9
12
 
10
- class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
13
+ class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'NAME'
11
14
  class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
15
+ class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'NAME'
13
16
  class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
17
+ class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
15
18
  class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- class_option :name, aliases: '-j', desc: 'Job configuration name'
17
19
 
18
- map run: :start # Thor doesn't like `run` as a command name
20
+ # This is an array to deal with shell globbing
21
+ class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
22
+ class_option :since, desc: "Load records SINCE this date", banner: 'DATE'
23
+ class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
24
+ class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
25
+
26
+ class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
27
+ class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
28
+
29
+ class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
30
+ class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
31
+ class_option :silent, desc: 'Silence all output', type: :boolean
32
+
33
+ # Thor doesn't like `run` as a command name
34
+ map run: :start
19
35
  desc "run", "Start a job"
20
36
  option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
21
37
  option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
@@ -69,7 +85,7 @@ LONG_DESC
69
85
  [job, extractor, transformer, loader]
70
86
  end
71
87
 
72
- headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
88
+ headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
73
89
 
74
90
  table = TTY::Table.new(headers, job_details)
75
91
  puts table.render(indent: 0, padding: [0, 2])
@@ -78,7 +94,9 @@ LONG_DESC
78
94
  private
79
95
 
80
96
  def setup_log_level
81
- if options[:verbose]
97
+ if options[:silent]
98
+ Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::SILENT
99
+ elsif options[:verbose]
82
100
  Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
83
101
  elsif options[:log_level]
84
102
  level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
@@ -90,7 +108,7 @@ LONG_DESC
90
108
  def build_job_definition(options)
91
109
  definition = Chronicle::ETL::JobDefinition.new
92
110
  definition.add_config(load_job_config(options[:name]))
93
- definition.add_config(process_flag_options(options))
111
+ definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
94
112
  definition
95
113
  end
96
114
 
@@ -100,19 +118,33 @@ LONG_DESC
100
118
 
101
119
  # Takes flag options and turns them into a runner config
102
120
  def process_flag_options options
121
+ extractor_options = options[:'extractor-opts'].merge({
122
+ input: (options[:input] if options[:input].any?),
123
+ since: options[:since],
124
+ until: options[:until],
125
+ limit: options[:limit],
126
+ }.compact)
127
+
128
+ transformer_options = options[:'transformer-opts']
129
+
130
+ loader_options = options[:'loader-opts'].merge({
131
+ output: options[:output],
132
+ fields: options[:fields]
133
+ }.compact)
134
+
103
135
  {
104
136
  dry_run: options[:dry_run],
105
137
  extractor: {
106
138
  name: options[:extractor],
107
- options: options[:'extractor-opts']
139
+ options: extractor_options
108
140
  }.compact,
109
141
  transformer: {
110
142
  name: options[:transformer],
111
- options: options[:'transformer-opts']
143
+ options: transformer_options
112
144
  }.compact,
113
145
  loader: {
114
146
  name: options[:loader],
115
- options: options[:'loader-opts']
147
+ options: loader_options
116
148
  }.compact
117
149
  }
118
150
  end