chronicle-etl 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +28 -1
  4. data/Guardfile +7 -0
  5. data/README.md +149 -85
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +10 -5
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -0
  10. data/lib/chronicle/etl/cli/jobs.rb +44 -12
  11. data/lib/chronicle/etl/cli/main.rb +13 -19
  12. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  13. data/lib/chronicle/etl/cli.rb +7 -0
  14. data/lib/chronicle/etl/configurable.rb +158 -0
  15. data/lib/chronicle/etl/exceptions.rb +7 -1
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  17. data/lib/chronicle/etl/extractors/extractor.rb +23 -19
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  21. data/lib/chronicle/etl/job.rb +1 -1
  22. data/lib/chronicle/etl/job_definition.rb +1 -1
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +1 -1
  24. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  25. data/lib/chronicle/etl/loaders/loader.rb +5 -2
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  27. data/lib/chronicle/etl/loaders/table_loader.rb +21 -24
  28. data/lib/chronicle/etl/logger.rb +1 -0
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +1 -0
  33. data/lib/chronicle/etl/runner.rb +6 -4
  34. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  35. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  36. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  37. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  38. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  39. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +12 -4
  42. metadata +80 -19
  43. data/.ruby-version +0 -1
  44. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  45. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  46. data/lib/chronicle/etl/models/generic.rb +0 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bfd4e081bfeda7d097a5a5eee6ccf28baf0a9b3878968d74c9a604013d0b55a6
4
- data.tar.gz: 003ebd2ffe2b1220c7f43a4875043cb5500aa1b7a6327b84c9be10f04e0e8d40
3
+ metadata.gz: 8a267de435b41b579e36128b7392729ef499eb37f05fabaead7811f089938ddb
4
+ data.tar.gz: d4af2f62f3f5de926bdfbb0e3d6dbe2c952ec286c07317af4dca8d98f665d6da
5
5
  SHA512:
6
- metadata.gz: 3d786fb4acf8d0b03e65262209def310ca25b92646847f6e96791e6491e9b159ab11db7fa35f785f6782fbc0b9e3daebb625e2353fce2422f7fc79aed7a4d6bc
7
- data.tar.gz: 87771745b9df2160966299f1d73eb568b46080ed217a1952e1dd938fff7758432f2cc6f449036d24951660b405e18083f1adac26d00243a0fab9003a96eb569d
6
+ metadata.gz: c78080cce008340f0b2795be46da2b5eb6562b2bffd97728150960343870f2bea4699e4efa07905710dd0e2eba7aaa1e803d8c0f727196f5d9d655b28a04f02e
7
+ data.tar.gz: cae3a3ffb6527f5c0b3ff89c75dc98d9cd66157ee6230c9db797f4683f90e2146daadf291108e55d3090d0120d3c9e25135cb21c4e9078bcaf4d1edf2172c930
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ main ]
13
+ pull_request:
14
+ branches: [ main ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.rubocop.yml CHANGED
@@ -1,11 +1,38 @@
1
1
  AllCops:
2
2
  EnabledByDefault: true
3
+ TargetRubyVersion: 2.7
4
+
5
+ Style/FrozenStringLiteralComment:
6
+ SafeAutoCorrect: true
3
7
 
4
8
  Style/StringLiterals:
5
9
  Enabled: false
6
10
 
11
+ Layout/MultilineAssignmentLayout:
12
+ Enabled: false
13
+
14
+ Layout/RedundantLineBreak:
15
+ Enabled: false
16
+
7
17
  Style/MethodCallWithArgsParentheses:
8
18
  Enabled: false
9
19
 
20
+ Style/MethodCalledOnDoEndBlock:
21
+ Exclude:
22
+ - 'spec/**/*'
23
+
24
+ Style/OpenStructUse:
25
+ Enabled: false
26
+
27
+ Style/Copyright:
28
+ Enabled: false
29
+
30
+ Style/SymbolArray:
31
+ EnforcedStyle: brackets
32
+
33
+ Style/WordArray:
34
+ EnforcedStyle: brackets
35
+
10
36
  Lint/ConstantResolution:
11
- Enabled: false
37
+ Enabled: false
38
+
data/Guardfile ADDED
@@ -0,0 +1,7 @@
1
+ guard :rspec, cmd: "bundle exec rspec" do
2
+ require "guard/rspec/dsl"
3
+
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ end
data/README.md CHANGED
@@ -1,125 +1,189 @@
1
- # Chronicle::ETL
1
+ ## A CLI toolkit for extracting and working with your digital history
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
3
+ [![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl) [![Ruby](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml/badge.svg)](https://github.com/chronicle-app/chronicle-etl/actions/workflows/ruby.yml)
4
4
 
5
- Chronicle ETL is a utility that helps you archive and processes personal data. You can *extract* it from a variety of sources, *transform* it, and *load* it to an external API, file, or stdout.
5
+ Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
6
6
 
7
- This tool is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex) and the dozens of existing importers are being migrated to Chronicle.
7
+ If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this project is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
8
8
 
9
- ## Installation
9
+ `chronicle-etl` is a CLI tool that gives you the ability to easily access your personal data. It uses the ETL pattern to **extract** it from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), **transform** it (into a given schema), and **load** it to a source (e.g. a CSV file, JSON, external API).
10
10
 
11
- ```bash
12
- $ gem install chronicle-etl
11
+ ## What does `chronicle-etl` give you?
12
+ * **CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
13
+ * **Plugins for many third-party providers**. A plugin system allows you to access data from third-party providers and hook it into the shared CLI infrastructure.
14
+ * **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are stored in a common schema. Don’t want to use the schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.
15
+
16
+ ## Installation
17
+ ```sh
18
+ # Install chronicle-etl
19
+ gem install chronicle-etl
13
20
  ```
14
21
 
15
- ## Usage
22
+ After installation, the `chronicle-etl` command will be available in your shell. Homebrew support [is coming soon](https://github.com/chronicle-app/chronicle-etl/issues/13).
16
23
 
17
- After installing the gem, `chronicle-etl` is available to run in your shell.
24
+ ## Basic usage and running jobs
18
25
 
19
- ```bash
20
- # read test.csv and display it as a table
21
- $ chronicle-etl jobs:run --extractor csv --extractor-opts filename:test.csv --loader table
26
+ ```sh
27
+ # Display help
28
+ $ chronicle-etl help
22
29
 
23
- # Display help for the jobs:run command
24
- $ chronicle-etl jobs help run
30
+ # Basic job usage
31
+ $ chronicle-etl --extractor NAME --transformer NAME --loader NAME
32
+
33
+ # Read test.csv and display it to stdout as a table
34
+ $ chronicle-etl --extractor csv --input ./data.csv --loader table
25
35
  ```
26
36
 
27
- ## Connectors
37
+ ### Common options
38
+ ```sh
39
+ Options:
40
+ -j, [--name=NAME] # Job configuration name
41
+ -e, [--extractor=EXTRACTOR-NAME] # Extractor class. Default: stdin
42
+ [--extractor-opts=key:value] # Extractor options
43
+ -t, [--transformer=TRANFORMER-NAME] # Transformer class. Default: null
44
+ [--transformer-opts=key:value] # Transformer options
45
+ -l, [--loader=LOADER-NAME] # Loader class. Default: stdout
46
+ [--loader-opts=key:value] # Loader options
47
+ -i, [--input=FILENAME] # Input filename or directory
48
+ [--since=DATE] # Load records SINCE this date. Overrides job's `load_since` configuration option in extractor's options
49
+ [--until=DATE] # Load records UNTIL this date
50
+ [--limit=N] # Only extract the first LIMIT records
51
+ -o, [--output=OUTPUT] # Output filename
52
+ [--fields=field1 field2 ...] # Output only these fields
53
+ [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
54
+ # Default: info
55
+ -v, [--verbose], [--no-verbose] # Set log level to verbose
56
+ [--silent], [--no-silent] # Silence all output
57
+ ```
28
58
 
59
+ ## Connectors
29
60
  Connectors are available to read, process, and load data from different formats or external services.
30
61
 
31
- ```bash
62
+ ```sh
32
63
  # List all available connectors
33
64
  $ chronicle-etl connectors:list
34
-
35
- # Install a connector
36
- $ chronicle-etl connectors:install imessage
37
65
  ```
38
66
 
39
- Built in connectors:
40
-
41
- ### Extractors
42
- - `stdin` - (default) Load records from line-separated stdin
43
- - `csv`
44
- - `file` - load from a single file or directory (with a glob pattern)
45
-
46
- ### Transformers
47
- - `null` - (default) Don't do anything
48
-
49
- ### Loaders
50
- - `stdout` - (default) output records to stdout serialized as JSON
51
- - `csv` - Load records to a csv file
52
- - `rest` - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
53
- - `table` - Output an ascii table of records. Useful for debugging.
54
-
55
- ### Provider-specific importers
56
-
57
- In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
67
+ ### Built-in Connectors
68
+ `chronicle-etl` comes with several built-in connectors for common formats and sources.
58
69
 
59
- - [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` and other email files
60
- - [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`
61
- - [imessage](https://github.com/chronicle-app/chronicle-imessage). Extract iMessage messages from a local macOS installation
70
+ #### Extractors
71
+ - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
72
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
73
+ - [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
62
74
 
63
- To install any of these, run `gem install chronicle-PROVIDER`.
75
+ #### Transformers
76
+ - [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
64
77
 
65
- If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
78
+ #### Loaders
79
+ - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
80
+ - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
81
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
82
+ - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
66
83
 
67
- I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
68
-
69
- ## Full commands
70
-
71
- ```
72
- $ chronicle-etl help
73
-
74
- ALL COMMANDS
75
- help # This help menu
76
- connectors help [COMMAND] # Describe subcommands or one specific subcommand
77
- connectors:install NAME # Installs connector NAME
78
- connectors:list # Lists available connectors
79
- jobs help [COMMAND] # Describe subcommands or one specific subcommand
80
- jobs:create # Create a job
81
- jobs:list # List all available jobs
82
- jobs:run # Start a job
83
- jobs:show # Show details about a job
84
- ```
85
-
86
- ### Running a job
84
+ ### Plugins
85
+ Plugins provide access to data from third-party platforms, services, or formats.
87
86
 
87
+ ```bash
88
+ # Install a plugin
89
+ $ chronicle-etl connectors:install NAME
88
90
  ```
89
- Usage:
90
- chronicle-etl jobs:run
91
91
 
92
- Options:
93
- [--log-level=LOG_LEVEL] # Log level (debug, info, warn, error, fatal)
94
- # Default: info
95
- -v, [--verbose], [--no-verbose] # Set log level to verbose
96
- [--dry-run], [--no-dry-run] # Only run the extraction and transform steps, not the loading
97
- -e, [--extractor=extractor-name] # Extractor class. Default: stdin
98
- [--extractor-opts=key:value] # Extractor options
99
- -t, [--transformer=transformer-name] # Transformer class. Default: null
100
- [--transformer-opts=key:value] # Transformer options
101
- -l, [--loader=loader-name] # Loader class. Default: stdout
102
- [--loader-opts=key:value] # Loader options
103
- -j, [--name=NAME] # Job configuration name
104
-
105
-
106
- Runs an ETL job
92
+ A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and they’re being ported over to the Chronicle system. This table shows what’s available now and what’s coming. Rows are sorted in very rough order of priority.
93
+
94
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
95
+
96
+ | Name | Description | Availability |
97
+ |-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
98
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
99
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (zsh support pending) |
100
+ | [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available (imap support pending) |
101
+ | [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
102
+ | github | Github user and repo activity | In progress |
103
+ | safari | Browser history from local sqlite db | Needs porting |
104
+ | chrome | Browser history from local sqlite db | Needs porting |
105
+ | whatsapp | Messaging history (via individual chat exports) or reverse-engineered local desktop install | Unstarted |
106
+ | anki | Studying and card creation history | Needs porting |
107
+ | facebook | Messaging and history posting via data export files | Needs porting |
108
+ | twitter | History via API or export data files | Needs porting |
109
+ | foursquare | Location history via API | Needs porting |
110
+ | goodreads | Reading history via export csv (RIP goodreads API) | Needs porting |
111
+ | lastfm | Listening history via API | Needs porting |
112
+ | images | Process image files | Needs porting |
113
+ | arc | Location history from synced icloud backup files | Needs porting |
114
+ | firefox | Browser history from local sqlite db | Needs porting |
115
+ | fitbit | Personal analytics via API | Needs porting |
116
+ | git | Commit history on a repo | Needs porting |
117
+ | google-calendar | Calendar events via API | Needs porting |
118
+ | instagram | Posting and messaging history via export data | Needs porting |
119
+ | shazam | Song tags via reverse-engineered API | Needs porting |
120
+ | slack | Messaging history via API | Need rethinking |
121
+ | strava | Activity history via API | Needs porting |
122
+ | things | Task activity via local sqlite db | Needs porting |
123
+ | bear | Note taking activity via local sqlite db | Needs porting |
124
+ | youtube | Video activity via takeout data and API | Needs porting |
125
+
126
+ ### Writing your own connector
127
+
128
+ Additional connectors are packaged as separate ruby gems. You can view the [iMessage plugin](https://github.com/chronicle-app/chronicle-imessage) for an example.
129
+
130
+ If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
131
+
132
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
133
+
134
+ #### Sample custom Extractor class
135
+ ```ruby
136
+ module Chronicle
137
+ module FooService
138
+ class FooExtractor < Chronicle::ETL::Extractor
139
+ register_connector do |r|
140
+ r.identifier = 'foo'
141
+ r.description = 'From foo.com'
142
+ end
143
+
144
+ setting :access_token, required: true
145
+
146
+ def prepare
147
+ @records = # load from somewhere
148
+ end
149
+
150
+ def extract
151
+ @records.each do |record|
152
+ yield Chronicle::ETL::Extraction.new(data: row.to_h)
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
107
158
  ```
108
159
 
109
160
  ## Development
110
-
111
161
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
112
162
 
113
163
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
114
164
 
115
- ## Contributing
165
+ ### Additional development commands
166
+ ```bash
167
+ # run tests
168
+ bundle exec rake spec
169
+
170
+ # generate docs
171
+ bundle exec rake yard
172
+
173
+ # use Guard to run specs automatically
174
+ bundle exec guard
175
+ ```
116
176
 
177
+ ## Get in touch
178
+ - [@hyfen](https://twitter.com/hyfen) on Twitter
179
+ - [@hyfen](https://github.com/hyfen) on Github
180
+ - Email: andrew@hyfen.net
181
+
182
+ ## Contributing
117
183
  Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
118
184
 
119
185
  ## License
120
-
121
186
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
122
187
 
123
188
  ## Code of Conduct
124
-
125
- Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
189
+ Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rspec/core/rake_task"
3
-
4
3
  RSpec::Core::RakeTask.new(:spec)
5
4
 
6
- task :default => :spec
5
+ require 'yard'
6
+ YARD::Rake::YardocTask.new
7
+
8
+ task default: :spec
@@ -17,11 +17,11 @@ Gem::Specification.new do |spec|
17
17
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
18
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
19
  if spec.respond_to?(:metadata)
20
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
21
 
22
22
  spec.metadata["homepage_uri"] = spec.homepage
23
23
  spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/blob/master/CHANGELOG.md"
24
+ spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
25
25
  else
26
26
  raise "RubyGems 2.0 or newer is required to protect against " \
27
27
  "public gem pushes."
@@ -35,17 +35,18 @@ Gem::Specification.new do |spec|
35
35
  spec.bindir = "exe"
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
+ spec.required_ruby_version = ">= 2.7"
38
39
 
39
- spec.add_dependency "activesupport"
40
+ spec.add_dependency "activesupport", "~> 7.0"
40
41
  spec.add_dependency "chronic_duration", "~> 0.10.6"
41
42
  spec.add_dependency "colorize", "~> 0.8.1"
42
43
  spec.add_dependency "marcel", "~> 1.0.2"
43
44
  spec.add_dependency "mini_exiftool", "~> 2.10"
44
45
  spec.add_dependency "nokogiri", "~> 1.13"
45
- spec.add_dependency "runcom", "~> 6.2"
46
+ spec.add_dependency "runcom", ">= 6.0"
46
47
  spec.add_dependency "sequel", "~> 5.35"
47
48
  spec.add_dependency "sqlite3", "~> 1.4"
48
- spec.add_dependency "thor", "~> 0.20"
49
+ spec.add_dependency "thor", "~> 1.2"
49
50
  spec.add_dependency "tty-progressbar", "~> 0.17"
50
51
  spec.add_dependency "tty-table", "~> 0.11"
51
52
 
@@ -53,4 +54,8 @@ Gem::Specification.new do |spec|
53
54
  spec.add_development_dependency "pry-byebug", "~> 3.9"
54
55
  spec.add_development_dependency "rake", "~> 13.0"
55
56
  spec.add_development_dependency "rspec", "~> 3.9"
57
+ spec.add_development_dependency "simplecov", "~> 0.21"
58
+ spec.add_development_dependency "guard-rspec", "~> 4.7.3"
59
+ spec.add_development_dependency "yard", "~> 0.9.7"
60
+ spec.add_development_dependency "rubocop", "~> 1.25.1"
56
61
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli/main"
3
+ require "chronicle/etl/cli"
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Chronicle
2
4
  module ETL
3
5
  module CLI
@@ -38,6 +40,38 @@ module Chronicle
38
40
  table = TTY::Table.new(headers, connector_info.map(&:values))
39
41
  puts table.render(indent: 0, padding: [0, 2])
40
42
  end
43
+
44
+ desc "show PHASE IDENTIFIER", "Show information about a connector"
45
+ def show(phase, identifier)
46
+ unless ['extractor', 'transformer', 'loader'].include?(phase)
47
+ puts "phase argument must be one of: [extractor, transformer, loader]"
48
+ return
49
+ end
50
+
51
+ begin
52
+ connector = Chronicle::ETL::Registry.find_by_phase_and_identifier(phase.to_sym, identifier)
53
+ rescue Chronicle::ETL::ConnectorNotAvailableError
54
+ puts "Could not find #{phase} #{identifier}"
55
+ return
56
+ end
57
+
58
+ puts connector.klass.to_s.bold
59
+ puts " #{connector.descriptive_phrase}"
60
+ puts
61
+ puts "OPTIONS"
62
+
63
+ headers = ['name', 'default', 'required'].map{ |h| h.to_s.upcase.bold }
64
+
65
+ settings = connector.klass.settings.map do |name, setting|
66
+ [
67
+ name,
68
+ setting.default,
69
+ setting.required ? 'yes' : 'no'
70
+ ]
71
+ end
72
+ table = TTY::Table.new(headers, settings)
73
+ puts table.render(indent: 0, padding: [0, 2])
74
+ end
41
75
  end
42
76
  end
43
77
  end
@@ -1,21 +1,37 @@
1
1
  require 'pp'
2
+
2
3
  module Chronicle
3
4
  module ETL
4
5
  module CLI
5
6
  # CLI commands for working with ETL jobs
6
7
  class Jobs < SubcommandBase
7
8
  default_task "start"
8
- namespace :jobs
9
+ namespace :jobs
10
+
11
+ class_option :name, aliases: '-j', desc: 'Job configuration name'
9
12
 
10
- class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'extractor-name'
13
+ class_option :extractor, aliases: '-e', desc: "Extractor class. Default: stdin", banner: 'NAME'
11
14
  class_option :'extractor-opts', desc: 'Extractor options', type: :hash, default: {}
12
- class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'transformer-name'
15
+ class_option :transformer, aliases: '-t', desc: 'Transformer class. Default: null', banner: 'NAME'
13
16
  class_option :'transformer-opts', desc: 'Transformer options', type: :hash, default: {}
14
- class_option :loader, aliases: '-l', desc: 'Loader class. Default: stdout', banner: 'loader-name'
17
+ class_option :loader, aliases: '-l', desc: 'Loader class. Default: table', banner: 'NAME'
15
18
  class_option :'loader-opts', desc: 'Loader options', type: :hash, default: {}
16
- class_option :name, aliases: '-j', desc: 'Job configuration name'
17
19
 
18
- map run: :start # Thor doesn't like `run` as a command name
20
+ # This is an array to deal with shell globbing
21
+ class_option :input, aliases: '-i', desc: 'Input filename or directory', default: [], type: 'array', banner: 'FILENAME'
22
+ class_option :since, desc: "Load records SINCE this date", banner: 'DATE'
23
+ class_option :until, desc: "Load records UNTIL this date", banner: 'DATE'
24
+ class_option :limit, desc: "Only extract the first LIMIT records", banner: 'N'
25
+
26
+ class_option :output, aliases: '-o', desc: 'Output filename', type: 'string'
27
+ class_option :fields, desc: 'Output only these fields', type: 'array', banner: 'field1 field2 ...'
28
+
29
+ class_option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
30
+ class_option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
31
+ class_option :silent, desc: 'Silence all output', type: :boolean
32
+
33
+ # Thor doesn't like `run` as a command name
34
+ map run: :start
19
35
  desc "run", "Start a job"
20
36
  option :log_level, desc: 'Log level (debug, info, warn, error, fatal)', default: 'info'
21
37
  option :verbose, aliases: '-v', desc: 'Set log level to verbose', type: :boolean
@@ -69,7 +85,7 @@ LONG_DESC
69
85
  [job, extractor, transformer, loader]
70
86
  end
71
87
 
72
- headers = ['name', 'extractor', 'transformer', 'loader'].map{|h| h.upcase.bold }
88
+ headers = ['name', 'extractor', 'transformer', 'loader'].map { |h| h.upcase.bold }
73
89
 
74
90
  table = TTY::Table.new(headers, job_details)
75
91
  puts table.render(indent: 0, padding: [0, 2])
@@ -78,7 +94,9 @@ LONG_DESC
78
94
  private
79
95
 
80
96
  def setup_log_level
81
- if options[:verbose]
97
+ if options[:silent]
98
+ Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::SILENT
99
+ elsif options[:verbose]
82
100
  Chronicle::ETL::Logger.log_level = Chronicle::ETL::Logger::DEBUG
83
101
  elsif options[:log_level]
84
102
  level = Chronicle::ETL::Logger.const_get(options[:log_level].upcase)
@@ -90,7 +108,7 @@ LONG_DESC
90
108
  def build_job_definition(options)
91
109
  definition = Chronicle::ETL::JobDefinition.new
92
110
  definition.add_config(load_job_config(options[:name]))
93
- definition.add_config(process_flag_options(options))
111
+ definition.add_config(process_flag_options(options).transform_keys(&:to_sym))
94
112
  definition
95
113
  end
96
114
 
@@ -100,19 +118,33 @@ LONG_DESC
100
118
 
101
119
  # Takes flag options and turns them into a runner config
102
120
  def process_flag_options options
121
+ extractor_options = options[:'extractor-opts'].merge({
122
+ input: (options[:input] if options[:input].any?),
123
+ since: options[:since],
124
+ until: options[:until],
125
+ limit: options[:limit],
126
+ }.compact)
127
+
128
+ transformer_options = options[:'transformer-opts']
129
+
130
+ loader_options = options[:'loader-opts'].merge({
131
+ output: options[:output],
132
+ fields: options[:fields]
133
+ }.compact)
134
+
103
135
  {
104
136
  dry_run: options[:dry_run],
105
137
  extractor: {
106
138
  name: options[:extractor],
107
- options: options[:'extractor-opts']
139
+ options: extractor_options
108
140
  }.compact,
109
141
  transformer: {
110
142
  name: options[:transformer],
111
- options: options[:'transformer-opts']
143
+ options: transformer_options
112
144
  }.compact,
113
145
  loader: {
114
146
  name: options[:loader],
115
- options: options[:'loader-opts']
147
+ options: loader_options
116
148
  }.compact
117
149
  }
118
150
  end