chronicle-etl 0.5.5 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +75 -68
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +51 -49
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +8 -6
- data/lib/chronicle/etl/cli/connectors.rb +7 -7
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +14 -15
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +18 -8
- data/lib/chronicle/etl/configurable.rb +20 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +38 -26
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +2 -2
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +93 -36
- data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
- data/lib/chronicle/etl/registry/plugins.rb +27 -19
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +4 -4
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +49 -47
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4059afe6aae8af3e2ef626b3313836f8cd937504b79ee13062bd98be9d7dc197
|
4
|
+
data.tar.gz: abcfaefbfbc2aac98a9c30fbdb3e66500a480b3ddc0904b5a75c4deab6d3e62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2fca383a80cce8efa6c6dcf8411807931e0e98f2a1cf491df916e96db0c78a70b50ea51d1209236e661be92ebb51d790e645d7c8a915aecdcc2dc5340a1efdc
|
7
|
+
data.tar.gz: 501359f4e01d3e34de89dd7489bf8254e3d4002b754536737fc78be513776c1e05b8731166a32e8a957f398f7792e5f3b23280b9fb4c5e6cb8a78c44312f31f4
|
data/.github/workflows/ruby.yml
CHANGED
@@ -1,35 +1,25 @@
|
|
1
|
-
# This workflow uses actions that are not certified by GitHub.
|
2
|
-
# They are provided by a third-party and are governed by
|
3
|
-
# separate terms of service, privacy policy, and support
|
4
|
-
# documentation.
|
5
|
-
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
-
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
-
|
8
1
|
name: Ruby
|
9
2
|
|
10
3
|
on:
|
11
4
|
push:
|
12
|
-
branches: [
|
5
|
+
branches: [main]
|
13
6
|
pull_request:
|
14
|
-
branches: [
|
7
|
+
branches: [main]
|
15
8
|
|
16
9
|
jobs:
|
17
|
-
|
18
|
-
|
10
|
+
build:
|
19
11
|
runs-on: ubuntu-latest
|
20
|
-
strategy:
|
21
|
-
matrix:
|
22
|
-
ruby-version: ['2.7', '3.0']
|
23
12
|
|
24
13
|
steps:
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
|
16
|
+
- name: Set up Ruby
|
17
|
+
uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: 3.2
|
20
|
+
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
|
24
|
+
- name: Run tests
|
25
|
+
run: bundle exec rake
|
data/.rubocop.yml
CHANGED
@@ -1,44 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
TargetRubyVersion: 2.7
|
4
|
-
|
5
|
-
Style/FrozenStringLiteralComment:
|
6
|
-
SafeAutoCorrect: true
|
7
|
-
|
8
|
-
Style/StringLiterals:
|
9
|
-
Enabled: false
|
10
|
-
|
11
|
-
Layout/MultilineAssignmentLayout:
|
12
|
-
Enabled: false
|
13
|
-
|
14
|
-
Layout/MultilineMethodCallIndentation:
|
15
|
-
EnforcedStyle: indented
|
16
|
-
|
17
|
-
Layout/RedundantLineBreak:
|
18
|
-
Enabled: false
|
19
|
-
|
20
|
-
Style/MethodCallWithArgsParentheses:
|
21
|
-
Enabled: false
|
22
|
-
|
23
|
-
Style/MethodCalledOnDoEndBlock:
|
24
|
-
Exclude:
|
25
|
-
- 'spec/**/*'
|
26
|
-
|
27
|
-
Style/OpenStructUse:
|
28
|
-
Enabled: false
|
29
|
-
|
30
|
-
Style/Copyright:
|
31
|
-
Enabled: false
|
32
|
-
|
33
|
-
Style/MissingElse:
|
34
|
-
Enabled: false
|
35
|
-
|
36
|
-
Style/SymbolArray:
|
37
|
-
EnforcedStyle: brackets
|
38
|
-
|
39
|
-
Style/WordArray:
|
40
|
-
EnforcedStyle: brackets
|
41
|
-
|
42
|
-
Lint/ConstantResolution:
|
43
|
-
Enabled: false
|
44
|
-
|
1
|
+
inherit_gem:
|
2
|
+
chronicle-core: .rubocop.yml
|
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
3
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
4
|
|
5
5
|
# Specify your gem's dependencies in chronicle-etl.gemspec
|
6
6
|
gemspec
|
data/Guardfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
guard :rspec, cmd:
|
2
|
-
require
|
1
|
+
guard :rspec, cmd: 'bundle exec rspec' do
|
2
|
+
require 'guard/rspec/dsl'
|
3
3
|
|
4
4
|
watch(%r{^spec/.+_spec\.rb$})
|
5
5
|
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
6
|
-
watch('spec/spec_helper.rb') {
|
6
|
+
watch('spec/spec_helper.rb') { 'spec' }
|
7
7
|
end
|
data/README.md
CHANGED
@@ -6,14 +6,15 @@
|
|
6
6
|
|
7
7
|
Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
|
8
8
|
|
9
|
-
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing
|
9
|
+
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing export data, this tool is for you! (_If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues)._)
|
10
10
|
|
11
|
-
**`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to
|
11
|
+
**`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to _extract_ data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), _transform_ it (into a given schema), and _load_ it to a destination (e.g. a CSV file, JSON, external API).
|
12
12
|
|
13
13
|
## What does `chronicle-etl` give you?
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
|
15
|
+
- **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
|
16
|
+
- **Plugins for many third-party sources** (see [list](#available-plugins-and-connectors)). This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
|
17
|
+
- **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
|
17
18
|
|
18
19
|
## Chronicle-ETL in action
|
19
20
|
|
@@ -26,15 +27,19 @@ If you don’t want to spend all your time writing scrapers, reverse-engineering
|
|
26
27
|
## Installation
|
27
28
|
|
28
29
|
Using homebrew:
|
30
|
+
|
29
31
|
```sh
|
30
32
|
$ brew install chronicle-app/etl/chronicle-etl
|
31
33
|
```
|
34
|
+
|
32
35
|
Using rubygems:
|
36
|
+
|
33
37
|
```sh
|
34
38
|
$ gem install chronicle-etl
|
35
39
|
```
|
36
40
|
|
37
41
|
Confirm it installed successfully:
|
42
|
+
|
38
43
|
```sh
|
39
44
|
$ chronicle-etl --version
|
40
45
|
```
|
@@ -45,18 +50,18 @@ $ chronicle-etl --version
|
|
45
50
|
# Display help
|
46
51
|
$ chronicle-etl help
|
47
52
|
|
48
|
-
# Run a basic job
|
53
|
+
# Run a basic job
|
49
54
|
$ chronicle-etl --extractor NAME --transformer NAME --loader NAME
|
50
55
|
|
51
|
-
# Read test.csv and display it to stdout as a table
|
56
|
+
# Read test.csv and display it to stdout as a table
|
52
57
|
$ chronicle-etl --extractor csv --input data.csv --loader table
|
53
58
|
|
54
59
|
# Show available plugins and install one
|
55
60
|
$ chronicle-etl plugins:list
|
56
|
-
$ chronicle-etl plugins:install
|
61
|
+
$ chronicle-etl plugins:install imessage
|
57
62
|
|
58
|
-
# Retrieve
|
59
|
-
$ chronicle-etl -e
|
63
|
+
# Retrieve imessage messages from the last 5 hours
|
64
|
+
$ chronicle-etl -e imessage --since 5h
|
60
65
|
|
61
66
|
# Get email senders from an .mbox email archive file
|
62
67
|
$ chronicle-etl --extractor email:mbox -i sample-email-archive.mbox -t email --fields actor.slug
|
@@ -68,18 +73,23 @@ $ chronicle-etl -e pinboard --since 1mo # Used automatically based on plugin nam
|
|
68
73
|
```
|
69
74
|
|
70
75
|
### Common options
|
76
|
+
|
71
77
|
```sh
|
72
78
|
Options:
|
73
79
|
-e, [--extractor=NAME] # Extractor class. Default: stdin
|
74
80
|
[--extractor-opts=key:value] # Extractor options
|
75
81
|
-t, [--transformer=NAME] # Transformer class. Default: null
|
76
82
|
[--transformer-opts=key:value] # Transformer options
|
77
|
-
-l, [--loader=NAME] # Loader class. Default:
|
83
|
+
-l, [--loader=NAME] # Loader class. Default: json
|
78
84
|
[--loader-opts=key:value] # Loader options
|
79
85
|
-i, [--input=FILENAME] # Input filename or directory
|
80
86
|
[--since=DATE] # Load records SINCE this date (or fuzzy time duration)
|
81
87
|
[--until=DATE] # Load records UNTIL this date (or fuzzy time duration)
|
82
88
|
[--limit=N] # Only extract the first LIMIT records
|
89
|
+
[--schema=SCHEMA_NAME] # Which Schema to transform
|
90
|
+
# Possible values: chronicle, activitystream, schemaorg, chronobase
|
91
|
+
[--format=SCHEMA_NAME] # How to serialize results
|
92
|
+
# Possible values: jsonapi, jsonld
|
83
93
|
-o, [--output=OUTPUT] # Output filename
|
84
94
|
[--fields=field1 field2 ...] # Output only these fields
|
85
95
|
[--header-row], [--no-header-row] # Output the header row of tabular output
|
@@ -91,6 +101,7 @@ Options:
|
|
91
101
|
```
|
92
102
|
|
93
103
|
### Saving a job
|
104
|
+
|
94
105
|
You can save details about a job to a local config file (saved by default in `~/.config/chronicle/etl/jobs/JOB_NAME.yml`) to save yourself the trouble specifying options each time.
|
95
106
|
|
96
107
|
```sh
|
@@ -103,13 +114,16 @@ $ chronicle-etl jobs:run sample
|
|
103
114
|
# Show details about the job
|
104
115
|
$ chronicle-etl jobs:show sample
|
105
116
|
|
117
|
+
# Edit a job definition with default editor ($EDITOR)
|
118
|
+
$ chronicle-etl jobs:edit sample
|
119
|
+
|
106
120
|
# Show all saved jobs
|
107
121
|
$ chronicle-etl jobs:list
|
108
122
|
```
|
109
123
|
|
110
124
|
## Connectors and plugins
|
111
125
|
|
112
|
-
Connectors let you work with different data formats or third-party
|
126
|
+
Connectors let you work with different data formats or third-party sources.
|
113
127
|
|
114
128
|
### Built-in Connectors
|
115
129
|
|
@@ -121,18 +135,24 @@ $ chronicle-etl connectors:list
|
|
121
135
|
```
|
122
136
|
|
123
137
|
#### Extractors
|
138
|
+
|
124
139
|
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
|
125
140
|
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
|
126
141
|
- [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
|
127
142
|
|
128
143
|
#### Transformers
|
144
|
+
|
129
145
|
- [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
|
146
|
+
- [`sampler`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - Sample `percent` records from the extraction
|
147
|
+
- [`sort`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - sort extracted results by `key` and `direction`
|
148
|
+
|
130
149
|
|
131
150
|
#### Loaders
|
132
|
-
|
151
|
+
|
152
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - (default) Load records serialized as JSON
|
153
|
+
- [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - Output an ascii table of records. Useful for exploring data.
|
133
154
|
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
|
134
|
-
- [`
|
135
|
-
- [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
|
155
|
+
- [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Send JSON to a REST API
|
136
156
|
|
137
157
|
### Chronicle Plugins for third-party services
|
138
158
|
|
@@ -148,41 +168,43 @@ $ chronicle-etl plugins:list
|
|
148
168
|
$ chronicle-etl plugins:install NAME
|
149
169
|
|
150
170
|
# Use a plugin
|
151
|
-
$ chronicle-etl plugins:install
|
152
|
-
$ chronicle-etl --extractor
|
171
|
+
$ chronicle-etl plugins:install imessage
|
172
|
+
$ chronicle-etl --extractor imessage --limit 10
|
153
173
|
|
154
174
|
# Uninstall a plugin
|
155
175
|
$ chronicle-etl plugins:uninstall NAME
|
156
176
|
```
|
177
|
+
|
157
178
|
#### Available plugins and connectors
|
158
179
|
|
159
180
|
The following are the officially-supported list of plugins and their available connectors:
|
160
181
|
|
161
|
-
| Plugin
|
162
|
-
|
163
|
-
| [
|
164
|
-
| [
|
165
|
-
| [email](https://github.com/chronicle-app/chronicle-email)
|
166
|
-
| [
|
167
|
-
| [
|
168
|
-
| [
|
169
|
-
| [
|
170
|
-
| [
|
171
|
-
| [
|
172
|
-
| [
|
173
|
-
| [
|
174
|
-
| [
|
175
|
-
| [
|
176
|
-
| [
|
177
|
-
| [
|
178
|
-
| [
|
179
|
-
| [spotify](https://github.com/chronicle-app/chronicle-spotify)
|
180
|
-
| [spotify](https://github.com/chronicle-app/chronicle-spotify)
|
181
|
-
| [spotify](https://github.com/chronicle-app/chronicle-spotify)
|
182
|
-
| [spotify](https://github.com/chronicle-app/chronicle-spotify)
|
183
|
-
| [
|
184
|
-
| [
|
185
|
-
|
182
|
+
| Plugin | Type | Identifier | Description |
|
183
|
+
| --------------------------------------------------------------------------- | ----------- | ---------------- | -------------------------------------------- |
|
184
|
+
| [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | extractor | listens | listening history of podcast episodes |
|
185
|
+
| [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | transformer | listen | a podcast episode listen to Chronicle Schema |
|
186
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | extractor | imap | emails over an IMAP connection |
|
187
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | extractor | mbox | emails from an .mbox file |
|
188
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | transformer | email | email to Chronicle Schema |
|
189
|
+
| [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | extractor | checkins | Foursqure visits |
|
190
|
+
| [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | transformer | checkin | checkin to Chronicle Schema |
|
191
|
+
| [github](https://github.com/chronicle-app/chronicle-github) | extractor | activity | user activity stream |
|
192
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | extractor | messages | imessages from local macOS |
|
193
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | transformer | message | imessage to Chronicle Schema |
|
194
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | extractor | bookmarks | Pinboard.in bookmarks |
|
195
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | transformer | bookmark | bookmark to Chronicle Schema |
|
196
|
+
| [safari](https://github.com/chronicle-app/chronicle-safari) | extractor | browser-history | browser history |
|
197
|
+
| [safari ](https://github.com/chronicle-app/chronicle-safari) | transformer | browser-history | browser history to Chronicle Schema |
|
198
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | extractor | history | shell command history (bash / zsh) |
|
199
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | transformer | command | command to Chronicle Schema |
|
200
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | liked-tracks | liked tracks |
|
201
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | saved-albums | saved albums |
|
202
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | listens | recently listened tracks (last 50 tracks) |
|
203
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | like | like to Chronicle Schema |
|
204
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | listen | listen to Chronicle Schema |
|
205
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | authorizer | | OAuth authorizer |
|
206
|
+
| [zulip](https://github.com/chronicle-app/chronicle-zulip) | extractor | private-messages | private messages |
|
207
|
+
| [zulip](https://github.com/chronicle-app/chronicle-zulip) | transformer | message | message to Chronicle Schema |
|
186
208
|
|
187
209
|
### Coming soon
|
188
210
|
|
@@ -191,7 +213,7 @@ A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and
|
|
191
213
|
If you don't see a plugin for a third-party provider or data source that you're interested in using with `chronicle-etl`, [please open an issue](https://github.com/chronicle-app/chronicle-etl/issues/new). If you want to work together on a plugin, please [get in touch](#get-in-touch)!
|
192
214
|
|
193
215
|
In summary, the following **are coming soon**:
|
194
|
-
anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, things, twitter, whatsapp, youtube.
|
216
|
+
anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, timing, things, twitter, whatsapp, youtube.
|
195
217
|
|
196
218
|
### Writing your own plugin
|
197
219
|
|
@@ -199,38 +221,17 @@ Additional connectors are packaged as separate ruby gems. You can view the [iMes
|
|
199
221
|
|
200
222
|
If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
|
201
223
|
|
202
|
-
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
224
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
203
225
|
|
204
226
|
#### Sample custom Extractor class
|
227
|
+
|
205
228
|
```ruby
|
206
|
-
|
207
|
-
module FooService
|
208
|
-
class FooExtractor < Chronicle::ETL::Extractor
|
209
|
-
register_connector do |r|
|
210
|
-
r.identifier = 'foo'
|
211
|
-
r.description = 'from foo.com'
|
212
|
-
end
|
213
|
-
|
214
|
-
setting :access_token, required: true
|
215
|
-
|
216
|
-
def prepare
|
217
|
-
@records = # load from somewhere
|
218
|
-
end
|
219
|
-
|
220
|
-
def extract
|
221
|
-
@records.each do |record|
|
222
|
-
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
223
|
-
end
|
224
|
-
end
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
229
|
+
# TODO
|
228
230
|
```
|
229
231
|
|
230
|
-
|
231
232
|
## Secrets Management
|
232
233
|
|
233
|
-
If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
|
234
|
+
If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
|
234
235
|
|
235
236
|
Secrets are organized in namespaces. Typically, you use one namespace per plugin (`pinboard` secrets for the `pinboard` plugin). When you run a job that uses the `pinboard` plugin extractor, for example, the secrets from that namespace will automatically be included in the extractor's options. To override which secrets get included, you can use do it in the connector options with `secrets: ALT-NAMESPACE`.
|
236
237
|
|
@@ -267,11 +268,13 @@ $ chronicle-etl secrets:unset pinboard access_token
|
|
267
268
|
- **Add documentation for Chronicle Schema**. It's found throughout this project but never explained.
|
268
269
|
|
269
270
|
## Development
|
271
|
+
|
270
272
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
271
273
|
|
272
274
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
273
275
|
|
274
276
|
### Additional development commands
|
277
|
+
|
275
278
|
```bash
|
276
279
|
# run tests
|
277
280
|
bundle exec rake spec
|
@@ -284,15 +287,19 @@ bundle exec guard
|
|
284
287
|
```
|
285
288
|
|
286
289
|
## Get in touch
|
290
|
+
|
287
291
|
- [@hyfen](https://twitter.com/hyfen) on Twitter
|
288
292
|
- [@hyfen](https://github.com/hyfen) on Github
|
289
293
|
- Email: andrew@hyfen.net
|
290
294
|
|
291
295
|
## Contributing
|
296
|
+
|
292
297
|
Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
293
298
|
|
294
299
|
## License
|
300
|
+
|
295
301
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
296
302
|
|
297
303
|
## Code of Conduct
|
304
|
+
|
298
305
|
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'chronicle/etl'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
require
|
10
|
+
require 'pry'
|
11
11
|
Pry.start
|
12
12
|
|
13
13
|
def reload!(print = true)
|
@@ -15,7 +15,7 @@ def reload!(print = true)
|
|
15
15
|
# Main project directory.
|
16
16
|
root_dir = File.expand_path('..', __dir__)
|
17
17
|
# Directories within the project that should be reloaded.
|
18
|
-
reload_dirs = %w
|
18
|
+
reload_dirs = %w[lib]
|
19
19
|
# Loop through and reload every file in all relevant project directories.
|
20
20
|
reload_dirs.each do |dir|
|
21
21
|
Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
|
@@ -23,4 +23,3 @@ def reload!(print = true)
|
|
23
23
|
# Return true when complete.
|
24
24
|
true
|
25
25
|
end
|
26
|
-
|
data/chronicle-etl.gemspec
CHANGED
@@ -1,71 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
lib = File.expand_path(
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
5
|
+
require 'chronicle/etl/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'chronicle-etl'
|
8
9
|
spec.version = Chronicle::ETL::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Andrew Louis']
|
11
|
+
spec.email = ['andrew@hyfen.net']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
13
|
+
spec.summary = 'ETL tool for personal data'
|
14
|
+
spec.description = 'Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it.'
|
15
|
+
spec.homepage = 'https://github.com/chronicle-app'
|
16
|
+
spec.license = 'MIT'
|
16
17
|
|
17
18
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
19
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
20
|
if spec.respond_to?(:metadata)
|
20
|
-
spec.metadata['allowed_push_host'] =
|
21
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
21
22
|
|
22
|
-
spec.metadata[
|
23
|
-
spec.metadata[
|
24
|
-
spec.metadata[
|
23
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
24
|
+
spec.metadata['source_code_uri'] = 'https://github.com/chronicle-app/chronicle-etl'
|
25
|
+
spec.metadata['changelog_uri'] = 'https://github.com/chronicle-app/chronicle-etl/releases'
|
25
26
|
else
|
26
|
-
raise
|
27
|
-
|
27
|
+
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
28
|
+
'public gem pushes.'
|
28
29
|
end
|
29
30
|
|
30
31
|
# Specify which files should be added to the gem when it is released.
|
31
32
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
-
spec.files
|
33
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
33
34
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
35
|
end
|
35
|
-
spec.bindir =
|
36
|
+
spec.bindir = 'exe'
|
36
37
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
-
spec.require_paths = [
|
38
|
-
spec.required_ruby_version =
|
38
|
+
spec.require_paths = ['lib']
|
39
|
+
spec.required_ruby_version = '>= 3.1'
|
40
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
39
41
|
|
40
|
-
spec.add_dependency
|
41
|
-
spec.add_dependency
|
42
|
-
spec.add_dependency
|
43
|
-
spec.add_dependency
|
44
|
-
spec.add_dependency
|
45
|
-
spec.add_dependency
|
46
|
-
spec.add_dependency
|
47
|
-
spec.add_dependency
|
48
|
-
spec.add_dependency
|
49
|
-
spec.add_dependency
|
50
|
-
spec.add_dependency
|
51
|
-
spec.add_dependency
|
52
|
-
spec.add_dependency
|
53
|
-
spec.add_dependency
|
54
|
-
spec.add_dependency
|
55
|
-
spec.add_dependency
|
56
|
-
spec.add_dependency
|
57
|
-
spec.add_dependency
|
58
|
-
spec.add_dependency "xdg", ">= 4.0"
|
42
|
+
spec.add_dependency 'activesupport', '~> 7.0'
|
43
|
+
spec.add_dependency 'chronic_duration', '~> 0.10.6'
|
44
|
+
spec.add_dependency 'chronicle-core', '~> 0.3'
|
45
|
+
spec.add_dependency 'colorize', '~> 0.8.1'
|
46
|
+
spec.add_dependency 'gems', '>= 1'
|
47
|
+
spec.add_dependency 'launchy'
|
48
|
+
spec.add_dependency 'marcel', '~> 1.0.2'
|
49
|
+
spec.add_dependency 'omniauth', '~> 2'
|
50
|
+
spec.add_dependency 'sequel', '~> 5.35'
|
51
|
+
spec.add_dependency 'sinatra', '~> 2'
|
52
|
+
spec.add_dependency 'sqlite3', '~> 1.4'
|
53
|
+
spec.add_dependency 'thor', '~> 1.2'
|
54
|
+
spec.add_dependency 'thor-hollaback', '~> 0.2'
|
55
|
+
spec.add_dependency 'tty-progressbar', '~> 0.17'
|
56
|
+
spec.add_dependency 'tty-prompt', '~> 0.23'
|
57
|
+
spec.add_dependency 'tty-spinner'
|
58
|
+
spec.add_dependency 'tty-table', '~> 0.12'
|
59
|
+
spec.add_dependency 'xdg', '>= 4.0'
|
59
60
|
|
60
|
-
spec.add_development_dependency
|
61
|
-
spec.add_development_dependency
|
62
|
-
spec.add_development_dependency
|
63
|
-
spec.add_development_dependency
|
64
|
-
spec.add_development_dependency
|
65
|
-
spec.add_development_dependency
|
66
|
-
spec.add_development_dependency
|
67
|
-
spec.add_development_dependency
|
68
|
-
spec.add_development_dependency
|
69
|
-
spec.add_development_dependency
|
70
|
-
spec.add_development_dependency
|
61
|
+
spec.add_development_dependency 'bundler', '~> 2.1'
|
62
|
+
spec.add_development_dependency 'fakefs', '~> 1.4'
|
63
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.7.3'
|
64
|
+
spec.add_development_dependency 'pry-byebug', '~> 3.9'
|
65
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
66
|
+
spec.add_development_dependency 'rspec', '~> 3.9'
|
67
|
+
spec.add_development_dependency 'rubocop', '~> 1.57'
|
68
|
+
spec.add_development_dependency 'simplecov', '~> 0.21'
|
69
|
+
spec.add_development_dependency 'vcr', '~> 6.1'
|
70
|
+
spec.add_development_dependency 'webmock', '~> 3'
|
71
|
+
spec.add_development_dependency 'webrick', '~> 1.7'
|
72
|
+
spec.add_development_dependency 'yard', '~> 0.9.7'
|
71
73
|
end
|
data/exe/chronicle-etl
CHANGED