chronicle-etl 0.5.4 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +15 -25
- data/.rubocop.yml +2 -44
- data/Gemfile +2 -2
- data/Guardfile +3 -3
- data/README.md +98 -73
- data/Rakefile +2 -2
- data/bin/console +4 -5
- data/chronicle-etl.gemspec +50 -45
- data/exe/chronicle-etl +1 -1
- data/lib/chronicle/etl/authorizer.rb +3 -4
- data/lib/chronicle/etl/cli/authorizations.rb +10 -8
- data/lib/chronicle/etl/cli/connectors.rb +9 -9
- data/lib/chronicle/etl/cli/jobs.rb +130 -53
- data/lib/chronicle/etl/cli/main.rb +29 -29
- data/lib/chronicle/etl/cli/plugins.rb +29 -26
- data/lib/chronicle/etl/cli/secrets.rb +14 -12
- data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
- data/lib/chronicle/etl/config.rb +20 -7
- data/lib/chronicle/etl/configurable.rb +24 -9
- data/lib/chronicle/etl/exceptions.rb +3 -3
- data/lib/chronicle/etl/extraction.rb +12 -2
- data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
- data/lib/chronicle/etl/extractors/extractor.rb +15 -2
- data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
- data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
- data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
- data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
- data/lib/chronicle/etl/job.rb +35 -17
- data/lib/chronicle/etl/job_definition.rb +39 -27
- data/lib/chronicle/etl/job_log.rb +14 -16
- data/lib/chronicle/etl/job_logger.rb +4 -4
- data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
- data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
- data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
- data/lib/chronicle/etl/loaders/loader.rb +0 -17
- data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
- data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
- data/lib/chronicle/etl/logger.rb +3 -3
- data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
- data/lib/chronicle/etl/record.rb +15 -0
- data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
- data/lib/chronicle/etl/registry/connectors.rb +117 -0
- data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
- data/lib/chronicle/etl/registry/plugins.rb +171 -0
- data/lib/chronicle/etl/registry/registry.rb +3 -52
- data/lib/chronicle/etl/registry/self_registering.rb +1 -1
- data/lib/chronicle/etl/runner.rb +158 -128
- data/lib/chronicle/etl/secrets.rb +5 -5
- data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
- data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
- data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
- data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
- data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
- data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
- data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
- data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
- data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
- data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
- data/lib/chronicle/etl/transformers/transformer.rb +63 -41
- data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
- data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
- data/lib/chronicle/etl/version.rb +1 -1
- data/lib/chronicle/etl.rb +6 -8
- metadata +91 -45
- data/lib/chronicle/etl/models/activity.rb +0 -15
- data/lib/chronicle/etl/models/attachment.rb +0 -14
- data/lib/chronicle/etl/models/base.rb +0 -122
- data/lib/chronicle/etl/models/entity.rb +0 -29
- data/lib/chronicle/etl/models/raw.rb +0 -26
- data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
- data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
- data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
- data/lib/chronicle/etl/serializers/serializer.rb +0 -28
- data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
- data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
- data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4059afe6aae8af3e2ef626b3313836f8cd937504b79ee13062bd98be9d7dc197
|
4
|
+
data.tar.gz: abcfaefbfbc2aac98a9c30fbdb3e66500a480b3ddc0904b5a75c4deab6d3e62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2fca383a80cce8efa6c6dcf8411807931e0e98f2a1cf491df916e96db0c78a70b50ea51d1209236e661be92ebb51d790e645d7c8a915aecdcc2dc5340a1efdc
|
7
|
+
data.tar.gz: 501359f4e01d3e34de89dd7489bf8254e3d4002b754536737fc78be513776c1e05b8731166a32e8a957f398f7792e5f3b23280b9fb4c5e6cb8a78c44312f31f4
|
data/.github/workflows/ruby.yml
CHANGED
@@ -1,35 +1,25 @@
|
|
1
|
-
# This workflow uses actions that are not certified by GitHub.
|
2
|
-
# They are provided by a third-party and are governed by
|
3
|
-
# separate terms of service, privacy policy, and support
|
4
|
-
# documentation.
|
5
|
-
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
-
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
-
|
8
1
|
name: Ruby
|
9
2
|
|
10
3
|
on:
|
11
4
|
push:
|
12
|
-
branches: [
|
5
|
+
branches: [main]
|
13
6
|
pull_request:
|
14
|
-
branches: [
|
7
|
+
branches: [main]
|
15
8
|
|
16
9
|
jobs:
|
17
|
-
|
18
|
-
|
10
|
+
build:
|
19
11
|
runs-on: ubuntu-latest
|
20
|
-
strategy:
|
21
|
-
matrix:
|
22
|
-
ruby-version: ['2.7', '3.0']
|
23
12
|
|
24
13
|
steps:
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
|
16
|
+
- name: Set up Ruby
|
17
|
+
uses: ruby/setup-ruby@v1
|
18
|
+
with:
|
19
|
+
ruby-version: 3.2
|
20
|
+
|
21
|
+
- name: Install dependencies
|
22
|
+
run: bundle install
|
23
|
+
|
24
|
+
- name: Run tests
|
25
|
+
run: bundle exec rake
|
data/.rubocop.yml
CHANGED
@@ -1,44 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
TargetRubyVersion: 2.7
|
4
|
-
|
5
|
-
Style/FrozenStringLiteralComment:
|
6
|
-
SafeAutoCorrect: true
|
7
|
-
|
8
|
-
Style/StringLiterals:
|
9
|
-
Enabled: false
|
10
|
-
|
11
|
-
Layout/MultilineAssignmentLayout:
|
12
|
-
Enabled: false
|
13
|
-
|
14
|
-
Layout/MultilineMethodCallIndentation:
|
15
|
-
EnforcedStyle: indented
|
16
|
-
|
17
|
-
Layout/RedundantLineBreak:
|
18
|
-
Enabled: false
|
19
|
-
|
20
|
-
Style/MethodCallWithArgsParentheses:
|
21
|
-
Enabled: false
|
22
|
-
|
23
|
-
Style/MethodCalledOnDoEndBlock:
|
24
|
-
Exclude:
|
25
|
-
- 'spec/**/*'
|
26
|
-
|
27
|
-
Style/OpenStructUse:
|
28
|
-
Enabled: false
|
29
|
-
|
30
|
-
Style/Copyright:
|
31
|
-
Enabled: false
|
32
|
-
|
33
|
-
Style/MissingElse:
|
34
|
-
Enabled: false
|
35
|
-
|
36
|
-
Style/SymbolArray:
|
37
|
-
EnforcedStyle: brackets
|
38
|
-
|
39
|
-
Style/WordArray:
|
40
|
-
EnforcedStyle: brackets
|
41
|
-
|
42
|
-
Lint/ConstantResolution:
|
43
|
-
Enabled: false
|
44
|
-
|
1
|
+
inherit_gem:
|
2
|
+
chronicle-core: .rubocop.yml
|
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
3
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
4
|
|
5
5
|
# Specify your gem's dependencies in chronicle-etl.gemspec
|
6
6
|
gemspec
|
data/Guardfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
guard :rspec, cmd:
|
2
|
-
require
|
1
|
+
guard :rspec, cmd: 'bundle exec rspec' do
|
2
|
+
require 'guard/rspec/dsl'
|
3
3
|
|
4
4
|
watch(%r{^spec/.+_spec\.rb$})
|
5
5
|
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
6
|
-
watch('spec/spec_helper.rb') {
|
6
|
+
watch('spec/spec_helper.rb') { 'spec' }
|
7
7
|
end
|
data/README.md
CHANGED
@@ -6,14 +6,15 @@
|
|
6
6
|
|
7
7
|
Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
|
8
8
|
|
9
|
-
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing
|
9
|
+
If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing export data, this tool is for you! (_If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues)._)
|
10
10
|
|
11
|
-
**`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to
|
11
|
+
**`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to _extract_ data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), _transform_ it (into a given schema), and _load_ it to a destination (e.g. a CSV file, JSON, external API).
|
12
12
|
|
13
13
|
## What does `chronicle-etl` give you?
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
|
15
|
+
- **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
|
16
|
+
- **Plugins for many third-party sources** (see [list](#available-plugins-and-connectors)). This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
|
17
|
+
- **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
|
17
18
|
|
18
19
|
## Chronicle-ETL in action
|
19
20
|
|
@@ -26,15 +27,19 @@ If you don’t want to spend all your time writing scrapers, reverse-engineering
|
|
26
27
|
## Installation
|
27
28
|
|
28
29
|
Using homebrew:
|
30
|
+
|
29
31
|
```sh
|
30
32
|
$ brew install chronicle-app/etl/chronicle-etl
|
31
33
|
```
|
34
|
+
|
32
35
|
Using rubygems:
|
36
|
+
|
33
37
|
```sh
|
34
38
|
$ gem install chronicle-etl
|
35
39
|
```
|
36
40
|
|
37
41
|
Confirm it installed successfully:
|
42
|
+
|
38
43
|
```sh
|
39
44
|
$ chronicle-etl --version
|
40
45
|
```
|
@@ -45,14 +50,18 @@ $ chronicle-etl --version
|
|
45
50
|
# Display help
|
46
51
|
$ chronicle-etl help
|
47
52
|
|
48
|
-
# Run a basic job
|
53
|
+
# Run a basic job
|
49
54
|
$ chronicle-etl --extractor NAME --transformer NAME --loader NAME
|
50
55
|
|
51
|
-
# Read test.csv and display it to stdout as a table
|
56
|
+
# Read test.csv and display it to stdout as a table
|
52
57
|
$ chronicle-etl --extractor csv --input data.csv --loader table
|
53
58
|
|
54
|
-
#
|
55
|
-
$ chronicle-etl
|
59
|
+
# Show available plugins and install one
|
60
|
+
$ chronicle-etl plugins:list
|
61
|
+
$ chronicle-etl plugins:install imessage
|
62
|
+
|
63
|
+
# Retrieve imessage messages from the last 5 hours
|
64
|
+
$ chronicle-etl -e imessage --since 5h
|
56
65
|
|
57
66
|
# Get email senders from an .mbox email archive file
|
58
67
|
$ chronicle-etl --extractor email:mbox -i sample-email-archive.mbox -t email --fields actor.slug
|
@@ -64,18 +73,23 @@ $ chronicle-etl -e pinboard --since 1mo # Used automatically based on plugin nam
|
|
64
73
|
```
|
65
74
|
|
66
75
|
### Common options
|
76
|
+
|
67
77
|
```sh
|
68
78
|
Options:
|
69
79
|
-e, [--extractor=NAME] # Extractor class. Default: stdin
|
70
80
|
[--extractor-opts=key:value] # Extractor options
|
71
81
|
-t, [--transformer=NAME] # Transformer class. Default: null
|
72
82
|
[--transformer-opts=key:value] # Transformer options
|
73
|
-
-l, [--loader=NAME] # Loader class. Default:
|
83
|
+
-l, [--loader=NAME] # Loader class. Default: json
|
74
84
|
[--loader-opts=key:value] # Loader options
|
75
85
|
-i, [--input=FILENAME] # Input filename or directory
|
76
86
|
[--since=DATE] # Load records SINCE this date (or fuzzy time duration)
|
77
87
|
[--until=DATE] # Load records UNTIL this date (or fuzzy time duration)
|
78
88
|
[--limit=N] # Only extract the first LIMIT records
|
89
|
+
[--schema=SCHEMA_NAME] # Which Schema to transform
|
90
|
+
# Possible values: chronicle, activitystream, schemaorg, chronobase
|
91
|
+
[--format=SCHEMA_NAME] # How to serialize results
|
92
|
+
# Possible values: jsonapi, jsonld
|
79
93
|
-o, [--output=OUTPUT] # Output filename
|
80
94
|
[--fields=field1 field2 ...] # Output only these fields
|
81
95
|
[--header-row], [--no-header-row] # Output the header row of tabular output
|
@@ -86,7 +100,7 @@ Options:
|
|
86
100
|
[--silent], [--no-silent] # Silence all output
|
87
101
|
```
|
88
102
|
|
89
|
-
### Saving
|
103
|
+
### Saving a job
|
90
104
|
|
91
105
|
You can save details about a job to a local config file (saved by default in `~/.config/chronicle/etl/jobs/JOB_NAME.yml`) to save yourself the trouble specifying options each time.
|
92
106
|
|
@@ -94,86 +108,112 @@ You can save details about a job to a local config file (saved by default in `~/
|
|
94
108
|
# Save a job named 'sample' to ~/.config/chronicle/etl/jobs/sample.yml
|
95
109
|
$ chronicle-etl jobs:save sample --extractor pinboard --since 10d
|
96
110
|
|
111
|
+
# Run the job
|
112
|
+
$ chronicle-etl jobs:run sample
|
113
|
+
|
97
114
|
# Show details about the job
|
98
115
|
$ chronicle-etl jobs:show sample
|
99
116
|
|
100
|
-
#
|
101
|
-
$ chronicle-etl jobs:
|
117
|
+
# Edit a job definition with default editor ($EDITOR)
|
118
|
+
$ chronicle-etl jobs:edit sample
|
102
119
|
|
103
120
|
# Show all saved jobs
|
104
121
|
$ chronicle-etl jobs:list
|
105
122
|
```
|
106
123
|
|
107
|
-
## Connectors
|
108
|
-
|
124
|
+
## Connectors and plugins
|
125
|
+
|
126
|
+
Connectors let you work with different data formats or third-party sources.
|
127
|
+
|
128
|
+
### Built-in Connectors
|
129
|
+
|
130
|
+
`chronicle-etl` comes with several built-in connectors for common formats and sources.
|
109
131
|
|
110
132
|
```sh
|
111
133
|
# List all available connectors
|
112
134
|
$ chronicle-etl connectors:list
|
113
135
|
```
|
114
136
|
|
115
|
-
### Built-in Connectors
|
116
|
-
`chronicle-etl` comes with several built-in connectors for common formats and sources.
|
117
|
-
|
118
137
|
#### Extractors
|
138
|
+
|
119
139
|
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
|
120
140
|
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
|
121
141
|
- [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
|
122
142
|
|
123
143
|
#### Transformers
|
144
|
+
|
124
145
|
- [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
|
146
|
+
- [`sampler`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - Sample `percent` records from the extraction
|
147
|
+
- [`sort`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - sort extracted results by `key` and `direction`
|
148
|
+
|
125
149
|
|
126
150
|
#### Loaders
|
127
|
-
|
151
|
+
|
152
|
+
- [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - (default) Load records serialized as JSON
|
153
|
+
- [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - Output an ascii table of records. Useful for exploring data.
|
128
154
|
- [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
|
129
|
-
- [`
|
130
|
-
|
155
|
+
- [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Send JSON to a REST API
|
156
|
+
|
157
|
+
### Chronicle Plugins for third-party services
|
131
158
|
|
132
|
-
|
133
|
-
Plugins provide access to data from third-party platforms, services, or formats. Plugins are packaged as separate rubygems and can be installed through the CLI (under the hood, it's a `gem install chronicle-PLUGINNAME`)
|
159
|
+
Plugins provide access to data from third-party platforms, services, or formats. Plugins are packaged as separate gems and can be installed through the CLI (under the hood, it's a `gem install chronicle-PLUGINNAME`)
|
134
160
|
|
135
|
-
|
161
|
+
#### Plugin usage
|
136
162
|
|
137
163
|
```bash
|
164
|
+
# List available plugins
|
165
|
+
$ chronicle-etl plugins:list
|
166
|
+
|
138
167
|
# Install a plugin
|
139
168
|
$ chronicle-etl plugins:install NAME
|
140
169
|
|
141
|
-
# List installed plugins
|
142
|
-
$ chronicle-etl plugins:list
|
143
|
-
|
144
170
|
# Use a plugin
|
145
|
-
$ chronicle-etl plugins:install
|
146
|
-
$ chronicle-etl --extractor
|
171
|
+
$ chronicle-etl plugins:install imessage
|
172
|
+
$ chronicle-etl --extractor imessage --limit 10
|
147
173
|
|
148
174
|
# Uninstall a plugin
|
149
175
|
$ chronicle-etl plugins:uninstall NAME
|
150
176
|
```
|
151
177
|
|
152
|
-
|
178
|
+
#### Available plugins and connectors
|
179
|
+
|
180
|
+
The following are the officially-supported list of plugins and their available connectors:
|
181
|
+
|
182
|
+
| Plugin | Type | Identifier | Description |
|
183
|
+
| --------------------------------------------------------------------------- | ----------- | ---------------- | -------------------------------------------- |
|
184
|
+
| [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | extractor | listens | listening history of podcast episodes |
|
185
|
+
| [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | transformer | listen | a podcast episode listen to Chronicle Schema |
|
186
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | extractor | imap | emails over an IMAP connection |
|
187
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | extractor | mbox | emails from an .mbox file |
|
188
|
+
| [email](https://github.com/chronicle-app/chronicle-email) | transformer | email | email to Chronicle Schema |
|
189
|
+
| [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | extractor | checkins | Foursqure visits |
|
190
|
+
| [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | transformer | checkin | checkin to Chronicle Schema |
|
191
|
+
| [github](https://github.com/chronicle-app/chronicle-github) | extractor | activity | user activity stream |
|
192
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | extractor | messages | imessages from local macOS |
|
193
|
+
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | transformer | message | imessage to Chronicle Schema |
|
194
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | extractor | bookmarks | Pinboard.in bookmarks |
|
195
|
+
| [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | transformer | bookmark | bookmark to Chronicle Schema |
|
196
|
+
| [safari](https://github.com/chronicle-app/chronicle-safari) | extractor | browser-history | browser history |
|
197
|
+
| [safari ](https://github.com/chronicle-app/chronicle-safari) | transformer | browser-history | browser history to Chronicle Schema |
|
198
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | extractor | history | shell command history (bash / zsh) |
|
199
|
+
| [shell](https://github.com/chronicle-app/chronicle-shell) | transformer | command | command to Chronicle Schema |
|
200
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | liked-tracks | liked tracks |
|
201
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | saved-albums | saved albums |
|
202
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | listens | recently listened tracks (last 50 tracks) |
|
203
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | like | like to Chronicle Schema |
|
204
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | listen | listen to Chronicle Schema |
|
205
|
+
| [spotify](https://github.com/chronicle-app/chronicle-spotify) | authorizer | | OAuth authorizer |
|
206
|
+
| [zulip](https://github.com/chronicle-app/chronicle-zulip) | extractor | private-messages | private messages |
|
207
|
+
| [zulip](https://github.com/chronicle-app/chronicle-zulip) | transformer | message | message to Chronicle Schema |
|
208
|
+
|
209
|
+
### Coming soon
|
153
210
|
|
154
211
|
A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and I'm porting them over to the Chronicle system. The [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1/views/1) lets you keep track what's available and what's coming soon.
|
155
212
|
|
156
213
|
If you don't see a plugin for a third-party provider or data source that you're interested in using with `chronicle-etl`, [please open an issue](https://github.com/chronicle-app/chronicle-etl/issues/new). If you want to work together on a plugin, please [get in touch](#get-in-touch)!
|
157
214
|
|
158
|
-
#### Currently available
|
159
|
-
|
160
|
-
| Name | Description | Availability |
|
161
|
-
|-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
|
162
|
-
| [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available |
|
163
|
-
| [github](https://github.com/chronicle-app/chronicle-github) | Github activity stream | Available |
|
164
|
-
| [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
|
165
|
-
| [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
|
166
|
-
| [safari](https://github.com/chronicle-app/chronicle-safari) | Browser history from local sqlite db | Available |
|
167
|
-
| [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (still needs zsh support) |
|
168
|
-
| [zulip](https://github.com/chronicle-app/chronicle-zulip) | Zulip message history | Available (for private messages) |
|
169
|
-
|
170
|
-
|
171
|
-
#### Coming soon
|
172
|
-
|
173
215
|
In summary, the following **are coming soon**:
|
174
|
-
anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, things, twitter, whatsapp, youtube.
|
175
|
-
|
176
|
-
Please check the [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1/views/1) for details.
|
216
|
+
anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, timing, things, twitter, whatsapp, youtube.
|
177
217
|
|
178
218
|
### Writing your own plugin
|
179
219
|
|
@@ -181,37 +221,17 @@ Additional connectors are packaged as separate ruby gems. You can view the [iMes
|
|
181
221
|
|
182
222
|
If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
|
183
223
|
|
184
|
-
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
224
|
+
If you want to work together on a connector, please [get in touch](#get-in-touch)!
|
185
225
|
|
186
226
|
#### Sample custom Extractor class
|
227
|
+
|
187
228
|
```ruby
|
188
|
-
|
189
|
-
module FooService
|
190
|
-
class FooExtractor < Chronicle::ETL::Extractor
|
191
|
-
register_connector do |r|
|
192
|
-
r.identifier = 'foo'
|
193
|
-
r.description = 'from foo.com'
|
194
|
-
end
|
195
|
-
|
196
|
-
setting :access_token, required: true
|
197
|
-
|
198
|
-
def prepare
|
199
|
-
@records = # load from somewhere
|
200
|
-
end
|
201
|
-
|
202
|
-
def extract
|
203
|
-
@records.each do |record|
|
204
|
-
yield Chronicle::ETL::Extraction.new(data: row.to_h)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
229
|
+
# TODO
|
210
230
|
```
|
211
231
|
|
212
232
|
## Secrets Management
|
213
233
|
|
214
|
-
If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
|
234
|
+
If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
|
215
235
|
|
216
236
|
Secrets are organized in namespaces. Typically, you use one namespace per plugin (`pinboard` secrets for the `pinboard` plugin). When you run a job that uses the `pinboard` plugin extractor, for example, the secrets from that namespace will automatically be included in the extractor's options. To override which secrets get included, you can use do it in the connector options with `secrets: ALT-NAMESPACE`.
|
217
237
|
|
@@ -243,17 +263,18 @@ $ chronicle-etl secrets:unset pinboard access_token
|
|
243
263
|
## Roadmap
|
244
264
|
|
245
265
|
- Keep tackling **new plugins**. See: [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1)
|
246
|
-
- Add an **OAuth2 authorizer** for services that require this type of authorization ([#48](https://github.com/chronicle-app/chronicle-etl/issues/48))
|
247
266
|
- Add support for **incremental extractions** ([#37](https://github.com/chronicle-app/chronicle-etl/issues/37))
|
248
267
|
- **Improve stdin extractor and shell command transformer** so that users can easily integrate their own scripts/languages/tools into jobs ([#5](https://github.com/chronicle-app/chronicle-etl/issues/48))
|
249
268
|
- **Add documentation for Chronicle Schema**. It's found throughout this project but never explained.
|
250
269
|
|
251
270
|
## Development
|
271
|
+
|
252
272
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
253
273
|
|
254
274
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
255
275
|
|
256
276
|
### Additional development commands
|
277
|
+
|
257
278
|
```bash
|
258
279
|
# run tests
|
259
280
|
bundle exec rake spec
|
@@ -266,15 +287,19 @@ bundle exec guard
|
|
266
287
|
```
|
267
288
|
|
268
289
|
## Get in touch
|
290
|
+
|
269
291
|
- [@hyfen](https://twitter.com/hyfen) on Twitter
|
270
292
|
- [@hyfen](https://github.com/hyfen) on Github
|
271
293
|
- Email: andrew@hyfen.net
|
272
294
|
|
273
295
|
## Contributing
|
296
|
+
|
274
297
|
Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
275
298
|
|
276
299
|
## License
|
300
|
+
|
277
301
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
278
302
|
|
279
303
|
## Code of Conduct
|
304
|
+
|
280
305
|
Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'chronicle/etl'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
require
|
10
|
+
require 'pry'
|
11
11
|
Pry.start
|
12
12
|
|
13
13
|
def reload!(print = true)
|
@@ -15,7 +15,7 @@ def reload!(print = true)
|
|
15
15
|
# Main project directory.
|
16
16
|
root_dir = File.expand_path('..', __dir__)
|
17
17
|
# Directories within the project that should be reloaded.
|
18
|
-
reload_dirs = %w
|
18
|
+
reload_dirs = %w[lib]
|
19
19
|
# Loop through and reload every file in all relevant project directories.
|
20
20
|
reload_dirs.each do |dir|
|
21
21
|
Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
|
@@ -23,4 +23,3 @@ def reload!(print = true)
|
|
23
23
|
# Return true when complete.
|
24
24
|
true
|
25
25
|
end
|
26
|
-
|
data/chronicle-etl.gemspec
CHANGED
@@ -1,68 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
lib = File.expand_path(
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
5
|
+
require 'chronicle/etl/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'chronicle-etl'
|
8
9
|
spec.version = Chronicle::ETL::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Andrew Louis']
|
11
|
+
spec.email = ['andrew@hyfen.net']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
13
|
+
spec.summary = 'ETL tool for personal data'
|
14
|
+
spec.description = 'Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it.'
|
15
|
+
spec.homepage = 'https://github.com/chronicle-app'
|
16
|
+
spec.license = 'MIT'
|
16
17
|
|
17
18
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
19
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
20
|
if spec.respond_to?(:metadata)
|
20
|
-
spec.metadata['allowed_push_host'] =
|
21
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
21
22
|
|
22
|
-
spec.metadata[
|
23
|
-
spec.metadata[
|
24
|
-
spec.metadata[
|
23
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
24
|
+
spec.metadata['source_code_uri'] = 'https://github.com/chronicle-app/chronicle-etl'
|
25
|
+
spec.metadata['changelog_uri'] = 'https://github.com/chronicle-app/chronicle-etl/releases'
|
25
26
|
else
|
26
|
-
raise
|
27
|
-
|
27
|
+
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
28
|
+
'public gem pushes.'
|
28
29
|
end
|
29
30
|
|
30
31
|
# Specify which files should be added to the gem when it is released.
|
31
32
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
-
spec.files
|
33
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
33
34
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
35
|
end
|
35
|
-
spec.bindir =
|
36
|
+
spec.bindir = 'exe'
|
36
37
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
-
spec.require_paths = [
|
38
|
-
spec.required_ruby_version =
|
38
|
+
spec.require_paths = ['lib']
|
39
|
+
spec.required_ruby_version = '>= 3.1'
|
40
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
39
41
|
|
40
|
-
spec.add_dependency
|
41
|
-
spec.add_dependency
|
42
|
-
spec.add_dependency
|
42
|
+
spec.add_dependency 'activesupport', '~> 7.0'
|
43
|
+
spec.add_dependency 'chronic_duration', '~> 0.10.6'
|
44
|
+
spec.add_dependency 'chronicle-core', '~> 0.3'
|
45
|
+
spec.add_dependency 'colorize', '~> 0.8.1'
|
46
|
+
spec.add_dependency 'gems', '>= 1'
|
43
47
|
spec.add_dependency 'launchy'
|
44
|
-
spec.add_dependency
|
45
|
-
spec.add_dependency
|
46
|
-
spec.add_dependency
|
47
|
-
spec.add_dependency '
|
48
|
-
spec.add_dependency
|
49
|
-
spec.add_dependency '
|
50
|
-
spec.add_dependency
|
51
|
-
spec.add_dependency
|
52
|
-
spec.add_dependency
|
53
|
-
spec.add_dependency
|
54
|
-
spec.add_dependency
|
55
|
-
spec.add_dependency
|
56
|
-
spec.add_dependency "tty-table", "~> 0.11"
|
57
|
-
spec.add_dependency "xdg", ">= 4.0"
|
48
|
+
spec.add_dependency 'marcel', '~> 1.0.2'
|
49
|
+
spec.add_dependency 'omniauth', '~> 2'
|
50
|
+
spec.add_dependency 'sequel', '~> 5.35'
|
51
|
+
spec.add_dependency 'sinatra', '~> 2'
|
52
|
+
spec.add_dependency 'sqlite3', '~> 1.4'
|
53
|
+
spec.add_dependency 'thor', '~> 1.2'
|
54
|
+
spec.add_dependency 'thor-hollaback', '~> 0.2'
|
55
|
+
spec.add_dependency 'tty-progressbar', '~> 0.17'
|
56
|
+
spec.add_dependency 'tty-prompt', '~> 0.23'
|
57
|
+
spec.add_dependency 'tty-spinner'
|
58
|
+
spec.add_dependency 'tty-table', '~> 0.12'
|
59
|
+
spec.add_dependency 'xdg', '>= 4.0'
|
58
60
|
|
59
|
-
spec.add_development_dependency
|
60
|
-
spec.add_development_dependency
|
61
|
-
spec.add_development_dependency
|
62
|
-
spec.add_development_dependency
|
63
|
-
spec.add_development_dependency
|
64
|
-
spec.add_development_dependency
|
65
|
-
spec.add_development_dependency
|
66
|
-
spec.add_development_dependency
|
67
|
-
spec.add_development_dependency
|
61
|
+
spec.add_development_dependency 'bundler', '~> 2.1'
|
62
|
+
spec.add_development_dependency 'fakefs', '~> 1.4'
|
63
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.7.3'
|
64
|
+
spec.add_development_dependency 'pry-byebug', '~> 3.9'
|
65
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
66
|
+
spec.add_development_dependency 'rspec', '~> 3.9'
|
67
|
+
spec.add_development_dependency 'rubocop', '~> 1.57'
|
68
|
+
spec.add_development_dependency 'simplecov', '~> 0.21'
|
69
|
+
spec.add_development_dependency 'vcr', '~> 6.1'
|
70
|
+
spec.add_development_dependency 'webmock', '~> 3'
|
71
|
+
spec.add_development_dependency 'webrick', '~> 1.7'
|
72
|
+
spec.add_development_dependency 'yard', '~> 0.9.7'
|
68
73
|
end
|
data/exe/chronicle-etl
CHANGED