chronicle-etl 0.5.4 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +98 -73
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +50 -45
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +10 -8
  13. data/lib/chronicle/etl/cli/connectors.rb +9 -9
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +29 -26
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +20 -7
  20. data/lib/chronicle/etl/configurable.rb +24 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +39 -27
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +3 -3
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -10
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +117 -0
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +19 -0
  45. data/lib/chronicle/etl/registry/plugins.rb +171 -0
  46. data/lib/chronicle/etl/registry/registry.rb +3 -52
  47. data/lib/chronicle/etl/registry/self_registering.rb +1 -1
  48. data/lib/chronicle/etl/runner.rb +158 -128
  49. data/lib/chronicle/etl/secrets.rb +5 -5
  50. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  51. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  52. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  53. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  54. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  55. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  56. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  57. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  58. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  60. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  61. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  62. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  63. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  64. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  65. data/lib/chronicle/etl/version.rb +1 -1
  66. data/lib/chronicle/etl.rb +6 -8
  67. metadata +91 -45
  68. data/lib/chronicle/etl/models/activity.rb +0 -15
  69. data/lib/chronicle/etl/models/attachment.rb +0 -14
  70. data/lib/chronicle/etl/models/base.rb +0 -122
  71. data/lib/chronicle/etl/models/entity.rb +0 -29
  72. data/lib/chronicle/etl/models/raw.rb +0 -26
  73. data/lib/chronicle/etl/registry/plugin_registry.rb +0 -95
  74. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  75. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  76. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  77. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  78. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  79. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8f6b4272cd7f2cfcc12e6327324b5d2f11e76036dcf2442de1fe9ad08e041bb2
4
- data.tar.gz: 4558bf48b7de7c64b691e8ef6403304c864e818360bc7c741a290f7650a7eb8c
3
+ metadata.gz: 4059afe6aae8af3e2ef626b3313836f8cd937504b79ee13062bd98be9d7dc197
4
+ data.tar.gz: abcfaefbfbc2aac98a9c30fbdb3e66500a480b3ddc0904b5a75c4deab6d3e62c
5
5
  SHA512:
6
- metadata.gz: 6f7d7f4fd89d284a3a7ad5bffc05cf50ba6ea4e909457585dccfe9071ee91fb36bea505b3fa559a9b6ddab8f37845cf45651c1d1abc4045282c95c99ca9a5944
7
- data.tar.gz: e2cf8a277c463d3b8ddef811e398fbae2e2649fb8227d14874443969b659ee68e74603f438ef4294b5caa59289f26dd5674dd28f9befe537b86b72ecf2ce7b40
6
+ metadata.gz: c2fca383a80cce8efa6c6dcf8411807931e0e98f2a1cf491df916e96db0c78a70b50ea51d1209236e661be92ebb51d790e645d7c8a915aecdcc2dc5340a1efdc
7
+ data.tar.gz: 501359f4e01d3e34de89dd7489bf8254e3d4002b754536737fc78be513776c1e05b8731166a32e8a957f398f7792e5f3b23280b9fb4c5e6cb8a78c44312f31f4
@@ -1,35 +1,25 @@
1
- # This workflow uses actions that are not certified by GitHub.
2
- # They are provided by a third-party and are governed by
3
- # separate terms of service, privacy policy, and support
4
- # documentation.
5
- # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
- # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
-
8
1
  name: Ruby
9
2
 
10
3
  on:
11
4
  push:
12
- branches: [ main ]
5
+ branches: [main]
13
6
  pull_request:
14
- branches: [ main ]
7
+ branches: [main]
15
8
 
16
9
  jobs:
17
- test:
18
-
10
+ build:
19
11
  runs-on: ubuntu-latest
20
- strategy:
21
- matrix:
22
- ruby-version: ['2.7', '3.0']
23
12
 
24
13
  steps:
25
- - uses: actions/checkout@v2
26
- - name: Set up Ruby
27
- # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
- # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
- # uses: ruby/setup-ruby@v1
30
- uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
- with:
32
- ruby-version: ${{ matrix.ruby-version }}
33
- bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
- - name: Run tests
35
- run: bundle exec rake
14
+ - uses: actions/checkout@v2
15
+
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: 3.2
20
+
21
+ - name: Install dependencies
22
+ run: bundle install
23
+
24
+ - name: Run tests
25
+ run: bundle exec rake
data/.rubocop.yml CHANGED
@@ -1,44 +1,2 @@
1
- AllCops:
2
- EnabledByDefault: true
3
- TargetRubyVersion: 2.7
4
-
5
- Style/FrozenStringLiteralComment:
6
- SafeAutoCorrect: true
7
-
8
- Style/StringLiterals:
9
- Enabled: false
10
-
11
- Layout/MultilineAssignmentLayout:
12
- Enabled: false
13
-
14
- Layout/MultilineMethodCallIndentation:
15
- EnforcedStyle: indented
16
-
17
- Layout/RedundantLineBreak:
18
- Enabled: false
19
-
20
- Style/MethodCallWithArgsParentheses:
21
- Enabled: false
22
-
23
- Style/MethodCalledOnDoEndBlock:
24
- Exclude:
25
- - 'spec/**/*'
26
-
27
- Style/OpenStructUse:
28
- Enabled: false
29
-
30
- Style/Copyright:
31
- Enabled: false
32
-
33
- Style/MissingElse:
34
- Enabled: false
35
-
36
- Style/SymbolArray:
37
- EnforcedStyle: brackets
38
-
39
- Style/WordArray:
40
- EnforcedStyle: brackets
41
-
42
- Lint/ConstantResolution:
43
- Enabled: false
44
-
1
+ inherit_gem:
2
+ chronicle-core: .rubocop.yml
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in chronicle-etl.gemspec
6
6
  gemspec
data/Guardfile CHANGED
@@ -1,7 +1,7 @@
1
- guard :rspec, cmd: "bundle exec rspec" do
2
- require "guard/rspec/dsl"
1
+ guard :rspec, cmd: 'bundle exec rspec' do
2
+ require 'guard/rspec/dsl'
3
3
 
4
4
  watch(%r{^spec/.+_spec\.rb$})
5
5
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
- watch('spec/spec_helper.rb') { "spec" }
6
+ watch('spec/spec_helper.rb') { 'spec' }
7
7
  end
data/README.md CHANGED
@@ -6,14 +6,15 @@
6
6
 
7
7
  Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
8
8
 
9
- If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this tool is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
9
+ If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing export data, this tool is for you! (_If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues)._)
10
10
 
11
- **`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to *extract* data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), *transform* it (into a given schema), and *load* it to a destination (e.g. a CSV file, JSON, external API).
11
+ **`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to _extract_ data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), _transform_ it (into a given schema), and _load_ it to a destination (e.g. a CSV file, JSON, external API).
12
12
 
13
13
  ## What does `chronicle-etl` give you?
14
- * **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
15
- * **Plugins for many third-party providers**. This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
16
- * **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
14
+
15
+ - **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
16
+ - **Plugins for many third-party sources** (see [list](#available-plugins-and-connectors)). This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
17
+ - **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
17
18
 
18
19
  ## Chronicle-ETL in action
19
20
 
@@ -26,15 +27,19 @@ If you don’t want to spend all your time writing scrapers, reverse-engineering
26
27
  ## Installation
27
28
 
28
29
  Using homebrew:
30
+
29
31
  ```sh
30
32
  $ brew install chronicle-app/etl/chronicle-etl
31
33
  ```
34
+
32
35
  Using rubygems:
36
+
33
37
  ```sh
34
38
  $ gem install chronicle-etl
35
39
  ```
36
40
 
37
41
  Confirm it installed successfully:
42
+
38
43
  ```sh
39
44
  $ chronicle-etl --version
40
45
  ```
@@ -45,14 +50,18 @@ $ chronicle-etl --version
45
50
  # Display help
46
51
  $ chronicle-etl help
47
52
 
48
- # Run a basic job
53
+ # Run a basic job
49
54
  $ chronicle-etl --extractor NAME --transformer NAME --loader NAME
50
55
 
51
- # Read test.csv and display it to stdout as a table
56
+ # Read test.csv and display it to stdout as a table
52
57
  $ chronicle-etl --extractor csv --input data.csv --loader table
53
58
 
54
- # Retrieve shell commands run in the last 5 hours
55
- $ chronicle-etl -e shell --since 5h
59
+ # Show available plugins and install one
60
+ $ chronicle-etl plugins:list
61
+ $ chronicle-etl plugins:install imessage
62
+
63
+ # Retrieve imessage messages from the last 5 hours
64
+ $ chronicle-etl -e imessage --since 5h
56
65
 
57
66
  # Get email senders from an .mbox email archive file
58
67
  $ chronicle-etl --extractor email:mbox -i sample-email-archive.mbox -t email --fields actor.slug
@@ -64,18 +73,23 @@ $ chronicle-etl -e pinboard --since 1mo # Used automatically based on plugin nam
64
73
  ```
65
74
 
66
75
  ### Common options
76
+
67
77
  ```sh
68
78
  Options:
69
79
  -e, [--extractor=NAME] # Extractor class. Default: stdin
70
80
  [--extractor-opts=key:value] # Extractor options
71
81
  -t, [--transformer=NAME] # Transformer class. Default: null
72
82
  [--transformer-opts=key:value] # Transformer options
73
- -l, [--loader=NAME] # Loader class. Default: table
83
+ -l, [--loader=NAME] # Loader class. Default: json
74
84
  [--loader-opts=key:value] # Loader options
75
85
  -i, [--input=FILENAME] # Input filename or directory
76
86
  [--since=DATE] # Load records SINCE this date (or fuzzy time duration)
77
87
  [--until=DATE] # Load records UNTIL this date (or fuzzy time duration)
78
88
  [--limit=N] # Only extract the first LIMIT records
89
+ [--schema=SCHEMA_NAME] # Which Schema to transform
90
+ # Possible values: chronicle, activitystream, schemaorg, chronobase
91
+ [--format=SCHEMA_NAME] # How to serialize results
92
+ # Possible values: jsonapi, jsonld
79
93
  -o, [--output=OUTPUT] # Output filename
80
94
  [--fields=field1 field2 ...] # Output only these fields
81
95
  [--header-row], [--no-header-row] # Output the header row of tabular output
@@ -86,7 +100,7 @@ Options:
86
100
  [--silent], [--no-silent] # Silence all output
87
101
  ```
88
102
 
89
- ### Saving jobs
103
+ ### Saving a job
90
104
 
91
105
  You can save details about a job to a local config file (saved by default in `~/.config/chronicle/etl/jobs/JOB_NAME.yml`) to save yourself the trouble specifying options each time.
92
106
 
@@ -94,86 +108,112 @@ You can save details about a job to a local config file (saved by default in `~/
94
108
  # Save a job named 'sample' to ~/.config/chronicle/etl/jobs/sample.yml
95
109
  $ chronicle-etl jobs:save sample --extractor pinboard --since 10d
96
110
 
111
+ # Run the job
112
+ $ chronicle-etl jobs:run sample
113
+
97
114
  # Show details about the job
98
115
  $ chronicle-etl jobs:show sample
99
116
 
100
- # Run the job
101
- $ chronicle-etl jobs:run sample
117
+ # Edit a job definition with default editor ($EDITOR)
118
+ $ chronicle-etl jobs:edit sample
102
119
 
103
120
  # Show all saved jobs
104
121
  $ chronicle-etl jobs:list
105
122
  ```
106
123
 
107
- ## Connectors
108
- Connectors are available to read, process, and load data from different formats or external services.
124
+ ## Connectors and plugins
125
+
126
+ Connectors let you work with different data formats or third-party sources.
127
+
128
+ ### Built-in Connectors
129
+
130
+ `chronicle-etl` comes with several built-in connectors for common formats and sources.
109
131
 
110
132
  ```sh
111
133
  # List all available connectors
112
134
  $ chronicle-etl connectors:list
113
135
  ```
114
136
 
115
- ### Built-in Connectors
116
- `chronicle-etl` comes with several built-in connectors for common formats and sources.
117
-
118
137
  #### Extractors
138
+
119
139
  - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
120
140
  - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
121
141
  - [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
122
142
 
123
143
  #### Transformers
144
+
124
145
  - [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
146
+ - [`sampler`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - Sample `percent` records from the extraction
147
+ - [`sort`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - sort extracted results by `key` and `direction`
148
+
125
149
 
126
150
  #### Loaders
127
- - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
151
+
152
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - (default) Load records serialized as JSON
153
+ - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - Output an ascii table of records. Useful for exploring data.
128
154
  - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
129
- - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
130
- - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
155
+ - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Send JSON to a REST API
156
+
157
+ ### Chronicle Plugins for third-party services
131
158
 
132
- ## Chronicle Plugins
133
- Plugins provide access to data from third-party platforms, services, or formats. Plugins are packaged as separate rubygems and can be installed through the CLI (under the hood, it's a `gem install chronicle-PLUGINNAME`)
159
+ Plugins provide access to data from third-party platforms, services, or formats. Plugins are packaged as separate gems and can be installed through the CLI (under the hood, it's a `gem install chronicle-PLUGINNAME`)
134
160
 
135
- ### Plugin usage
161
+ #### Plugin usage
136
162
 
137
163
  ```bash
164
+ # List available plugins
165
+ $ chronicle-etl plugins:list
166
+
138
167
  # Install a plugin
139
168
  $ chronicle-etl plugins:install NAME
140
169
 
141
- # List installed plugins
142
- $ chronicle-etl plugins:list
143
-
144
170
  # Use a plugin
145
- $ chronicle-etl plugins:install shell
146
- $ chronicle-etl --extractor shell:history --limit 10
171
+ $ chronicle-etl plugins:install imessage
172
+ $ chronicle-etl --extractor imessage --limit 10
147
173
 
148
174
  # Uninstall a plugin
149
175
  $ chronicle-etl plugins:uninstall NAME
150
176
  ```
151
177
 
152
- ### Status
178
+ #### Available plugins and connectors
179
+
180
+ The following are the officially-supported list of plugins and their available connectors:
181
+
182
+ | Plugin | Type | Identifier | Description |
183
+ | --------------------------------------------------------------------------- | ----------- | ---------------- | -------------------------------------------- |
184
+ | [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | extractor | listens | listening history of podcast episodes |
185
+ | [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | transformer | listen | a podcast episode listen to Chronicle Schema |
186
+ | [email](https://github.com/chronicle-app/chronicle-email) | extractor | imap | emails over an IMAP connection |
187
+ | [email](https://github.com/chronicle-app/chronicle-email) | extractor | mbox | emails from an .mbox file |
188
+ | [email](https://github.com/chronicle-app/chronicle-email) | transformer | email | email to Chronicle Schema |
189
+ | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | extractor | checkins | Foursqure visits |
190
+ | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | transformer | checkin | checkin to Chronicle Schema |
191
+ | [github](https://github.com/chronicle-app/chronicle-github) | extractor | activity | user activity stream |
192
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | extractor | messages | imessages from local macOS |
193
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | transformer | message | imessage to Chronicle Schema |
194
+ | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | extractor | bookmarks | Pinboard.in bookmarks |
195
+ | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | transformer | bookmark | bookmark to Chronicle Schema |
196
+ | [safari](https://github.com/chronicle-app/chronicle-safari) | extractor | browser-history | browser history |
197
+ | [safari ](https://github.com/chronicle-app/chronicle-safari) | transformer | browser-history | browser history to Chronicle Schema |
198
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | extractor | history | shell command history (bash / zsh) |
199
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | transformer | command | command to Chronicle Schema |
200
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | liked-tracks | liked tracks |
201
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | saved-albums | saved albums |
202
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | listens | recently listened tracks (last 50 tracks) |
203
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | like | like to Chronicle Schema |
204
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | listen | listen to Chronicle Schema |
205
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | authorizer | | OAuth authorizer |
206
+ | [zulip](https://github.com/chronicle-app/chronicle-zulip) | extractor | private-messages | private messages |
207
+ | [zulip](https://github.com/chronicle-app/chronicle-zulip) | transformer | message | message to Chronicle Schema |
208
+
209
+ ### Coming soon
153
210
 
154
211
  A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and I'm porting them over to the Chronicle system. The [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1/views/1) lets you keep track what's available and what's coming soon.
155
212
 
156
213
  If you don't see a plugin for a third-party provider or data source that you're interested in using with `chronicle-etl`, [please open an issue](https://github.com/chronicle-app/chronicle-etl/issues/new). If you want to work together on a plugin, please [get in touch](#get-in-touch)!
157
214
 
158
- #### Currently available
159
-
160
- | Name | Description | Availability |
161
- |-----------------------------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------|
162
- | [email](https://github.com/chronicle-app/chronicle-email) | Emails and attachments from IMAP or .mbox files | Available |
163
- | [github](https://github.com/chronicle-app/chronicle-github) | Github activity stream | Available |
164
- | [imessage](https://github.com/chronicle-app/chronicle-imessage) | iMessage messages and attachments | Available |
165
- | [pinboard](https://github.com/chronicle-app/chronicle-email) | Bookmarks and tags | Available |
166
- | [safari](https://github.com/chronicle-app/chronicle-safari) | Browser history from local sqlite db | Available |
167
- | [shell](https://github.com/chronicle-app/chronicle-shell) | Shell command history | Available (still needs zsh support) |
168
- | [zulip](https://github.com/chronicle-app/chronicle-zulip) | Zulip message history | Available (for private messages) |
169
-
170
-
171
- #### Coming soon
172
-
173
215
  In summary, the following **are coming soon**:
174
- anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, things, twitter, whatsapp, youtube.
175
-
176
- Please check the [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1/views/1) for details.
216
+ anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, timing, things, twitter, whatsapp, youtube.
177
217
 
178
218
  ### Writing your own plugin
179
219
 
@@ -181,37 +221,17 @@ Additional connectors are packaged as separate ruby gems. You can view the [iMes
181
221
 
182
222
  If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
183
223
 
184
- If you want to work together on a connector, please [get in touch](#get-in-touch)!
224
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
185
225
 
186
226
  #### Sample custom Extractor class
227
+
187
228
  ```ruby
188
- module Chronicle
189
- module FooService
190
- class FooExtractor < Chronicle::ETL::Extractor
191
- register_connector do |r|
192
- r.identifier = 'foo'
193
- r.description = 'from foo.com'
194
- end
195
-
196
- setting :access_token, required: true
197
-
198
- def prepare
199
- @records = # load from somewhere
200
- end
201
-
202
- def extract
203
- @records.each do |record|
204
- yield Chronicle::ETL::Extraction.new(data: row.to_h)
205
- end
206
- end
207
- end
208
- end
209
- end
229
+ # TODO
210
230
  ```
211
231
 
212
232
  ## Secrets Management
213
233
 
214
- If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
234
+ If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
215
235
 
216
236
  Secrets are organized in namespaces. Typically, you use one namespace per plugin (`pinboard` secrets for the `pinboard` plugin). When you run a job that uses the `pinboard` plugin extractor, for example, the secrets from that namespace will automatically be included in the extractor's options. To override which secrets get included, you can use do it in the connector options with `secrets: ALT-NAMESPACE`.
217
237
 
@@ -243,17 +263,18 @@ $ chronicle-etl secrets:unset pinboard access_token
243
263
  ## Roadmap
244
264
 
245
265
  - Keep tackling **new plugins**. See: [Chronicle Plugin Tracker](https://github.com/orgs/chronicle-app/projects/1)
246
- - Add an **OAuth2 authorizer** for services that require this type of authorization ([#48](https://github.com/chronicle-app/chronicle-etl/issues/48))
247
266
  - Add support for **incremental extractions** ([#37](https://github.com/chronicle-app/chronicle-etl/issues/37))
248
267
  - **Improve stdin extractor and shell command transformer** so that users can easily integrate their own scripts/languages/tools into jobs ([#5](https://github.com/chronicle-app/chronicle-etl/issues/48))
249
268
  - **Add documentation for Chronicle Schema**. It's found throughout this project but never explained.
250
269
 
251
270
  ## Development
271
+
252
272
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
253
273
 
254
274
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
255
275
 
256
276
  ### Additional development commands
277
+
257
278
  ```bash
258
279
  # run tests
259
280
  bundle exec rake spec
@@ -266,15 +287,19 @@ bundle exec guard
266
287
  ```
267
288
 
268
289
  ## Get in touch
290
+
269
291
  - [@hyfen](https://twitter.com/hyfen) on Twitter
270
292
  - [@hyfen](https://github.com/hyfen) on Github
271
293
  - Email: andrew@hyfen.net
272
294
 
273
295
  ## Contributing
296
+
274
297
  Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
275
298
 
276
299
  ## License
300
+
277
301
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
278
302
 
279
303
  ## Code of Conduct
304
+
280
305
  Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
  RSpec::Core::RakeTask.new(:spec)
4
4
 
5
5
  require 'yard'
data/bin/console CHANGED
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "chronicle/etl"
3
+ require 'bundler/setup'
4
+ require 'chronicle/etl'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
8
8
 
9
9
  # (If you use this, don't forget to add pry to your Gemfile!)
10
- require "pry"
10
+ require 'pry'
11
11
  Pry.start
12
12
 
13
13
  def reload!(print = true)
@@ -15,7 +15,7 @@ def reload!(print = true)
15
15
  # Main project directory.
16
16
  root_dir = File.expand_path('..', __dir__)
17
17
  # Directories within the project that should be reloaded.
18
- reload_dirs = %w{lib}
18
+ reload_dirs = %w[lib]
19
19
  # Loop through and reload every file in all relevant project directories.
20
20
  reload_dirs.each do |dir|
21
21
  Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
@@ -23,4 +23,3 @@ def reload!(print = true)
23
23
  # Return true when complete.
24
24
  true
25
25
  end
26
-
@@ -1,68 +1,73 @@
1
+ # frozen_string_literal: true
1
2
 
2
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "chronicle/etl/version"
5
+ require 'chronicle/etl/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
- spec.name = "chronicle-etl"
8
+ spec.name = 'chronicle-etl'
8
9
  spec.version = Chronicle::ETL::VERSION
9
- spec.authors = ["Andrew Louis"]
10
- spec.email = ["andrew@hyfen.net"]
10
+ spec.authors = ['Andrew Louis']
11
+ spec.email = ['andrew@hyfen.net']
11
12
 
12
- spec.summary = "ETL tool for personal data"
13
- spec.description = "Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it."
14
- spec.homepage = "https://github.com/chronicle-app"
15
- spec.license = "MIT"
13
+ spec.summary = 'ETL tool for personal data'
14
+ spec.description = 'Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it.'
15
+ spec.homepage = 'https://github.com/chronicle-app'
16
+ spec.license = 'MIT'
16
17
 
17
18
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
19
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
20
  if spec.respond_to?(:metadata)
20
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
22
 
22
- spec.metadata["homepage_uri"] = spec.homepage
23
- spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
23
+ spec.metadata['homepage_uri'] = spec.homepage
24
+ spec.metadata['source_code_uri'] = 'https://github.com/chronicle-app/chronicle-etl'
25
+ spec.metadata['changelog_uri'] = 'https://github.com/chronicle-app/chronicle-etl/releases'
25
26
  else
26
- raise "RubyGems 2.0 or newer is required to protect against " \
27
- "public gem pushes."
27
+ raise 'RubyGems 2.0 or newer is required to protect against ' \
28
+ 'public gem pushes.'
28
29
  end
29
30
 
30
31
  # Specify which files should be added to the gem when it is released.
31
32
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
32
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
33
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
33
34
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
34
35
  end
35
- spec.bindir = "exe"
36
+ spec.bindir = 'exe'
36
37
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
- spec.require_paths = ["lib"]
38
- spec.required_ruby_version = ">= 2.7"
38
+ spec.require_paths = ['lib']
39
+ spec.required_ruby_version = '>= 3.1'
40
+ spec.metadata['rubygems_mfa_required'] = 'true'
39
41
 
40
- spec.add_dependency "activesupport", "~> 7.0"
41
- spec.add_dependency "chronic_duration", "~> 0.10.6"
42
- spec.add_dependency "colorize", "~> 0.8.1"
42
+ spec.add_dependency 'activesupport', '~> 7.0'
43
+ spec.add_dependency 'chronic_duration', '~> 0.10.6'
44
+ spec.add_dependency 'chronicle-core', '~> 0.3'
45
+ spec.add_dependency 'colorize', '~> 0.8.1'
46
+ spec.add_dependency 'gems', '>= 1'
43
47
  spec.add_dependency 'launchy'
44
- spec.add_dependency "marcel", "~> 1.0.2"
45
- spec.add_dependency "mini_exiftool", "~> 2.10"
46
- spec.add_dependency "nokogiri", "~> 1.13"
47
- spec.add_dependency 'omniauth', "~> 2"
48
- spec.add_dependency "sequel", "~> 5.35"
49
- spec.add_dependency 'sinatra', "~> 2"
50
- spec.add_dependency "sqlite3", "~> 1.4"
51
- spec.add_dependency "thor", "~> 1.2"
52
- spec.add_dependency "thor-hollaback", "~> 0.2"
53
- spec.add_dependency "tty-progressbar", "~> 0.17"
54
- spec.add_dependency "tty-prompt", "~> 0.23"
55
- spec.add_dependency "tty-spinner"
56
- spec.add_dependency "tty-table", "~> 0.11"
57
- spec.add_dependency "xdg", ">= 4.0"
48
+ spec.add_dependency 'marcel', '~> 1.0.2'
49
+ spec.add_dependency 'omniauth', '~> 2'
50
+ spec.add_dependency 'sequel', '~> 5.35'
51
+ spec.add_dependency 'sinatra', '~> 2'
52
+ spec.add_dependency 'sqlite3', '~> 1.4'
53
+ spec.add_dependency 'thor', '~> 1.2'
54
+ spec.add_dependency 'thor-hollaback', '~> 0.2'
55
+ spec.add_dependency 'tty-progressbar', '~> 0.17'
56
+ spec.add_dependency 'tty-prompt', '~> 0.23'
57
+ spec.add_dependency 'tty-spinner'
58
+ spec.add_dependency 'tty-table', '~> 0.12'
59
+ spec.add_dependency 'xdg', '>= 4.0'
58
60
 
59
- spec.add_development_dependency "bundler", "~> 2.1"
60
- spec.add_development_dependency "fakefs"
61
- spec.add_development_dependency "guard-rspec", "~> 4.7.3"
62
- spec.add_development_dependency "pry-byebug", "~> 3.9"
63
- spec.add_development_dependency "rake", "~> 13.0"
64
- spec.add_development_dependency "rspec", "~> 3.9"
65
- spec.add_development_dependency "rubocop", "~> 1.25.1"
66
- spec.add_development_dependency "simplecov", "~> 0.21"
67
- spec.add_development_dependency "yard", "~> 0.9.7"
61
+ spec.add_development_dependency 'bundler', '~> 2.1'
62
+ spec.add_development_dependency 'fakefs', '~> 1.4'
63
+ spec.add_development_dependency 'guard-rspec', '~> 4.7.3'
64
+ spec.add_development_dependency 'pry-byebug', '~> 3.9'
65
+ spec.add_development_dependency 'rake', '~> 13.0'
66
+ spec.add_development_dependency 'rspec', '~> 3.9'
67
+ spec.add_development_dependency 'rubocop', '~> 1.57'
68
+ spec.add_development_dependency 'simplecov', '~> 0.21'
69
+ spec.add_development_dependency 'vcr', '~> 6.1'
70
+ spec.add_development_dependency 'webmock', '~> 3'
71
+ spec.add_development_dependency 'webrick', '~> 1.7'
72
+ spec.add_development_dependency 'yard', '~> 0.9.7'
68
73
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require 'chronicle/etl/cli'
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)