chronicle-etl 0.5.5 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +15 -25
  3. data/.rubocop.yml +2 -44
  4. data/Gemfile +2 -2
  5. data/Guardfile +3 -3
  6. data/README.md +75 -68
  7. data/Rakefile +2 -2
  8. data/bin/console +4 -5
  9. data/chronicle-etl.gemspec +51 -49
  10. data/exe/chronicle-etl +1 -1
  11. data/lib/chronicle/etl/authorizer.rb +3 -4
  12. data/lib/chronicle/etl/cli/authorizations.rb +8 -6
  13. data/lib/chronicle/etl/cli/connectors.rb +7 -7
  14. data/lib/chronicle/etl/cli/jobs.rb +130 -53
  15. data/lib/chronicle/etl/cli/main.rb +29 -29
  16. data/lib/chronicle/etl/cli/plugins.rb +14 -15
  17. data/lib/chronicle/etl/cli/secrets.rb +14 -12
  18. data/lib/chronicle/etl/cli/subcommand_base.rb +5 -3
  19. data/lib/chronicle/etl/config.rb +18 -8
  20. data/lib/chronicle/etl/configurable.rb +20 -9
  21. data/lib/chronicle/etl/exceptions.rb +3 -3
  22. data/lib/chronicle/etl/extraction.rb +12 -2
  23. data/lib/chronicle/etl/extractors/csv_extractor.rb +9 -0
  24. data/lib/chronicle/etl/extractors/extractor.rb +15 -2
  25. data/lib/chronicle/etl/extractors/file_extractor.rb +5 -3
  26. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +2 -2
  27. data/lib/chronicle/etl/extractors/json_extractor.rb +14 -4
  28. data/lib/chronicle/etl/extractors/stdin_extractor.rb +3 -0
  29. data/lib/chronicle/etl/job.rb +35 -17
  30. data/lib/chronicle/etl/job_definition.rb +38 -26
  31. data/lib/chronicle/etl/job_log.rb +14 -16
  32. data/lib/chronicle/etl/job_logger.rb +4 -4
  33. data/lib/chronicle/etl/loaders/csv_loader.rb +17 -4
  34. data/lib/chronicle/etl/loaders/helpers/stdout_helper.rb +4 -0
  35. data/lib/chronicle/etl/loaders/json_loader.rb +30 -10
  36. data/lib/chronicle/etl/loaders/loader.rb +0 -17
  37. data/lib/chronicle/etl/loaders/rest_loader.rb +7 -7
  38. data/lib/chronicle/etl/loaders/table_loader.rb +37 -12
  39. data/lib/chronicle/etl/logger.rb +2 -2
  40. data/lib/chronicle/etl/oauth_authorizer.rb +8 -8
  41. data/lib/chronicle/etl/record.rb +15 -0
  42. data/lib/chronicle/etl/registry/connector_registration.rb +15 -23
  43. data/lib/chronicle/etl/registry/connectors.rb +93 -36
  44. data/lib/chronicle/etl/registry/plugin_registration.rb +1 -1
  45. data/lib/chronicle/etl/registry/plugins.rb +27 -19
  46. data/lib/chronicle/etl/runner.rb +158 -128
  47. data/lib/chronicle/etl/secrets.rb +4 -4
  48. data/lib/chronicle/etl/transformers/buffer_transformer.rb +29 -0
  49. data/lib/chronicle/etl/transformers/chronicle_transformer.rb +32 -0
  50. data/lib/chronicle/etl/transformers/chronobase_transformer.rb +100 -0
  51. data/lib/chronicle/etl/transformers/fields_limit_transformer.rb +23 -0
  52. data/lib/chronicle/etl/transformers/filter_fields_transformer.rb +60 -0
  53. data/lib/chronicle/etl/transformers/filter_transformer.rb +30 -0
  54. data/lib/chronicle/etl/transformers/format_transformer.rb +32 -0
  55. data/lib/chronicle/etl/transformers/merge_meta_transformer.rb +19 -0
  56. data/lib/chronicle/etl/transformers/multiply_transformer.rb +21 -0
  57. data/lib/chronicle/etl/transformers/null_transformer.rb +5 -7
  58. data/lib/chronicle/etl/transformers/sampler_transformer.rb +21 -0
  59. data/lib/chronicle/etl/transformers/sort_transformer.rb +31 -0
  60. data/lib/chronicle/etl/transformers/transformer.rb +63 -41
  61. data/lib/chronicle/etl/utils/binary_attachments.rb +1 -1
  62. data/lib/chronicle/etl/utils/progress_bar.rb +2 -3
  63. data/lib/chronicle/etl/version.rb +1 -1
  64. data/lib/chronicle/etl.rb +6 -8
  65. metadata +49 -47
  66. data/lib/chronicle/etl/models/activity.rb +0 -15
  67. data/lib/chronicle/etl/models/attachment.rb +0 -14
  68. data/lib/chronicle/etl/models/base.rb +0 -122
  69. data/lib/chronicle/etl/models/entity.rb +0 -29
  70. data/lib/chronicle/etl/models/raw.rb +0 -26
  71. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +0 -31
  72. data/lib/chronicle/etl/serializers/raw_serializer.rb +0 -10
  73. data/lib/chronicle/etl/serializers/serializer.rb +0 -28
  74. data/lib/chronicle/etl/transformers/image_file_transformer.rb +0 -247
  75. data/lib/chronicle/etl/utils/hash_utilities.rb +0 -19
  76. data/lib/chronicle/etl/utils/text_recognition.rb +0 -15
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2de46efc3c5fbdc7ac120137bef56e13a138c8a95c8dd7d0a3542a65be65959
4
- data.tar.gz: e8e3e9ae236e270b2926037419d5349170f85b8597c640c0b7b899552257fdb9
3
+ metadata.gz: 4059afe6aae8af3e2ef626b3313836f8cd937504b79ee13062bd98be9d7dc197
4
+ data.tar.gz: abcfaefbfbc2aac98a9c30fbdb3e66500a480b3ddc0904b5a75c4deab6d3e62c
5
5
  SHA512:
6
- metadata.gz: 58b293d45d1a7f4589aee080d0fb1348e45d76753a8d48bb33e694d6a2b6ea123d1495a12a42e98ed6d3e65926ad48bd7de1fb98dac9bc7bac9225ef00d32fb3
7
- data.tar.gz: e73c73b67b4e3790347da3df22ed7a11357fdf43e91b734925503f8d0c189aabd07e26f51ed25cf1409fd68ba74dd72e1e7e01c113f773832c509172f0b4ee84
6
+ metadata.gz: c2fca383a80cce8efa6c6dcf8411807931e0e98f2a1cf491df916e96db0c78a70b50ea51d1209236e661be92ebb51d790e645d7c8a915aecdcc2dc5340a1efdc
7
+ data.tar.gz: 501359f4e01d3e34de89dd7489bf8254e3d4002b754536737fc78be513776c1e05b8731166a32e8a957f398f7792e5f3b23280b9fb4c5e6cb8a78c44312f31f4
@@ -1,35 +1,25 @@
1
- # This workflow uses actions that are not certified by GitHub.
2
- # They are provided by a third-party and are governed by
3
- # separate terms of service, privacy policy, and support
4
- # documentation.
5
- # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
- # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
-
8
1
  name: Ruby
9
2
 
10
3
  on:
11
4
  push:
12
- branches: [ main ]
5
+ branches: [main]
13
6
  pull_request:
14
- branches: [ main ]
7
+ branches: [main]
15
8
 
16
9
  jobs:
17
- test:
18
-
10
+ build:
19
11
  runs-on: ubuntu-latest
20
- strategy:
21
- matrix:
22
- ruby-version: ['2.7', '3.0']
23
12
 
24
13
  steps:
25
- - uses: actions/checkout@v2
26
- - name: Set up Ruby
27
- # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
- # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
- # uses: ruby/setup-ruby@v1
30
- uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
- with:
32
- ruby-version: ${{ matrix.ruby-version }}
33
- bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
- - name: Run tests
35
- run: bundle exec rake
14
+ - uses: actions/checkout@v2
15
+
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: 3.2
20
+
21
+ - name: Install dependencies
22
+ run: bundle install
23
+
24
+ - name: Run tests
25
+ run: bundle exec rake
data/.rubocop.yml CHANGED
@@ -1,44 +1,2 @@
1
- AllCops:
2
- EnabledByDefault: true
3
- TargetRubyVersion: 2.7
4
-
5
- Style/FrozenStringLiteralComment:
6
- SafeAutoCorrect: true
7
-
8
- Style/StringLiterals:
9
- Enabled: false
10
-
11
- Layout/MultilineAssignmentLayout:
12
- Enabled: false
13
-
14
- Layout/MultilineMethodCallIndentation:
15
- EnforcedStyle: indented
16
-
17
- Layout/RedundantLineBreak:
18
- Enabled: false
19
-
20
- Style/MethodCallWithArgsParentheses:
21
- Enabled: false
22
-
23
- Style/MethodCalledOnDoEndBlock:
24
- Exclude:
25
- - 'spec/**/*'
26
-
27
- Style/OpenStructUse:
28
- Enabled: false
29
-
30
- Style/Copyright:
31
- Enabled: false
32
-
33
- Style/MissingElse:
34
- Enabled: false
35
-
36
- Style/SymbolArray:
37
- EnforcedStyle: brackets
38
-
39
- Style/WordArray:
40
- EnforcedStyle: brackets
41
-
42
- Lint/ConstantResolution:
43
- Enabled: false
44
-
1
+ inherit_gem:
2
+ chronicle-core: .rubocop.yml
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
- source "https://rubygems.org"
1
+ source 'https://rubygems.org'
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in chronicle-etl.gemspec
6
6
  gemspec
data/Guardfile CHANGED
@@ -1,7 +1,7 @@
1
- guard :rspec, cmd: "bundle exec rspec" do
2
- require "guard/rspec/dsl"
1
+ guard :rspec, cmd: 'bundle exec rspec' do
2
+ require 'guard/rspec/dsl'
3
3
 
4
4
  watch(%r{^spec/.+_spec\.rb$})
5
5
  watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
- watch('spec/spec_helper.rb') { "spec" }
6
+ watch('spec/spec_helper.rb') { 'spec' }
7
7
  end
data/README.md CHANGED
@@ -6,14 +6,15 @@
6
6
 
7
7
  Are you trying to archive your digital history or incorporate it into your own projects? You’ve probably discovered how frustrating it is to get machine-readable access to your own data. While [building a memex](https://hyfen.net/memex/), I learned first-hand what great efforts must be made before you can begin using the data in interesting ways.
8
8
 
9
- If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing takeout data, this tool is for you! (*If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues).*)
9
+ If you don’t want to spend all your time writing scrapers, reverse-engineering APIs, or parsing export data, this tool is for you! (_If you do enjoy these things, please see the [open issues](https://github.com/chronicle-app/chronicle-etl/issues)._)
10
10
 
11
- **`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to *extract* data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), *transform* it (into a given schema), and *load* it to a destination (e.g. a CSV file, JSON, external API).
11
+ **`chronicle-etl` is a CLI tool that gives you a unified interface to your personal data.** It uses the ETL pattern to _extract_ data from a source (e.g. your local browser history, a directory of images, goodreads.com reading history), _transform_ it (into a given schema), and _load_ it to a destination (e.g. a CSV file, JSON, external API).
12
12
 
13
13
  ## What does `chronicle-etl` give you?
14
- * **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
15
- * **Plugins for many third-party providers** (see [list](#available-plugins-and-connectors)). This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
16
- * **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
14
+
15
+ - **A CLI tool for working with personal data**. You can monitor progress of exports, manipulate the output, set up recurring jobs, manage credentials, and more.
16
+ - **Plugins for many third-party sources** (see [list](#available-plugins-and-connectors)). This plugin system allows you to access data from dozens of third-party services, all accessible through a common CLI interface.
17
+ - **A common, opinionated schema**: You can normalize different datasets into a single schema so that, for example, all your iMessages and emails are represented in a common schema. (Don’t want to use this schema? `chronicle-etl` always allows you to fall back on working with the raw extraction data.)
17
18
 
18
19
  ## Chronicle-ETL in action
19
20
 
@@ -26,15 +27,19 @@ If you don’t want to spend all your time writing scrapers, reverse-engineering
26
27
  ## Installation
27
28
 
28
29
  Using homebrew:
30
+
29
31
  ```sh
30
32
  $ brew install chronicle-app/etl/chronicle-etl
31
33
  ```
34
+
32
35
  Using rubygems:
36
+
33
37
  ```sh
34
38
  $ gem install chronicle-etl
35
39
  ```
36
40
 
37
41
  Confirm it installed successfully:
42
+
38
43
  ```sh
39
44
  $ chronicle-etl --version
40
45
  ```
@@ -45,18 +50,18 @@ $ chronicle-etl --version
45
50
  # Display help
46
51
  $ chronicle-etl help
47
52
 
48
- # Run a basic job
53
+ # Run a basic job
49
54
  $ chronicle-etl --extractor NAME --transformer NAME --loader NAME
50
55
 
51
- # Read test.csv and display it to stdout as a table
56
+ # Read test.csv and display it to stdout as a table
52
57
  $ chronicle-etl --extractor csv --input data.csv --loader table
53
58
 
54
59
  # Show available plugins and install one
55
60
  $ chronicle-etl plugins:list
56
- $ chronicle-etl plugins:install shell
61
+ $ chronicle-etl plugins:install imessage
57
62
 
58
- # Retrieve shell commands run in the last 5 hours
59
- $ chronicle-etl -e shell --since 5h
63
+ # Retrieve imessage messages from the last 5 hours
64
+ $ chronicle-etl -e imessage --since 5h
60
65
 
61
66
  # Get email senders from an .mbox email archive file
62
67
  $ chronicle-etl --extractor email:mbox -i sample-email-archive.mbox -t email --fields actor.slug
@@ -68,18 +73,23 @@ $ chronicle-etl -e pinboard --since 1mo # Used automatically based on plugin nam
68
73
  ```
69
74
 
70
75
  ### Common options
76
+
71
77
  ```sh
72
78
  Options:
73
79
  -e, [--extractor=NAME] # Extractor class. Default: stdin
74
80
  [--extractor-opts=key:value] # Extractor options
75
81
  -t, [--transformer=NAME] # Transformer class. Default: null
76
82
  [--transformer-opts=key:value] # Transformer options
77
- -l, [--loader=NAME] # Loader class. Default: table
83
+ -l, [--loader=NAME] # Loader class. Default: json
78
84
  [--loader-opts=key:value] # Loader options
79
85
  -i, [--input=FILENAME] # Input filename or directory
80
86
  [--since=DATE] # Load records SINCE this date (or fuzzy time duration)
81
87
  [--until=DATE] # Load records UNTIL this date (or fuzzy time duration)
82
88
  [--limit=N] # Only extract the first LIMIT records
89
+ [--schema=SCHEMA_NAME] # Which Schema to transform
90
+ # Possible values: chronicle, activitystream, schemaorg, chronobase
91
+ [--format=SCHEMA_NAME] # How to serialize results
92
+ # Possible values: jsonapi, jsonld
83
93
  -o, [--output=OUTPUT] # Output filename
84
94
  [--fields=field1 field2 ...] # Output only these fields
85
95
  [--header-row], [--no-header-row] # Output the header row of tabular output
@@ -91,6 +101,7 @@ Options:
91
101
  ```
92
102
 
93
103
  ### Saving a job
104
+
94
105
  You can save details about a job to a local config file (saved by default in `~/.config/chronicle/etl/jobs/JOB_NAME.yml`) to save yourself the trouble specifying options each time.
95
106
 
96
107
  ```sh
@@ -103,13 +114,16 @@ $ chronicle-etl jobs:run sample
103
114
  # Show details about the job
104
115
  $ chronicle-etl jobs:show sample
105
116
 
117
+ # Edit a job definition with default editor ($EDITOR)
118
+ $ chronicle-etl jobs:edit sample
119
+
106
120
  # Show all saved jobs
107
121
  $ chronicle-etl jobs:list
108
122
  ```
109
123
 
110
124
  ## Connectors and plugins
111
125
 
112
- Connectors let you work with different data formats or third-party providers.
126
+ Connectors let you work with different data formats or third-party sources.
113
127
 
114
128
  ### Built-in Connectors
115
129
 
@@ -121,18 +135,24 @@ $ chronicle-etl connectors:list
121
135
  ```
122
136
 
123
137
  #### Extractors
138
+
124
139
  - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records from CSV files or stdin
125
140
  - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/json_extractor.rb) - Load JSON (either [line-separated objects](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON) or one object)
126
141
  - [`file`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/file_extractor.rb) - load from a single file or directory (with a glob pattern)
127
142
 
128
143
  #### Transformers
144
+
129
145
  - [`null`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/null_transformer.rb) - (default) Don’t do anything and pass on raw extraction data
146
+ - [`sampler`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - Sample `percent` records from the extraction
147
+ - [`sort`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/transformers/sampler_transformer.rb) - sort extracted results by `key` and `direction`
148
+
130
149
 
131
150
  #### Loaders
132
- - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - (default) Output an ascii table of records. Useful for exploring data.
151
+
152
+ - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - (default) Load records serialized as JSON
153
+ - [`table`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/table_loader.rb) - Output an ascii table of records. Useful for exploring data.
133
154
  - [`csv`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/extractors/csv_extractor.rb) - Load records to CSV
134
- - [`json`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/json_loader.rb) - Load records serialized as JSON
135
- - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Serialize records with [JSONAPI](https://jsonapi.org/) and send to a REST API
155
+ - [`rest`](https://github.com/chronicle-app/chronicle-etl/blob/main/lib/chronicle/etl/loaders/rest_loader.rb) - Send JSON to a REST API
136
156
 
137
157
  ### Chronicle Plugins for third-party services
138
158
 
@@ -148,41 +168,43 @@ $ chronicle-etl plugins:list
148
168
  $ chronicle-etl plugins:install NAME
149
169
 
150
170
  # Use a plugin
151
- $ chronicle-etl plugins:install shell
152
- $ chronicle-etl --extractor shell:history --limit 10
171
+ $ chronicle-etl plugins:install imessage
172
+ $ chronicle-etl --extractor imessage --limit 10
153
173
 
154
174
  # Uninstall a plugin
155
175
  $ chronicle-etl plugins:uninstall NAME
156
176
  ```
177
+
157
178
  #### Available plugins and connectors
158
179
 
159
180
  The following are the officially-supported list of plugins and their available connectors:
160
181
 
161
- | Plugin | Type | Identifier | Description | Description |
162
- |---------------------------------------------------------------------|-------------|------------------|-------------------------------------------|-------------------------------------------|
163
- | [email](https://github.com/chronicle-app/chronicle-email) | extractor | imap | emails over an IMAP connection | emails over an IMAP connection |
164
- | [email](https://github.com/chronicle-app/chronicle-email) | extractor | mbox | emails from an .mbox file | emails from an .mbox file |
165
- | [email](https://github.com/chronicle-app/chronicle-email) | transformer | email | email to Chronicle Schema | email to Chronicle Schema |
166
- | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | extractor | checkins | Foursqure visits | Foursqure visits |
167
- | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | transformer | checkin | checkin to Chronicle Schema | checkin to Chronicle Schema |
168
- | [github](https://github.com/chronicle-app/chronicle-github) | extractor | activity | user activity stream | user activity stream |
169
- | [imessage](https://github.com/chronicle-app/chronicle-imessage) | extractor | messages | imessages from local macOS | imessages from local macOS |
170
- | [imessage](https://github.com/chronicle-app/chronicle-imessage) | transformer | message | imessage to Chronicle Schema | imessage to Chronicle Schema |
171
- | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | extractor | bookmarks | Pinboard.in bookmarks | Pinboard.in bookmarks |
172
- | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | transformer | bookmark | bookmark to Chronicle Schema | bookmark to Chronicle Schema |
173
- | [safari](https://github.com/chronicle-app/chronicle-safari) | extractor | browser-history | browser history | browser history |
174
- | [safari ](https://github.com/chronicle-app/chronicle-safari ) | transformer | browser-history | browser history to Chronicle Schema | browser history to Chronicle Schema |
175
- | [shell](https://github.com/chronicle-app/chronicle-shell) | extractor | history | shell command history (bash / zsh) | shell command history (bash / zsh) |
176
- | [shell](https://github.com/chronicle-app/chronicle-shell) | transformer | command | command to Chronicle Schema | command to Chronicle Schema |
177
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | liked-tracks | liked tracks | liked tracks |
178
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | saved-albums | saved albums | saved albums |
179
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | listens | recently listened tracks (last 50 tracks) | recently listened tracks (last 50 tracks) |
180
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | like | like to Chronicle Schema | like to Chronicle Schema |
181
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | listen | listen to Chronicle Schema | listen to Chronicle Schema |
182
- | [spotify](https://github.com/chronicle-app/chronicle-spotify) | authorizer | | OAuth authorizer | OAuth authorizer |
183
- | [zulip](https://github.com/chronicle-app/chronicle-zulip) | extractor | private-messages | private messages | private messages |
184
- | [zulip](https://github.com/chronicle-app/chronicle-zulip) | transformer | message | message to Chronicle Schema | message to Chronicle Schema |
185
-
182
+ | Plugin | Type | Identifier | Description |
183
+ | --------------------------------------------------------------------------- | ----------- | ---------------- | -------------------------------------------- |
184
+ | [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | extractor | listens | listening history of podcast episodes |
185
+ | [apple-podcasts](https://github.com/chronicle-app/chronicle-apple-podcasts) | transformer | listen | a podcast episode listen to Chronicle Schema |
186
+ | [email](https://github.com/chronicle-app/chronicle-email) | extractor | imap | emails over an IMAP connection |
187
+ | [email](https://github.com/chronicle-app/chronicle-email) | extractor | mbox | emails from an .mbox file |
188
+ | [email](https://github.com/chronicle-app/chronicle-email) | transformer | email | email to Chronicle Schema |
189
+ | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | extractor | checkins | Foursqure visits |
190
+ | [foursquare](https://github.com/chronicle-app/chronicle-foursquare) | transformer | checkin | checkin to Chronicle Schema |
191
+ | [github](https://github.com/chronicle-app/chronicle-github) | extractor | activity | user activity stream |
192
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | extractor | messages | imessages from local macOS |
193
+ | [imessage](https://github.com/chronicle-app/chronicle-imessage) | transformer | message | imessage to Chronicle Schema |
194
+ | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | extractor | bookmarks | Pinboard.in bookmarks |
195
+ | [pinboard](https://github.com/chronicle-app/chronicle-pinboard) | transformer | bookmark | bookmark to Chronicle Schema |
196
+ | [safari](https://github.com/chronicle-app/chronicle-safari) | extractor | browser-history | browser history |
197
+ | [safari ](https://github.com/chronicle-app/chronicle-safari) | transformer | browser-history | browser history to Chronicle Schema |
198
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | extractor | history | shell command history (bash / zsh) |
199
+ | [shell](https://github.com/chronicle-app/chronicle-shell) | transformer | command | command to Chronicle Schema |
200
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | liked-tracks | liked tracks |
201
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | saved-albums | saved albums |
202
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | extractor | listens | recently listened tracks (last 50 tracks) |
203
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | like | like to Chronicle Schema |
204
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | transformer | listen | listen to Chronicle Schema |
205
+ | [spotify](https://github.com/chronicle-app/chronicle-spotify) | authorizer | | OAuth authorizer |
206
+ | [zulip](https://github.com/chronicle-app/chronicle-zulip) | extractor | private-messages | private messages |
207
+ | [zulip](https://github.com/chronicle-app/chronicle-zulip) | transformer | message | message to Chronicle Schema |
186
208
 
187
209
  ### Coming soon
188
210
 
@@ -191,7 +213,7 @@ A few dozen importers exist [in my Memex project](https://hyfen.net/memex/) and
191
213
  If you don't see a plugin for a third-party provider or data source that you're interested in using with `chronicle-etl`, [please open an issue](https://github.com/chronicle-app/chronicle-etl/issues/new). If you want to work together on a plugin, please [get in touch](#get-in-touch)!
192
214
 
193
215
  In summary, the following **are coming soon**:
194
- anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, things, twitter, whatsapp, youtube.
216
+ anki, arc, bear, chrome, facebook, firefox, fitbit, foursquare, git, github, goodreads, google-calendar, images, instagram, lastfm, shazam, slack, strava, timing, things, twitter, whatsapp, youtube.
195
217
 
196
218
  ### Writing your own plugin
197
219
 
@@ -199,38 +221,17 @@ Additional connectors are packaged as separate ruby gems. You can view the [iMes
199
221
 
200
222
  If you want to load a custom connector without creating a gem, you can help by [completing this issue](https://github.com/chronicle-app/chronicle-etl/issues/23).
201
223
 
202
- If you want to work together on a connector, please [get in touch](#get-in-touch)!
224
+ If you want to work together on a connector, please [get in touch](#get-in-touch)!
203
225
 
204
226
  #### Sample custom Extractor class
227
+
205
228
  ```ruby
206
- module Chronicle
207
- module FooService
208
- class FooExtractor < Chronicle::ETL::Extractor
209
- register_connector do |r|
210
- r.identifier = 'foo'
211
- r.description = 'from foo.com'
212
- end
213
-
214
- setting :access_token, required: true
215
-
216
- def prepare
217
- @records = # load from somewhere
218
- end
219
-
220
- def extract
221
- @records.each do |record|
222
- yield Chronicle::ETL::Extraction.new(data: row.to_h)
223
- end
224
- end
225
- end
226
- end
227
- end
229
+ # TODO
228
230
  ```
229
231
 
230
-
231
232
  ## Secrets Management
232
233
 
233
- If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
234
+ If your job needs secrets such as access tokens or passwords, `chronicle-etl` has a built-in secret management system.
234
235
 
235
236
  Secrets are organized in namespaces. Typically, you use one namespace per plugin (`pinboard` secrets for the `pinboard` plugin). When you run a job that uses the `pinboard` plugin extractor, for example, the secrets from that namespace will automatically be included in the extractor's options. To override which secrets get included, you can use do it in the connector options with `secrets: ALT-NAMESPACE`.
236
237
 
@@ -267,11 +268,13 @@ $ chronicle-etl secrets:unset pinboard access_token
267
268
  - **Add documentation for Chronicle Schema**. It's found throughout this project but never explained.
268
269
 
269
270
  ## Development
271
+
270
272
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
271
273
 
272
274
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
273
275
 
274
276
  ### Additional development commands
277
+
275
278
  ```bash
276
279
  # run tests
277
280
  bundle exec rake spec
@@ -284,15 +287,19 @@ bundle exec guard
284
287
  ```
285
288
 
286
289
  ## Get in touch
290
+
287
291
  - [@hyfen](https://twitter.com/hyfen) on Twitter
288
292
  - [@hyfen](https://github.com/hyfen) on Github
289
293
  - Email: andrew@hyfen.net
290
294
 
291
295
  ## Contributing
296
+
292
297
  Bug reports and pull requests are welcome on GitHub at https://github.com/chronicle-app/chronicle-etl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
293
298
 
294
299
  ## License
300
+
295
301
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
296
302
 
297
303
  ## Code of Conduct
304
+
298
305
  Everyone interacting in the Chronicle::ETL project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/chronicle-app/chronicle-etl/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,5 +1,5 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
  RSpec::Core::RakeTask.new(:spec)
4
4
 
5
5
  require 'yard'
data/bin/console CHANGED
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "chronicle/etl"
3
+ require 'bundler/setup'
4
+ require 'chronicle/etl'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
8
8
 
9
9
  # (If you use this, don't forget to add pry to your Gemfile!)
10
- require "pry"
10
+ require 'pry'
11
11
  Pry.start
12
12
 
13
13
  def reload!(print = true)
@@ -15,7 +15,7 @@ def reload!(print = true)
15
15
  # Main project directory.
16
16
  root_dir = File.expand_path('..', __dir__)
17
17
  # Directories within the project that should be reloaded.
18
- reload_dirs = %w{lib}
18
+ reload_dirs = %w[lib]
19
19
  # Loop through and reload every file in all relevant project directories.
20
20
  reload_dirs.each do |dir|
21
21
  Dir.glob("#{root_dir}/#{dir}/**/*.rb").each { |f| load(f) }
@@ -23,4 +23,3 @@ def reload!(print = true)
23
23
  # Return true when complete.
24
24
  true
25
25
  end
26
-
@@ -1,71 +1,73 @@
1
+ # frozen_string_literal: true
1
2
 
2
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require "chronicle/etl/version"
5
+ require 'chronicle/etl/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
- spec.name = "chronicle-etl"
8
+ spec.name = 'chronicle-etl'
8
9
  spec.version = Chronicle::ETL::VERSION
9
- spec.authors = ["Andrew Louis"]
10
- spec.email = ["andrew@hyfen.net"]
10
+ spec.authors = ['Andrew Louis']
11
+ spec.email = ['andrew@hyfen.net']
11
12
 
12
- spec.summary = "ETL tool for personal data"
13
- spec.description = "Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it."
14
- spec.homepage = "https://github.com/chronicle-app"
15
- spec.license = "MIT"
13
+ spec.summary = 'ETL tool for personal data'
14
+ spec.description = 'Chronicle-ETL allows you to extract personal data from a variety of services, transformer it, and load it.'
15
+ spec.homepage = 'https://github.com/chronicle-app'
16
+ spec.license = 'MIT'
16
17
 
17
18
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
19
  # to allow pushing to a single host or delete this section to allow pushing to any host.
19
20
  if spec.respond_to?(:metadata)
20
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
21
22
 
22
- spec.metadata["homepage_uri"] = spec.homepage
23
- spec.metadata["source_code_uri"] = "https://github.com/chronicle-app/chronicle-etl"
24
- spec.metadata["changelog_uri"] = "https://github.com/chronicle-app/chronicle-etl/releases"
23
+ spec.metadata['homepage_uri'] = spec.homepage
24
+ spec.metadata['source_code_uri'] = 'https://github.com/chronicle-app/chronicle-etl'
25
+ spec.metadata['changelog_uri'] = 'https://github.com/chronicle-app/chronicle-etl/releases'
25
26
  else
26
- raise "RubyGems 2.0 or newer is required to protect against " \
27
- "public gem pushes."
27
+ raise 'RubyGems 2.0 or newer is required to protect against ' \
28
+ 'public gem pushes.'
28
29
  end
29
30
 
30
31
  # Specify which files should be added to the gem when it is released.
31
32
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
32
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
33
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
33
34
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
34
35
  end
35
- spec.bindir = "exe"
36
+ spec.bindir = 'exe'
36
37
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
- spec.require_paths = ["lib"]
38
- spec.required_ruby_version = ">= 2.7"
38
+ spec.require_paths = ['lib']
39
+ spec.required_ruby_version = '>= 3.1'
40
+ spec.metadata['rubygems_mfa_required'] = 'true'
39
41
 
40
- spec.add_dependency "activesupport", "~> 7.0"
41
- spec.add_dependency "chronic_duration", "~> 0.10.6"
42
- spec.add_dependency "colorize", "~> 0.8.1"
43
- spec.add_dependency "gems", ">= 1"
44
- spec.add_dependency "launchy"
45
- spec.add_dependency "marcel", "~> 1.0.2"
46
- spec.add_dependency "mini_exiftool", "~> 2.10"
47
- spec.add_dependency "nokogiri", "~> 1.13"
48
- spec.add_dependency "omniauth", "~> 2"
49
- spec.add_dependency "sequel", "~> 5.35"
50
- spec.add_dependency "sinatra", "~> 2"
51
- spec.add_dependency "sqlite3", "~> 1.4"
52
- spec.add_dependency "thor", "~> 1.2"
53
- spec.add_dependency "thor-hollaback", "~> 0.2"
54
- spec.add_dependency "tty-progressbar", "~> 0.17"
55
- spec.add_dependency "tty-prompt", "~> 0.23"
56
- spec.add_dependency "tty-spinner"
57
- spec.add_dependency "tty-table", "~> 0.11"
58
- spec.add_dependency "xdg", ">= 4.0"
42
+ spec.add_dependency 'activesupport', '~> 7.0'
43
+ spec.add_dependency 'chronic_duration', '~> 0.10.6'
44
+ spec.add_dependency 'chronicle-core', '~> 0.3'
45
+ spec.add_dependency 'colorize', '~> 0.8.1'
46
+ spec.add_dependency 'gems', '>= 1'
47
+ spec.add_dependency 'launchy'
48
+ spec.add_dependency 'marcel', '~> 1.0.2'
49
+ spec.add_dependency 'omniauth', '~> 2'
50
+ spec.add_dependency 'sequel', '~> 5.35'
51
+ spec.add_dependency 'sinatra', '~> 2'
52
+ spec.add_dependency 'sqlite3', '~> 1.4'
53
+ spec.add_dependency 'thor', '~> 1.2'
54
+ spec.add_dependency 'thor-hollaback', '~> 0.2'
55
+ spec.add_dependency 'tty-progressbar', '~> 0.17'
56
+ spec.add_dependency 'tty-prompt', '~> 0.23'
57
+ spec.add_dependency 'tty-spinner'
58
+ spec.add_dependency 'tty-table', '~> 0.12'
59
+ spec.add_dependency 'xdg', '>= 4.0'
59
60
 
60
- spec.add_development_dependency "bundler", "~> 2.1"
61
- spec.add_development_dependency "fakefs", "~> 1.4"
62
- spec.add_development_dependency "guard-rspec", "~> 4.7.3"
63
- spec.add_development_dependency "pry-byebug", "~> 3.9"
64
- spec.add_development_dependency "rake", "~> 13.0"
65
- spec.add_development_dependency "rspec", "~> 3.9"
66
- spec.add_development_dependency "rubocop", "~> 1.25.1"
67
- spec.add_development_dependency "simplecov", "~> 0.21"
68
- spec.add_development_dependency "vcr", "~> 6.1"
69
- spec.add_development_dependency "webmock", "~> 3"
70
- spec.add_development_dependency "yard", "~> 0.9.7"
61
+ spec.add_development_dependency 'bundler', '~> 2.1'
62
+ spec.add_development_dependency 'fakefs', '~> 1.4'
63
+ spec.add_development_dependency 'guard-rspec', '~> 4.7.3'
64
+ spec.add_development_dependency 'pry-byebug', '~> 3.9'
65
+ spec.add_development_dependency 'rake', '~> 13.0'
66
+ spec.add_development_dependency 'rspec', '~> 3.9'
67
+ spec.add_development_dependency 'rubocop', '~> 1.57'
68
+ spec.add_development_dependency 'simplecov', '~> 0.21'
69
+ spec.add_development_dependency 'vcr', '~> 6.1'
70
+ spec.add_development_dependency 'webmock', '~> 3'
71
+ spec.add_development_dependency 'webrick', '~> 1.7'
72
+ spec.add_development_dependency 'yard', '~> 0.9.7'
71
73
  end
data/exe/chronicle-etl CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "chronicle/etl/cli"
3
+ require 'chronicle/etl/cli'
4
4
 
5
5
  Chronicle::ETL::CLI::Main.start(ARGV)