chronicle-etl 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/README.md +36 -3
- data/lib/chronicle/etl/catalog.rb +1 -1
- data/lib/chronicle/etl/cli.rb +10 -2
- data/lib/chronicle/etl/loaders/loader.rb +2 -4
- data/lib/chronicle/etl/loaders/table_loader.rb +4 -8
- data/lib/chronicle/etl/runner.rb +8 -13
- data/lib/chronicle/etl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ab7fe84d09764034f061236d75fdf7403af9e1835d568831ae4896b90fc39b5
|
4
|
+
data.tar.gz: c4c7a1ff47ecaf7d364e35d3439eef1345f90fee5a6610735124592109e2c02c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5a4b0e3769cd6063cb78843828ab15766df9d1ac1cd5d65fb81145d1415e95e46583c578312418fca91bfe02b05a8db3277a41a8a6b420f12263135deb49022
|
7
|
+
data.tar.gz: e6d6e23d3c164d6fc5b9283d468fe9c9a450329d2d778e3ad49962f42fbe0316299111cdd02d2abc92eb52dc46823c5cbaf1c195203fc4e78129e9b28528c5ef
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
4
|
|
5
|
+
## [0.1.4] - 2020-08-18
|
6
|
+
### Updated
|
7
|
+
- Better display of available ETL classes
|
8
|
+
- Updated documentation
|
9
|
+
|
5
10
|
## [0.1.3] - 2020-08-13
|
6
11
|
### Added
|
7
12
|
- Ability to list all available ETL classes
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/chronicle-etl)
|
4
4
|
|
5
|
-
Chronicle ETL is a utility tool for
|
5
|
+
Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
|
6
|
+
|
7
|
+
This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
|
6
8
|
|
7
9
|
## Installation
|
8
10
|
|
@@ -10,7 +12,7 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
|
|
10
12
|
$ gem install chronicle-etl
|
11
13
|
```
|
12
14
|
|
13
|
-
##
|
15
|
+
## Usage
|
14
16
|
|
15
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
16
18
|
|
@@ -19,12 +21,43 @@ chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
|
|
19
21
|
cat test.csv | chronicle-etl --extractor csv --loader table
|
20
22
|
```
|
21
23
|
|
22
|
-
##
|
24
|
+
## Available importers
|
25
|
+
|
26
|
+
### Extractors
|
27
|
+
- `stdin` - (default) Load records from line-separated stdin
|
28
|
+
- `csv`
|
29
|
+
- `file` - load from a single file or directory (with a glob pattern)
|
30
|
+
|
31
|
+
### Transformers
|
32
|
+
- `null` - (default) Don't do anything
|
33
|
+
|
34
|
+
### Loaders
|
35
|
+
- `stdout` - (default) output transformed records to stdount
|
36
|
+
- `csv` - Load records to a csv file
|
37
|
+
- `table` - Output an ascii table of records. Useful for debugging.
|
38
|
+
|
39
|
+
### Provider-specific importers
|
40
|
+
|
41
|
+
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
42
|
+
|
43
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
|
44
|
+
- [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
|
45
|
+
|
46
|
+
To install any of these, run `gem install chronicle-PROVIDER`.
|
47
|
+
|
48
|
+
If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
|
49
|
+
|
50
|
+
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
51
|
+
|
52
|
+
### Full commands
|
23
53
|
|
24
54
|
```
|
55
|
+
$ chronicle-etl help
|
56
|
+
|
25
57
|
Commands:
|
26
58
|
chronicle-etl help [COMMAND] # Describe available commands or one specific command
|
27
59
|
chronicle-etl job # Runs an ETL job
|
60
|
+
chronicle-etl list # List all ETL classes
|
28
61
|
```
|
29
62
|
|
30
63
|
### Job options
|
@@ -47,7 +47,7 @@ module Chronicle
|
|
47
47
|
def provider
|
48
48
|
# TODO: needs better convention for a gem reporting its provider name
|
49
49
|
provider = to_s.split('::')[1].downcase
|
50
|
-
|
50
|
+
provider == 'etl' ? 'chronicle' : provider
|
51
51
|
end
|
52
52
|
|
53
53
|
def built_in?
|
data/lib/chronicle/etl/cli.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'thor'
|
2
2
|
require 'chronicle/etl'
|
3
|
+
require 'colorize'
|
3
4
|
|
4
5
|
module Chronicle
|
5
6
|
module Etl
|
@@ -39,9 +40,16 @@ module Chronicle
|
|
39
40
|
desc 'list', 'List all ETL classes'
|
40
41
|
def list
|
41
42
|
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
+
klasses = klasses.sort_by do |a|
|
44
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
+
end
|
42
46
|
|
43
|
-
|
44
|
-
|
47
|
+
headers = klasses.first.keys.map do |key|
|
48
|
+
key.to_s.capitalize.light_white
|
49
|
+
end
|
50
|
+
|
51
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
+
puts table.render(padding: [0, 2])
|
45
53
|
end
|
46
54
|
end
|
47
55
|
end
|
@@ -2,15 +2,13 @@ module Chronicle
|
|
2
2
|
module Etl
|
3
3
|
class Loader
|
4
4
|
extend Chronicle::Etl::Catalog
|
5
|
-
|
5
|
+
|
6
6
|
def initialize(options = {})
|
7
7
|
@options = options
|
8
8
|
end
|
9
9
|
|
10
10
|
def start; end
|
11
11
|
|
12
|
-
def first_load result; end
|
13
|
-
|
14
12
|
def load
|
15
13
|
raise NotImplementedError
|
16
14
|
end
|
@@ -22,4 +20,4 @@ end
|
|
22
20
|
|
23
21
|
require_relative 'csv_loader'
|
24
22
|
require_relative 'stdout_loader'
|
25
|
-
require_relative 'table_loader'
|
23
|
+
require_relative 'table_loader'
|
@@ -7,18 +7,14 @@ module Chronicle
|
|
7
7
|
super(options)
|
8
8
|
end
|
9
9
|
|
10
|
-
# defer creating table until we get first result and can determine headers
|
11
|
-
def first_load(result)
|
12
|
-
headers = result.keys
|
13
|
-
@table = TTY::Table.new(header: headers)
|
14
|
-
end
|
15
|
-
|
16
10
|
def load(result)
|
17
|
-
@table
|
11
|
+
@table ||= TTY::Table.new(header: result.keys)
|
12
|
+
values = result.values.map{|x| x.to_s[0..30]}
|
13
|
+
@table << values
|
18
14
|
end
|
19
15
|
|
20
16
|
def finish
|
21
|
-
puts @table.render(:ascii)
|
17
|
+
puts @table.render(:ascii, padding: [0, 1])
|
22
18
|
end
|
23
19
|
end
|
24
20
|
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -12,23 +12,18 @@ class Chronicle::Etl::Runner
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def run!
|
15
|
-
|
15
|
+
total = @extractor.results_count
|
16
|
+
progress_bar = Chronicle::Etl::Utils::ProgressBar.new(title: 'Running job', total: total)
|
16
17
|
count = 0
|
17
18
|
|
18
19
|
@loader.start
|
19
20
|
|
20
21
|
@extractor.extract do |data, metadata|
|
21
22
|
transformed_data = @transformer.transform(data)
|
22
|
-
|
23
|
-
@loader.first_load(transformed_data) if count == 0
|
24
23
|
@loader.load(transformed_data)
|
25
24
|
|
26
25
|
progress_bar.increment
|
27
26
|
count += 1
|
28
|
-
# rescue StandardError => e
|
29
|
-
# require 'pry'
|
30
|
-
# binding.pry
|
31
|
-
# progress_bar.log "Error processing; #{e.inspect}"
|
32
27
|
end
|
33
28
|
|
34
29
|
progress_bar.finish
|
@@ -43,13 +38,13 @@ class Chronicle::Etl::Runner
|
|
43
38
|
@loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
|
44
39
|
end
|
45
40
|
|
46
|
-
def load_etl_class(phase,
|
47
|
-
if BUILTIN[phase].include?
|
48
|
-
klass_name = "Chronicle::Etl::#{
|
41
|
+
def load_etl_class(phase, x)
|
42
|
+
if BUILTIN[phase].include? x
|
43
|
+
klass_name = "Chronicle::Etl::#{x.capitalize}#{phase.to_s.capitalize}"
|
49
44
|
else
|
50
45
|
# TODO: come up with syntax for specifying a particular extractor in a provider library
|
51
|
-
|
52
|
-
provider =
|
46
|
+
provider, name = x.split(":")
|
47
|
+
provider = x unless provider
|
53
48
|
begin
|
54
49
|
require "chronicle/#{provider}"
|
55
50
|
rescue LoadError => e
|
@@ -57,7 +52,7 @@ class Chronicle::Etl::Runner
|
|
57
52
|
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
|
58
53
|
exit(false)
|
59
54
|
end
|
60
|
-
klass_name = "Chronicle::#{name.capitalize}
|
55
|
+
klass_name = "Chronicle::#{provider.capitalize}::#{name&.capitalize}#{phase.capitalize}"
|
61
56
|
end
|
62
57
|
Object.const_get(klass_name)
|
63
58
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|