chronicle-etl 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/README.md +36 -3
- data/lib/chronicle/etl/catalog.rb +1 -1
- data/lib/chronicle/etl/cli.rb +10 -2
- data/lib/chronicle/etl/loaders/loader.rb +2 -4
- data/lib/chronicle/etl/loaders/table_loader.rb +4 -8
- data/lib/chronicle/etl/runner.rb +8 -13
- data/lib/chronicle/etl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ab7fe84d09764034f061236d75fdf7403af9e1835d568831ae4896b90fc39b5
|
4
|
+
data.tar.gz: c4c7a1ff47ecaf7d364e35d3439eef1345f90fee5a6610735124592109e2c02c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5a4b0e3769cd6063cb78843828ab15766df9d1ac1cd5d65fb81145d1415e95e46583c578312418fca91bfe02b05a8db3277a41a8a6b420f12263135deb49022
|
7
|
+
data.tar.gz: e6d6e23d3c164d6fc5b9283d468fe9c9a450329d2d778e3ad49962f42fbe0316299111cdd02d2abc92eb52dc46823c5cbaf1c195203fc4e78129e9b28528c5ef
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
4
4
|
|
5
|
+
## [0.1.4] - 2020-08-18
|
6
|
+
### Updated
|
7
|
+
- Better display of available ETL classes
|
8
|
+
- Updated documentation
|
9
|
+
|
5
10
|
## [0.1.3] - 2020-08-13
|
6
11
|
### Added
|
7
12
|
- Ability to list all available ETL classes
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/chronicle-etl.svg)](https://badge.fury.io/rb/chronicle-etl)
|
4
4
|
|
5
|
-
Chronicle ETL is a utility tool for
|
5
|
+
Chronicle ETL is a utility tool for archiving and processing personal data. You can extract it from a variety of source, transform it, and load it to different APIs or file formats.
|
6
|
+
|
7
|
+
This project is an adaptation of Andrew Louis's experimental [Memex project](https://hyfen.net/memex).
|
6
8
|
|
7
9
|
## Installation
|
8
10
|
|
@@ -10,7 +12,7 @@ Chronicle ETL is a utility tool for manipulating personal data. You can extract
|
|
10
12
|
$ gem install chronicle-etl
|
11
13
|
```
|
12
14
|
|
13
|
-
##
|
15
|
+
## Usage
|
14
16
|
|
15
17
|
After installing the gem, `chronicle-etl` is available to run in your shell.
|
16
18
|
|
@@ -19,12 +21,43 @@ chronicle-etl --extractor csv --extractor-opts filename:test.csv --loader table
|
|
19
21
|
cat test.csv | chronicle-etl --extractor csv --loader table
|
20
22
|
```
|
21
23
|
|
22
|
-
##
|
24
|
+
## Available importers
|
25
|
+
|
26
|
+
### Extractors
|
27
|
+
- `stdin` - (default) Load records from line-separated stdin
|
28
|
+
- `csv`
|
29
|
+
- `file` - load from a single file or directory (with a glob pattern)
|
30
|
+
|
31
|
+
### Transformers
|
32
|
+
- `null` - (default) Don't do anything
|
33
|
+
|
34
|
+
### Loaders
|
35
|
+
- `stdout` - (default) output transformed records to stdount
|
36
|
+
- `csv` - Load records to a csv file
|
37
|
+
- `table` - Output an ascii table of records. Useful for debugging.
|
38
|
+
|
39
|
+
### Provider-specific importers
|
40
|
+
|
41
|
+
In addition to the built-in importers, importers for third-party platforms are available. They are packaged as individual Ruby gems.
|
42
|
+
|
43
|
+
- [email](https://github.com/chronicle-app/chronicle-email). Extractors for `mbox` files. Transformers for chronicle schema
|
44
|
+
- [bash](https://github.com/chronicle-app/chronicle-bash). Extract bash history from `~/.bash_history`. Transform it for chronicle schema
|
45
|
+
|
46
|
+
To install any of these, run `gem install chronicle-PROVIDER`.
|
47
|
+
|
48
|
+
If you don't want to use the available rubygem importers, `chronicle-etl` can use `stdin` as an Extractor source (newline separated records). You can also use `stdout` as a loader — transformed records will be outputted separated by newlines.
|
49
|
+
|
50
|
+
I'll be open-sourcing more importers. Please [contact me](mailto:andrew@hyfen.net) to chat about what will be available!
|
51
|
+
|
52
|
+
### Full commands
|
23
53
|
|
24
54
|
```
|
55
|
+
$ chronicle-etl help
|
56
|
+
|
25
57
|
Commands:
|
26
58
|
chronicle-etl help [COMMAND] # Describe available commands or one specific command
|
27
59
|
chronicle-etl job # Runs an ETL job
|
60
|
+
chronicle-etl list # List all ETL classes
|
28
61
|
```
|
29
62
|
|
30
63
|
### Job options
|
@@ -47,7 +47,7 @@ module Chronicle
|
|
47
47
|
def provider
|
48
48
|
# TODO: needs better convention for a gem reporting its provider name
|
49
49
|
provider = to_s.split('::')[1].downcase
|
50
|
-
|
50
|
+
provider == 'etl' ? 'chronicle' : provider
|
51
51
|
end
|
52
52
|
|
53
53
|
def built_in?
|
data/lib/chronicle/etl/cli.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'thor'
|
2
2
|
require 'chronicle/etl'
|
3
|
+
require 'colorize'
|
3
4
|
|
4
5
|
module Chronicle
|
5
6
|
module Etl
|
@@ -39,9 +40,16 @@ module Chronicle
|
|
39
40
|
desc 'list', 'List all ETL classes'
|
40
41
|
def list
|
41
42
|
klasses = Chronicle::Etl::Catalog.available_classes
|
43
|
+
klasses = klasses.sort_by do |a|
|
44
|
+
[a[:built_in].to_s, a[:provider], a[:phase]]
|
45
|
+
end
|
42
46
|
|
43
|
-
|
44
|
-
|
47
|
+
headers = klasses.first.keys.map do |key|
|
48
|
+
key.to_s.capitalize.light_white
|
49
|
+
end
|
50
|
+
|
51
|
+
table = TTY::Table.new(headers, klasses.map(&:values))
|
52
|
+
puts table.render(padding: [0, 2])
|
45
53
|
end
|
46
54
|
end
|
47
55
|
end
|
@@ -2,15 +2,13 @@ module Chronicle
|
|
2
2
|
module Etl
|
3
3
|
class Loader
|
4
4
|
extend Chronicle::Etl::Catalog
|
5
|
-
|
5
|
+
|
6
6
|
def initialize(options = {})
|
7
7
|
@options = options
|
8
8
|
end
|
9
9
|
|
10
10
|
def start; end
|
11
11
|
|
12
|
-
def first_load result; end
|
13
|
-
|
14
12
|
def load
|
15
13
|
raise NotImplementedError
|
16
14
|
end
|
@@ -22,4 +20,4 @@ end
|
|
22
20
|
|
23
21
|
require_relative 'csv_loader'
|
24
22
|
require_relative 'stdout_loader'
|
25
|
-
require_relative 'table_loader'
|
23
|
+
require_relative 'table_loader'
|
@@ -7,18 +7,14 @@ module Chronicle
|
|
7
7
|
super(options)
|
8
8
|
end
|
9
9
|
|
10
|
-
# defer creating table until we get first result and can determine headers
|
11
|
-
def first_load(result)
|
12
|
-
headers = result.keys
|
13
|
-
@table = TTY::Table.new(header: headers)
|
14
|
-
end
|
15
|
-
|
16
10
|
def load(result)
|
17
|
-
@table
|
11
|
+
@table ||= TTY::Table.new(header: result.keys)
|
12
|
+
values = result.values.map{|x| x.to_s[0..30]}
|
13
|
+
@table << values
|
18
14
|
end
|
19
15
|
|
20
16
|
def finish
|
21
|
-
puts @table.render(:ascii)
|
17
|
+
puts @table.render(:ascii, padding: [0, 1])
|
22
18
|
end
|
23
19
|
end
|
24
20
|
end
|
data/lib/chronicle/etl/runner.rb
CHANGED
@@ -12,23 +12,18 @@ class Chronicle::Etl::Runner
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def run!
|
15
|
-
|
15
|
+
total = @extractor.results_count
|
16
|
+
progress_bar = Chronicle::Etl::Utils::ProgressBar.new(title: 'Running job', total: total)
|
16
17
|
count = 0
|
17
18
|
|
18
19
|
@loader.start
|
19
20
|
|
20
21
|
@extractor.extract do |data, metadata|
|
21
22
|
transformed_data = @transformer.transform(data)
|
22
|
-
|
23
|
-
@loader.first_load(transformed_data) if count == 0
|
24
23
|
@loader.load(transformed_data)
|
25
24
|
|
26
25
|
progress_bar.increment
|
27
26
|
count += 1
|
28
|
-
# rescue StandardError => e
|
29
|
-
# require 'pry'
|
30
|
-
# binding.pry
|
31
|
-
# progress_bar.log "Error processing; #{e.inspect}"
|
32
27
|
end
|
33
28
|
|
34
29
|
progress_bar.finish
|
@@ -43,13 +38,13 @@ class Chronicle::Etl::Runner
|
|
43
38
|
@loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
|
44
39
|
end
|
45
40
|
|
46
|
-
def load_etl_class(phase,
|
47
|
-
if BUILTIN[phase].include?
|
48
|
-
klass_name = "Chronicle::Etl::#{
|
41
|
+
def load_etl_class(phase, x)
|
42
|
+
if BUILTIN[phase].include? x
|
43
|
+
klass_name = "Chronicle::Etl::#{x.capitalize}#{phase.to_s.capitalize}"
|
49
44
|
else
|
50
45
|
# TODO: come up with syntax for specifying a particular extractor in a provider library
|
51
|
-
|
52
|
-
provider =
|
46
|
+
provider, name = x.split(":")
|
47
|
+
provider = x unless provider
|
53
48
|
begin
|
54
49
|
require "chronicle/#{provider}"
|
55
50
|
rescue LoadError => e
|
@@ -57,7 +52,7 @@ class Chronicle::Etl::Runner
|
|
57
52
|
warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
|
58
53
|
exit(false)
|
59
54
|
end
|
60
|
-
klass_name = "Chronicle::#{name.capitalize}
|
55
|
+
klass_name = "Chronicle::#{provider.capitalize}::#{name&.capitalize}#{phase.capitalize}"
|
61
56
|
end
|
62
57
|
Object.const_get(klass_name)
|
63
58
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chronicle-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Louis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|