dbx 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +62 -9
- data/dbx_sample.yml +3 -0
- data/exe/dbx +11 -6
- data/lib/dbx.rb +10 -2
- data/lib/dbx/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b769b3b6e3be916660bd21df6ccbdaaedf31558c
|
4
|
+
data.tar.gz: e67cdb52aca07a0035884c3866697d32327dd956
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2b8a54503f2b6487553d7e7551268de810eb15c7b80632ec1d3b7c3b30d2b9b037384b4c506e6be5263dfe6c6ca871503470d64ee48c3dc42a00546b4fc19d1
|
7
|
+
data.tar.gz: 25024c1049ba266994d6113d21f4fb0fcddb6a40e108bf687a5ba37d72702d58fdb493db376c33beedc345bca2f7edc5d11382d07ffde9d6f8224845ecd9e575
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,22 +1,72 @@
|
|
1
1
|
# DBX
|
2
2
|
|
3
|
-
Database eXtras for working with CSV files in a database.
|
3
|
+
Database eXtras for working with CSV files in a Postgres database.
|
4
4
|
|
5
|
-
|
5
|
+
We currently only support Postgres database, but others will be supported soon.
|
6
|
+
|
7
|
+
## Usage Examples
|
8
|
+
|
9
|
+
### Import CSV file into database
|
10
|
+
|
11
|
+
Column type detection is performed based on column contents. By default the new table name will be the file name minus its extension, indexes are added to column ending with `_id$`.
|
12
|
+
|
13
|
+
```sh
|
14
|
+
dbx import path/to/data.csv --name data_v1 --db postgres://localhost/scratch --column-patterns _ref$:string
|
15
|
+
# --db : Used to define where to put the table.
|
16
|
+
# --column-patterns : [] Override the detected column type.
|
17
|
+
# --name : [data] Optional override to default table name of file's base name without extension.
|
18
|
+
# --sample : [100] Number of rows to sample during column detection.
|
19
|
+
# --force : [false] Drops the destination table if it exists.
|
20
|
+
# --auto-index-pattern : [^(\w+_id|id)$] Creates indexes for columns matching the pattern
|
21
|
+
```
|
22
|
+
|
23
|
+
### Create diff table of two tables in a database
|
24
|
+
|
25
|
+
+ The new table will be named `diff_data_v1_data_v2`.
|
26
|
+
+ It will contains column_a, column_b, column_diff. Where column is every `column_` from `data_v1` and `column_diff` is a simple difference representation of columns `_a` and `_b`.
|
27
|
+
+ Columns `_a` and `_b` can be omitted with `--no-a-b`
|
28
|
+
|
29
|
+
```sh
|
30
|
+
dbx diff data_v1 data_v2 --db postgres://localhost/scratch --using id
|
31
|
+
# --db : Used to define where to put the table.
|
32
|
+
# --using : Space delimited list of join columns.
|
33
|
+
# --no-a-b: [false] Omit the `_a` and `_b` columns showing the source data.
|
34
|
+
# --force : [false] Drops the destination diff table if it exists.
|
35
|
+
```
|
36
|
+
|
37
|
+
### Import and diff two CSV files
|
38
|
+
|
39
|
+
Do the import and diff all at once!!!
|
40
|
+
|
41
|
+
```sh
|
42
|
+
dbx import_diff /path/to/data_v1.csv /path/to/data_v2.csv --db postgres://localhost/scratch --using id
|
43
|
+
# --db : Used to define where to put the table.
|
44
|
+
# --column-patterns : [] Override the detected column type.
|
45
|
+
# --sample : [100] Number of rows to sample during column detection.
|
46
|
+
# --auto-index-pattern : [^(\w+_id|id)$] Creates indexes for columns matching the pattern
|
47
|
+
# --using : Space delimited list of join columns.
|
48
|
+
# --no-a-b: [false] Omit the `_a` and `_b` columns showing the source data.
|
49
|
+
# --force : [false] Drops the destination diff table if it exists.
|
50
|
+
```
|
51
|
+
|
52
|
+
### List of Commands `dbx help`
|
6
53
|
|
7
54
|
```sh
|
8
55
|
Commands:
|
9
|
-
dbx create SRC #
|
10
|
-
dbx diff TABLE_A TABLE_B #
|
56
|
+
dbx create SRC # Create a table with types from SRC CSV file
|
57
|
+
dbx diff TABLE_A TABLE_B # Create diff table between TABLE_A and TABLE_B.
|
11
58
|
dbx help [COMMAND] # Describe available commands or one specific command
|
12
|
-
dbx import SRC #
|
13
|
-
dbx import_diff SRC_A SRC_B #
|
14
|
-
dbx types SRC #
|
59
|
+
dbx import SRC # Import SRC CSV into table
|
60
|
+
dbx import_diff SRC_A SRC_B # Import then diff between SRC_A CSV and SRC_B CSV files.
|
61
|
+
dbx types SRC # Detect column types give a SRC CSV file
|
15
62
|
|
16
63
|
Options:
|
17
64
|
[--db=Database URL: adapter://user:pass@host:port/db_name]
|
18
|
-
[--column-patterns=List of column patterns to override type info
|
19
|
-
[--sample=Number of rows to sample for type detection]
|
65
|
+
[--column-patterns=List of column patterns to override type info]
|
66
|
+
[--sample=Number of rows to sample for type detection]
|
67
|
+
# Default: 100
|
68
|
+
[--auto-index-pattern=Add index when column matches pattern]
|
69
|
+
# Default: ^(\w+_id|id)$
|
20
70
|
```
|
21
71
|
|
22
72
|
## Configuration
|
@@ -37,6 +87,9 @@ column_patterns:
|
|
37
87
|
|
38
88
|
# Number of rows to sample for type detection
|
39
89
|
sample: 100
|
90
|
+
|
91
|
+
# Add index if column matches this pattern.
|
92
|
+
auto_index_pattern: _id$
|
40
93
|
```
|
41
94
|
|
42
95
|
## Installation
|
data/dbx_sample.yml
CHANGED
data/exe/dbx
CHANGED
@@ -9,11 +9,12 @@ require 'pp'
|
|
9
9
|
# #rubocop:disable all
|
10
10
|
class CLI < Thor
|
11
11
|
class_option :db, type: :string, banner: 'Database URL: adapter://user:pass@host:port/db_name'
|
12
|
-
class_option :column_patterns, type: :array, banner: 'List of column patterns to override type info
|
12
|
+
class_option :column_patterns, type: :array, banner: 'List of column patterns to override type info'
|
13
13
|
class_option :sample, type: :numeric, banner: 'Number of rows to sample for type detection', default: 100
|
14
|
+
class_option :auto_index_pattern, type: :string, default: '^(\w+_id|id)$', banner: 'Add index when column matches pattern'
|
14
15
|
|
15
16
|
# contents of the Thor class
|
16
|
-
desc 'types SRC', '
|
17
|
+
desc 'types SRC', 'Detect column types give a SRC CSV file'
|
17
18
|
def types(src)
|
18
19
|
handle_global_options
|
19
20
|
DBX.column_types(src, sample_rows: options[:sample]).each do |col, type|
|
@@ -21,7 +22,7 @@ class CLI < Thor
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
24
|
-
desc 'create SRC', '
|
25
|
+
desc 'create SRC', 'Create a table with types from SRC CSV file'
|
25
26
|
option :name
|
26
27
|
option :force, type: :boolean
|
27
28
|
def create(src)
|
@@ -33,7 +34,7 @@ class CLI < Thor
|
|
33
34
|
)
|
34
35
|
end
|
35
36
|
|
36
|
-
desc 'import SRC', '
|
37
|
+
desc 'import SRC', 'Import SRC CSV into table'
|
37
38
|
option :name
|
38
39
|
option :force, type: :boolean
|
39
40
|
def import(src)
|
@@ -45,7 +46,7 @@ class CLI < Thor
|
|
45
46
|
)
|
46
47
|
end
|
47
48
|
|
48
|
-
desc 'diff TABLE_A TABLE_B', '
|
49
|
+
desc 'diff TABLE_A TABLE_B', 'Create diff table between TABLE_A and TABLE_B.'
|
49
50
|
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
50
51
|
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
51
52
|
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
@@ -62,7 +63,7 @@ class CLI < Thor
|
|
62
63
|
)
|
63
64
|
end
|
64
65
|
|
65
|
-
desc 'import_diff SRC_A SRC_B', '
|
66
|
+
desc 'import_diff SRC_A SRC_B', 'Import then diff between SRC_A CSV and SRC_B CSV files.'
|
66
67
|
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
67
68
|
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
68
69
|
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
@@ -97,6 +98,10 @@ class CLI < Thor
|
|
97
98
|
if options[:db]
|
98
99
|
DBX.config['db'] = options[:db]
|
99
100
|
end
|
101
|
+
|
102
|
+
if options[:auto_index_pattern]
|
103
|
+
DBX.config['auto_index_pattern'] = options[:auto_index_pattern]
|
104
|
+
end
|
100
105
|
end
|
101
106
|
end
|
102
107
|
CLI.start(ARGV)
|
data/lib/dbx.rb
CHANGED
@@ -34,6 +34,10 @@ module DBX
|
|
34
34
|
config['sample_rows'] || 100
|
35
35
|
end
|
36
36
|
|
37
|
+
def config_auto_index_pattern
|
38
|
+
config['auto_index_pattern']
|
39
|
+
end
|
40
|
+
|
37
41
|
def config_db
|
38
42
|
ENV['DATABASE_URL'] || config['db'] || raise('`db` not set as command line option or `dbx.yml`')
|
39
43
|
end
|
@@ -94,14 +98,18 @@ module DBX
|
|
94
98
|
pg.put_copy_data(line)
|
95
99
|
end
|
96
100
|
end
|
97
|
-
|
101
|
+
|
102
|
+
unless config_auto_index_pattern.blank?
|
103
|
+
index_table(name, pattern: /#{config_auto_index_pattern}/)
|
104
|
+
end
|
98
105
|
end
|
99
106
|
name
|
100
107
|
end
|
101
108
|
|
102
|
-
def index_table(table_name)
|
109
|
+
def index_table(table_name, pattern: nil)
|
103
110
|
connection do |conn|
|
104
111
|
conn.columns(table_name).each_with_index do |column, i|
|
112
|
+
next unless column.name =~ pattern
|
105
113
|
conn.add_index(table_name, [column.name], name: "idx_#{table_name}_#{i.to_s.rjust(2,'0')}")
|
106
114
|
end
|
107
115
|
end
|
data/lib/dbx/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dbx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Pierce
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|