dbx 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +62 -9
- data/dbx_sample.yml +3 -0
- data/exe/dbx +11 -6
- data/lib/dbx.rb +10 -2
- data/lib/dbx/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b769b3b6e3be916660bd21df6ccbdaaedf31558c
|
4
|
+
data.tar.gz: e67cdb52aca07a0035884c3866697d32327dd956
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2b8a54503f2b6487553d7e7551268de810eb15c7b80632ec1d3b7c3b30d2b9b037384b4c506e6be5263dfe6c6ca871503470d64ee48c3dc42a00546b4fc19d1
|
7
|
+
data.tar.gz: 25024c1049ba266994d6113d21f4fb0fcddb6a40e108bf687a5ba37d72702d58fdb493db376c33beedc345bca2f7edc5d11382d07ffde9d6f8224845ecd9e575
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,22 +1,72 @@
|
|
1
1
|
# DBX
|
2
2
|
|
3
|
-
Database eXtras for working with CSV files in a database.
|
3
|
+
Database eXtras for working with CSV files in a Postgres database.
|
4
4
|
|
5
|
-
|
5
|
+
We currently only support Postgres database, but others will be supported soon.
|
6
|
+
|
7
|
+
## Usage Examples
|
8
|
+
|
9
|
+
### Import CSV file into database
|
10
|
+
|
11
|
+
Column type detection is performed based on column contents. By default the new table name will be the file name minus its extension, indexes are added to column ending with `_id$`.
|
12
|
+
|
13
|
+
```sh
|
14
|
+
dbx import path/to/data.csv --name data_v1 --db postgres://localhost/scratch --column-patterns _ref$:string
|
15
|
+
# --db : Used to define where to put the table.
|
16
|
+
# --column-patterns : [] Override the detected column type.
|
17
|
+
# --name : [data] Optional override to default table name of file's base name without extension.
|
18
|
+
# --sample : [100] Number of rows to sample during column detection.
|
19
|
+
# --force : [false] Drops the destination table if it exists.
|
20
|
+
# --auto-index-pattern : [^(\w+_id|id)$] Creates indexes for columns matching the pattern
|
21
|
+
```
|
22
|
+
|
23
|
+
### Create diff table of two tables in a database
|
24
|
+
|
25
|
+
+ The new table will be named `diff_data_v1_data_v2`.
|
26
|
+
+ It will contains column_a, column_b, column_diff. Where column is every `column_` from `data_v1` and `column_diff` is a simple difference representation of columns `_a` and `_b`.
|
27
|
+
+ Columns `_a` and `_b` can be omitted with `--no-a-b`
|
28
|
+
|
29
|
+
```sh
|
30
|
+
dbx diff data_v1 data_v2 --db postgres://localhost/scratch --using id
|
31
|
+
# --db : Used to define where to put the table.
|
32
|
+
# --using : Space delimited list of join columns.
|
33
|
+
# --no-a-b: [false] Omit the `_a` and `_b` columns showing the source data.
|
34
|
+
# --force : [false] Drops the destination diff table if it exists.
|
35
|
+
```
|
36
|
+
|
37
|
+
### Import and diff two CSV files
|
38
|
+
|
39
|
+
Do the import and diff all at once!!!
|
40
|
+
|
41
|
+
```sh
|
42
|
+
dbx import_diff /path/to/data_v1.csv /path/to/data_v2.csv --db postgres://localhost/scratch --using id
|
43
|
+
# --db : Used to define where to put the table.
|
44
|
+
# --column-patterns : [] Override the detected column type.
|
45
|
+
# --sample : [100] Number of rows to sample during column detection.
|
46
|
+
# --auto-index-pattern : [^(\w+_id|id)$] Creates indexes for columns matching the pattern
|
47
|
+
# --using : Space delimited list of join columns.
|
48
|
+
# --no-a-b: [false] Omit the `_a` and `_b` columns showing the source data.
|
49
|
+
# --force : [false] Drops the destination diff table if it exists.
|
50
|
+
```
|
51
|
+
|
52
|
+
### List of Commands `dbx help`
|
6
53
|
|
7
54
|
```sh
|
8
55
|
Commands:
|
9
|
-
dbx create SRC #
|
10
|
-
dbx diff TABLE_A TABLE_B #
|
56
|
+
dbx create SRC # Create a table with types from SRC CSV file
|
57
|
+
dbx diff TABLE_A TABLE_B # Create diff table between TABLE_A and TABLE_B.
|
11
58
|
dbx help [COMMAND] # Describe available commands or one specific command
|
12
|
-
dbx import SRC #
|
13
|
-
dbx import_diff SRC_A SRC_B #
|
14
|
-
dbx types SRC #
|
59
|
+
dbx import SRC # Import SRC CSV into table
|
60
|
+
dbx import_diff SRC_A SRC_B # Import then diff between SRC_A CSV and SRC_B CSV files.
|
61
|
+
dbx types SRC # Detect column types give a SRC CSV file
|
15
62
|
|
16
63
|
Options:
|
17
64
|
[--db=Database URL: adapter://user:pass@host:port/db_name]
|
18
|
-
[--column-patterns=List of column patterns to override type info
|
19
|
-
[--sample=Number of rows to sample for type detection]
|
65
|
+
[--column-patterns=List of column patterns to override type info]
|
66
|
+
[--sample=Number of rows to sample for type detection]
|
67
|
+
# Default: 100
|
68
|
+
[--auto-index-pattern=Add index when column matches pattern]
|
69
|
+
# Default: ^(\w+_id|id)$
|
20
70
|
```
|
21
71
|
|
22
72
|
## Configuration
|
@@ -37,6 +87,9 @@ column_patterns:
|
|
37
87
|
|
38
88
|
# Number of rows to sample for type detection
|
39
89
|
sample: 100
|
90
|
+
|
91
|
+
# Add index if column matches this pattern.
|
92
|
+
auto_index_pattern: _id$
|
40
93
|
```
|
41
94
|
|
42
95
|
## Installation
|
data/dbx_sample.yml
CHANGED
data/exe/dbx
CHANGED
@@ -9,11 +9,12 @@ require 'pp'
|
|
9
9
|
# #rubocop:disable all
|
10
10
|
class CLI < Thor
|
11
11
|
class_option :db, type: :string, banner: 'Database URL: adapter://user:pass@host:port/db_name'
|
12
|
-
class_option :column_patterns, type: :array, banner: 'List of column patterns to override type info
|
12
|
+
class_option :column_patterns, type: :array, banner: 'List of column patterns to override type info'
|
13
13
|
class_option :sample, type: :numeric, banner: 'Number of rows to sample for type detection', default: 100
|
14
|
+
class_option :auto_index_pattern, type: :string, default: '^(\w+_id|id)$', banner: 'Add index when column matches pattern'
|
14
15
|
|
15
16
|
# contents of the Thor class
|
16
|
-
desc 'types SRC', '
|
17
|
+
desc 'types SRC', 'Detect column types give a SRC CSV file'
|
17
18
|
def types(src)
|
18
19
|
handle_global_options
|
19
20
|
DBX.column_types(src, sample_rows: options[:sample]).each do |col, type|
|
@@ -21,7 +22,7 @@ class CLI < Thor
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
24
|
-
desc 'create SRC', '
|
25
|
+
desc 'create SRC', 'Create a table with types from SRC CSV file'
|
25
26
|
option :name
|
26
27
|
option :force, type: :boolean
|
27
28
|
def create(src)
|
@@ -33,7 +34,7 @@ class CLI < Thor
|
|
33
34
|
)
|
34
35
|
end
|
35
36
|
|
36
|
-
desc 'import SRC', '
|
37
|
+
desc 'import SRC', 'Import SRC CSV into table'
|
37
38
|
option :name
|
38
39
|
option :force, type: :boolean
|
39
40
|
def import(src)
|
@@ -45,7 +46,7 @@ class CLI < Thor
|
|
45
46
|
)
|
46
47
|
end
|
47
48
|
|
48
|
-
desc 'diff TABLE_A TABLE_B', '
|
49
|
+
desc 'diff TABLE_A TABLE_B', 'Create diff table between TABLE_A and TABLE_B.'
|
49
50
|
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
50
51
|
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
51
52
|
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
@@ -62,7 +63,7 @@ class CLI < Thor
|
|
62
63
|
)
|
63
64
|
end
|
64
65
|
|
65
|
-
desc 'import_diff SRC_A SRC_B', '
|
66
|
+
desc 'import_diff SRC_A SRC_B', 'Import then diff between SRC_A CSV and SRC_B CSV files.'
|
66
67
|
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
67
68
|
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
68
69
|
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
@@ -97,6 +98,10 @@ class CLI < Thor
|
|
97
98
|
if options[:db]
|
98
99
|
DBX.config['db'] = options[:db]
|
99
100
|
end
|
101
|
+
|
102
|
+
if options[:auto_index_pattern]
|
103
|
+
DBX.config['auto_index_pattern'] = options[:auto_index_pattern]
|
104
|
+
end
|
100
105
|
end
|
101
106
|
end
|
102
107
|
CLI.start(ARGV)
|
data/lib/dbx.rb
CHANGED
@@ -34,6 +34,10 @@ module DBX
|
|
34
34
|
config['sample_rows'] || 100
|
35
35
|
end
|
36
36
|
|
37
|
+
def config_auto_index_pattern
|
38
|
+
config['auto_index_pattern']
|
39
|
+
end
|
40
|
+
|
37
41
|
def config_db
|
38
42
|
ENV['DATABASE_URL'] || config['db'] || raise('`db` not set as command line option or `dbx.yml`')
|
39
43
|
end
|
@@ -94,14 +98,18 @@ module DBX
|
|
94
98
|
pg.put_copy_data(line)
|
95
99
|
end
|
96
100
|
end
|
97
|
-
|
101
|
+
|
102
|
+
unless config_auto_index_pattern.blank?
|
103
|
+
index_table(name, pattern: /#{config_auto_index_pattern}/)
|
104
|
+
end
|
98
105
|
end
|
99
106
|
name
|
100
107
|
end
|
101
108
|
|
102
|
-
def index_table(table_name)
|
109
|
+
def index_table(table_name, pattern: nil)
|
103
110
|
connection do |conn|
|
104
111
|
conn.columns(table_name).each_with_index do |column, i|
|
112
|
+
next unless column.name =~ pattern
|
105
113
|
conn.add_index(table_name, [column.name], name: "idx_#{table_name}_#{i.to_s.rjust(2,'0')}")
|
106
114
|
end
|
107
115
|
end
|
data/lib/dbx/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dbx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Pierce
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|