dbx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +5 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +68 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/Rakefile +6 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/dbx.gemspec +29 -0
- data/dbx_sample.yml +3 -0
- data/exe/dbx +99 -0
- data/lib/dbx/differ.rb +100 -0
- data/lib/dbx/model_base.rb +5 -0
- data/lib/dbx/version.rb +3 -0
- data/lib/dbx.rb +178 -0
- metadata +145 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2ee3e7675d2bf79eee5b1c193719713b894677e6
|
4
|
+
data.tar.gz: a8c1b1a08b82a0ae18930b5d7e9614f5b0f2ee05
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f0bbf5e27d8bf10dc4864c30c2bfbeb568c7815ec5f51f02e1a0c2a5673ccb1e7c0fb9e5a9e7a4e3ee70594b3a2bf19369c4e091d0426b21b727d6b38f2ebb19
|
7
|
+
data.tar.gz: 6b632abac4bb5546651d6825e5e209933a602901abf6785c59266c1a01c51e6e5f3b8e5ea43d6e64946046abf61777ea78284dff3d11296a0453b060ffcfb484
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
dbx (0.1.0)
|
5
|
+
activerecord (~> 4.0)
|
6
|
+
activesupport (~> 4.0)
|
7
|
+
thor
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activemodel (4.2.10)
|
13
|
+
activesupport (= 4.2.10)
|
14
|
+
builder (~> 3.1)
|
15
|
+
activerecord (4.2.10)
|
16
|
+
activemodel (= 4.2.10)
|
17
|
+
activesupport (= 4.2.10)
|
18
|
+
arel (~> 6.0)
|
19
|
+
activesupport (4.2.10)
|
20
|
+
i18n (~> 0.7)
|
21
|
+
minitest (~> 5.1)
|
22
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
23
|
+
tzinfo (~> 1.1)
|
24
|
+
arel (6.0.4)
|
25
|
+
builder (3.2.3)
|
26
|
+
coderay (1.1.2)
|
27
|
+
concurrent-ruby (1.0.5)
|
28
|
+
diff-lcs (1.3)
|
29
|
+
i18n (0.9.5)
|
30
|
+
concurrent-ruby (~> 1.0)
|
31
|
+
method_source (0.9.0)
|
32
|
+
minitest (5.11.3)
|
33
|
+
pg (0.19.0)
|
34
|
+
pry (0.11.3)
|
35
|
+
coderay (~> 1.1.0)
|
36
|
+
method_source (~> 0.9.0)
|
37
|
+
rake (10.5.0)
|
38
|
+
rspec (3.8.0)
|
39
|
+
rspec-core (~> 3.8.0)
|
40
|
+
rspec-expectations (~> 3.8.0)
|
41
|
+
rspec-mocks (~> 3.8.0)
|
42
|
+
rspec-core (3.8.0)
|
43
|
+
rspec-support (~> 3.8.0)
|
44
|
+
rspec-expectations (3.8.1)
|
45
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
46
|
+
rspec-support (~> 3.8.0)
|
47
|
+
rspec-mocks (3.8.0)
|
48
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
49
|
+
rspec-support (~> 3.8.0)
|
50
|
+
rspec-support (3.8.0)
|
51
|
+
thor (0.20.0)
|
52
|
+
thread_safe (0.3.6)
|
53
|
+
tzinfo (1.2.5)
|
54
|
+
thread_safe (~> 0.1)
|
55
|
+
|
56
|
+
PLATFORMS
|
57
|
+
ruby
|
58
|
+
|
59
|
+
DEPENDENCIES
|
60
|
+
bundler (~> 1.16)
|
61
|
+
dbx!
|
62
|
+
pg (< 1.0.0)
|
63
|
+
pry
|
64
|
+
rake (~> 10.0)
|
65
|
+
rspec (~> 3.0)
|
66
|
+
|
67
|
+
BUNDLED WITH
|
68
|
+
1.16.1
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Scott Pierce
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
# DBX
|
2
|
+
|
3
|
+
Database eXtras for working with CSV files in a database.
|
4
|
+
|
5
|
+
## Commands
|
6
|
+
|
7
|
+
```sh
|
8
|
+
Commands:
|
9
|
+
dbx create SRC # create a table with types from SRC CSV file
|
10
|
+
dbx diff TABLE_A TABLE_B # create diff table between TABLE_A and TABLE_B.
|
11
|
+
dbx help [COMMAND] # Describe available commands or one specific command
|
12
|
+
dbx import SRC # import SRC CSV into table
|
13
|
+
dbx import_diff SRC_A SRC_B # import then diff between SRC_A CSV and SRC_B CSV files.
|
14
|
+
dbx types SRC # detect column types give a SRC CSV file
|
15
|
+
|
16
|
+
Options:
|
17
|
+
[--db=Database URL: adapter://user:pass@host:port/db_name]
|
18
|
+
[--column-patterns=List of column patterns to override type info. Ex: phone:string external_ref:string ...]
|
19
|
+
[--sample=Number of rows to sample for type detection] # Default: 100
|
20
|
+
```
|
21
|
+
|
22
|
+
## Configuration
|
23
|
+
|
24
|
+
If the current path contains a `dbx.yml` file, it will be read first. Settings in the config file can still be overridden by command line arguments.
|
25
|
+
|
26
|
+
```yaml
|
27
|
+
# Column patterns are used to override column detection based on the a matched pattern in the name.
|
28
|
+
# This is useful for things like phone numbers and zip codes where they look like numbers, but should be strings.
|
29
|
+
column_patterns:
|
30
|
+
phone_number: :string
|
31
|
+
zipcode: :string
|
32
|
+
zip_code: :string
|
33
|
+
external_ref: :string
|
34
|
+
|
35
|
+
# Number of rows to sample for type detection
|
36
|
+
sample: 100
|
37
|
+
```
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
Add this line to your application's Gemfile:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
gem 'dbx'
|
45
|
+
```
|
46
|
+
|
47
|
+
And then execute:
|
48
|
+
|
49
|
+
$ bundle
|
50
|
+
|
51
|
+
Or install it yourself as:
|
52
|
+
|
53
|
+
$ gem install dbx
|
54
|
+
|
55
|
+
## Development
|
56
|
+
|
57
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
58
|
+
|
59
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
60
|
+
|
61
|
+
## Contributing
|
62
|
+
|
63
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/ddrscott/dbx.
|
64
|
+
|
65
|
+
## License
|
66
|
+
|
67
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'dbx'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
require 'pry'
|
11
|
+
Pry.start
|
data/bin/setup
ADDED
data/dbx.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'dbx/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'dbx'
|
7
|
+
spec.version = Dbx::VERSION
|
8
|
+
spec.authors = ['Scott Pierce']
|
9
|
+
spec.email = ['ddrscott@gmail.com']
|
10
|
+
|
11
|
+
spec.summary = 'Database utilities'
|
12
|
+
spec.homepage = 'https://github.com/ddrscott/dbx'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.bindir = 'exe'
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ['lib']
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
25
|
+
|
26
|
+
spec.add_dependency 'activerecord', '~> 4.0'
|
27
|
+
spec.add_dependency 'activesupport', '~> 4.0'
|
28
|
+
spec.add_dependency 'thor'
|
29
|
+
end
|
data/dbx_sample.yml
ADDED
data/exe/dbx
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'thor'
|
5
|
+
require 'dbx'
|
6
|
+
require 'pp'
|
7
|
+
require 'pry'
|
8
|
+
|
9
|
+
# nodoc
|
10
|
+
# #rubocop:disable all
|
11
|
+
class CLI < Thor
|
12
|
+
class_option :db, type: :string, banner: 'Database URL: adapter://user:pass@host:port/db_name'
|
13
|
+
class_option :column_patterns, type: :array, banner: 'List of column patterns to override type info. Ex: phone:string external_ref:string ...'
|
14
|
+
class_option :sample, type: :numeric, banner: 'Number of rows to sample for type detection', default: 100
|
15
|
+
|
16
|
+
# contents of the Thor class
|
17
|
+
desc 'types SRC', 'detect column types give a SRC CSV file'
|
18
|
+
def types(src)
|
19
|
+
handle_global_options
|
20
|
+
DBX.column_types(src, sample_rows: options[:sample]).each do |col, type|
|
21
|
+
puts "#{col},#{type}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
desc 'create SRC', 'create a table with types from SRC CSV file'
|
26
|
+
option :name
|
27
|
+
option :force, type: :boolean
|
28
|
+
def create(src)
|
29
|
+
handle_global_options
|
30
|
+
DBX.create_table(
|
31
|
+
src,
|
32
|
+
name: options[:name],
|
33
|
+
force: options[:force]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
desc 'import SRC', 'import SRC CSV into table'
|
38
|
+
option :name
|
39
|
+
option :force, type: :boolean
|
40
|
+
def import(src)
|
41
|
+
handle_global_options
|
42
|
+
DBX.import_table(
|
43
|
+
src,
|
44
|
+
name: options[:name],
|
45
|
+
force: options[:force]
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
desc 'diff TABLE_A TABLE_B', 'create diff table between TABLE_A and TABLE_B.'
|
50
|
+
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
51
|
+
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
52
|
+
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
53
|
+
def diff(table_a, table_b)
|
54
|
+
handle_global_options
|
55
|
+
DBX::Differ.diff(
|
56
|
+
table_a: table_a,
|
57
|
+
table_b: table_b,
|
58
|
+
using: options[:using],
|
59
|
+
exclude_columns: options[:exclude_columns],
|
60
|
+
force: options[:force]
|
61
|
+
)
|
62
|
+
end
|
63
|
+
|
64
|
+
desc 'import_diff SRC_A SRC_B', 'import then diff between SRC_A CSV and SRC_B CSV files.'
|
65
|
+
option :force, type: :boolean, banner: 'remove diff_ table if it exists'
|
66
|
+
option :using, type: :array, banner: 'JOIN USING the columns list here. Ex: id'
|
67
|
+
option :exclude_columns, type: :array, banner: 'Exclude columns from comparison and selection'
|
68
|
+
def import_diff(src_a, src_b)
|
69
|
+
handle_global_options
|
70
|
+
DBX::Differ.import_and_diff(
|
71
|
+
src_a: src_a,
|
72
|
+
src_b: src_b,
|
73
|
+
using: options[:using],
|
74
|
+
exclude_columns: options[:exclude_columns],
|
75
|
+
force: options[:force]
|
76
|
+
)
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def handle_global_options
|
82
|
+
if options[:column_patterns]
|
83
|
+
options[:column_patterns].each do |pair|
|
84
|
+
pattern, type = pair.split(':').map(&:strip)
|
85
|
+
patterns = (DBX.config['column_patterns'] ||= {})
|
86
|
+
patterns[pattern] = type.to_sym
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
if options[:sample]
|
91
|
+
DBX.config['sample_rows'] = options[:sample]
|
92
|
+
end
|
93
|
+
|
94
|
+
if options[:db]
|
95
|
+
DBX.config['db'] = options[:db]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
CLI.start(ARGV)
|
data/lib/dbx/differ.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# #rubocop:disable all
|
2
|
+
module DBX
|
3
|
+
module Differ
|
4
|
+
module_function
|
5
|
+
|
6
|
+
# Compare `src_a` with `src_b`.
|
7
|
+
# Numeric types will be diffed by subtracting the values.
|
8
|
+
# Dates will contain difference by day.
|
9
|
+
# Datetime will contain difference by seconds.
|
10
|
+
# Everything else can only return a boolean true/false that it is different.
|
11
|
+
#
|
12
|
+
# @param [String] table A Should be the initial state table.
|
13
|
+
# @param [String] table B Should be newer than table A, but doesn't have to be.
|
14
|
+
# @param [Array<String>] using is the join criteria between the 2 tables.
|
15
|
+
# @param [Array<String>] exclude_columns are excluded from the diff comparison.
|
16
|
+
def diff(table_a:, table_b:, force: false, using: ['id'], exclude_columns: nil)
|
17
|
+
table_diff = "diff_#{table_a}_#{table_b}"
|
18
|
+
exclude_columns ||= []
|
19
|
+
DBX.info("Creating diff table #{table_diff}")
|
20
|
+
DBX.connection do |conn|
|
21
|
+
conn.execute("DROP TABLE IF EXISTS #{table_diff}") if force
|
22
|
+
conn.execute(<<-SQL)
|
23
|
+
CREATE TABLE #{table_diff} AS
|
24
|
+
SELECT
|
25
|
+
#{using.join(', ')},
|
26
|
+
#{select_columns(table_a, exclude_columns: using + exclude_columns)}
|
27
|
+
FROM #{table_a} AS a
|
28
|
+
FULL OUTER JOIN #{table_b} b USING (#{using.join(',')})
|
29
|
+
WHERE
|
30
|
+
#{where_columns(table_a, exclude_columns: using + exclude_columns)}
|
31
|
+
SQL
|
32
|
+
DBX.info(conn.exec_query(<<-SQL).as_json)
|
33
|
+
SELECT
|
34
|
+
(SELECT COUNT(*) FROM #{table_a}) count_table_a,
|
35
|
+
(SELECT COUNT(*) FROM #{table_b}) count_table_b,
|
36
|
+
(SELECT COUNT(*) FROM #{table_diff}) diffs
|
37
|
+
SQL
|
38
|
+
end
|
39
|
+
DBX.info("Diff complete. Results details in: #{table_diff}")
|
40
|
+
end
|
41
|
+
|
42
|
+
def import_and_diff(src_a:, src_b:, force: false, using: ['id'], exclude_columns: nil)
|
43
|
+
DBX.info("Importing #{src_a}")
|
44
|
+
table_a = DBX.import_table(src_a, force: force)
|
45
|
+
|
46
|
+
|
47
|
+
DBX.info("Importing #{src_b}")
|
48
|
+
table_b = DBX.import_table(src_b, force: force)
|
49
|
+
|
50
|
+
diff(table_a: table_a, table_b: table_b, force: force, using: using, exclude_columns: exclude_columns)
|
51
|
+
end
|
52
|
+
|
53
|
+
def select_columns(table, exclude_columns: nil)
|
54
|
+
exclude_columns ||= []
|
55
|
+
DBX.connection do |conn|
|
56
|
+
conn.columns(table).map do |column|
|
57
|
+
header, type = column.name, column.type
|
58
|
+
next if exclude_columns.include?(header)
|
59
|
+
case type
|
60
|
+
when :decimal, :integer
|
61
|
+
select_difference(header)
|
62
|
+
when :date, :datetime
|
63
|
+
select_difference_as_text(header)
|
64
|
+
else
|
65
|
+
select_boolean(header)
|
66
|
+
end
|
67
|
+
end.compact.join(',')
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def where_columns(table, exclude_columns: nil)
|
72
|
+
exclude_columns ||= []
|
73
|
+
DBX.connection do |conn|
|
74
|
+
conn.columns(table).map do |column|
|
75
|
+
header, type = column.name, column.type
|
76
|
+
next if exclude_columns.include?(header)
|
77
|
+
%((a.#{header} <> b.#{header}))
|
78
|
+
end
|
79
|
+
end.compact.join('OR')
|
80
|
+
end
|
81
|
+
|
82
|
+
def select_difference(column)
|
83
|
+
a = "a.#{column}"
|
84
|
+
b = "b.#{column}"
|
85
|
+
%(#{a} AS #{column}_a, #{b} AS #{column}_b, (CASE WHEN #{a} IS NULL THEN #{b} WHEN #{b} IS NULL THEN #{a} ELSE NULLIF(#{b} - #{a}, 0) END) AS #{column}_diff)
|
86
|
+
end
|
87
|
+
|
88
|
+
def select_difference_as_text(column)
|
89
|
+
a = "a.#{column}"
|
90
|
+
b = "b.#{column}"
|
91
|
+
%(#{a} AS #{column}_a, #{b} AS #{column}_b, (CASE WHEN #{a} IS NULL THEN #{b}::text WHEN #{b} IS NULL THEN #{a}::text ELSE NULLIF((#{b} - #{a})::text, '0') END) AS #{column}_diff)
|
92
|
+
end
|
93
|
+
|
94
|
+
def select_boolean(column)
|
95
|
+
a = "a.#{column}"
|
96
|
+
b = "b.#{column}"
|
97
|
+
%(#{a} AS #{column}_a, #{b} AS #{column}_b, NULLIF(#{a} <> #{b}, FALSE) AS #{column}_diff)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/dbx/version.rb
ADDED
data/lib/dbx.rb
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
require 'dbx/version'
|
2
|
+
require 'yaml'
|
3
|
+
require 'csv'
|
4
|
+
require 'pg'
|
5
|
+
require 'active_support/all'
|
6
|
+
require 'active_record'
|
7
|
+
require 'pp'
|
8
|
+
|
9
|
+
require 'dbx/model_base'
|
10
|
+
require 'dbx/differ'
|
11
|
+
|
12
|
+
# Collection of database utility methods
|
13
|
+
#
|
14
|
+
# #rubocop:disable all
|
15
|
+
module DBX
|
16
|
+
module_function
|
17
|
+
|
18
|
+
CONFIG_PATH = 'dbx.yml'
|
19
|
+
|
20
|
+
def config
|
21
|
+
@config ||= config_from_yaml
|
22
|
+
end
|
23
|
+
|
24
|
+
def config_from_yaml
|
25
|
+
if File.file?(CONFIG_PATH)
|
26
|
+
YAML.load(IO.read(CONFIG_PATH))
|
27
|
+
else
|
28
|
+
{}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def config_sample_rows
|
33
|
+
config['sample_rows'] || 100
|
34
|
+
end
|
35
|
+
|
36
|
+
def config_db
|
37
|
+
ENV['DATABASE_URL'] || config['db'] || raise('`db` not set as command line option or `dbx.yml`')
|
38
|
+
end
|
39
|
+
|
40
|
+
# TODO what about windows?!
|
41
|
+
def tty
|
42
|
+
@tty ||= File.open('/dev/tty', 'a')
|
43
|
+
end
|
44
|
+
|
45
|
+
def info(msg)
|
46
|
+
tty.puts("\e[33m#{msg}\e[0m")
|
47
|
+
end
|
48
|
+
|
49
|
+
def connection(db_url: config_db, &block)
|
50
|
+
# ENV['DATABASE_URL'] = db_url
|
51
|
+
# @pool ||= ModelBase.establish_connection(db_url)
|
52
|
+
@pool ||= begin
|
53
|
+
ModelBase.establish_connection(db_url)
|
54
|
+
ModelBase.logger = Logger.new(tty)
|
55
|
+
end
|
56
|
+
ModelBase.connection_pool.with_connection(&block)
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_table_name(src)
|
60
|
+
File.basename(src).sub(File.extname(src), '')
|
61
|
+
end
|
62
|
+
|
63
|
+
def create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {})
|
64
|
+
name ||= parse_table_name(src)
|
65
|
+
types = column_types(src, sample_rows: sample_rows, csv_options: csv_options)
|
66
|
+
connection do |conn|
|
67
|
+
conn.create_table name, force: force, id: false do |t|
|
68
|
+
types.each do |column, type|
|
69
|
+
t.send(type, column, nulls: true)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# TODO parse CSV options into Postgres
|
76
|
+
def import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {})
|
77
|
+
name ||= parse_table_name(src)
|
78
|
+
connection do |conn|
|
79
|
+
create_table(src, force: force, sample_rows: sample_rows, csv_options: csv_options)
|
80
|
+
# TODO only postgres is support at the moment
|
81
|
+
pg = conn.instance_variable_get(:@connection)
|
82
|
+
types = column_types(src).keys.map{|m| %("#{m}")}
|
83
|
+
|
84
|
+
pg_stmt = "COPY #{name}(#{types.join(',')}) FROM STDIN CSV"
|
85
|
+
conn.logger.debug(pg_stmt)
|
86
|
+
pg.copy_data(pg_stmt) do
|
87
|
+
first = true
|
88
|
+
IO.foreach(src) do |line|
|
89
|
+
if first
|
90
|
+
first = false
|
91
|
+
next
|
92
|
+
end
|
93
|
+
pg.put_copy_data(line)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
index_table(name)
|
97
|
+
end
|
98
|
+
name
|
99
|
+
end
|
100
|
+
|
101
|
+
def index_table(table_name)
|
102
|
+
connection do |conn|
|
103
|
+
conn.columns(table_name).each_with_index do |column, i|
|
104
|
+
conn.add_index(table_name, [column.name], name: "idx_#{table_name}_#{i.to_s.rjust(2,'0')}")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Read source as CSV and detect types based on `sample_rows`
|
110
|
+
# Types returns match with ActiveRecord column types.
|
111
|
+
# Types are memory cached by `src`.
|
112
|
+
#
|
113
|
+
# @return [Hash<String, Symbol>] column name to type symbols
|
114
|
+
def column_types(src, sample_rows: config_sample_rows, csv_options: {})
|
115
|
+
headers = nil
|
116
|
+
count = 0
|
117
|
+
csv_options[:headers] = false
|
118
|
+
@types ||= {}
|
119
|
+
|
120
|
+
types = @types[src]
|
121
|
+
return types if types
|
122
|
+
types = []
|
123
|
+
|
124
|
+
CSV.foreach(src, **csv_options) do |row|
|
125
|
+
unless headers
|
126
|
+
headers = row
|
127
|
+
next
|
128
|
+
end
|
129
|
+
|
130
|
+
headers.each_with_index do |header, i|
|
131
|
+
next if types[i] == :string
|
132
|
+
|
133
|
+
pattern_type = config['column_patterns'].detect{ |pat, _| header =~ /#{pat}/ }
|
134
|
+
if pattern_type
|
135
|
+
types[i] = pattern_type.last
|
136
|
+
next
|
137
|
+
end
|
138
|
+
|
139
|
+
type = detect_type(row[i])
|
140
|
+
next if type.nil?
|
141
|
+
if types[i] == :decimal && type == :integer
|
142
|
+
# keep decimal
|
143
|
+
elsif types[i] == :datetime && type == :date
|
144
|
+
# keep datetime
|
145
|
+
else
|
146
|
+
# assign the new type
|
147
|
+
types[i] = type
|
148
|
+
end
|
149
|
+
end
|
150
|
+
# stop after max rows reached
|
151
|
+
break if (count += 1) > sample_rows
|
152
|
+
end
|
153
|
+
# any remaining nil types are assigned as :string
|
154
|
+
types.size.times{|i| types[i] ||= :string }
|
155
|
+
@types[src] = Hash[headers.zip(types)]
|
156
|
+
end
|
157
|
+
|
158
|
+
# Detect the column type given a value.
|
159
|
+
# May return nil if the value is blank.
|
160
|
+
def detect_type(value)
|
161
|
+
if value.blank?
|
162
|
+
nil
|
163
|
+
elsif value =~ /^\d+\.\d+$/
|
164
|
+
:decimal
|
165
|
+
elsif value =~ /^\d{1,10}$/
|
166
|
+
:integer
|
167
|
+
elsif value =~ /^\h{8}-\h{4}-\h{4}-\h{4}-\h{12}$/
|
168
|
+
:uuid
|
169
|
+
elsif value =~ /^\d{4}(\D)\d{2}\1\d{2}$/ && (Date.parse(value) rescue false)
|
170
|
+
:date
|
171
|
+
elsif (Time.zone.parse(value) rescue false)
|
172
|
+
:datetime
|
173
|
+
else
|
174
|
+
:string
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
# #rubocop:enable all
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dbx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Scott Pierce
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-08-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activerecord
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '4.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: activesupport
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '4.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '4.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description:
|
98
|
+
email:
|
99
|
+
- ddrscott@gmail.com
|
100
|
+
executables:
|
101
|
+
- dbx
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".travis.yml"
|
107
|
+
- Gemfile
|
108
|
+
- Gemfile.lock
|
109
|
+
- LICENSE.txt
|
110
|
+
- README.md
|
111
|
+
- Rakefile
|
112
|
+
- bin/console
|
113
|
+
- bin/setup
|
114
|
+
- dbx.gemspec
|
115
|
+
- dbx_sample.yml
|
116
|
+
- exe/dbx
|
117
|
+
- lib/dbx.rb
|
118
|
+
- lib/dbx/differ.rb
|
119
|
+
- lib/dbx/model_base.rb
|
120
|
+
- lib/dbx/version.rb
|
121
|
+
homepage: https://github.com/ddrscott/dbx
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata: {}
|
125
|
+
post_install_message:
|
126
|
+
rdoc_options: []
|
127
|
+
require_paths:
|
128
|
+
- lib
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
requirements: []
|
140
|
+
rubyforge_project:
|
141
|
+
rubygems_version: 2.6.14
|
142
|
+
signing_key:
|
143
|
+
specification_version: 4
|
144
|
+
summary: Database utilities
|
145
|
+
test_files: []
|