db_subsetter 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -0
- data/lib/db_subsetter/dialect/generic.rb +14 -0
- data/lib/db_subsetter/dialect/ms_sql.rb +24 -0
- data/lib/db_subsetter/dialect/my_sql.rb +16 -0
- data/lib/db_subsetter/exporter.rb +9 -7
- data/lib/db_subsetter/filter.rb +1 -1
- data/lib/db_subsetter/importer.rb +25 -9
- data/lib/db_subsetter/version.rb +1 -1
- data/lib/db_subsetter.rb +4 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0513f9d6c747b0a4b59915680c0f753f585c7176
|
4
|
+
data.tar.gz: dd987be888e834ffa1e2f832eed209206eb791e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c1bf06867d598238213377d5c8e8c1f47aa9736b8136038e4e49a8f34fa6f113fae065df8b798914824c0329b21baf4763f8e1b09437dafd8e7889af84dea75
|
7
|
+
data.tar.gz: faa183feff6d80f767310170a9dce02612baaa9816cb07225869da55585c9ab05d75d2af17047c9b7ba8e298ac80d4df5c56be7497ff9718b498a450027ebdee
|
data/README.md
CHANGED
@@ -35,6 +35,17 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
35
35
|
|
36
36
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
37
37
|
|
38
|
+
## TODO
|
39
|
+
|
40
|
+
* Improve the dialect handling
|
41
|
+
* Better example docs on usage and filtering examples
|
42
|
+
* Implement a scrubber API to allow sanitizing or correcting data at export time. This allows us to keep sensitive/personal data out of the export and also allows correction of broken data that won't re-insert.
|
43
|
+
* Add an executable and/or rake task to perform export and import rather than requiring the API to used directly. Will need a config file to specific custom plugins
|
44
|
+
* Add pre-flight check on import to make sure all tables smell like they will load the data (right columns, at minimum)
|
45
|
+
* Finish building and test checks to make sure foreign keys are valid after import
|
46
|
+
* Have verify_exportability return all failures together rather than one at a time
|
47
|
+
* Add a verbose mode to display more detailed stats while running an export or import (what table we're on, records exported, time taken
|
48
|
+
|
38
49
|
## Contributing
|
39
50
|
|
40
51
|
Bug reports and pull requests are welcome on GitHub at https://github.com/lostapathy/db_subsetter.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DbSubsetter
|
2
|
+
module Dialect
|
3
|
+
class MSSQL < Generic
|
4
|
+
def self.import
|
5
|
+
ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? NOCHECK CONSTRAINT all"')
|
6
|
+
ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? DISABLE TRIGGER all"')
|
7
|
+
ActiveRecord::Base.connection.execute("select 'ALTER INDEX ' + I.name + ' ON ' + T.name + ' DISABLE'
|
8
|
+
from sys.indexes I
|
9
|
+
inner join sys.tables T on I.object_id = T.object_id
|
10
|
+
where I.type_desc = 'NONCLUSTERED'
|
11
|
+
and I.name is not null")
|
12
|
+
|
13
|
+
yield
|
14
|
+
ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? ENABLE TRIGGER all"')
|
15
|
+
ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? WITH CHECK CHECK CONSTRAINT all"')
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.integrity_problems
|
19
|
+
ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "DBCC CHECKCONSTRAINTS WITH ALL_CONSTRAINTS"')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module DbSubsetter
|
2
|
+
module Dialect
|
3
|
+
class MySQL < Generic
|
4
|
+
def self.import
|
5
|
+
ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
|
6
|
+
yield
|
7
|
+
ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.integrity_problems
|
11
|
+
raise NotImplementedError.new("integrity_problems not implemented for MySQL")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -36,7 +36,7 @@ module DbSubsetter
|
|
36
36
|
verify_exportability
|
37
37
|
|
38
38
|
@output = SQLite3::Database.new(filename)
|
39
|
-
@output.execute("CREATE TABLE tables (name TEXT, columns TEXT)")
|
39
|
+
@output.execute("CREATE TABLE tables (name TEXT, records_exported INTEGER, columns TEXT)")
|
40
40
|
tables.each do |table|
|
41
41
|
export_table(table)
|
42
42
|
end
|
@@ -64,8 +64,8 @@ module DbSubsetter
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def filtered_row_count(table)
|
67
|
-
query = Arel::Table.new(table, ActiveRecord::Base)
|
68
|
-
query = filter.filter(table, query)
|
67
|
+
query = Arel::Table.new(table, ActiveRecord::Base)
|
68
|
+
query = filter.filter(table, query).project( Arel.sql("count(1)") )
|
69
69
|
ActiveRecord::Base.connection.select_one(query.to_sql).values.first
|
70
70
|
end
|
71
71
|
|
@@ -96,22 +96,24 @@ module DbSubsetter
|
|
96
96
|
|
97
97
|
def export_table(table)
|
98
98
|
columns = ActiveRecord::Base.connection.columns(table).map{ |table| table.name }
|
99
|
-
|
99
|
+
rows_exported = 0
|
100
100
|
@output.execute("CREATE TABLE #{table.underscore} ( data TEXT )")
|
101
101
|
for i in 0..pages(table)
|
102
|
-
query = Arel::Table.new(table, ActiveRecord::Base)
|
102
|
+
arel_table = query = Arel::Table.new(table, ActiveRecord::Base)
|
103
|
+
query = filter.filter(table, query)
|
103
104
|
# Need to extend this to take more than the first batch_size records
|
104
|
-
query = query.order(
|
105
|
+
query = query.order(arel_table[order_by(table)]) if order_by(table)
|
105
106
|
|
106
107
|
sql = query.skip(i * select_batch_size).take(select_batch_size).project( Arel.sql('*') ).to_sql
|
107
108
|
|
108
109
|
records = ActiveRecord::Base.connection.select_rows( sql )
|
109
110
|
records.each_slice(insert_batch_size) do |rows|
|
110
111
|
@output.execute("INSERT INTO #{table.underscore} (data) VALUES #{ Array.new(rows.size){"(?)"}.join(",")}", rows.map{|x| cleanup_types(x)}.map(&:to_json) )
|
112
|
+
rows_exported += rows.size
|
111
113
|
end
|
112
114
|
end
|
115
|
+
@output.execute("INSERT INTO tables VALUES (?, ?, ?)", [table, rows_exported, columns.to_json])
|
113
116
|
end
|
114
|
-
|
115
117
|
end
|
116
118
|
end
|
117
119
|
|
data/lib/db_subsetter/filter.rb
CHANGED
@@ -3,10 +3,11 @@ require 'sqlite3'
|
|
3
3
|
module DbSubsetter
|
4
4
|
class Importer
|
5
5
|
|
6
|
-
def initialize(filename)
|
6
|
+
def initialize(filename, dialect = DbSubsetter::Dialect::Generic)
|
7
7
|
raise ArgumentError.new("invalid input file") unless File.exists?(filename)
|
8
8
|
|
9
9
|
@data = SQLite3::Database.new(filename)
|
10
|
+
@dialect = dialect
|
10
11
|
end
|
11
12
|
|
12
13
|
def tables
|
@@ -18,25 +19,40 @@ module DbSubsetter
|
|
18
19
|
end
|
19
20
|
|
20
21
|
def import
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
@dialect.import do
|
23
|
+
tables.each do |table|
|
24
|
+
import_table(table)
|
25
|
+
end
|
24
26
|
end
|
25
|
-
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert_batch_size
|
30
|
+
100 # more like 500 for mysql
|
26
31
|
end
|
27
32
|
|
28
33
|
private
|
29
34
|
def import_table(table)
|
30
|
-
|
31
|
-
|
32
|
-
|
35
|
+
begin
|
36
|
+
ActiveRecord::Base.connection.truncate(table)
|
37
|
+
rescue NotImplementedError
|
38
|
+
ActiveRecord::Base.connection.execute("DELETE FROM #{quoted_table_name(table)}")
|
39
|
+
end
|
40
|
+
|
41
|
+
ActiveRecord::Base.connection.begin_db_transaction
|
42
|
+
|
43
|
+
all_rows = @data.execute("SELECT data FROM #{table.underscore}")
|
44
|
+
all_rows.each_slice(insert_batch_size) do |rows|
|
45
|
+
quoted_rows = rows.map{ |row| "(" + quoted_values(row).join(",") + ")" }.join(",")
|
46
|
+
insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES #{quoted_rows}"
|
33
47
|
ActiveRecord::Base.connection.execute(insert_sql)
|
34
48
|
end
|
49
|
+
|
50
|
+
ActiveRecord::Base.connection.commit_db_transaction
|
35
51
|
end
|
36
52
|
|
37
53
|
def quoted_values(row)
|
38
54
|
out = JSON.parse(row[0])
|
39
|
-
out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) }
|
55
|
+
out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) }
|
40
56
|
out = out.map{|x| ActiveRecord::Base.connection.quote(x) }
|
41
57
|
out
|
42
58
|
end
|
data/lib/db_subsetter/version.rb
CHANGED
data/lib/db_subsetter.rb
CHANGED
@@ -2,6 +2,10 @@ require "db_subsetter/version"
|
|
2
2
|
require "db_subsetter/filter"
|
3
3
|
require "db_subsetter/exporter"
|
4
4
|
require "db_subsetter/importer"
|
5
|
+
require "db_subsetter/dialect/generic"
|
6
|
+
require "db_subsetter/dialect/my_sql"
|
7
|
+
require "db_subsetter/dialect/ms_sql"
|
8
|
+
|
5
9
|
|
6
10
|
module DbSubsetter
|
7
11
|
# Your code goes here...
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: db_subsetter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joe Francis
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -97,6 +97,9 @@ files:
|
|
97
97
|
- bin/setup
|
98
98
|
- db_subsetter.gemspec
|
99
99
|
- lib/db_subsetter.rb
|
100
|
+
- lib/db_subsetter/dialect/generic.rb
|
101
|
+
- lib/db_subsetter/dialect/ms_sql.rb
|
102
|
+
- lib/db_subsetter/dialect/my_sql.rb
|
100
103
|
- lib/db_subsetter/exporter.rb
|
101
104
|
- lib/db_subsetter/filter.rb
|
102
105
|
- lib/db_subsetter/importer.rb
|