db_subsetter 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b69d7c3714c570b4a1f22b2ccff5e6e74c8b670
4
- data.tar.gz: dafe34226a0da60c16d2823cc6d76e12b2f9837a
3
+ metadata.gz: 0513f9d6c747b0a4b59915680c0f753f585c7176
4
+ data.tar.gz: dd987be888e834ffa1e2f832eed209206eb791e0
5
5
  SHA512:
6
- metadata.gz: 6a7deca37ab7b6267f9b9f0fbabd7dd221278e93366623df347a46d24aa9a6980c71ec0f9f0c3a3d41b4aaf3b44032ee185092671f1c6517fd9c70e449265c25
7
- data.tar.gz: d50010b3ee53ce2b2a13c95dcae3f00b08681a0f802251d85c6e6764375b4b499c9db817087a2b62c011bed578a161464b9c36abfeaf119ad772e7a2f12cda88
6
+ metadata.gz: 4c1bf06867d598238213377d5c8e8c1f47aa9736b8136038e4e49a8f34fa6f113fae065df8b798914824c0329b21baf4763f8e1b09437dafd8e7889af84dea75
7
+ data.tar.gz: faa183feff6d80f767310170a9dce02612baaa9816cb07225869da55585c9ab05d75d2af17047c9b7ba8e298ac80d4df5c56be7497ff9718b498a450027ebdee
data/README.md CHANGED
@@ -35,6 +35,17 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
35
35
 
36
36
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
37
37
 
38
+ ## TODO
39
+
40
+ * Improve the dialect handling
41
+ * Better example docs on usage and filtering examples
42
+ * Implement a scrubber API to allow sanitizing or correcting data at export time. This allows us to keep sensitive/personal data out of the export and also allows correction of broken data that won't re-insert.
43
+ * Add an executable and/or rake task to perform export and import rather than requiring the API to used directly. Will need a config file to specific custom plugins
44
+ * Add pre-flight check on import to make sure all tables smell like they will load the data (right columns, at minimum)
45
+ * Finish building and test checks to make sure foreign keys are valid after import
46
+ * Have verify_exportability return all failures together rather than one at a time
47
+ * Add a verbose mode to display more detailed stats while running an export or import (what table we're on, records exported, time taken
48
+
38
49
  ## Contributing
39
50
 
40
51
  Bug reports and pull requests are welcome on GitHub at https://github.com/lostapathy/db_subsetter.
@@ -0,0 +1,14 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class Generic
4
+ def self.import
5
+ yield
6
+ end
7
+
8
+ def self.integrity_problems
9
+ []
10
+ end
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,24 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class MSSQL < Generic
4
+ def self.import
5
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? NOCHECK CONSTRAINT all"')
6
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? DISABLE TRIGGER all"')
7
+ ActiveRecord::Base.connection.execute("select 'ALTER INDEX ' + I.name + ' ON ' + T.name + ' DISABLE'
8
+ from sys.indexes I
9
+ inner join sys.tables T on I.object_id = T.object_id
10
+ where I.type_desc = 'NONCLUSTERED'
11
+ and I.name is not null")
12
+
13
+ yield
14
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? ENABLE TRIGGER all"')
15
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? WITH CHECK CHECK CONSTRAINT all"')
16
+ end
17
+
18
+ def self.integrity_problems
19
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "DBCC CHECKCONSTRAINTS WITH ALL_CONSTRAINTS"')
20
+ end
21
+ end
22
+ end
23
+ end
24
+
@@ -0,0 +1,16 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class MySQL < Generic
4
+ def self.import
5
+ ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
6
+ yield
7
+ ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
8
+ end
9
+
10
+ def self.integrity_problems
11
+ raise NotImplementedError.new("integrity_problems not implemented for MySQL")
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -36,7 +36,7 @@ module DbSubsetter
36
36
  verify_exportability
37
37
 
38
38
  @output = SQLite3::Database.new(filename)
39
- @output.execute("CREATE TABLE tables (name TEXT, columns TEXT)")
39
+ @output.execute("CREATE TABLE tables (name TEXT, records_exported INTEGER, columns TEXT)")
40
40
  tables.each do |table|
41
41
  export_table(table)
42
42
  end
@@ -64,8 +64,8 @@ module DbSubsetter
64
64
  end
65
65
 
66
66
  def filtered_row_count(table)
67
- query = Arel::Table.new(table, ActiveRecord::Base).project( Arel.sql("count(1)") )
68
- query = filter.filter(table, query)
67
+ query = Arel::Table.new(table, ActiveRecord::Base)
68
+ query = filter.filter(table, query).project( Arel.sql("count(1)") )
69
69
  ActiveRecord::Base.connection.select_one(query.to_sql).values.first
70
70
  end
71
71
 
@@ -96,22 +96,24 @@ module DbSubsetter
96
96
 
97
97
  def export_table(table)
98
98
  columns = ActiveRecord::Base.connection.columns(table).map{ |table| table.name }
99
- @output.execute("INSERT INTO tables VALUES (?, ?)", [table, columns.to_json])
99
+ rows_exported = 0
100
100
  @output.execute("CREATE TABLE #{table.underscore} ( data TEXT )")
101
101
  for i in 0..pages(table)
102
- query = Arel::Table.new(table, ActiveRecord::Base)
102
+ arel_table = query = Arel::Table.new(table, ActiveRecord::Base)
103
+ query = filter.filter(table, query)
103
104
  # Need to extend this to take more than the first batch_size records
104
- query = query.order(query[order_by(table)]) if order_by(table)
105
+ query = query.order(arel_table[order_by(table)]) if order_by(table)
105
106
 
106
107
  sql = query.skip(i * select_batch_size).take(select_batch_size).project( Arel.sql('*') ).to_sql
107
108
 
108
109
  records = ActiveRecord::Base.connection.select_rows( sql )
109
110
  records.each_slice(insert_batch_size) do |rows|
110
111
  @output.execute("INSERT INTO #{table.underscore} (data) VALUES #{ Array.new(rows.size){"(?)"}.join(",")}", rows.map{|x| cleanup_types(x)}.map(&:to_json) )
112
+ rows_exported += rows.size
111
113
  end
112
114
  end
115
+ @output.execute("INSERT INTO tables VALUES (?, ?, ?)", [table, rows_exported, columns.to_json])
113
116
  end
114
-
115
117
  end
116
118
  end
117
119
 
@@ -11,7 +11,7 @@ module DbSubsetter
11
11
  end
12
12
 
13
13
  def filter(table, query)
14
- filter_method = "filter_#{table}"
14
+ filter_method = "filter_#{table.downcase}"
15
15
  if self.respond_to? filter_method
16
16
  self.send(filter_method, query)
17
17
  else
@@ -3,10 +3,11 @@ require 'sqlite3'
3
3
  module DbSubsetter
4
4
  class Importer
5
5
 
6
- def initialize(filename)
6
+ def initialize(filename, dialect = DbSubsetter::Dialect::Generic)
7
7
  raise ArgumentError.new("invalid input file") unless File.exists?(filename)
8
8
 
9
9
  @data = SQLite3::Database.new(filename)
10
+ @dialect = dialect
10
11
  end
11
12
 
12
13
  def tables
@@ -18,25 +19,40 @@ module DbSubsetter
18
19
  end
19
20
 
20
21
  def import
21
- ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
22
- tables.each do |table|
23
- import_table(table)
22
+ @dialect.import do
23
+ tables.each do |table|
24
+ import_table(table)
25
+ end
24
26
  end
25
- ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
27
+ end
28
+
29
+ def insert_batch_size
30
+ 100 # more like 500 for mysql
26
31
  end
27
32
 
28
33
  private
29
34
  def import_table(table)
30
- ActiveRecord::Base.connection.truncate(table)
31
- @data.execute("SELECT data FROM #{table.underscore}") do |row|
32
- insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES (#{quoted_values(row).join(",")})"
35
+ begin
36
+ ActiveRecord::Base.connection.truncate(table)
37
+ rescue NotImplementedError
38
+ ActiveRecord::Base.connection.execute("DELETE FROM #{quoted_table_name(table)}")
39
+ end
40
+
41
+ ActiveRecord::Base.connection.begin_db_transaction
42
+
43
+ all_rows = @data.execute("SELECT data FROM #{table.underscore}")
44
+ all_rows.each_slice(insert_batch_size) do |rows|
45
+ quoted_rows = rows.map{ |row| "(" + quoted_values(row).join(",") + ")" }.join(",")
46
+ insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES #{quoted_rows}"
33
47
  ActiveRecord::Base.connection.execute(insert_sql)
34
48
  end
49
+
50
+ ActiveRecord::Base.connection.commit_db_transaction
35
51
  end
36
52
 
37
53
  def quoted_values(row)
38
54
  out = JSON.parse(row[0])
39
- out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) } #.first, x.last) }
55
+ out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) }
40
56
  out = out.map{|x| ActiveRecord::Base.connection.quote(x) }
41
57
  out
42
58
  end
@@ -1,3 +1,3 @@
1
1
  module DbSubsetter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/db_subsetter.rb CHANGED
@@ -2,6 +2,10 @@ require "db_subsetter/version"
2
2
  require "db_subsetter/filter"
3
3
  require "db_subsetter/exporter"
4
4
  require "db_subsetter/importer"
5
+ require "db_subsetter/dialect/generic"
6
+ require "db_subsetter/dialect/my_sql"
7
+ require "db_subsetter/dialect/ms_sql"
8
+
5
9
 
6
10
  module DbSubsetter
7
11
  # Your code goes here...
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_subsetter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Francis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-02 00:00:00.000000000 Z
11
+ date: 2016-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -97,6 +97,9 @@ files:
97
97
  - bin/setup
98
98
  - db_subsetter.gemspec
99
99
  - lib/db_subsetter.rb
100
+ - lib/db_subsetter/dialect/generic.rb
101
+ - lib/db_subsetter/dialect/ms_sql.rb
102
+ - lib/db_subsetter/dialect/my_sql.rb
100
103
  - lib/db_subsetter/exporter.rb
101
104
  - lib/db_subsetter/filter.rb
102
105
  - lib/db_subsetter/importer.rb