db_subsetter 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b69d7c3714c570b4a1f22b2ccff5e6e74c8b670
4
- data.tar.gz: dafe34226a0da60c16d2823cc6d76e12b2f9837a
3
+ metadata.gz: 0513f9d6c747b0a4b59915680c0f753f585c7176
4
+ data.tar.gz: dd987be888e834ffa1e2f832eed209206eb791e0
5
5
  SHA512:
6
- metadata.gz: 6a7deca37ab7b6267f9b9f0fbabd7dd221278e93366623df347a46d24aa9a6980c71ec0f9f0c3a3d41b4aaf3b44032ee185092671f1c6517fd9c70e449265c25
7
- data.tar.gz: d50010b3ee53ce2b2a13c95dcae3f00b08681a0f802251d85c6e6764375b4b499c9db817087a2b62c011bed578a161464b9c36abfeaf119ad772e7a2f12cda88
6
+ metadata.gz: 4c1bf06867d598238213377d5c8e8c1f47aa9736b8136038e4e49a8f34fa6f113fae065df8b798914824c0329b21baf4763f8e1b09437dafd8e7889af84dea75
7
+ data.tar.gz: faa183feff6d80f767310170a9dce02612baaa9816cb07225869da55585c9ab05d75d2af17047c9b7ba8e298ac80d4df5c56be7497ff9718b498a450027ebdee
data/README.md CHANGED
@@ -35,6 +35,17 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
35
35
 
36
36
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
37
37
 
38
+ ## TODO
39
+
40
+ * Improve the dialect handling
41
+ * Better example docs on usage and filtering examples
42
+ * Implement a scrubber API to allow sanitizing or correcting data at export time. This allows us to keep sensitive/personal data out of the export and also allows correction of broken data that won't re-insert.
43
+ * Add an executable and/or rake task to perform export and import rather than requiring the API to used directly. Will need a config file to specific custom plugins
44
+ * Add pre-flight check on import to make sure all tables smell like they will load the data (right columns, at minimum)
45
+ * Finish building and test checks to make sure foreign keys are valid after import
46
+ * Have verify_exportability return all failures together rather than one at a time
47
+ * Add a verbose mode to display more detailed stats while running an export or import (what table we're on, records exported, time taken
48
+
38
49
  ## Contributing
39
50
 
40
51
  Bug reports and pull requests are welcome on GitHub at https://github.com/lostapathy/db_subsetter.
@@ -0,0 +1,14 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class Generic
4
+ def self.import
5
+ yield
6
+ end
7
+
8
+ def self.integrity_problems
9
+ []
10
+ end
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,24 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class MSSQL < Generic
4
+ def self.import
5
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? NOCHECK CONSTRAINT all"')
6
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? DISABLE TRIGGER all"')
7
+ ActiveRecord::Base.connection.execute("select 'ALTER INDEX ' + I.name + ' ON ' + T.name + ' DISABLE'
8
+ from sys.indexes I
9
+ inner join sys.tables T on I.object_id = T.object_id
10
+ where I.type_desc = 'NONCLUSTERED'
11
+ and I.name is not null")
12
+
13
+ yield
14
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? ENABLE TRIGGER all"')
15
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "ALTER TABLE ? WITH CHECK CHECK CONSTRAINT all"')
16
+ end
17
+
18
+ def self.integrity_problems
19
+ ActiveRecord::Base.connection.execute('EXEC sp_msforeachtable "DBCC CHECKCONSTRAINTS WITH ALL_CONSTRAINTS"')
20
+ end
21
+ end
22
+ end
23
+ end
24
+
@@ -0,0 +1,16 @@
1
+ module DbSubsetter
2
+ module Dialect
3
+ class MySQL < Generic
4
+ def self.import
5
+ ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
6
+ yield
7
+ ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
8
+ end
9
+
10
+ def self.integrity_problems
11
+ raise NotImplementedError.new("integrity_problems not implemented for MySQL")
12
+ end
13
+ end
14
+ end
15
+ end
16
+
@@ -36,7 +36,7 @@ module DbSubsetter
36
36
  verify_exportability
37
37
 
38
38
  @output = SQLite3::Database.new(filename)
39
- @output.execute("CREATE TABLE tables (name TEXT, columns TEXT)")
39
+ @output.execute("CREATE TABLE tables (name TEXT, records_exported INTEGER, columns TEXT)")
40
40
  tables.each do |table|
41
41
  export_table(table)
42
42
  end
@@ -64,8 +64,8 @@ module DbSubsetter
64
64
  end
65
65
 
66
66
  def filtered_row_count(table)
67
- query = Arel::Table.new(table, ActiveRecord::Base).project( Arel.sql("count(1)") )
68
- query = filter.filter(table, query)
67
+ query = Arel::Table.new(table, ActiveRecord::Base)
68
+ query = filter.filter(table, query).project( Arel.sql("count(1)") )
69
69
  ActiveRecord::Base.connection.select_one(query.to_sql).values.first
70
70
  end
71
71
 
@@ -96,22 +96,24 @@ module DbSubsetter
96
96
 
97
97
  def export_table(table)
98
98
  columns = ActiveRecord::Base.connection.columns(table).map{ |table| table.name }
99
- @output.execute("INSERT INTO tables VALUES (?, ?)", [table, columns.to_json])
99
+ rows_exported = 0
100
100
  @output.execute("CREATE TABLE #{table.underscore} ( data TEXT )")
101
101
  for i in 0..pages(table)
102
- query = Arel::Table.new(table, ActiveRecord::Base)
102
+ arel_table = query = Arel::Table.new(table, ActiveRecord::Base)
103
+ query = filter.filter(table, query)
103
104
  # Need to extend this to take more than the first batch_size records
104
- query = query.order(query[order_by(table)]) if order_by(table)
105
+ query = query.order(arel_table[order_by(table)]) if order_by(table)
105
106
 
106
107
  sql = query.skip(i * select_batch_size).take(select_batch_size).project( Arel.sql('*') ).to_sql
107
108
 
108
109
  records = ActiveRecord::Base.connection.select_rows( sql )
109
110
  records.each_slice(insert_batch_size) do |rows|
110
111
  @output.execute("INSERT INTO #{table.underscore} (data) VALUES #{ Array.new(rows.size){"(?)"}.join(",")}", rows.map{|x| cleanup_types(x)}.map(&:to_json) )
112
+ rows_exported += rows.size
111
113
  end
112
114
  end
115
+ @output.execute("INSERT INTO tables VALUES (?, ?, ?)", [table, rows_exported, columns.to_json])
113
116
  end
114
-
115
117
  end
116
118
  end
117
119
 
@@ -11,7 +11,7 @@ module DbSubsetter
11
11
  end
12
12
 
13
13
  def filter(table, query)
14
- filter_method = "filter_#{table}"
14
+ filter_method = "filter_#{table.downcase}"
15
15
  if self.respond_to? filter_method
16
16
  self.send(filter_method, query)
17
17
  else
@@ -3,10 +3,11 @@ require 'sqlite3'
3
3
  module DbSubsetter
4
4
  class Importer
5
5
 
6
- def initialize(filename)
6
+ def initialize(filename, dialect = DbSubsetter::Dialect::Generic)
7
7
  raise ArgumentError.new("invalid input file") unless File.exists?(filename)
8
8
 
9
9
  @data = SQLite3::Database.new(filename)
10
+ @dialect = dialect
10
11
  end
11
12
 
12
13
  def tables
@@ -18,25 +19,40 @@ module DbSubsetter
18
19
  end
19
20
 
20
21
  def import
21
- ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
22
- tables.each do |table|
23
- import_table(table)
22
+ @dialect.import do
23
+ tables.each do |table|
24
+ import_table(table)
25
+ end
24
26
  end
25
- ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
27
+ end
28
+
29
+ def insert_batch_size
30
+ 100 # more like 500 for mysql
26
31
  end
27
32
 
28
33
  private
29
34
  def import_table(table)
30
- ActiveRecord::Base.connection.truncate(table)
31
- @data.execute("SELECT data FROM #{table.underscore}") do |row|
32
- insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES (#{quoted_values(row).join(",")})"
35
+ begin
36
+ ActiveRecord::Base.connection.truncate(table)
37
+ rescue NotImplementedError
38
+ ActiveRecord::Base.connection.execute("DELETE FROM #{quoted_table_name(table)}")
39
+ end
40
+
41
+ ActiveRecord::Base.connection.begin_db_transaction
42
+
43
+ all_rows = @data.execute("SELECT data FROM #{table.underscore}")
44
+ all_rows.each_slice(insert_batch_size) do |rows|
45
+ quoted_rows = rows.map{ |row| "(" + quoted_values(row).join(",") + ")" }.join(",")
46
+ insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES #{quoted_rows}"
33
47
  ActiveRecord::Base.connection.execute(insert_sql)
34
48
  end
49
+
50
+ ActiveRecord::Base.connection.commit_db_transaction
35
51
  end
36
52
 
37
53
  def quoted_values(row)
38
54
  out = JSON.parse(row[0])
39
- out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) } #.first, x.last) }
55
+ out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) }
40
56
  out = out.map{|x| ActiveRecord::Base.connection.quote(x) }
41
57
  out
42
58
  end
@@ -1,3 +1,3 @@
1
1
  module DbSubsetter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/db_subsetter.rb CHANGED
@@ -2,6 +2,10 @@ require "db_subsetter/version"
2
2
  require "db_subsetter/filter"
3
3
  require "db_subsetter/exporter"
4
4
  require "db_subsetter/importer"
5
+ require "db_subsetter/dialect/generic"
6
+ require "db_subsetter/dialect/my_sql"
7
+ require "db_subsetter/dialect/ms_sql"
8
+
5
9
 
6
10
  module DbSubsetter
7
11
  # Your code goes here...
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_subsetter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Francis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-02 00:00:00.000000000 Z
11
+ date: 2016-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -97,6 +97,9 @@ files:
97
97
  - bin/setup
98
98
  - db_subsetter.gemspec
99
99
  - lib/db_subsetter.rb
100
+ - lib/db_subsetter/dialect/generic.rb
101
+ - lib/db_subsetter/dialect/ms_sql.rb
102
+ - lib/db_subsetter/dialect/my_sql.rb
100
103
  - lib/db_subsetter/exporter.rb
101
104
  - lib/db_subsetter/filter.rb
102
105
  - lib/db_subsetter/importer.rb