db_subsetter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +48 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/db_subsetter.gemspec +31 -0
- data/lib/db_subsetter/exporter.rb +117 -0
- data/lib/db_subsetter/filter.rb +24 -0
- data/lib/db_subsetter/importer.rb +59 -0
- data/lib/db_subsetter/version.rb +3 -0
- data/lib/db_subsetter.rb +8 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8b69d7c3714c570b4a1f22b2ccff5e6e74c8b670
|
4
|
+
data.tar.gz: dafe34226a0da60c16d2823cc6d76e12b2f9837a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6a7deca37ab7b6267f9b9f0fbabd7dd221278e93366623df347a46d24aa9a6980c71ec0f9f0c3a3d41b4aaf3b44032ee185092671f1c6517fd9c70e449265c25
|
7
|
+
data.tar.gz: d50010b3ee53ce2b2a13c95dcae3f00b08681a0f802251d85c6e6764375b4b499c9db817087a2b62c011bed578a161464b9c36abfeaf119ad772e7a2f12cda88
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Joe Francis
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# db_subsetter
|
2
|
+
|
3
|
+
Extract a subset of a relational database for use in development or testing. Provides a simple API to filter rows and preserve referential integrity. The extracted data is packed into a [SQLite](https://www.sqlite.org/) database to allow easy copying.
|
4
|
+
|
5
|
+
Developing against a realistic dataset extracted from production provides a lot of advantages over starting with an empty database. This tools was inspired by [rdbms-subsetter](https://github.com/18F/rdbms-subsetter) and [yaml_db](https://github.com/yamldb/yaml_db/) and combines some of the best attributes of both.
|
6
|
+
|
7
|
+
When working against a legacy database, automatic relationship management does not always work out. It can also be desirable to extract similar subsets of data over time to simplify testing. We provide an API to allow you to quickly define how you want to filter each table for your subset. We also provide tools to help calibrate your filters to extract a subset of a reasonable size.
|
8
|
+
|
9
|
+
ActiveRecord is used for database access, however you *do not* need to have ActiveRecord models for all tables you wish to subset. Any database supported by ActiveRecord should work. In theory, you should be able to subset from database and import into another (i.e., MySQL -> Postgres), however in practice this may or may not work well depending on exactly what data types are used.
|
10
|
+
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'db_subsetter'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install db_subsetter
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
TODO: Write usage instructions here
|
31
|
+
|
32
|
+
## Development
|
33
|
+
|
34
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
35
|
+
|
36
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/lostapathy/db_subsetter.
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
## License
|
45
|
+
|
46
|
+
|
47
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
48
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "db_subsetter"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'db_subsetter/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "db_subsetter"
|
8
|
+
spec.version = DbSubsetter::VERSION
|
9
|
+
spec.authors = ["Joe Francis"]
|
10
|
+
spec.email = ["joe@lostapathy.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Extract a subset of a relational database for use in development or
|
13
|
+
testing. Provides a simple API to filter rows and preserve referential
|
14
|
+
integrity.}
|
15
|
+
#spec.description = %q{TODO: Write a longer description or delete this line.}
|
16
|
+
spec.homepage = "https://github.com/lostapathy/db_subsetter"
|
17
|
+
spec.license = "MIT"
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
27
|
+
|
28
|
+
spec.add_dependency "activerecord", "~> 4.2"
|
29
|
+
spec.add_dependency "sqlite3", "~> 1.3"
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'sqlite3'
|
2
|
+
|
3
|
+
module DbSubsetter
|
4
|
+
class Exporter
|
5
|
+
attr_writer :filter
|
6
|
+
|
7
|
+
def all_tables
|
8
|
+
ActiveRecord::Base.connection.tables
|
9
|
+
end
|
10
|
+
|
11
|
+
def tables
|
12
|
+
filter.tables
|
13
|
+
end
|
14
|
+
|
15
|
+
def total_row_counts
|
16
|
+
tables.each.map do |table|
|
17
|
+
query = Arel::Table.new(table, ActiveRecord::Base).project("count(1) AS num_rows")
|
18
|
+
rows = ActiveRecord::Base.connection.select_one(query.to_sql)["num_rows"]
|
19
|
+
{table => rows}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def filtered_row_counts
|
24
|
+
tables.each.map do |table|
|
25
|
+
{table => filtered_row_count(table)}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def verify_exportability
|
30
|
+
tables.each do |table|
|
31
|
+
verify_table_exportability(table)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def export(filename)
|
36
|
+
verify_exportability
|
37
|
+
|
38
|
+
@output = SQLite3::Database.new(filename)
|
39
|
+
@output.execute("CREATE TABLE tables (name TEXT, columns TEXT)")
|
40
|
+
tables.each do |table|
|
41
|
+
export_table(table)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
private
|
48
|
+
def max_rows
|
49
|
+
10000000
|
50
|
+
end
|
51
|
+
|
52
|
+
def insert_batch_size
|
53
|
+
250
|
54
|
+
end
|
55
|
+
|
56
|
+
def select_batch_size
|
57
|
+
insert_batch_size * 20
|
58
|
+
end
|
59
|
+
|
60
|
+
def filter
|
61
|
+
@filter ||= Filter.new
|
62
|
+
@filter.exporter = self
|
63
|
+
@filter
|
64
|
+
end
|
65
|
+
|
66
|
+
def filtered_row_count(table)
|
67
|
+
query = Arel::Table.new(table, ActiveRecord::Base).project( Arel.sql("count(1)") )
|
68
|
+
query = filter.filter(table, query)
|
69
|
+
ActiveRecord::Base.connection.select_one(query.to_sql).values.first
|
70
|
+
end
|
71
|
+
|
72
|
+
def pages(table)
|
73
|
+
( filtered_row_count(table) / select_batch_size.to_f ).ceil
|
74
|
+
end
|
75
|
+
|
76
|
+
def order_by(table)
|
77
|
+
#TODO should probably allow the user to override this and manually set a sort order?
|
78
|
+
key = ActiveRecord::Base.connection.primary_key(table)
|
79
|
+
key || false
|
80
|
+
end
|
81
|
+
|
82
|
+
def verify_table_exportability(table)
|
83
|
+
raise "ERROR: Multiple pages but no primary key on: #{table}" if pages(table) > 1 && order_by(table).blank?
|
84
|
+
raise "ERROR: Too many rows in: #{table} (#{filtered_row_count(table)})" if( filtered_row_count(table) > max_rows )
|
85
|
+
end
|
86
|
+
|
87
|
+
def cleanup_types(row)
|
88
|
+
row.map do |field|
|
89
|
+
case field
|
90
|
+
when Date, Time then field.to_s(:db)
|
91
|
+
else
|
92
|
+
field
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def export_table(table)
|
98
|
+
columns = ActiveRecord::Base.connection.columns(table).map{ |table| table.name }
|
99
|
+
@output.execute("INSERT INTO tables VALUES (?, ?)", [table, columns.to_json])
|
100
|
+
@output.execute("CREATE TABLE #{table.underscore} ( data TEXT )")
|
101
|
+
for i in 0..pages(table)
|
102
|
+
query = Arel::Table.new(table, ActiveRecord::Base)
|
103
|
+
# Need to extend this to take more than the first batch_size records
|
104
|
+
query = query.order(query[order_by(table)]) if order_by(table)
|
105
|
+
|
106
|
+
sql = query.skip(i * select_batch_size).take(select_batch_size).project( Arel.sql('*') ).to_sql
|
107
|
+
|
108
|
+
records = ActiveRecord::Base.connection.select_rows( sql )
|
109
|
+
records.each_slice(insert_batch_size) do |rows|
|
110
|
+
@output.execute("INSERT INTO #{table.underscore} (data) VALUES #{ Array.new(rows.size){"(?)"}.join(",")}", rows.map{|x| cleanup_types(x)}.map(&:to_json) )
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DbSubsetter
|
2
|
+
class Filter
|
3
|
+
attr_writer :exporter
|
4
|
+
|
5
|
+
def ignore_tables
|
6
|
+
[]
|
7
|
+
end
|
8
|
+
|
9
|
+
def tables
|
10
|
+
@exporter.all_tables - ActiveRecord::SchemaDumper.ignore_tables - ignore_tables
|
11
|
+
end
|
12
|
+
|
13
|
+
def filter(table, query)
|
14
|
+
filter_method = "filter_#{table}"
|
15
|
+
if self.respond_to? filter_method
|
16
|
+
self.send(filter_method, query)
|
17
|
+
else
|
18
|
+
query
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'sqlite3'
|
2
|
+
|
3
|
+
module DbSubsetter
|
4
|
+
class Importer
|
5
|
+
|
6
|
+
def initialize(filename)
|
7
|
+
raise ArgumentError.new("invalid input file") unless File.exists?(filename)
|
8
|
+
|
9
|
+
@data = SQLite3::Database.new(filename)
|
10
|
+
end
|
11
|
+
|
12
|
+
def tables
|
13
|
+
all_tables = []
|
14
|
+
@data.execute("SELECT name FROM tables") do |row|
|
15
|
+
all_tables << row[0]
|
16
|
+
end
|
17
|
+
all_tables
|
18
|
+
end
|
19
|
+
|
20
|
+
def import
|
21
|
+
ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=0;")
|
22
|
+
tables.each do |table|
|
23
|
+
import_table(table)
|
24
|
+
end
|
25
|
+
ActiveRecord::Base.connection.execute("SET FOREIGN_KEY_CHECKS=1;")
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def import_table(table)
|
30
|
+
ActiveRecord::Base.connection.truncate(table)
|
31
|
+
@data.execute("SELECT data FROM #{table.underscore}") do |row|
|
32
|
+
insert_sql = "INSERT INTO #{quoted_table_name(table)} (#{quoted_column_names(table).join(",")}) VALUES (#{quoted_values(row).join(",")})"
|
33
|
+
ActiveRecord::Base.connection.execute(insert_sql)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def quoted_values(row)
|
38
|
+
out = JSON.parse(row[0])
|
39
|
+
out = out.map{|x| ActiveRecord::Base.connection.type_cast(x, nil) } #.first, x.last) }
|
40
|
+
out = out.map{|x| ActiveRecord::Base.connection.quote(x) }
|
41
|
+
out
|
42
|
+
end
|
43
|
+
|
44
|
+
def columns(table)
|
45
|
+
raw = @data.execute("SELECT columns FROM tables WHERE name = ?", [table]).first[0]
|
46
|
+
JSON.parse(raw)
|
47
|
+
end
|
48
|
+
|
49
|
+
def quoted_table_name(table)
|
50
|
+
ActiveRecord::Base.connection.quote_table_name(table)
|
51
|
+
end
|
52
|
+
|
53
|
+
def quoted_column_names(table)
|
54
|
+
columns(table).map{ |column| ActiveRecord::Base.connection.quote_column_name(column) }
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
data/lib/db_subsetter.rb
ADDED
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: db_subsetter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Joe Francis
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-07-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activerecord
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '4.2'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '4.2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sqlite3
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.3'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.3'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- joe@lostapathy.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".gitignore"
|
91
|
+
- ".travis.yml"
|
92
|
+
- Gemfile
|
93
|
+
- LICENSE
|
94
|
+
- README.md
|
95
|
+
- Rakefile
|
96
|
+
- bin/console
|
97
|
+
- bin/setup
|
98
|
+
- db_subsetter.gemspec
|
99
|
+
- lib/db_subsetter.rb
|
100
|
+
- lib/db_subsetter/exporter.rb
|
101
|
+
- lib/db_subsetter/filter.rb
|
102
|
+
- lib/db_subsetter/importer.rb
|
103
|
+
- lib/db_subsetter/version.rb
|
104
|
+
homepage: https://github.com/lostapathy/db_subsetter
|
105
|
+
licenses:
|
106
|
+
- MIT
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubyforge_project:
|
124
|
+
rubygems_version: 2.5.1
|
125
|
+
signing_key:
|
126
|
+
specification_version: 4
|
127
|
+
summary: Extract a subset of a relational database for use in development or testing. Provides
|
128
|
+
a simple API to filter rows and preserve referential integrity.
|
129
|
+
test_files: []
|
130
|
+
has_rdoc:
|