fake_pipe 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.rubocop.yml +26 -0
- data/.travis.yml +4 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +124 -0
- data/Rakefile +14 -0
- data/bin/console +7 -0
- data/bin/setup +8 -0
- data/exe/fake_pipe +6 -0
- data/fake_pipe.gemspec +26 -0
- data/lib/fake_pipe/any_block.rb +22 -0
- data/lib/fake_pipe/commenter.rb +65 -0
- data/lib/fake_pipe/mutator.rb +184 -0
- data/lib/fake_pipe/piper.rb +93 -0
- data/lib/fake_pipe/postgres/comment_block.rb +35 -0
- data/lib/fake_pipe/postgres/copy_block.rb +40 -0
- data/lib/fake_pipe/postgres.rb +11 -0
- data/lib/fake_pipe/text_block.rb +38 -0
- data/lib/fake_pipe/version.rb +3 -0
- data/lib/fake_pipe.rb +21 -0
- metadata +137 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 12b4fbe781cae0694881681c54759347c3aa6f7f
|
4
|
+
data.tar.gz: 31b8a828b0ceb197988aecb655bbd9baa9211741
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9f9b1f9ead0f631a875b6bcfba1573d6dedb77a83ebb1e0b05362bb6a07bf1063d52835e37559a361db97474feec1bf879c77c978b60ce5fb9b7765c8324a332
|
7
|
+
data.tar.gz: b9ea9be17454c4ac8ec51c59207c9c510e0a6d7da7029e126f9579e5e2f65fa667b9dd3d5caa4d38f33788f8142f22c4a1ea4db04b111edda165d011ea5595b3
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.2
|
3
|
+
Rails:
|
4
|
+
Enabled: true
|
5
|
+
Encoding:
|
6
|
+
Enabled: false
|
7
|
+
Metrics/ClassLength:
|
8
|
+
Enabled: false
|
9
|
+
Metrics/MethodLength:
|
10
|
+
Enabled: false
|
11
|
+
Metrics/AbcSize:
|
12
|
+
Enabled: false
|
13
|
+
Metrics/CyclomaticComplexity:
|
14
|
+
Enabled: false
|
15
|
+
Style/AndOr:
|
16
|
+
EnforcedStyle: conditionals
|
17
|
+
Style/CaseEquality:
|
18
|
+
Enabled: false
|
19
|
+
Style/ConditionalAssignment:
|
20
|
+
Enabled: false
|
21
|
+
Style/GuardClause:
|
22
|
+
Enabled: false
|
23
|
+
Style/SingleLineBlockParams:
|
24
|
+
Enabled: false
|
25
|
+
Style/StringLiterals:
|
26
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Scott Pierce
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
We need a simply way to [anonymize data](https://en.wikipedia.org/wiki/Data_anonymization)
|
4
|
+
for exporting to various departments for analytics and troubleshooting. This tool
|
5
|
+
allows us to annotate a DB schema with special comments that can trigger
|
6
|
+
different data mutations
|
7
|
+
|
8
|
+
FakePipe
|
9
|
+
|
10
|
+
## Workflow with FakePipe
|
11
|
+
|
12
|
+
Here's how FakePipe could work in a projects lifecycle. These steps assume a
|
13
|
+
Postgres database:
|
14
|
+
|
15
|
+
1. Add comment to table column: `COMMENT ON COLUMN user.phone IS 'anon: phone_number';`.
|
16
|
+
2. Pipe DB dump to fake_pipe: `pg_dump my_db | fake_pipe > anon-db-dump.sql`.
|
17
|
+
3. Send `anon-db-dump.sql` to needy people.
|
18
|
+
|
19
|
+
|
20
|
+
## Basic Class Diagram
|
21
|
+
|
22
|
+
TODO This is subject to change!
|
23
|
+
|
24
|
+
+---------------+ +----------------------------------+
|
25
|
+
| Piper | | DatabaseAdapter/PostgresAdapter |
|
26
|
+
| * stdin | | * match comments |
|
27
|
+
| * stdout +-------> * match dml |
|
28
|
+
| * main loop | | * delegate.on_cell(name, cell) |
|
29
|
+
| * def on_cell | | |
|
30
|
+
| | +----------------------------------+
|
31
|
+
| |
|
32
|
+
| | +-----------------+
|
33
|
+
| | | Mutator |
|
34
|
+
| +-------> * phone_number |
|
35
|
+
+---------------+ | * email |
|
36
|
+
| |
|
37
|
+
+-----------------+
|
38
|
+
|
39
|
+
# Comment Dialect
|
40
|
+
|
41
|
+
Schema columns comments are in [YAML format](http://www.yaml.org/start.html).
|
42
|
+
Using some of it's option quoting, it can look very much like JSON. The reason
|
43
|
+
it was chosen over JSON is due for the optional quotes. That means the following
|
44
|
+
syntax will resolve to the same Ruby definition:
|
45
|
+
|
46
|
+
```
|
47
|
+
---
|
48
|
+
color: red
|
49
|
+
width: 100
|
50
|
+
```
|
51
|
+
|
52
|
+
```
|
53
|
+
{color: "red", width: 100}
|
54
|
+
```
|
55
|
+
|
56
|
+
For single options, the quotes can be omitted: `color: red`.
|
57
|
+
|
58
|
+
Any keys unknown by FakePipe will be ignored. So annotations from other system
|
59
|
+
shouldn't interfere. We do hope the abbreviated YAML syntax is simple to parse
|
60
|
+
by all systems.
|
61
|
+
|
62
|
+
## Currently Support FakePipe Methods
|
63
|
+
|
64
|
+
To get a current list try running `rake methods` from terminal.
|
65
|
+
|
66
|
+
```sh
|
67
|
+
$ rake methods
|
68
|
+
anon: email # Faker email
|
69
|
+
anon: md5 # MD5 hash of cell contents
|
70
|
+
anon: phone_number # Faker::PhoneNumber with digits only
|
71
|
+
```
|
72
|
+
|
73
|
+
TODO clean up README. The following is default stock from `bundle gem ...`
|
74
|
+
|
75
|
+
|
76
|
+
# Decisions
|
77
|
+
- 2016-06-08
|
78
|
+
- parsing SQL file is okay for now. Reconsider using a temp DB when
|
79
|
+
foreign keys need to be scrambled
|
80
|
+
- MD5sum foreign keys
|
81
|
+
|
82
|
+
|
83
|
+
## Installation
|
84
|
+
|
85
|
+
Add this line to your application's Gemfile:
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
gem 'fake_pipe'
|
89
|
+
```
|
90
|
+
|
91
|
+
And then execute:
|
92
|
+
|
93
|
+
$ bundle
|
94
|
+
|
95
|
+
Or install it yourself as:
|
96
|
+
|
97
|
+
$ gem install fake_pipe
|
98
|
+
|
99
|
+
## Usage
|
100
|
+
|
101
|
+
|
102
|
+
## Development
|
103
|
+
|
104
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
105
|
+
`rake spec` to run the tests. You can also run `bin/console` for an interactive
|
106
|
+
prompt that will allow you to experiment.
|
107
|
+
|
108
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
109
|
+
release a new version, update the version number in `version.rb`, and then run
|
110
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
111
|
+
git commits and tags, and push the `.gem` file to
|
112
|
+
[rubygems.org](https://rubygems.org).
|
113
|
+
|
114
|
+
## Contributing
|
115
|
+
|
116
|
+
Bug reports and pull requests are welcome on GitHub at
|
117
|
+
https://github.com/centro/fake_pipe.
|
118
|
+
|
119
|
+
|
120
|
+
## License
|
121
|
+
|
122
|
+
The gem is available as open source under the terms of the [MIT
|
123
|
+
License](http://opensource.org/licenses/MIT).
|
124
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task default: :spec
|
7
|
+
|
8
|
+
desc 'List supported FakePipe methods'
|
9
|
+
task :methods do
|
10
|
+
require 'fake_pipe'
|
11
|
+
methods = FakePipe::Mutator.list_with_comments
|
12
|
+
longest_name = methods.map(&:first).max_by(&:size)
|
13
|
+
puts methods.map { |m, c| "anon: #{m.ljust(longest_name.size)} #{c}" }
|
14
|
+
end
|
data/bin/console
ADDED
data/bin/setup
ADDED
data/exe/fake_pipe
ADDED
data/fake_pipe.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fake_pipe/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'fake_pipe'
|
8
|
+
spec.version = FakePipe::VERSION
|
9
|
+
spec.authors = ['Scott Pierce']
|
10
|
+
spec.email = ['ddrscott@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = 'db_dump.sql | fake_pipe > fake_dump.sql'
|
13
|
+
spec.homepage = 'https://github.com/centro/fake_pipe'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = 'exe'
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency 'activesupport'
|
22
|
+
spec.add_dependency 'faker'
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
26
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Catch all text block.
|
3
|
+
# Generic base state while a more interesting text block is not present.
|
4
|
+
class AnyBlock < TextBlock
|
5
|
+
|
6
|
+
def match_start_text(line)
|
7
|
+
true
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_text?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def end_text?(line)
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(line)
|
19
|
+
line
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Provides helper migration method to set a valid fake_pipe comment.
|
3
|
+
# Currently this only supports the follwing DB adapter types:
|
4
|
+
# postgres
|
5
|
+
#
|
6
|
+
# To support more implement aa private `execute_comment_<dialect>` method.
|
7
|
+
module Commenter
|
8
|
+
# Generates an fake_pipe comment for the give table and column.
|
9
|
+
# This is uses `reversible` so it should be safe to use within a `change`
|
10
|
+
# style migration.
|
11
|
+
#
|
12
|
+
# @params [String] table to apply the comment
|
13
|
+
# @params [String] column to apply the comment
|
14
|
+
# @params [String] mutator strategy to apply
|
15
|
+
def anonymize_comment(table, column, mutator)
|
16
|
+
validate_mutator!(mutator)
|
17
|
+
comment_updater.call(table: table, column: column, mutator: mutator)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def validate_mutator!(mutator)
|
23
|
+
Mutator.list.include?(mutator) or
|
24
|
+
raise "Mutator #{mutator} is not valid. Try one of: #{Mutator.list}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def comment_updater
|
28
|
+
meth = "execute_comment_#{dialect}"
|
29
|
+
if respond_to?(meth, true)
|
30
|
+
method(meth)
|
31
|
+
else
|
32
|
+
raise NotImplementedError,
|
33
|
+
"DB dialect `#{dialect}` not supported. Try one of: #{supported_dialects}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def execute_comment_postgresql(table:, column:, mutator:)
|
38
|
+
reversible do |dir|
|
39
|
+
dir.up do
|
40
|
+
execute "COMMENT ON COLUMN #{table}.#{column} IS #{formatted_comment(mutator)};"
|
41
|
+
end
|
42
|
+
dir.down do
|
43
|
+
execute "COMMENT ON COLUMN #{table}.#{column} IS NULL;"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def formatted_comment(mutator)
|
49
|
+
escape_string("anon: #{mutator}")
|
50
|
+
end
|
51
|
+
|
52
|
+
def escape_string(text)
|
53
|
+
connection.quote(text)
|
54
|
+
end
|
55
|
+
|
56
|
+
def supported_dialects
|
57
|
+
methods.map { |m| m[/^execute_comment_(.*)$/, 1] }.compact
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO: There's got be a better way
|
61
|
+
def dialect
|
62
|
+
connection.adapter_name.downcase.to_s
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# This class handles mapping between a configured mutation such as
|
3
|
+
# 'phone_number' and the logic to change the data.
|
4
|
+
#
|
5
|
+
# To create a new mutable named configuration create a method prefixed
|
6
|
+
# with `mutate_`. The method will receive the original cell value and is
|
7
|
+
# expected to return the mutated value. Please add comment to the mutate
|
8
|
+
# method. The comment is used by `rake methods` to get a listing of all
|
9
|
+
# possible mutations.
|
10
|
+
module Mutator
|
11
|
+
module_function
|
12
|
+
|
13
|
+
def mutate(name, cell)
|
14
|
+
mutator_method = "mutate_#{name}"
|
15
|
+
if respond_to? mutator_method
|
16
|
+
public_send(mutator_method, cell)
|
17
|
+
else
|
18
|
+
raise "Mutator named `#{name}` not found. Try one of these: #{list.join(', ')}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def list
|
23
|
+
@list ||= public_methods
|
24
|
+
.map { |m| m.to_s[/^mutate_(\w+)$/, 1] }
|
25
|
+
.select(&:present?)
|
26
|
+
.sort
|
27
|
+
end
|
28
|
+
|
29
|
+
# Utility method for outputting available mutators.
|
30
|
+
# Only require method source here.
|
31
|
+
# Currently used by a `rake methods`.
|
32
|
+
def list_with_comments
|
33
|
+
require 'method_source'
|
34
|
+
list.map { |short| [short, public_method("mutate_#{short}").comment.strip] }
|
35
|
+
end
|
36
|
+
|
37
|
+
# Faker::PhoneNumber with punctuation and extensions
|
38
|
+
def mutate_phone_number(_)
|
39
|
+
Faker::PhoneNumber.phone_number
|
40
|
+
end
|
41
|
+
|
42
|
+
# Faker::PhoneNumber 10-digits only
|
43
|
+
def mutate_clean_phone_number(_)
|
44
|
+
Faker::PhoneNumber.phone_number.gsub(/\D|(^1)/, '')[0, 10]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Faker email
|
48
|
+
def mutate_email(_)
|
49
|
+
Faker::Internet.email
|
50
|
+
end
|
51
|
+
|
52
|
+
# Faker::Internet.user_name
|
53
|
+
def mutate_user_name(_)
|
54
|
+
Faker::Internet.user_name
|
55
|
+
end
|
56
|
+
|
57
|
+
# Faker::Internet.url
|
58
|
+
def mutate_url(_)
|
59
|
+
Faker::Internet.url
|
60
|
+
end
|
61
|
+
|
62
|
+
# MD5 hash of cell contents
|
63
|
+
def mutate_md5(cell)
|
64
|
+
cell ? Digest::MD5.base64digest(cell) : cell
|
65
|
+
end
|
66
|
+
|
67
|
+
# Faker::Address.street_address
|
68
|
+
def mutate_address_line_1(_)
|
69
|
+
Faker::Address.street_address
|
70
|
+
end
|
71
|
+
|
72
|
+
# Faker::Address.secondary_address
|
73
|
+
def mutate_address_line_2(_)
|
74
|
+
Faker::Address.secondary_address
|
75
|
+
end
|
76
|
+
|
77
|
+
# Faker::Address.country
|
78
|
+
def mutate_address_country(_)
|
79
|
+
Faker::Address.country
|
80
|
+
end
|
81
|
+
|
82
|
+
# Faker::Address.city
|
83
|
+
def mutate_address_city(_)
|
84
|
+
Faker::Address.city
|
85
|
+
end
|
86
|
+
|
87
|
+
# Faker::Address.state
|
88
|
+
def mutate_address_state(_)
|
89
|
+
Faker::Address.state
|
90
|
+
end
|
91
|
+
|
92
|
+
# Faker::Address.postcode
|
93
|
+
def mutate_address_postcode(_)
|
94
|
+
Faker::Address.postcode
|
95
|
+
end
|
96
|
+
|
97
|
+
# Faker::Company.name
|
98
|
+
def mutate_company_name(_)
|
99
|
+
Faker::Company.name
|
100
|
+
end
|
101
|
+
|
102
|
+
# Faker::Company.catch_phrase
|
103
|
+
def mutate_company_catch_phrase(_)
|
104
|
+
Faker::Company.catch_phrase
|
105
|
+
end
|
106
|
+
|
107
|
+
# an empty curly brace '{}' - good for json object and array fields
|
108
|
+
def mutate_empty_curly(_)
|
109
|
+
'{}'
|
110
|
+
end
|
111
|
+
|
112
|
+
# an empty bracket '[]' - good for json::array objects
|
113
|
+
def mutate_empty_bracket(_)
|
114
|
+
'[]'
|
115
|
+
end
|
116
|
+
|
117
|
+
# an empty String
|
118
|
+
def mutate_empty_string(_)
|
119
|
+
''
|
120
|
+
end
|
121
|
+
|
122
|
+
# Faker::Lorem.paragraph
|
123
|
+
def mutate_lorem_paragraph(_)
|
124
|
+
Faker::Lorem.paragraph
|
125
|
+
end
|
126
|
+
|
127
|
+
# Faker::Lorem.word
|
128
|
+
def mutate_lorem_word(_)
|
129
|
+
Faker::Lorem.word
|
130
|
+
end
|
131
|
+
|
132
|
+
# Faker::Lorem.sentence
|
133
|
+
def mutate_lorem_sentence(_)
|
134
|
+
Faker::Lorem.sentence
|
135
|
+
end
|
136
|
+
|
137
|
+
# Faker::Name.first_name
|
138
|
+
def mutate_first_name(_)
|
139
|
+
Faker::Name.first_name
|
140
|
+
end
|
141
|
+
|
142
|
+
# Faker::Name.last_name
|
143
|
+
def mutate_last_name(_)
|
144
|
+
Faker::Name.last_name
|
145
|
+
end
|
146
|
+
|
147
|
+
# Faker::Name.full_name
|
148
|
+
def mutate_full_name(_)
|
149
|
+
Faker::Name.name
|
150
|
+
end
|
151
|
+
|
152
|
+
# Faker::PhoneNumber.extension
|
153
|
+
def mutate_phone_ext(_)
|
154
|
+
Faker::PhoneNumber.extension
|
155
|
+
end
|
156
|
+
|
157
|
+
# bcrypt password as 'password'
|
158
|
+
def mutate_bcrypt_password(_)
|
159
|
+
'400$8$2d$f6ed5a490c441958$67f59aa61bc617849a3280b5e80f78607e53b5aa5807a44ddbc53e804e2e2a99'
|
160
|
+
end
|
161
|
+
|
162
|
+
# bcrypt salt used to generate password
|
163
|
+
def mutate_bcrypt_salt(_)
|
164
|
+
'au6lOASvp17AGsqkmE7'
|
165
|
+
end
|
166
|
+
|
167
|
+
ALPHABET = ('A'..'Z').to_a
|
168
|
+
DIGITS = ('0'..'9').to_a
|
169
|
+
# Six random uppercase letters followed by four random numbers - ex. 'ABCDEF1234'
|
170
|
+
def mutate_ugcid(_)
|
171
|
+
(ALPHABET.sample(6) + DIGITS.sample(4)).join
|
172
|
+
end
|
173
|
+
|
174
|
+
# UUID
|
175
|
+
def mutate_uuid(_)
|
176
|
+
SecureRandom.uuid
|
177
|
+
end
|
178
|
+
|
179
|
+
# Reopen class to define aliases on module_function
|
180
|
+
class << self
|
181
|
+
alias mutate_guid mutate_uuid
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# This class cooridinates between all the text blocks.
|
3
|
+
# The class is initialized with some input io, an output io, and an adapter.
|
4
|
+
#
|
5
|
+
# ## Adapter
|
6
|
+
# An adapter is created by creating a module directly under fake_pipe. The
|
7
|
+
# module must respond to `text_blocks` which will return all the `TextBlock`
|
8
|
+
# classes needed to call `on_config` and `on_cell`.
|
9
|
+
#
|
10
|
+
# ## General IO Flow
|
11
|
+
# The `run` method is probably the most interesting. It streams in `each_line`
|
12
|
+
# of the input `io` and will output either the same line or the parsed line
|
13
|
+
# from the `TextObject#parse`. It's the responsibility of the TextBlock to
|
14
|
+
# extract relevant table, column, cell information. This class will make
|
15
|
+
# keep track of when to mutate cell.
|
16
|
+
#
|
17
|
+
# Most lines from `io` should be passed directly to the `outputter`
|
18
|
+
class Piper
|
19
|
+
attr_accessor :io, :configs, :outputter, :text_blocks
|
20
|
+
|
21
|
+
# @param [String] adapter should be a module file directly under the 'fake_pipe' path
|
22
|
+
def initialize(io:, outputter:, adapter:)
|
23
|
+
self.configs = {}
|
24
|
+
self.io = io
|
25
|
+
self.outputter = outputter
|
26
|
+
register_adapter(adapter)
|
27
|
+
end
|
28
|
+
|
29
|
+
def register_adapter(adapter)
|
30
|
+
adapter_module = "fake_pipe/#{adapter}"
|
31
|
+
require adapter_module
|
32
|
+
adapter_class = adapter_module.camelize.constantize
|
33
|
+
self.text_blocks = adapter_class.text_blocks.map do |block_class|
|
34
|
+
block_class.new(delegate: self)
|
35
|
+
end
|
36
|
+
|
37
|
+
# AnyBlock is a catch all and needs to come last.
|
38
|
+
text_blocks << AnyBlock.new(delegate: self)
|
39
|
+
end
|
40
|
+
|
41
|
+
def run
|
42
|
+
# used to track which text_block is currently in use
|
43
|
+
current_block = text_blocks.last
|
44
|
+
io.each_line do |line|
|
45
|
+
if current_block.end_text?(line)
|
46
|
+
output line
|
47
|
+
current_block = detect_and_start_text_block(line)
|
48
|
+
elsif configs[current_block.table] # optimization: only parse of the text block has a table configuration
|
49
|
+
output current_block.parse(line)
|
50
|
+
else # otherwise output the original line
|
51
|
+
output line
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Check if a line is the begining of a new text block.
|
57
|
+
# When it is, trigger the callbacks so the text block
|
58
|
+
# can initialize itself.
|
59
|
+
def detect_and_start_text_block(line)
|
60
|
+
text_blocks.detect do |block|
|
61
|
+
matcher = block.match_start_text(line)
|
62
|
+
if matcher && block.start_text?
|
63
|
+
block.on_start_text(matcher, line)
|
64
|
+
true # result for detect
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Delegate method to be called by the #text_objects to get config information
|
70
|
+
# from a table's column
|
71
|
+
def on_config(table:, column:, config:)
|
72
|
+
table = (configs[table] ||= {})
|
73
|
+
table[column] = config
|
74
|
+
end
|
75
|
+
|
76
|
+
# @return [String] The mutated cell or the original if there's no config for
|
77
|
+
# the table/column.
|
78
|
+
def on_cell(table:, column:, cell:)
|
79
|
+
if config = configs[table].try(:[], column)
|
80
|
+
Mutator.mutate(config, cell)
|
81
|
+
else
|
82
|
+
cell
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
# Simple wrapper to print to the configured #outputter
|
89
|
+
def output(text)
|
90
|
+
outputter.puts text
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'fake_pipe/text_block'
|
2
|
+
|
3
|
+
module FakePipe
|
4
|
+
module Postgres
|
5
|
+
# Finds Postgres comment DML.
|
6
|
+
class CommentBlock < TextBlock
|
7
|
+
|
8
|
+
self.start_pattern = /^COMMENT ON COLUMN (?<table>[^\.]+)\.(?<column>\S+) IS '(?<comment>.*)';/
|
9
|
+
self.end_pattern = /^$/
|
10
|
+
|
11
|
+
def on_start_text(match, line)
|
12
|
+
self.table = match[:table]
|
13
|
+
parse_config(match)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_config(match)
|
17
|
+
# consolidate escaped single quotes
|
18
|
+
comment = match[:comment].gsub("''", "'")
|
19
|
+
data = YAML.load(comment).with_indifferent_access
|
20
|
+
|
21
|
+
# give the config back to the delegate
|
22
|
+
delegate.on_config(
|
23
|
+
table: match[:table],
|
24
|
+
column: match[:column],
|
25
|
+
config: data[:anon]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse(*)
|
30
|
+
raise '`parse` should not be called to extract config from comments. ' \
|
31
|
+
' Try inspecting the PG dump format for changes. Comments are normally all in a single line.'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'fake_pipe/text_block'
|
2
|
+
|
3
|
+
module FakePipe
|
4
|
+
module Postgres
|
5
|
+
# Finds COPY... text blocks inside of `pg_dumps`
|
6
|
+
class CopyBlock < TextBlock
|
7
|
+
|
8
|
+
DELIMITER = "\t"
|
9
|
+
|
10
|
+
COLUMN_SPLITTER = /,\s*/
|
11
|
+
|
12
|
+
self.start_pattern = /^COPY (?<table>\S+) \((?<columns>[^)]*)\) FROM stdin;/
|
13
|
+
self.end_pattern = /^\\\.$/
|
14
|
+
|
15
|
+
# @return [Hash<Integer,String>] Index for column ordinal and column name: { 1 => column_name }
|
16
|
+
def on_start_text(match, line)
|
17
|
+
@table = match[:table]
|
18
|
+
@columns = match[:columns].split(COLUMN_SPLITTER)
|
19
|
+
@column_idx = Hash[@columns.map.with_index { |name, i| [i, name] }]
|
20
|
+
end
|
21
|
+
|
22
|
+
# Postgres COPY format is NOT CSV.
|
23
|
+
# > https://www.postgresql.org/docs/9.1/static/sql-copy.html
|
24
|
+
#
|
25
|
+
# @return [String] maybe mutated by `delegate.on_cell`
|
26
|
+
def parse(line)
|
27
|
+
row = line.split(DELIMITER)
|
28
|
+
faked = row.map.with_index do |cell, i|
|
29
|
+
if cell.blank? || cell == '\N'
|
30
|
+
# Don't acknowledge null cells
|
31
|
+
cell
|
32
|
+
else
|
33
|
+
delegate.on_cell(table: @table, column: @column_idx[i], cell: cell)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
faked.join(DELIMITER)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Blocks are considered between the #start_text? and #end_text?
|
3
|
+
# Any lines in between the start and end are passed to #parse
|
4
|
+
#
|
5
|
+
# @start_match is available in case there's information in there #parse could
|
6
|
+
# find interesting.
|
7
|
+
class TextBlock
|
8
|
+
|
9
|
+
class_attribute :start_pattern
|
10
|
+
class_attribute :end_pattern
|
11
|
+
|
12
|
+
attr_accessor :delegate, :start_match, :table
|
13
|
+
|
14
|
+
def initialize(delegate:)
|
15
|
+
self.delegate = delegate
|
16
|
+
end
|
17
|
+
|
18
|
+
def match_start_text(line)
|
19
|
+
start_pattern && (self.start_match = start_pattern.match(line))
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_text?
|
23
|
+
!start_match.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def end_text?(line)
|
27
|
+
end_pattern && !!end_pattern.match(line)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Override to do something interesting with the initial match or line
|
31
|
+
def on_start_text(_match, _line)
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse(_line)
|
35
|
+
raise NotImplementedError, "#{self} doesn't implement `parse`."
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/fake_pipe.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# external gems
|
2
|
+
require 'faker'
|
3
|
+
require 'active_support/all'
|
4
|
+
require 'csv'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
# internal
|
8
|
+
require 'fake_pipe/version'
|
9
|
+
require 'fake_pipe/text_block'
|
10
|
+
require 'fake_pipe/any_block'
|
11
|
+
require 'fake_pipe/piper'
|
12
|
+
require 'fake_pipe/mutator'
|
13
|
+
|
14
|
+
module FakePipe
|
15
|
+
module_function
|
16
|
+
|
17
|
+
def pipe(io:, outputter: $stdout, adapter: 'postgres')
|
18
|
+
piper = FakePipe::Piper.new(io: io, outputter: outputter, adapter: adapter)
|
19
|
+
piper.run
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fake_pipe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Scott Pierce
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-10-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faker
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.11'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.11'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.0'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- ddrscott@gmail.com
|
86
|
+
executables:
|
87
|
+
- fake_pipe
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
93
|
+
- ".rubocop.yml"
|
94
|
+
- ".travis.yml"
|
95
|
+
- Gemfile
|
96
|
+
- LICENSE.txt
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/setup
|
101
|
+
- exe/fake_pipe
|
102
|
+
- fake_pipe.gemspec
|
103
|
+
- lib/fake_pipe.rb
|
104
|
+
- lib/fake_pipe/any_block.rb
|
105
|
+
- lib/fake_pipe/commenter.rb
|
106
|
+
- lib/fake_pipe/mutator.rb
|
107
|
+
- lib/fake_pipe/piper.rb
|
108
|
+
- lib/fake_pipe/postgres.rb
|
109
|
+
- lib/fake_pipe/postgres/comment_block.rb
|
110
|
+
- lib/fake_pipe/postgres/copy_block.rb
|
111
|
+
- lib/fake_pipe/text_block.rb
|
112
|
+
- lib/fake_pipe/version.rb
|
113
|
+
homepage: https://github.com/centro/fake_pipe
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.6.13
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: db_dump.sql | fake_pipe > fake_dump.sql
|
137
|
+
test_files: []
|