fake_pipe 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.rubocop.yml +26 -0
- data/.travis.yml +4 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +124 -0
- data/Rakefile +14 -0
- data/bin/console +7 -0
- data/bin/setup +8 -0
- data/exe/fake_pipe +6 -0
- data/fake_pipe.gemspec +26 -0
- data/lib/fake_pipe/any_block.rb +22 -0
- data/lib/fake_pipe/commenter.rb +65 -0
- data/lib/fake_pipe/mutator.rb +184 -0
- data/lib/fake_pipe/piper.rb +93 -0
- data/lib/fake_pipe/postgres/comment_block.rb +35 -0
- data/lib/fake_pipe/postgres/copy_block.rb +40 -0
- data/lib/fake_pipe/postgres.rb +11 -0
- data/lib/fake_pipe/text_block.rb +38 -0
- data/lib/fake_pipe/version.rb +3 -0
- data/lib/fake_pipe.rb +21 -0
- metadata +137 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 12b4fbe781cae0694881681c54759347c3aa6f7f
|
4
|
+
data.tar.gz: 31b8a828b0ceb197988aecb655bbd9baa9211741
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9f9b1f9ead0f631a875b6bcfba1573d6dedb77a83ebb1e0b05362bb6a07bf1063d52835e37559a361db97474feec1bf879c77c978b60ce5fb9b7765c8324a332
|
7
|
+
data.tar.gz: b9ea9be17454c4ac8ec51c59207c9c510e0a6d7da7029e126f9579e5e2f65fa667b9dd3d5caa4d38f33788f8142f22c4a1ea4db04b111edda165d011ea5595b3
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.2
|
3
|
+
Rails:
|
4
|
+
Enabled: true
|
5
|
+
Encoding:
|
6
|
+
Enabled: false
|
7
|
+
Metrics/ClassLength:
|
8
|
+
Enabled: false
|
9
|
+
Metrics/MethodLength:
|
10
|
+
Enabled: false
|
11
|
+
Metrics/AbcSize:
|
12
|
+
Enabled: false
|
13
|
+
Metrics/CyclomaticComplexity:
|
14
|
+
Enabled: false
|
15
|
+
Style/AndOr:
|
16
|
+
EnforcedStyle: conditionals
|
17
|
+
Style/CaseEquality:
|
18
|
+
Enabled: false
|
19
|
+
Style/ConditionalAssignment:
|
20
|
+
Enabled: false
|
21
|
+
Style/GuardClause:
|
22
|
+
Enabled: false
|
23
|
+
Style/SingleLineBlockParams:
|
24
|
+
Enabled: false
|
25
|
+
Style/StringLiterals:
|
26
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Scott Pierce
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
We need a simply way to [anonymize data](https://en.wikipedia.org/wiki/Data_anonymization)
|
4
|
+
for exporting to various departments for analytics and troubleshooting. This tool
|
5
|
+
allows us to annotate a DB schema with special comments that can trigger
|
6
|
+
different data mutations
|
7
|
+
|
8
|
+
FakePipe
|
9
|
+
|
10
|
+
## Workflow with FakePipe
|
11
|
+
|
12
|
+
Here's how FakePipe could work in a projects lifecycle. These steps assume a
|
13
|
+
Postgres database:
|
14
|
+
|
15
|
+
1. Add comment to table column: `COMMENT ON COLUMN user.phone IS 'anon: phone_number';`.
|
16
|
+
2. Pipe DB dump to fake_pipe: `pg_dump my_db | fake_pipe > anon-db-dump.sql`.
|
17
|
+
3. Send `anon-db-dump.sql` to needy people.
|
18
|
+
|
19
|
+
|
20
|
+
## Basic Class Diagram
|
21
|
+
|
22
|
+
TODO This is subject to change!
|
23
|
+
|
24
|
+
+---------------+ +----------------------------------+
|
25
|
+
| Piper | | DatabaseAdapter/PostgresAdapter |
|
26
|
+
| * stdin | | * match comments |
|
27
|
+
| * stdout +-------> * match dml |
|
28
|
+
| * main loop | | * delegate.on_cell(name, cell) |
|
29
|
+
| * def on_cell | | |
|
30
|
+
| | +----------------------------------+
|
31
|
+
| |
|
32
|
+
| | +-----------------+
|
33
|
+
| | | Mutator |
|
34
|
+
| +-------> * phone_number |
|
35
|
+
+---------------+ | * email |
|
36
|
+
| |
|
37
|
+
+-----------------+
|
38
|
+
|
39
|
+
# Comment Dialect
|
40
|
+
|
41
|
+
Schema columns comments are in [YAML format](http://www.yaml.org/start.html).
|
42
|
+
Using some of it's option quoting, it can look very much like JSON. The reason
|
43
|
+
it was chosen over JSON is due for the optional quotes. That means the following
|
44
|
+
syntax will resolve to the same Ruby definition:
|
45
|
+
|
46
|
+
```
|
47
|
+
---
|
48
|
+
color: red
|
49
|
+
width: 100
|
50
|
+
```
|
51
|
+
|
52
|
+
```
|
53
|
+
{color: "red", width: 100}
|
54
|
+
```
|
55
|
+
|
56
|
+
For single options, the quotes can be omitted: `color: red`.
|
57
|
+
|
58
|
+
Any keys unknown by FakePipe will be ignored. So annotations from other system
|
59
|
+
shouldn't interfere. We do hope the abbreviated YAML syntax is simple to parse
|
60
|
+
by all systems.
|
61
|
+
|
62
|
+
## Currently Support FakePipe Methods
|
63
|
+
|
64
|
+
To get a current list try running `rake methods` from terminal.
|
65
|
+
|
66
|
+
```sh
|
67
|
+
$ rake methods
|
68
|
+
anon: email # Faker email
|
69
|
+
anon: md5 # MD5 hash of cell contents
|
70
|
+
anon: phone_number # Faker::PhoneNumber with digits only
|
71
|
+
```
|
72
|
+
|
73
|
+
TODO clean up README. The following is default stock from `bundle gem ...`
|
74
|
+
|
75
|
+
|
76
|
+
# Decisions
|
77
|
+
- 2016-06-08
|
78
|
+
- parsing SQL file is okay for now. Reconsider using a temp DB when
|
79
|
+
foreign keys need to be scrambled
|
80
|
+
- MD5sum foreign keys
|
81
|
+
|
82
|
+
|
83
|
+
## Installation
|
84
|
+
|
85
|
+
Add this line to your application's Gemfile:
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
gem 'fake_pipe'
|
89
|
+
```
|
90
|
+
|
91
|
+
And then execute:
|
92
|
+
|
93
|
+
$ bundle
|
94
|
+
|
95
|
+
Or install it yourself as:
|
96
|
+
|
97
|
+
$ gem install fake_pipe
|
98
|
+
|
99
|
+
## Usage
|
100
|
+
|
101
|
+
|
102
|
+
## Development
|
103
|
+
|
104
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
105
|
+
`rake spec` to run the tests. You can also run `bin/console` for an interactive
|
106
|
+
prompt that will allow you to experiment.
|
107
|
+
|
108
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
109
|
+
release a new version, update the version number in `version.rb`, and then run
|
110
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
111
|
+
git commits and tags, and push the `.gem` file to
|
112
|
+
[rubygems.org](https://rubygems.org).
|
113
|
+
|
114
|
+
## Contributing
|
115
|
+
|
116
|
+
Bug reports and pull requests are welcome on GitHub at
|
117
|
+
https://github.com/centro/fake_pipe.
|
118
|
+
|
119
|
+
|
120
|
+
## License
|
121
|
+
|
122
|
+
The gem is available as open source under the terms of the [MIT
|
123
|
+
License](http://opensource.org/licenses/MIT).
|
124
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task default: :spec
|
7
|
+
|
8
|
+
desc 'List supported FakePipe methods'
|
9
|
+
task :methods do
|
10
|
+
require 'fake_pipe'
|
11
|
+
methods = FakePipe::Mutator.list_with_comments
|
12
|
+
longest_name = methods.map(&:first).max_by(&:size)
|
13
|
+
puts methods.map { |m, c| "anon: #{m.ljust(longest_name.size)} #{c}" }
|
14
|
+
end
|
data/bin/console
ADDED
data/bin/setup
ADDED
data/exe/fake_pipe
ADDED
data/fake_pipe.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fake_pipe/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'fake_pipe'
|
8
|
+
spec.version = FakePipe::VERSION
|
9
|
+
spec.authors = ['Scott Pierce']
|
10
|
+
spec.email = ['ddrscott@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = 'db_dump.sql | fake_pipe > fake_dump.sql'
|
13
|
+
spec.homepage = 'https://github.com/centro/fake_pipe'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = 'exe'
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency 'activesupport'
|
22
|
+
spec.add_dependency 'faker'
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
26
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Catch all text block.
|
3
|
+
# Generic base state while a more interesting text block is not present.
|
4
|
+
class AnyBlock < TextBlock
|
5
|
+
|
6
|
+
def match_start_text(line)
|
7
|
+
true
|
8
|
+
end
|
9
|
+
|
10
|
+
def start_text?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def end_text?(line)
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(line)
|
19
|
+
line
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Provides helper migration method to set a valid fake_pipe comment.
|
3
|
+
# Currently this only supports the follwing DB adapter types:
|
4
|
+
# postgres
|
5
|
+
#
|
6
|
+
# To support more implement aa private `execute_comment_<dialect>` method.
|
7
|
+
module Commenter
|
8
|
+
# Generates an fake_pipe comment for the give table and column.
|
9
|
+
# This is uses `reversible` so it should be safe to use within a `change`
|
10
|
+
# style migration.
|
11
|
+
#
|
12
|
+
# @params [String] table to apply the comment
|
13
|
+
# @params [String] column to apply the comment
|
14
|
+
# @params [String] mutator strategy to apply
|
15
|
+
def anonymize_comment(table, column, mutator)
|
16
|
+
validate_mutator!(mutator)
|
17
|
+
comment_updater.call(table: table, column: column, mutator: mutator)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def validate_mutator!(mutator)
|
23
|
+
Mutator.list.include?(mutator) or
|
24
|
+
raise "Mutator #{mutator} is not valid. Try one of: #{Mutator.list}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def comment_updater
|
28
|
+
meth = "execute_comment_#{dialect}"
|
29
|
+
if respond_to?(meth, true)
|
30
|
+
method(meth)
|
31
|
+
else
|
32
|
+
raise NotImplementedError,
|
33
|
+
"DB dialect `#{dialect}` not supported. Try one of: #{supported_dialects}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def execute_comment_postgresql(table:, column:, mutator:)
|
38
|
+
reversible do |dir|
|
39
|
+
dir.up do
|
40
|
+
execute "COMMENT ON COLUMN #{table}.#{column} IS #{formatted_comment(mutator)};"
|
41
|
+
end
|
42
|
+
dir.down do
|
43
|
+
execute "COMMENT ON COLUMN #{table}.#{column} IS NULL;"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def formatted_comment(mutator)
|
49
|
+
escape_string("anon: #{mutator}")
|
50
|
+
end
|
51
|
+
|
52
|
+
def escape_string(text)
|
53
|
+
connection.quote(text)
|
54
|
+
end
|
55
|
+
|
56
|
+
def supported_dialects
|
57
|
+
methods.map { |m| m[/^execute_comment_(.*)$/, 1] }.compact
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO: There's got be a better way
|
61
|
+
def dialect
|
62
|
+
connection.adapter_name.downcase.to_s
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# This class handles mapping between a configured mutation such as
|
3
|
+
# 'phone_number' and the logic to change the data.
|
4
|
+
#
|
5
|
+
# To create a new mutable named configuration create a method prefixed
|
6
|
+
# with `mutate_`. The method will receive the original cell value and is
|
7
|
+
# expected to return the mutated value. Please add comment to the mutate
|
8
|
+
# method. The comment is used by `rake methods` to get a listing of all
|
9
|
+
# possible mutations.
|
10
|
+
module Mutator
|
11
|
+
module_function
|
12
|
+
|
13
|
+
def mutate(name, cell)
|
14
|
+
mutator_method = "mutate_#{name}"
|
15
|
+
if respond_to? mutator_method
|
16
|
+
public_send(mutator_method, cell)
|
17
|
+
else
|
18
|
+
raise "Mutator named `#{name}` not found. Try one of these: #{list.join(', ')}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def list
|
23
|
+
@list ||= public_methods
|
24
|
+
.map { |m| m.to_s[/^mutate_(\w+)$/, 1] }
|
25
|
+
.select(&:present?)
|
26
|
+
.sort
|
27
|
+
end
|
28
|
+
|
29
|
+
# Utility method for outputting available mutators.
|
30
|
+
# Only require method source here.
|
31
|
+
# Currently used by a `rake methods`.
|
32
|
+
def list_with_comments
|
33
|
+
require 'method_source'
|
34
|
+
list.map { |short| [short, public_method("mutate_#{short}").comment.strip] }
|
35
|
+
end
|
36
|
+
|
37
|
+
# Faker::PhoneNumber with punctuation and extensions
|
38
|
+
def mutate_phone_number(_)
|
39
|
+
Faker::PhoneNumber.phone_number
|
40
|
+
end
|
41
|
+
|
42
|
+
# Faker::PhoneNumber 10-digits only
|
43
|
+
def mutate_clean_phone_number(_)
|
44
|
+
Faker::PhoneNumber.phone_number.gsub(/\D|(^1)/, '')[0, 10]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Faker email
|
48
|
+
def mutate_email(_)
|
49
|
+
Faker::Internet.email
|
50
|
+
end
|
51
|
+
|
52
|
+
# Faker::Internet.user_name
|
53
|
+
def mutate_user_name(_)
|
54
|
+
Faker::Internet.user_name
|
55
|
+
end
|
56
|
+
|
57
|
+
# Faker::Internet.url
|
58
|
+
def mutate_url(_)
|
59
|
+
Faker::Internet.url
|
60
|
+
end
|
61
|
+
|
62
|
+
# MD5 hash of cell contents
|
63
|
+
def mutate_md5(cell)
|
64
|
+
cell ? Digest::MD5.base64digest(cell) : cell
|
65
|
+
end
|
66
|
+
|
67
|
+
# Faker::Address.street_address
|
68
|
+
def mutate_address_line_1(_)
|
69
|
+
Faker::Address.street_address
|
70
|
+
end
|
71
|
+
|
72
|
+
# Faker::Address.secondary_address
|
73
|
+
def mutate_address_line_2(_)
|
74
|
+
Faker::Address.secondary_address
|
75
|
+
end
|
76
|
+
|
77
|
+
# Faker::Address.country
|
78
|
+
def mutate_address_country(_)
|
79
|
+
Faker::Address.country
|
80
|
+
end
|
81
|
+
|
82
|
+
# Faker::Address.city
|
83
|
+
def mutate_address_city(_)
|
84
|
+
Faker::Address.city
|
85
|
+
end
|
86
|
+
|
87
|
+
# Faker::Address.state
|
88
|
+
def mutate_address_state(_)
|
89
|
+
Faker::Address.state
|
90
|
+
end
|
91
|
+
|
92
|
+
# Faker::Address.postcode
|
93
|
+
def mutate_address_postcode(_)
|
94
|
+
Faker::Address.postcode
|
95
|
+
end
|
96
|
+
|
97
|
+
# Faker::Company.name
|
98
|
+
def mutate_company_name(_)
|
99
|
+
Faker::Company.name
|
100
|
+
end
|
101
|
+
|
102
|
+
# Faker::Company.catch_phrase
|
103
|
+
def mutate_company_catch_phrase(_)
|
104
|
+
Faker::Company.catch_phrase
|
105
|
+
end
|
106
|
+
|
107
|
+
# an empty curly brace '{}' - good for json object and array fields
|
108
|
+
def mutate_empty_curly(_)
|
109
|
+
'{}'
|
110
|
+
end
|
111
|
+
|
112
|
+
# an empty bracket '[]' - good for json::array objects
|
113
|
+
def mutate_empty_bracket(_)
|
114
|
+
'[]'
|
115
|
+
end
|
116
|
+
|
117
|
+
# an empty String
|
118
|
+
def mutate_empty_string(_)
|
119
|
+
''
|
120
|
+
end
|
121
|
+
|
122
|
+
# Faker::Lorem.paragraph
|
123
|
+
def mutate_lorem_paragraph(_)
|
124
|
+
Faker::Lorem.paragraph
|
125
|
+
end
|
126
|
+
|
127
|
+
# Faker::Lorem.word
|
128
|
+
def mutate_lorem_word(_)
|
129
|
+
Faker::Lorem.word
|
130
|
+
end
|
131
|
+
|
132
|
+
# Faker::Lorem.sentence
|
133
|
+
def mutate_lorem_sentence(_)
|
134
|
+
Faker::Lorem.sentence
|
135
|
+
end
|
136
|
+
|
137
|
+
# Faker::Name.first_name
|
138
|
+
def mutate_first_name(_)
|
139
|
+
Faker::Name.first_name
|
140
|
+
end
|
141
|
+
|
142
|
+
# Faker::Name.last_name
|
143
|
+
def mutate_last_name(_)
|
144
|
+
Faker::Name.last_name
|
145
|
+
end
|
146
|
+
|
147
|
+
# Faker::Name.full_name
|
148
|
+
def mutate_full_name(_)
|
149
|
+
Faker::Name.name
|
150
|
+
end
|
151
|
+
|
152
|
+
# Faker::PhoneNumber.extension
|
153
|
+
def mutate_phone_ext(_)
|
154
|
+
Faker::PhoneNumber.extension
|
155
|
+
end
|
156
|
+
|
157
|
+
# bcrypt password as 'password'
|
158
|
+
def mutate_bcrypt_password(_)
|
159
|
+
'400$8$2d$f6ed5a490c441958$67f59aa61bc617849a3280b5e80f78607e53b5aa5807a44ddbc53e804e2e2a99'
|
160
|
+
end
|
161
|
+
|
162
|
+
# bcrypt salt used to generate password
|
163
|
+
def mutate_bcrypt_salt(_)
|
164
|
+
'au6lOASvp17AGsqkmE7'
|
165
|
+
end
|
166
|
+
|
167
|
+
ALPHABET = ('A'..'Z').to_a
|
168
|
+
DIGITS = ('0'..'9').to_a
|
169
|
+
# Six random uppercase letters followed by four random numbers - ex. 'ABCDEF1234'
|
170
|
+
def mutate_ugcid(_)
|
171
|
+
(ALPHABET.sample(6) + DIGITS.sample(4)).join
|
172
|
+
end
|
173
|
+
|
174
|
+
# UUID
|
175
|
+
def mutate_uuid(_)
|
176
|
+
SecureRandom.uuid
|
177
|
+
end
|
178
|
+
|
179
|
+
# Reopen class to define aliases on module_function
|
180
|
+
class << self
|
181
|
+
alias mutate_guid mutate_uuid
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# This class cooridinates between all the text blocks.
|
3
|
+
# The class is initialized with some input io, an output io, and an adapter.
|
4
|
+
#
|
5
|
+
# ## Adapter
|
6
|
+
# An adapter is created by creating a module directly under fake_pipe. The
|
7
|
+
# module must respond to `text_blocks` which will return all the `TextBlock`
|
8
|
+
# classes needed to call `on_config` and `on_cell`.
|
9
|
+
#
|
10
|
+
# ## General IO Flow
|
11
|
+
# The `run` method is probably the most interesting. It streams in `each_line`
|
12
|
+
# of the input `io` and will output either the same line or the parsed line
|
13
|
+
# from the `TextObject#parse`. It's the responsibility of the TextBlock to
|
14
|
+
# extract relevant table, column, cell information. This class will make
|
15
|
+
# keep track of when to mutate cell.
|
16
|
+
#
|
17
|
+
# Most lines from `io` should be passed directly to the `outputter`
|
18
|
+
class Piper
|
19
|
+
attr_accessor :io, :configs, :outputter, :text_blocks
|
20
|
+
|
21
|
+
# @param [String] adapter should be a module file directly under the 'fake_pipe' path
|
22
|
+
def initialize(io:, outputter:, adapter:)
|
23
|
+
self.configs = {}
|
24
|
+
self.io = io
|
25
|
+
self.outputter = outputter
|
26
|
+
register_adapter(adapter)
|
27
|
+
end
|
28
|
+
|
29
|
+
def register_adapter(adapter)
|
30
|
+
adapter_module = "fake_pipe/#{adapter}"
|
31
|
+
require adapter_module
|
32
|
+
adapter_class = adapter_module.camelize.constantize
|
33
|
+
self.text_blocks = adapter_class.text_blocks.map do |block_class|
|
34
|
+
block_class.new(delegate: self)
|
35
|
+
end
|
36
|
+
|
37
|
+
# AnyBlock is a catch all and needs to come last.
|
38
|
+
text_blocks << AnyBlock.new(delegate: self)
|
39
|
+
end
|
40
|
+
|
41
|
+
def run
|
42
|
+
# used to track which text_block is currently in use
|
43
|
+
current_block = text_blocks.last
|
44
|
+
io.each_line do |line|
|
45
|
+
if current_block.end_text?(line)
|
46
|
+
output line
|
47
|
+
current_block = detect_and_start_text_block(line)
|
48
|
+
elsif configs[current_block.table] # optimization: only parse of the text block has a table configuration
|
49
|
+
output current_block.parse(line)
|
50
|
+
else # otherwise output the original line
|
51
|
+
output line
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Check if a line is the begining of a new text block.
|
57
|
+
# When it is, trigger the callbacks so the text block
|
58
|
+
# can initialize itself.
|
59
|
+
def detect_and_start_text_block(line)
|
60
|
+
text_blocks.detect do |block|
|
61
|
+
matcher = block.match_start_text(line)
|
62
|
+
if matcher && block.start_text?
|
63
|
+
block.on_start_text(matcher, line)
|
64
|
+
true # result for detect
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Delegate method to be called by the #text_objects to get config information
|
70
|
+
# from a table's column
|
71
|
+
def on_config(table:, column:, config:)
|
72
|
+
table = (configs[table] ||= {})
|
73
|
+
table[column] = config
|
74
|
+
end
|
75
|
+
|
76
|
+
# @return [String] The mutated cell or the original if there's no config for
|
77
|
+
# the table/column.
|
78
|
+
def on_cell(table:, column:, cell:)
|
79
|
+
if config = configs[table].try(:[], column)
|
80
|
+
Mutator.mutate(config, cell)
|
81
|
+
else
|
82
|
+
cell
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
# Simple wrapper to print to the configured #outputter
|
89
|
+
def output(text)
|
90
|
+
outputter.puts text
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'fake_pipe/text_block'
|
2
|
+
|
3
|
+
module FakePipe
|
4
|
+
module Postgres
|
5
|
+
# Finds Postgres comment DML.
|
6
|
+
class CommentBlock < TextBlock
|
7
|
+
|
8
|
+
self.start_pattern = /^COMMENT ON COLUMN (?<table>[^\.]+)\.(?<column>\S+) IS '(?<comment>.*)';/
|
9
|
+
self.end_pattern = /^$/
|
10
|
+
|
11
|
+
def on_start_text(match, line)
|
12
|
+
self.table = match[:table]
|
13
|
+
parse_config(match)
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_config(match)
|
17
|
+
# consolidate escaped single quotes
|
18
|
+
comment = match[:comment].gsub("''", "'")
|
19
|
+
data = YAML.load(comment).with_indifferent_access
|
20
|
+
|
21
|
+
# give the config back to the delegate
|
22
|
+
delegate.on_config(
|
23
|
+
table: match[:table],
|
24
|
+
column: match[:column],
|
25
|
+
config: data[:anon]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse(*)
|
30
|
+
raise '`parse` should not be called to extract config from comments. ' \
|
31
|
+
' Try inspecting the PG dump format for changes. Comments are normally all in a single line.'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'fake_pipe/text_block'
|
2
|
+
|
3
|
+
module FakePipe
|
4
|
+
module Postgres
|
5
|
+
# Finds COPY... text blocks inside of `pg_dumps`
|
6
|
+
class CopyBlock < TextBlock
|
7
|
+
|
8
|
+
DELIMITER = "\t"
|
9
|
+
|
10
|
+
COLUMN_SPLITTER = /,\s*/
|
11
|
+
|
12
|
+
self.start_pattern = /^COPY (?<table>\S+) \((?<columns>[^)]*)\) FROM stdin;/
|
13
|
+
self.end_pattern = /^\\\.$/
|
14
|
+
|
15
|
+
# @return [Hash<Integer,String>] Index for column ordinal and column name: { 1 => column_name }
|
16
|
+
def on_start_text(match, line)
|
17
|
+
@table = match[:table]
|
18
|
+
@columns = match[:columns].split(COLUMN_SPLITTER)
|
19
|
+
@column_idx = Hash[@columns.map.with_index { |name, i| [i, name] }]
|
20
|
+
end
|
21
|
+
|
22
|
+
# Postgres COPY format is NOT CSV.
|
23
|
+
# > https://www.postgresql.org/docs/9.1/static/sql-copy.html
|
24
|
+
#
|
25
|
+
# @return [String] maybe mutated by `delegate.on_cell`
|
26
|
+
def parse(line)
|
27
|
+
row = line.split(DELIMITER)
|
28
|
+
faked = row.map.with_index do |cell, i|
|
29
|
+
if cell.blank? || cell == '\N'
|
30
|
+
# Don't acknowledge null cells
|
31
|
+
cell
|
32
|
+
else
|
33
|
+
delegate.on_cell(table: @table, column: @column_idx[i], cell: cell)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
faked.join(DELIMITER)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module FakePipe
|
2
|
+
# Blocks are considered between the #start_text? and #end_text?
|
3
|
+
# Any lines in between the start and end are passed to #parse
|
4
|
+
#
|
5
|
+
# @start_match is available in case there's information in there #parse could
|
6
|
+
# find interesting.
|
7
|
+
class TextBlock
|
8
|
+
|
9
|
+
class_attribute :start_pattern
|
10
|
+
class_attribute :end_pattern
|
11
|
+
|
12
|
+
attr_accessor :delegate, :start_match, :table
|
13
|
+
|
14
|
+
def initialize(delegate:)
|
15
|
+
self.delegate = delegate
|
16
|
+
end
|
17
|
+
|
18
|
+
def match_start_text(line)
|
19
|
+
start_pattern && (self.start_match = start_pattern.match(line))
|
20
|
+
end
|
21
|
+
|
22
|
+
def start_text?
|
23
|
+
!start_match.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def end_text?(line)
|
27
|
+
end_pattern && !!end_pattern.match(line)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Override to do something interesting with the initial match or line
|
31
|
+
def on_start_text(_match, _line)
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse(_line)
|
35
|
+
raise NotImplementedError, "#{self} doesn't implement `parse`."
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/fake_pipe.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# external gems
|
2
|
+
require 'faker'
|
3
|
+
require 'active_support/all'
|
4
|
+
require 'csv'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
# internal
|
8
|
+
require 'fake_pipe/version'
|
9
|
+
require 'fake_pipe/text_block'
|
10
|
+
require 'fake_pipe/any_block'
|
11
|
+
require 'fake_pipe/piper'
|
12
|
+
require 'fake_pipe/mutator'
|
13
|
+
|
14
|
+
module FakePipe
|
15
|
+
module_function
|
16
|
+
|
17
|
+
def pipe(io:, outputter: $stdout, adapter: 'postgres')
|
18
|
+
piper = FakePipe::Piper.new(io: io, outputter: outputter, adapter: adapter)
|
19
|
+
piper.run
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fake_pipe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Scott Pierce
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-10-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faker
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.11'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.11'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.0'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- ddrscott@gmail.com
|
86
|
+
executables:
|
87
|
+
- fake_pipe
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
93
|
+
- ".rubocop.yml"
|
94
|
+
- ".travis.yml"
|
95
|
+
- Gemfile
|
96
|
+
- LICENSE.txt
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/setup
|
101
|
+
- exe/fake_pipe
|
102
|
+
- fake_pipe.gemspec
|
103
|
+
- lib/fake_pipe.rb
|
104
|
+
- lib/fake_pipe/any_block.rb
|
105
|
+
- lib/fake_pipe/commenter.rb
|
106
|
+
- lib/fake_pipe/mutator.rb
|
107
|
+
- lib/fake_pipe/piper.rb
|
108
|
+
- lib/fake_pipe/postgres.rb
|
109
|
+
- lib/fake_pipe/postgres/comment_block.rb
|
110
|
+
- lib/fake_pipe/postgres/copy_block.rb
|
111
|
+
- lib/fake_pipe/text_block.rb
|
112
|
+
- lib/fake_pipe/version.rb
|
113
|
+
homepage: https://github.com/centro/fake_pipe
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.6.13
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: db_dump.sql | fake_pipe > fake_dump.sql
|
137
|
+
test_files: []
|