django-db-anonymiser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_db_anonymiser/database_sanitizer/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/__main__.py +68 -0
- django_db_anonymiser/database_sanitizer/config.py +373 -0
- django_db_anonymiser/database_sanitizer/dump/__init__.py +47 -0
- django_db_anonymiser/database_sanitizer/dump/mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/dump/postgres.py +170 -0
- django_db_anonymiser/database_sanitizer/sanitizers/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/sanitizers/constant.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/derived.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/string.py +31 -0
- django_db_anonymiser/database_sanitizer/sanitizers/times.py +11 -0
- django_db_anonymiser/database_sanitizer/sanitizers/user.py +145 -0
- django_db_anonymiser/database_sanitizer/session.py +146 -0
- django_db_anonymiser/database_sanitizer/tests/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/tests/test_config.py +256 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump.py +123 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py +177 -0
- django_db_anonymiser/database_sanitizer/tests/test_main.py +91 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py +29 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py +19 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py +44 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py +18 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py +67 -0
- django_db_anonymiser/database_sanitizer/tests/test_session.py +36 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py +112 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py +86 -0
- django_db_anonymiser/database_sanitizer/utils/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/utils/mysql.py +161 -0
- django_db_anonymiser/database_sanitizer/utils/postgres.py +145 -0
- django_db_anonymiser/db_anonymiser/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/faker.py +91 -0
- django_db_anonymiser/db_anonymiser/management/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +105 -0
- django_db_anonymiser/db_anonymiser/tests/test_command.py +90 -0
- django_db_anonymiser/db_anonymiser/tests/test_faker.py +116 -0
- django_db_anonymiser-0.1.0.dist-info/METADATA +98 -0
- django_db_anonymiser-0.1.0.dist-info/RECORD +40 -0
- django_db_anonymiser-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from collections import namedtuple
|
|
4
|
+
|
|
5
|
+
import mock
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from .. import config
|
|
9
|
+
from ..config import Configuration, ConfigurationError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@mock.patch.object(config, 'open')
|
|
13
|
+
@mock.patch('yaml.safe_load')
|
|
14
|
+
def test_from_file(mocked_yaml_load, mocked_open):
|
|
15
|
+
mocked_yaml_load.return_value = {}
|
|
16
|
+
|
|
17
|
+
Configuration.from_file('filename.yml')
|
|
18
|
+
|
|
19
|
+
assert mocked_open.call_args == (('filename.yml', 'rb'), {})
|
|
20
|
+
opened_file = mocked_open.return_value.__enter__.return_value
|
|
21
|
+
assert mocked_yaml_load.call_args == ((opened_file,), {})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_load_config_data_must_be_dict():
|
|
25
|
+
config = Configuration()
|
|
26
|
+
config.load({})
|
|
27
|
+
with pytest.raises(ConfigurationError):
|
|
28
|
+
config.load(config_data="test")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_load_dump_extra_parameters():
|
|
32
|
+
config = Configuration()
|
|
33
|
+
|
|
34
|
+
config.load_dump_extra_parameters({})
|
|
35
|
+
assert config.mysqldump_params == ["--single-transaction"]
|
|
36
|
+
assert config.pg_dump_params == []
|
|
37
|
+
|
|
38
|
+
with pytest.raises(ConfigurationError):
|
|
39
|
+
config.load_dump_extra_parameters({"config": "test"})
|
|
40
|
+
|
|
41
|
+
config.load_dump_extra_parameters({"config": {}})
|
|
42
|
+
assert config.mysqldump_params == ["--single-transaction"]
|
|
43
|
+
assert config.pg_dump_params == []
|
|
44
|
+
|
|
45
|
+
with pytest.raises(ConfigurationError):
|
|
46
|
+
config.load_dump_extra_parameters({"config": {
|
|
47
|
+
"extra_parameters": "test"
|
|
48
|
+
}})
|
|
49
|
+
|
|
50
|
+
with pytest.raises(ConfigurationError):
|
|
51
|
+
config.load_dump_extra_parameters({"config": {
|
|
52
|
+
"extra_parameters": [True]
|
|
53
|
+
}})
|
|
54
|
+
|
|
55
|
+
with pytest.raises(ConfigurationError):
|
|
56
|
+
config.load_dump_extra_parameters({"config": {
|
|
57
|
+
"extra_parameters": {
|
|
58
|
+
"mysqldump": "hernekeitto",
|
|
59
|
+
},
|
|
60
|
+
}})
|
|
61
|
+
|
|
62
|
+
with pytest.raises(ConfigurationError):
|
|
63
|
+
config.load_dump_extra_parameters({"config": {
|
|
64
|
+
"extra_parameters": {
|
|
65
|
+
"pg_dump": "viina",
|
|
66
|
+
},
|
|
67
|
+
}})
|
|
68
|
+
|
|
69
|
+
config.load_dump_extra_parameters({"config": {
|
|
70
|
+
"extra_parameters": {
|
|
71
|
+
"mysqldump": ["--double-transaction"],
|
|
72
|
+
"pg_dump": ["--exclude-table=something"],
|
|
73
|
+
},
|
|
74
|
+
}})
|
|
75
|
+
assert config.mysqldump_params == ["--double-transaction"]
|
|
76
|
+
assert config.pg_dump_params == ["--exclude-table=something"]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_load_addon_packages():
|
|
80
|
+
config = Configuration()
|
|
81
|
+
|
|
82
|
+
config.load_addon_packages({})
|
|
83
|
+
assert config.addon_packages == []
|
|
84
|
+
|
|
85
|
+
with pytest.raises(ConfigurationError):
|
|
86
|
+
config.load_addon_packages({"config": "test"})
|
|
87
|
+
|
|
88
|
+
config.load_addon_packages({"config": {}})
|
|
89
|
+
assert config.addon_packages == []
|
|
90
|
+
|
|
91
|
+
with pytest.raises(ConfigurationError):
|
|
92
|
+
config.load_addon_packages({"config": {"addons": "test"}})
|
|
93
|
+
|
|
94
|
+
with pytest.raises(ConfigurationError):
|
|
95
|
+
config.load_addon_packages({"config": {"addons": [True]}})
|
|
96
|
+
|
|
97
|
+
config.load_addon_packages({"config": {
|
|
98
|
+
"addons": [
|
|
99
|
+
"test1",
|
|
100
|
+
"test2",
|
|
101
|
+
"test3",
|
|
102
|
+
],
|
|
103
|
+
}})
|
|
104
|
+
assert config.addon_packages == ["test1", "test2", "test3"]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_load_sanitizers():
|
|
108
|
+
config = Configuration()
|
|
109
|
+
|
|
110
|
+
with pytest.raises(ConfigurationError):
|
|
111
|
+
config.load_sanitizers({"strategy": "test"})
|
|
112
|
+
|
|
113
|
+
with pytest.raises(ConfigurationError):
|
|
114
|
+
config.load_sanitizers({"strategy": {"test": "test"}})
|
|
115
|
+
|
|
116
|
+
def mock_find_sanitizer(*args):
|
|
117
|
+
return lambda value: value
|
|
118
|
+
|
|
119
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer",
|
|
120
|
+
side_effect=mock_find_sanitizer):
|
|
121
|
+
with pytest.raises(ConfigurationError):
|
|
122
|
+
config.load_sanitizers({"strategy": {"table1": {"column1": True}}})
|
|
123
|
+
|
|
124
|
+
config.load_sanitizers({"strategy": {
|
|
125
|
+
"table1": {
|
|
126
|
+
"column1": None,
|
|
127
|
+
"column2": "test.test",
|
|
128
|
+
},
|
|
129
|
+
"table2": {
|
|
130
|
+
"column1": "test.test",
|
|
131
|
+
},
|
|
132
|
+
"table3": None,
|
|
133
|
+
}})
|
|
134
|
+
|
|
135
|
+
assert "table1.column1" not in config.sanitizers
|
|
136
|
+
assert "table1.column2" in config.sanitizers
|
|
137
|
+
assert "table2.column1" in config.sanitizers
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_table_skip_rows_configuration():
|
|
141
|
+
config = Configuration()
|
|
142
|
+
|
|
143
|
+
with pytest.raises(ConfigurationError):
|
|
144
|
+
config.load_sanitizers({"strategy": "test"})
|
|
145
|
+
|
|
146
|
+
def mock_find_sanitizer(*args):
|
|
147
|
+
return lambda value: value
|
|
148
|
+
|
|
149
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer",
|
|
150
|
+
side_effect=mock_find_sanitizer):
|
|
151
|
+
|
|
152
|
+
config.load_sanitizers({"strategy": {
|
|
153
|
+
"table1": "skip_rows",
|
|
154
|
+
"table2": {
|
|
155
|
+
"column1": "test",
|
|
156
|
+
}
|
|
157
|
+
}})
|
|
158
|
+
|
|
159
|
+
assert "table2.column1" in config.sanitizers
|
|
160
|
+
assert "table1" in config.skip_rows_for_tables
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_find_sanitizer():
|
|
164
|
+
config = Configuration()
|
|
165
|
+
|
|
166
|
+
with pytest.raises(ConfigurationError):
|
|
167
|
+
config.find_sanitizer("test")
|
|
168
|
+
|
|
169
|
+
def mock_find_sanitizer_from_module1(module_name, function_name):
|
|
170
|
+
assert module_name == "sanitizers.test"
|
|
171
|
+
assert function_name == "sanitize_test"
|
|
172
|
+
return lambda value: value
|
|
173
|
+
|
|
174
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer_from_module",
|
|
175
|
+
side_effect=mock_find_sanitizer_from_module1):
|
|
176
|
+
assert config.find_sanitizer("test.test") is not None
|
|
177
|
+
|
|
178
|
+
def mock_find_sanitizer_from_module2(module_name, function_name):
|
|
179
|
+
assert module_name in ("sanitizers.test", "addon.test")
|
|
180
|
+
assert function_name == "sanitize_test"
|
|
181
|
+
if module_name.startswith("addon."):
|
|
182
|
+
return lambda value: value
|
|
183
|
+
else:
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer_from_module",
|
|
187
|
+
side_effect=mock_find_sanitizer_from_module2):
|
|
188
|
+
config.addon_packages = ("addon",)
|
|
189
|
+
assert config.find_sanitizer("test.test") is not None
|
|
190
|
+
|
|
191
|
+
def mock_find_sanitizer_from_module3(module_name, function_name):
|
|
192
|
+
assert module_name in (
|
|
193
|
+
"sanitizers.test",
|
|
194
|
+
"addon.test",
|
|
195
|
+
"database_sanitizer.sanitizers.test",
|
|
196
|
+
)
|
|
197
|
+
assert function_name == "sanitize_test"
|
|
198
|
+
if module_name.startswith("database_sanitizer."):
|
|
199
|
+
return lambda value: value
|
|
200
|
+
else:
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer_from_module",
|
|
204
|
+
side_effect=mock_find_sanitizer_from_module3):
|
|
205
|
+
assert config.find_sanitizer("test.test") is not None
|
|
206
|
+
|
|
207
|
+
def mock_find_sanitizer_from_module4(module_name, function_name):
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
with mock.patch("database_sanitizer.config.Configuration.find_sanitizer_from_module",
|
|
211
|
+
side_effect=mock_find_sanitizer_from_module4):
|
|
212
|
+
with pytest.raises(ConfigurationError):
|
|
213
|
+
config.find_sanitizer("test.test")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def test_find_sanitizer_from_module():
|
|
217
|
+
def mock_import1(module_name):
|
|
218
|
+
assert module_name == "test"
|
|
219
|
+
raise ImportError("Should be catched")
|
|
220
|
+
|
|
221
|
+
with mock.patch("importlib.import_module", side_effect=mock_import1):
|
|
222
|
+
assert Configuration.find_sanitizer_from_module("test", "test") is None
|
|
223
|
+
|
|
224
|
+
mock_module_type = namedtuple("mock_module", ("test",))
|
|
225
|
+
|
|
226
|
+
def mock_import2(module_name):
|
|
227
|
+
assert module_name == "test"
|
|
228
|
+
return mock_module_type(test=None)
|
|
229
|
+
|
|
230
|
+
with mock.patch("importlib.import_module", side_effect=mock_import2):
|
|
231
|
+
assert Configuration.find_sanitizer_from_module("test", "test") is None
|
|
232
|
+
|
|
233
|
+
def mock_import3(module_name):
|
|
234
|
+
assert module_name == "test"
|
|
235
|
+
return mock_module_type(test=lambda value: value)
|
|
236
|
+
|
|
237
|
+
with mock.patch("importlib.import_module", side_effect=mock_import3):
|
|
238
|
+
assert Configuration.find_sanitizer_from_module("test", "test") is not None
|
|
239
|
+
|
|
240
|
+
def mock_import4(module_name):
|
|
241
|
+
assert module_name == "test"
|
|
242
|
+
return mock_module_type(test="test")
|
|
243
|
+
|
|
244
|
+
with mock.patch("importlib.import_module", side_effect=mock_import4):
|
|
245
|
+
with pytest.raises(ConfigurationError):
|
|
246
|
+
Configuration.find_sanitizer_from_module("test", "test")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_sanitize():
|
|
250
|
+
config = Configuration()
|
|
251
|
+
config.sanitizers["a.a"] = lambda value: value.upper()
|
|
252
|
+
config.sanitizers["a.b"] = lambda value: value[::-1]
|
|
253
|
+
|
|
254
|
+
assert config.sanitize("a", "a", "test") == "TEST"
|
|
255
|
+
assert config.sanitize("a", "b", "test") == "tset"
|
|
256
|
+
assert config.sanitize("a", "c", "test") == "test"
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from io import BytesIO, StringIO
|
|
3
|
+
|
|
4
|
+
import mock
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from database_sanitizer import dump
|
|
8
|
+
from database_sanitizer.config import Configuration
|
|
9
|
+
|
|
10
|
+
EXPECTED_POPEN_KWARGS = {
|
|
11
|
+
'mysql://User:Pass@HostName/Db': {
|
|
12
|
+
'args': (
|
|
13
|
+
'mysqldump --complete-insert --extended-insert'
|
|
14
|
+
' --net_buffer_length=10240 -h hostname -u User Db'
|
|
15
|
+
' --single-transaction'
|
|
16
|
+
).split(),
|
|
17
|
+
'env': {'MYSQL_PWD': 'Pass'},
|
|
18
|
+
'stdout': subprocess.PIPE,
|
|
19
|
+
},
|
|
20
|
+
'postgres:///Db': {
|
|
21
|
+
'args': tuple((
|
|
22
|
+
'pg_dump --encoding=utf-8 --quote-all-identifiers'
|
|
23
|
+
' --dbname postgres:///Db').split()),
|
|
24
|
+
'stdout': subprocess.PIPE,
|
|
25
|
+
},
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
for url in ['postgresql:///Db', 'postgis:///Db']:
|
|
29
|
+
EXPECTED_POPEN_KWARGS[url] = EXPECTED_POPEN_KWARGS['postgres:///Db'].copy()
|
|
30
|
+
EXPECTED_POPEN_KWARGS[url]['args'] = tuple(
|
|
31
|
+
' '.join(EXPECTED_POPEN_KWARGS[url]['args'])
|
|
32
|
+
.replace('postgres', 'postgresql').split())
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@pytest.mark.parametrize('url', list(EXPECTED_POPEN_KWARGS))
|
|
36
|
+
@mock.patch('subprocess.Popen')
|
|
37
|
+
def test_run(mocked_popen, url):
|
|
38
|
+
mocked_popen.return_value.stdout = BytesIO(b'INPUT DUMP')
|
|
39
|
+
output = StringIO()
|
|
40
|
+
config = None
|
|
41
|
+
dump.run(url, output, config)
|
|
42
|
+
|
|
43
|
+
expected_popen_kwargs = EXPECTED_POPEN_KWARGS[url]
|
|
44
|
+
(popen_args, popen_kwargs) = mocked_popen.call_args
|
|
45
|
+
expected_popen_args = (
|
|
46
|
+
(expected_popen_kwargs.pop('args'),) if popen_args else ())
|
|
47
|
+
assert popen_args == expected_popen_args
|
|
48
|
+
assert popen_kwargs == expected_popen_kwargs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@mock.patch('subprocess.Popen')
|
|
52
|
+
def test_run_with_mysql_extra_params(mocked_popen):
|
|
53
|
+
mocked_popen.return_value.stdout = BytesIO(b'INPUT DUMP')
|
|
54
|
+
output = StringIO()
|
|
55
|
+
|
|
56
|
+
url = "mysql://User:Pass@HostName/Db"
|
|
57
|
+
config = Configuration()
|
|
58
|
+
config.load({
|
|
59
|
+
"config": {
|
|
60
|
+
"extra_parameters": {
|
|
61
|
+
"mysqldump": ["--double-transaction"]
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
dump.run(url, output, config)
|
|
67
|
+
|
|
68
|
+
expected = {
|
|
69
|
+
'args': (
|
|
70
|
+
'mysqldump --complete-insert --extended-insert'
|
|
71
|
+
' --net_buffer_length=10240 -h hostname -u User Db'
|
|
72
|
+
' --double-transaction'
|
|
73
|
+
).split(),
|
|
74
|
+
'env': {'MYSQL_PWD': 'Pass'},
|
|
75
|
+
'stdout': subprocess.PIPE,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
(popen_args, popen_kwargs) = mocked_popen.call_args
|
|
79
|
+
expected_popen_args = (
|
|
80
|
+
(expected.pop('args'),) if popen_args else ())
|
|
81
|
+
assert popen_args == expected_popen_args
|
|
82
|
+
assert popen_kwargs == expected
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@mock.patch('subprocess.Popen')
|
|
86
|
+
def test_run_with_pg_dump_extra_params(mocked_popen):
|
|
87
|
+
mocked_popen.return_value.stdout = BytesIO(b'INPUT DUMP')
|
|
88
|
+
output = StringIO()
|
|
89
|
+
|
|
90
|
+
url = "postgres:///Db"
|
|
91
|
+
config = Configuration()
|
|
92
|
+
config.load({
|
|
93
|
+
"config": {
|
|
94
|
+
"extra_parameters": {
|
|
95
|
+
"pg_dump": ["--exclude-table=something"]
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
dump.run(url, output, config)
|
|
101
|
+
|
|
102
|
+
expected = {
|
|
103
|
+
'args': tuple((
|
|
104
|
+
'pg_dump --encoding=utf-8 --quote-all-identifiers'
|
|
105
|
+
' --dbname postgres:///Db'
|
|
106
|
+
' --exclude-table=something'
|
|
107
|
+
).split()),
|
|
108
|
+
'stdout': subprocess.PIPE,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
(popen_args, popen_kwargs) = mocked_popen.call_args
|
|
112
|
+
expected_popen_args = (
|
|
113
|
+
(expected.pop('args'),) if popen_args else ())
|
|
114
|
+
assert popen_args == expected_popen_args
|
|
115
|
+
assert popen_kwargs == expected
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@mock.patch('subprocess.Popen')
|
|
119
|
+
def test_run_unknown_scheme(mocked_popen):
|
|
120
|
+
with pytest.raises(ValueError) as excinfo:
|
|
121
|
+
dump.run('unknown:///db', None, None)
|
|
122
|
+
assert str(excinfo.value) == "Unsupported database scheme: 'unknown'"
|
|
123
|
+
mocked_popen.assert_not_called()
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from six.moves.urllib import parse as urlparse
|
|
9
|
+
|
|
10
|
+
from ..config import Configuration
|
|
11
|
+
from ..dump.mysql import (
|
|
12
|
+
parse_column_names,
|
|
13
|
+
parse_values,
|
|
14
|
+
sanitize,
|
|
15
|
+
sanitize_from_stream,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
MOCK_MYSQLDUMP_OUTPUT = b"""
|
|
19
|
+
--- Fake MySQL database dump
|
|
20
|
+
|
|
21
|
+
DROP TABLE IF EXISTS `test`;
|
|
22
|
+
|
|
23
|
+
CREATE TABLE `test` (
|
|
24
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
|
25
|
+
`created_at` date NOT NULL,
|
|
26
|
+
`notes` varchar(255) NOT NULL,
|
|
27
|
+
PRIMARY KEY (`id`)
|
|
28
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
29
|
+
|
|
30
|
+
INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \
|
|
31
|
+
(1,'2018-01-01','Test data 1'),\
|
|
32
|
+
(2,'2018-01-02','Test data 2'),\
|
|
33
|
+
(3,'2018-01-03','Test data 3');
|
|
34
|
+
|
|
35
|
+
--- Final line after `INSERT INTO` statement.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
MOCK_MYSQLDUMP_OUTPUT_WITH_U2028 = b"""
|
|
39
|
+
--- Fake MySQL database dump
|
|
40
|
+
|
|
41
|
+
DROP TABLE IF EXISTS `test`;
|
|
42
|
+
|
|
43
|
+
CREATE TABLE `test` (
|
|
44
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
|
45
|
+
`created_at` date NOT NULL,
|
|
46
|
+
`notes` varchar(255) NOT NULL,
|
|
47
|
+
PRIMARY KEY (`id`)
|
|
48
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
49
|
+
|
|
50
|
+
INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \
|
|
51
|
+
(1,'2018-01-01','Test \xe2\x80\xa8 data 1'),\
|
|
52
|
+
(2,'2018-01-02','Test data 2'),\
|
|
53
|
+
(3,'2018-01-03','Test data 3');
|
|
54
|
+
|
|
55
|
+
--- Final line after `INSERT INTO` statement.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
INVALID_MOCK_MYSQLDUMP_OUTPUT = b"""
|
|
60
|
+
--- Fake MySQL database dump
|
|
61
|
+
|
|
62
|
+
DROP TABLE IF EXISTS `test`;
|
|
63
|
+
|
|
64
|
+
INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES (1),(2),(3);
|
|
65
|
+
|
|
66
|
+
--- Final line after `INSERT INTO` statement.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_sanitize_wrong_scheme():
|
|
71
|
+
url = urlparse.urlparse("http://localhost/test")
|
|
72
|
+
with pytest.raises(ValueError):
|
|
73
|
+
list(sanitize(url, None))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_sanitize_from_stream():
|
|
77
|
+
stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT)
|
|
78
|
+
config = Configuration()
|
|
79
|
+
config.sanitizers["test.notes"] = lambda value: "Sanitized"
|
|
80
|
+
dump_output_lines = list(sanitize_from_stream(stream, config))
|
|
81
|
+
|
|
82
|
+
assert "--- Fake MySQL database dump" in dump_output_lines
|
|
83
|
+
assert "--- Final line after `INSERT INTO` statement." in dump_output_lines
|
|
84
|
+
assert """INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \
|
|
85
|
+
(1,'2018-01-01','Sanitized'),\
|
|
86
|
+
(2,'2018-01-02','Sanitized'),\
|
|
87
|
+
(3,'2018-01-03','Sanitized');\
|
|
88
|
+
""" in dump_output_lines
|
|
89
|
+
|
|
90
|
+
def test_sanitize_with_u2028_from_stream():
|
|
91
|
+
stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT_WITH_U2028)
|
|
92
|
+
config = Configuration()
|
|
93
|
+
config.sanitizers["test.notes"] = lambda value: "Sanitized"
|
|
94
|
+
dump_output_lines = list(sanitize_from_stream(stream, config))
|
|
95
|
+
|
|
96
|
+
assert "--- Fake MySQL database dump" in dump_output_lines
|
|
97
|
+
assert "--- Final line after `INSERT INTO` statement." in dump_output_lines
|
|
98
|
+
assert """INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \
|
|
99
|
+
(1,'2018-01-01','Sanitized'),\
|
|
100
|
+
(2,'2018-01-02','Sanitized'),\
|
|
101
|
+
(3,'2018-01-03','Sanitized');\
|
|
102
|
+
""" in dump_output_lines
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_skip_table_rows():
|
|
106
|
+
stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT)
|
|
107
|
+
config = Configuration()
|
|
108
|
+
config.skip_rows_for_tables.append('test')
|
|
109
|
+
|
|
110
|
+
output = list(sanitize_from_stream(stream, config))
|
|
111
|
+
|
|
112
|
+
assert output == [
|
|
113
|
+
'',
|
|
114
|
+
'--- Fake MySQL database dump',
|
|
115
|
+
'',
|
|
116
|
+
'DROP TABLE IF EXISTS `test`;',
|
|
117
|
+
'',
|
|
118
|
+
'CREATE TABLE `test` (',
|
|
119
|
+
'`id` int(11) NOT NULL AUTO_INCREMENT,',
|
|
120
|
+
'`created_at` date NOT NULL,',
|
|
121
|
+
'`notes` varchar(255) NOT NULL,',
|
|
122
|
+
'PRIMARY KEY (`id`)',
|
|
123
|
+
') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;',
|
|
124
|
+
'',
|
|
125
|
+
'',
|
|
126
|
+
'--- Final line after `INSERT INTO` statement.',
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_sanitizer_invalid_input():
|
|
131
|
+
stream = io.BytesIO(INVALID_MOCK_MYSQLDUMP_OUTPUT)
|
|
132
|
+
config = Configuration()
|
|
133
|
+
config.sanitizers["test.notes"] = lambda value: "Sanitized"
|
|
134
|
+
|
|
135
|
+
with pytest.raises(ValueError):
|
|
136
|
+
list(sanitize_from_stream(stream, config))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.parametrize(
|
|
140
|
+
"text,expected_column_names",
|
|
141
|
+
(
|
|
142
|
+
("`test`", ("test",)),
|
|
143
|
+
("`test`, `test`", ("test", "test")),
|
|
144
|
+
("`test`,`test`", ("test", "test")),
|
|
145
|
+
),
|
|
146
|
+
)
|
|
147
|
+
def test_parse_column_names(text, expected_column_names):
|
|
148
|
+
assert parse_column_names(text) == expected_column_names
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@pytest.mark.parametrize(
|
|
152
|
+
"text,expected_values",
|
|
153
|
+
(
|
|
154
|
+
("('test'),('test')", (("test",), ("test",))),
|
|
155
|
+
("(1,2),(3,4),", ((1, 2), (3, 4))),
|
|
156
|
+
("(TRUE),(FALSE),(NULL)", ((True,), (False,), (None,))),
|
|
157
|
+
("(x')", ()), # Invalid data
|
|
158
|
+
),
|
|
159
|
+
)
|
|
160
|
+
def test_parse_values(text, expected_values):
|
|
161
|
+
assert tuple(parse_values(text)) == expected_values
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pytest.mark.parametrize('config_type', [
|
|
165
|
+
'no-config', 'empty-config', 'single-column-config'])
|
|
166
|
+
@pytest.mark.parametrize('data_label', ['ok', 'invalid'])
|
|
167
|
+
def test_optimizations(config_type, data_label):
|
|
168
|
+
if config_type == 'no-config':
|
|
169
|
+
config = None
|
|
170
|
+
decoder_call_count = 0
|
|
171
|
+
else:
|
|
172
|
+
config = Configuration()
|
|
173
|
+
if config_type == 'empty-config':
|
|
174
|
+
decoder_call_count = 0
|
|
175
|
+
else:
|
|
176
|
+
assert config_type == 'single-column-config'
|
|
177
|
+
config.sanitizers["test.notes"] = (lambda x: x)
|
|
178
|
+
decoder_call_count = 3 # Number of rows in test table
|
|
179
|
+
|
|
180
|
+
data = {
|
|
181
|
+
'ok': MOCK_MYSQLDUMP_OUTPUT,
|
|
182
|
+
'invalid': INVALID_MOCK_MYSQLDUMP_OUTPUT,
|
|
183
|
+
}[data_label]
|
|
184
|
+
|
|
185
|
+
should_raise = (
|
|
186
|
+
config_type == 'single-column-config'
|
|
187
|
+
and data_label == 'invalid')
|
|
188
|
+
|
|
189
|
+
dump_stream = io.BytesIO(data)
|
|
190
|
+
if should_raise:
|
|
191
|
+
with pytest.raises(ValueError):
|
|
192
|
+
list(sanitize_from_stream(dump_stream, config))
|
|
193
|
+
else:
|
|
194
|
+
expected_output = data.decode('utf-8').splitlines()
|
|
195
|
+
result = list(sanitize_from_stream(dump_stream, config))
|
|
196
|
+
assert result == expected_output
|