django-db-anonymiser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_db_anonymiser/database_sanitizer/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/__main__.py +68 -0
- django_db_anonymiser/database_sanitizer/config.py +373 -0
- django_db_anonymiser/database_sanitizer/dump/__init__.py +47 -0
- django_db_anonymiser/database_sanitizer/dump/mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/dump/postgres.py +170 -0
- django_db_anonymiser/database_sanitizer/sanitizers/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/sanitizers/constant.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/derived.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/string.py +31 -0
- django_db_anonymiser/database_sanitizer/sanitizers/times.py +11 -0
- django_db_anonymiser/database_sanitizer/sanitizers/user.py +145 -0
- django_db_anonymiser/database_sanitizer/session.py +146 -0
- django_db_anonymiser/database_sanitizer/tests/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/tests/test_config.py +256 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump.py +123 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py +177 -0
- django_db_anonymiser/database_sanitizer/tests/test_main.py +91 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py +29 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py +19 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py +44 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py +18 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py +67 -0
- django_db_anonymiser/database_sanitizer/tests/test_session.py +36 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py +112 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py +86 -0
- django_db_anonymiser/database_sanitizer/utils/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/utils/mysql.py +161 -0
- django_db_anonymiser/database_sanitizer/utils/postgres.py +145 -0
- django_db_anonymiser/db_anonymiser/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/faker.py +91 -0
- django_db_anonymiser/db_anonymiser/management/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +105 -0
- django_db_anonymiser/db_anonymiser/tests/test_command.py +90 -0
- django_db_anonymiser/db_anonymiser/tests/test_faker.py +116 -0
- django_db_anonymiser-0.1.0.dist-info/METADATA +98 -0
- django_db_anonymiser-0.1.0.dist-info/RECORD +40 -0
- django_db_anonymiser-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
from collections import namedtuple
|
|
7
|
+
|
|
8
|
+
import mock
|
|
9
|
+
import pytest
|
|
10
|
+
from six.moves.urllib import parse as urlparse
|
|
11
|
+
|
|
12
|
+
from ..config import Configuration
|
|
13
|
+
from ..dump import postgres as dump_postgres
|
|
14
|
+
from ..dump.postgres import parse_column_names, parse_values, sanitize
|
|
15
|
+
from ..utils.postgres import decode_copy_value
|
|
16
|
+
|
|
17
|
+
MOCK_PG_DUMP_OUTPUT = b"""
|
|
18
|
+
--- Fake PostgreSQL database dump
|
|
19
|
+
|
|
20
|
+
COMMENT ON SCHEMA "public" IS 'standard public schema';
|
|
21
|
+
|
|
22
|
+
CREATE TABLE "public"."test" (
|
|
23
|
+
"id" integer NOT NULL,
|
|
24
|
+
"created_at" timestamp with time zone NOT NULL,
|
|
25
|
+
"notes" character varying(255) NOT NULL
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
COPY "public"."test" ("id", "created_at", "notes") FROM stdin;
|
|
29
|
+
1\t2018-01-01 00:00:00\tTest data 1
|
|
30
|
+
2\t2018-01-02 00:00:00\tTest data 2
|
|
31
|
+
3\t2018-01-03 00:00:00\tTest data 3
|
|
32
|
+
\\.
|
|
33
|
+
|
|
34
|
+
--- Final line after `COPY` statement
|
|
35
|
+
""".strip()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
INVALID_MOCK_PG_DUMP_OUTPUT = b"""
|
|
39
|
+
--- Fake PostgreSQL database dump
|
|
40
|
+
|
|
41
|
+
COMMENT ON SCHEMA "public" IS 'standard public schema';
|
|
42
|
+
|
|
43
|
+
COPY "public"."test" ("id", "created_at", "notes") FROM stdin;
|
|
44
|
+
1\t2018-01-01 00:00:00 Test data 1
|
|
45
|
+
2\t2018-01-02 00:00:00 Test data 2
|
|
46
|
+
3\t2018-01-03 00:00:00 Test data 3
|
|
47
|
+
\\.
|
|
48
|
+
|
|
49
|
+
--- Final line after `COPY` statement
|
|
50
|
+
""".strip()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def create_mock_popen(mock_pg_dump_output):
|
|
54
|
+
def mock_popen(cmd_args, stdout):
|
|
55
|
+
mock_pipe_type = namedtuple("mock_pipe", ("stdout",))
|
|
56
|
+
mock_stdout = io.BytesIO(mock_pg_dump_output)
|
|
57
|
+
return mock_pipe_type(stdout=mock_stdout)
|
|
58
|
+
return mock_popen
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_sanitize():
|
|
62
|
+
url = urlparse.urlparse("postgres://localhost/test")
|
|
63
|
+
config = Configuration()
|
|
64
|
+
config.sanitizers["test.notes"] = lambda value: "Sanitized"
|
|
65
|
+
|
|
66
|
+
with mock.patch("subprocess.Popen", side_effect=create_mock_popen(MOCK_PG_DUMP_OUTPUT)):
|
|
67
|
+
dump_output_lines = list(sanitize(url, config))
|
|
68
|
+
|
|
69
|
+
assert "--- Fake PostgreSQL database dump" in dump_output_lines
|
|
70
|
+
assert "--- Final line after `COPY` statement" in dump_output_lines
|
|
71
|
+
assert "2\t2018-01-02 00:00:00\tSanitized" in dump_output_lines
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_skip_table_rows():
|
|
75
|
+
url = urlparse.urlparse("postgres://localhost/test")
|
|
76
|
+
config = Configuration()
|
|
77
|
+
config.skip_rows_for_tables.append('test')
|
|
78
|
+
|
|
79
|
+
with mock.patch("subprocess.Popen",
|
|
80
|
+
side_effect=create_mock_popen(MOCK_PG_DUMP_OUTPUT)):
|
|
81
|
+
output = list(sanitize(url, config))
|
|
82
|
+
|
|
83
|
+
assert output == [
|
|
84
|
+
'--- Fake PostgreSQL database dump',
|
|
85
|
+
'',
|
|
86
|
+
'COMMENT ON SCHEMA "public" IS \'standard public schema\';',
|
|
87
|
+
'',
|
|
88
|
+
'CREATE TABLE "public"."test" (',
|
|
89
|
+
'"id" integer NOT NULL,',
|
|
90
|
+
'"created_at" timestamp with time zone NOT NULL,',
|
|
91
|
+
'"notes" character varying(255) NOT NULL',
|
|
92
|
+
');',
|
|
93
|
+
'',
|
|
94
|
+
'',
|
|
95
|
+
'--- Final line after `COPY` statement'
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_sanitizer_invalid_input():
|
|
100
|
+
url = urlparse.urlparse("postgres://localhost/test")
|
|
101
|
+
|
|
102
|
+
config = Configuration()
|
|
103
|
+
config.sanitizers["test.notes"] = lambda value: "Sanitized"
|
|
104
|
+
|
|
105
|
+
with mock.patch("subprocess.Popen", side_effect=create_mock_popen(INVALID_MOCK_PG_DUMP_OUTPUT)):
|
|
106
|
+
with pytest.raises(ValueError):
|
|
107
|
+
# Yes, we need the list() function there to eat the yields.
|
|
108
|
+
list(sanitize(url, config))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_sanitizer_invalid_scheme():
|
|
112
|
+
url = urlparse.urlparse("http://localhost/test")
|
|
113
|
+
with pytest.raises(ValueError):
|
|
114
|
+
list(sanitize(url, None))
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@pytest.mark.parametrize(
|
|
118
|
+
"text,expected_column_names",
|
|
119
|
+
(
|
|
120
|
+
("\"test\"", ("test",)),
|
|
121
|
+
("\"test\",\"test\"", ("test", "test")),
|
|
122
|
+
("\"test\", \"test\"", ("test", "test")),
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
def test_parse_column_names(text, expected_column_names):
|
|
126
|
+
assert parse_column_names(text) == expected_column_names
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@pytest.mark.parametrize(
|
|
130
|
+
"text,expected_values",
|
|
131
|
+
(
|
|
132
|
+
("Test", ("Test",)),
|
|
133
|
+
("Test\tTest", ("Test", "Test")),
|
|
134
|
+
("Test\tTest\t", ("Test", "Test", "")),
|
|
135
|
+
("\\N", (None,)),
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
def test_parse_values(text, expected_values):
|
|
139
|
+
assert parse_values(text) == expected_values
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@pytest.mark.parametrize('config_type', [
|
|
143
|
+
'no-config', 'empty-config', 'single-column-config'])
|
|
144
|
+
@pytest.mark.parametrize('data_label', ['ok', 'invalid'])
|
|
145
|
+
def test_optimizations(config_type, data_label):
|
|
146
|
+
if config_type == 'no-config':
|
|
147
|
+
config = None
|
|
148
|
+
decoder_call_count = 0
|
|
149
|
+
else:
|
|
150
|
+
config = Configuration()
|
|
151
|
+
if config_type == 'empty-config':
|
|
152
|
+
decoder_call_count = 0
|
|
153
|
+
else:
|
|
154
|
+
assert config_type == 'single-column-config'
|
|
155
|
+
config.sanitizers["test.notes"] = (lambda x: x)
|
|
156
|
+
decoder_call_count = 3 # Number of rows in test table
|
|
157
|
+
|
|
158
|
+
data = {
|
|
159
|
+
'ok': MOCK_PG_DUMP_OUTPUT,
|
|
160
|
+
'invalid': INVALID_MOCK_PG_DUMP_OUTPUT,
|
|
161
|
+
}[data_label]
|
|
162
|
+
|
|
163
|
+
should_raise = (
|
|
164
|
+
config_type == 'single-column-config'
|
|
165
|
+
and data_label == 'invalid')
|
|
166
|
+
|
|
167
|
+
url = urlparse.urlparse("postgres://localhost/test")
|
|
168
|
+
with mock.patch("subprocess.Popen", side_effect=create_mock_popen(data)):
|
|
169
|
+
with mock.patch.object(dump_postgres, 'decode_copy_value') as decoder:
|
|
170
|
+
decoder.side_effect = decode_copy_value
|
|
171
|
+
if should_raise:
|
|
172
|
+
with pytest.raises(ValueError):
|
|
173
|
+
list(sanitize(url, config))
|
|
174
|
+
else:
|
|
175
|
+
expected_output = data.decode('utf-8').splitlines()
|
|
176
|
+
assert list(sanitize(url, config)) == expected_output
|
|
177
|
+
assert decoder.call_count == decoder_call_count
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import unicode_literals
|
|
2
|
+
|
|
3
|
+
import mock
|
|
4
|
+
import pytest
|
|
5
|
+
import six
|
|
6
|
+
|
|
7
|
+
from database_sanitizer import __main__
|
|
8
|
+
|
|
9
|
+
main = __main__.main
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@mock.patch.object(__main__, 'run')
|
|
13
|
+
def test_main_without_args(mocked_run, capsys):
|
|
14
|
+
with pytest.raises(SystemExit) as excinfo:
|
|
15
|
+
main(['SANI'])
|
|
16
|
+
assert excinfo.value.code == 2
|
|
17
|
+
|
|
18
|
+
captured = capsys.readouterr()
|
|
19
|
+
assert captured.out == ''
|
|
20
|
+
assert captured.err.splitlines() == [
|
|
21
|
+
'usage: SANI [-h] [--config CONFIG] [--output OUTPUT] url',
|
|
22
|
+
'SANI: error: the following arguments are required: url' if six.PY3
|
|
23
|
+
else 'SANI: error: too few arguments',
|
|
24
|
+
]
|
|
25
|
+
assert not mocked_run.called
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@mock.patch.object(__main__, 'run')
|
|
29
|
+
def test_main_with_url(mocked_run, capsys):
|
|
30
|
+
main(['SANI', 'some://url'])
|
|
31
|
+
|
|
32
|
+
# Output should be empty
|
|
33
|
+
captured = capsys.readouterr()
|
|
34
|
+
assert captured.out == ''
|
|
35
|
+
assert captured.err == ''
|
|
36
|
+
|
|
37
|
+
# The run function should have been called with the URL
|
|
38
|
+
(run_call_args, run_call_kwargs) = mocked_run.call_args
|
|
39
|
+
assert run_call_args == ()
|
|
40
|
+
assert set(run_call_kwargs.keys()) == {'config', 'output', 'url'}
|
|
41
|
+
assert run_call_kwargs['config'] is None
|
|
42
|
+
assert run_call_kwargs['url'] == 'some://url'
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.mark.parametrize('optname', ['-c', '--config'])
|
|
46
|
+
@mock.patch.object(__main__, 'run')
|
|
47
|
+
@mock.patch.object(__main__, 'Configuration')
|
|
48
|
+
def test_main_with_config(mocked_conf, mocked_run, capsys, optname):
|
|
49
|
+
main(['SANI', optname, 'config_file.yml', 'some://url'])
|
|
50
|
+
|
|
51
|
+
# Output should be empty
|
|
52
|
+
captured = capsys.readouterr()
|
|
53
|
+
assert captured.out == ''
|
|
54
|
+
assert captured.err == ''
|
|
55
|
+
|
|
56
|
+
# Configuration should have been created with Configuration.from_file
|
|
57
|
+
(fromfile_args, fromfile_kwargs) = mocked_conf.from_file.call_args
|
|
58
|
+
assert fromfile_args == ('config_file.yml',)
|
|
59
|
+
assert fromfile_kwargs == {}
|
|
60
|
+
|
|
61
|
+
# The run function should have been called with the config and URL
|
|
62
|
+
(run_call_args, run_call_kwargs) = mocked_run.call_args
|
|
63
|
+
assert run_call_args == ()
|
|
64
|
+
assert set(run_call_kwargs.keys()) == {'config', 'output', 'url'}
|
|
65
|
+
assert run_call_kwargs['config'] == mocked_conf.from_file.return_value
|
|
66
|
+
assert run_call_kwargs['url'] == 'some://url'
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.mark.parametrize('optname', ['-o', '--output'])
|
|
70
|
+
@mock.patch.object(__main__, 'run')
|
|
71
|
+
@mock.patch.object(__main__, 'open')
|
|
72
|
+
def test_main_with_output(mocked_open, mocked_run, capsys, optname):
|
|
73
|
+
main(['SANI', optname, 'output_file.sql', 'some://url'])
|
|
74
|
+
|
|
75
|
+
# Output should be empty
|
|
76
|
+
captured = capsys.readouterr()
|
|
77
|
+
assert captured.out == ''
|
|
78
|
+
assert captured.err == ''
|
|
79
|
+
|
|
80
|
+
# Output file should have been opened
|
|
81
|
+
(open_args, open_kwargs) = mocked_open.call_args
|
|
82
|
+
assert open_args == ('output_file.sql', 'w')
|
|
83
|
+
assert open_kwargs == {}
|
|
84
|
+
|
|
85
|
+
# The run function should have been called with the output and URL
|
|
86
|
+
(run_call_args, run_call_kwargs) = mocked_run.call_args
|
|
87
|
+
assert run_call_args == ()
|
|
88
|
+
assert set(run_call_kwargs.keys()) == {'config', 'output', 'url'}
|
|
89
|
+
assert run_call_kwargs['config'] is None
|
|
90
|
+
assert run_call_kwargs['output'] == mocked_open.return_value
|
|
91
|
+
assert run_call_kwargs['url'] == 'some://url'
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from database_sanitizer.sanitizers import constant
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_sanitize_null():
|
|
5
|
+
assert constant.sanitize_null(None) is None
|
|
6
|
+
assert constant.sanitize_null('') is None
|
|
7
|
+
assert constant.sanitize_null('whatever') is None
|
|
8
|
+
assert constant.sanitize_null('test') is None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_sanitize_invalid_django_password():
|
|
12
|
+
assert constant.sanitize_invalid_django_password(None) == '!'
|
|
13
|
+
assert constant.sanitize_invalid_django_password('') == '!'
|
|
14
|
+
assert constant.sanitize_invalid_django_password('whatever') == '!'
|
|
15
|
+
assert constant.sanitize_invalid_django_password('test') == '!'
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_sanitize_empty_json_dict():
|
|
19
|
+
assert constant.sanitize_empty_json_dict(None) == '{}'
|
|
20
|
+
assert constant.sanitize_empty_json_dict('') == '{}'
|
|
21
|
+
assert constant.sanitize_empty_json_dict('whatever') == '{}'
|
|
22
|
+
assert constant.sanitize_empty_json_dict('test') == '{}'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_sanitize_empty_json_list():
|
|
26
|
+
assert constant.sanitize_empty_json_list(None) == '[]'
|
|
27
|
+
assert constant.sanitize_empty_json_list('') == '[]'
|
|
28
|
+
assert constant.sanitize_empty_json_list('whatever') == '[]'
|
|
29
|
+
assert constant.sanitize_empty_json_list('test') == '[]'
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from database_sanitizer import session
|
|
2
|
+
from database_sanitizer.sanitizers import derived
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def setup_module():
|
|
6
|
+
session.reset(b'not-so-secret-key')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_sanitize_uuid4():
|
|
10
|
+
assert derived.sanitize_uuid4(None) is None
|
|
11
|
+
assert derived.sanitize_uuid4('') == ''
|
|
12
|
+
assert derived.sanitize_uuid4('0') == (
|
|
13
|
+
'e3a5862f-cffb-4d89-ab3e-5563b27e287a')
|
|
14
|
+
assert derived.sanitize_uuid4('00000000000000000000000000000000') == (
|
|
15
|
+
'00000000-0000-0000-0000-000000000000')
|
|
16
|
+
assert derived.sanitize_uuid4('00000000-0000-0000-0000-000000000000') == (
|
|
17
|
+
'00000000-0000-0000-0000-000000000000')
|
|
18
|
+
assert derived.sanitize_uuid4('e3a5862f-cffb-4d89-ab3e-5563b27e287a') == (
|
|
19
|
+
'88b0225e-6090-459a-999d-9b3a3ab28c53')
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import mock
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from ..sanitizers.string import sanitize_empty, sanitize_random, sanitize_zfill
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.parametrize(
|
|
12
|
+
"input_value,expected_output",
|
|
13
|
+
(
|
|
14
|
+
("foo", ""),
|
|
15
|
+
("bar", ""),
|
|
16
|
+
("", ""),
|
|
17
|
+
(" ", ""),
|
|
18
|
+
(None, None),
|
|
19
|
+
),
|
|
20
|
+
)
|
|
21
|
+
def test_sanitize_empty(input_value, expected_output):
|
|
22
|
+
assert sanitize_empty(input_value) == expected_output
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.parametrize(
|
|
26
|
+
"input_value,expected_output",
|
|
27
|
+
(
|
|
28
|
+
("foo", "000"),
|
|
29
|
+
("test test", "000000000"),
|
|
30
|
+
("", ""),
|
|
31
|
+
(None, None)
|
|
32
|
+
),
|
|
33
|
+
)
|
|
34
|
+
def test_sanitize_zfill(input_value, expected_output):
|
|
35
|
+
return sanitize_zfill(input_value) == expected_output
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@mock.patch('random.choice', return_value='x')
|
|
39
|
+
def test_sanitize_random(mocked_random_choice):
|
|
40
|
+
assert sanitize_random(None) is None
|
|
41
|
+
assert sanitize_random('') == ''
|
|
42
|
+
assert sanitize_random('a') == 'x'
|
|
43
|
+
assert sanitize_random('hello') == 'xxxxx'
|
|
44
|
+
assert sanitize_random('hello world') == 'xxxxxxxxxxx'
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
import mock
|
|
4
|
+
|
|
5
|
+
from database_sanitizer.sanitizers import times
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _FakeDateTime(datetime.datetime):
|
|
9
|
+
@staticmethod
|
|
10
|
+
def now():
|
|
11
|
+
return datetime.datetime(2018, 1, 1, 12, 00, 00)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@mock.patch('random.randint', return_value=42005)
|
|
15
|
+
@mock.patch.object(datetime, 'datetime', _FakeDateTime)
|
|
16
|
+
def test_sanitize_random_past_timestamp(randint_mock):
|
|
17
|
+
assert times.sanitize_random_past_timestamp('old') == (
|
|
18
|
+
'2018-01-01T11:59:17.995000')
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from database_sanitizer import session
|
|
2
|
+
from database_sanitizer.sanitizers import user
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def setup_module():
|
|
6
|
+
session.reset(b'not-so-secret-key')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_sanitize_email():
|
|
10
|
+
assert user.sanitize_email(None) is None
|
|
11
|
+
assert user.sanitize_email('') == ''
|
|
12
|
+
assert user.sanitize_email('test@example.com') == (
|
|
13
|
+
'zoe.burke@xce13103b.sanitized.net')
|
|
14
|
+
assert user.sanitize_email('test2@example.com') == (
|
|
15
|
+
'Melanie.Pratt@x4feb7f40.sanitized.net')
|
|
16
|
+
assert user.sanitize_email('test@example.com') == (
|
|
17
|
+
'zoe.burke@xce13103b.sanitized.net')
|
|
18
|
+
assert user.sanitize_email('test3@example.com') == (
|
|
19
|
+
'irene.archer@x3d2e92ec.sanitized.net')
|
|
20
|
+
assert user.sanitize_email(' test3@example.com ') == (
|
|
21
|
+
'irene.archer@x3d2e92ec.sanitized.net')
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_sanitize_username():
|
|
25
|
+
assert user.sanitize_username(None) is None
|
|
26
|
+
assert user.sanitize_username('') == ''
|
|
27
|
+
assert user.sanitize_username('John.Doe') == 'billyda979417'
|
|
28
|
+
assert user.sanitize_username('JaneSmith') == 'helena34a7a0b'
|
|
29
|
+
assert user.sanitize_username('john-smith') == 'arthurc5a84ec'
|
|
30
|
+
assert user.sanitize_username('john-smith ') == 'douglas8d3b8d5e'
|
|
31
|
+
assert user.sanitize_username('john smith ') == 'katyfdab90cc'
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_sanitize_full_name_en_gb():
|
|
35
|
+
assert user.sanitize_full_name_en_gb(None) is None
|
|
36
|
+
assert user.sanitize_full_name_en_gb('') == ''
|
|
37
|
+
assert user.sanitize_full_name_en_gb('John Doe') == 'Francis Walker'
|
|
38
|
+
assert user.sanitize_full_name_en_gb('Jane Smith') == 'Declan Burke'
|
|
39
|
+
assert user.sanitize_full_name_en_gb('John Smith') == 'Lawrence Norton'
|
|
40
|
+
assert user.sanitize_full_name_en_gb('john smith ') == 'Lawrence Norton'
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_sanitize_given_name_en_gb():
|
|
44
|
+
assert user.sanitize_given_name_en_gb(None) is None
|
|
45
|
+
assert user.sanitize_given_name_en_gb('') == ''
|
|
46
|
+
assert user.sanitize_given_name_en_gb('John') == 'Cheryl'
|
|
47
|
+
assert user.sanitize_given_name_en_gb('Jane') == 'Andrea'
|
|
48
|
+
assert user.sanitize_given_name_en_gb('Foo bar') == 'Elliott'
|
|
49
|
+
assert user.sanitize_given_name_en_gb(' Foo BAR ') == 'Elliott'
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_sanitize_surname_en_gb():
|
|
53
|
+
assert user.sanitize_surname_en_gb(None) is None
|
|
54
|
+
assert user.sanitize_surname_en_gb('') == ''
|
|
55
|
+
assert user.sanitize_surname_en_gb('Doe') == 'Bibi'
|
|
56
|
+
assert user.sanitize_surname_en_gb('Smith') == 'Duffy'
|
|
57
|
+
assert user.sanitize_surname_en_gb('Anderson') == 'Hodgson'
|
|
58
|
+
assert user.sanitize_surname_en_gb('andersOn ') == 'Hodgson'
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_sanitize_email_resets_on_session_reset():
|
|
62
|
+
assert user.sanitize_email('test@example.com') == (
|
|
63
|
+
'zoe.burke@xce13103b.sanitized.net')
|
|
64
|
+
session.reset()
|
|
65
|
+
assert user.sanitize_email('test@example.com') != (
|
|
66
|
+
'zoe.burke@xce13103b.sanitized.net')
|
|
67
|
+
session.reset(b'not-so-secret-key')
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from database_sanitizer import session
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def setup_module():
|
|
5
|
+
session.reset(b'not-so-secret-key')
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_hash_text_to_int():
|
|
9
|
+
assert session.hash_text_to_int('hello') == 4100462238
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_hash_text_to_ints():
|
|
13
|
+
assert session.hash_text_to_ints('hello', [4, 8, 16]) == (15, 70, 33129)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_hash_text():
|
|
17
|
+
assert session.hash_text('hello') == (
|
|
18
|
+
'f468169e17f4dd5d7318bd6099a4e657ceb0a978cddb4f3382be0da7121659bb')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_hash_bytes():
|
|
22
|
+
assert session.hash_bytes(b'hello') == (
|
|
23
|
+
'f468169e17f4dd5d7318bd6099a4e657ceb0a978cddb4f3382be0da7121659bb')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_get_secret():
|
|
27
|
+
assert session.get_secret() == b'not-so-secret-key'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_reset():
|
|
31
|
+
old_key = session.get_secret()
|
|
32
|
+
session.reset()
|
|
33
|
+
new_key = session.get_secret()
|
|
34
|
+
assert new_key != old_key
|
|
35
|
+
session.reset(b'not-so-secret-key')
|
|
36
|
+
assert session.get_secret() == b'not-so-secret-key'
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from six.moves.urllib import parse as urlparse
|
|
7
|
+
|
|
8
|
+
from ..utils.mysql import (
|
|
9
|
+
decode_mysql_literal,
|
|
10
|
+
decode_mysql_string_literal,
|
|
11
|
+
get_mysqldump_args_and_env_from_url,
|
|
12
|
+
unescape_single_character,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.mark.parametrize(
|
|
17
|
+
"url",
|
|
18
|
+
(
|
|
19
|
+
"mysql://test:test@localhost/test",
|
|
20
|
+
"mysql://localhost:1234/test",
|
|
21
|
+
"mysql://localhost",
|
|
22
|
+
),
|
|
23
|
+
)
|
|
24
|
+
def test_get_mysqldump_args_and_env_from_url(url):
|
|
25
|
+
parsed_url = urlparse.urlparse(url)
|
|
26
|
+
|
|
27
|
+
if not parsed_url.path:
|
|
28
|
+
with pytest.raises(ValueError):
|
|
29
|
+
get_mysqldump_args_and_env_from_url(url=parsed_url)
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
args, env = get_mysqldump_args_and_env_from_url(url=parsed_url)
|
|
33
|
+
|
|
34
|
+
assert isinstance(args, list)
|
|
35
|
+
assert isinstance(env, dict)
|
|
36
|
+
|
|
37
|
+
assert len(args) > 0
|
|
38
|
+
assert "--complete-insert" in args
|
|
39
|
+
assert "--extended-insert" in args
|
|
40
|
+
assert "--net_buffer_length=10240" in args
|
|
41
|
+
assert args[-1] == parsed_url.path[1:]
|
|
42
|
+
|
|
43
|
+
if parsed_url.username:
|
|
44
|
+
index = args.index("-u")
|
|
45
|
+
assert args[index + 1] == parsed_url.username
|
|
46
|
+
|
|
47
|
+
if parsed_url.password:
|
|
48
|
+
assert env["MYSQL_PWD"] == parsed_url.password
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.mark.parametrize(
|
|
52
|
+
"text,expected_value",
|
|
53
|
+
(
|
|
54
|
+
("NULL", None),
|
|
55
|
+
("TRUE", True),
|
|
56
|
+
("FALSE", False),
|
|
57
|
+
("12", 12),
|
|
58
|
+
("12.5", 12.5),
|
|
59
|
+
("'test'", "test"),
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
def test_decode_mysql_literal(text, expected_value):
|
|
63
|
+
assert decode_mysql_literal(text) == expected_value
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_decode_mysql_literal_invalid_input():
|
|
67
|
+
with pytest.raises(ValueError):
|
|
68
|
+
decode_mysql_literal("ERROR")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.mark.parametrize(
|
|
72
|
+
"text,expected_output",
|
|
73
|
+
(
|
|
74
|
+
("'test'", "test"),
|
|
75
|
+
("'test\\ntest'", "test\ntest"),
|
|
76
|
+
("'\\0'", "\000"),
|
|
77
|
+
("'foo", None),
|
|
78
|
+
("foo'", None),
|
|
79
|
+
("foo", None),
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
def test_decode_mysql_string_literal(text, expected_output):
|
|
83
|
+
if expected_output is None:
|
|
84
|
+
with pytest.raises(AssertionError):
|
|
85
|
+
decode_mysql_string_literal(text)
|
|
86
|
+
else:
|
|
87
|
+
assert decode_mysql_string_literal(text) == expected_output
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.parametrize(
|
|
91
|
+
"text,expected_output",
|
|
92
|
+
(
|
|
93
|
+
("\\\\", "\\"),
|
|
94
|
+
("\\n", "\n"),
|
|
95
|
+
("\\r", "\r"),
|
|
96
|
+
("\\0", "\000"),
|
|
97
|
+
("\\Z", "\032"),
|
|
98
|
+
("\\'", "'"),
|
|
99
|
+
('\\"', '"'),
|
|
100
|
+
),
|
|
101
|
+
)
|
|
102
|
+
def test_unescape_single_character(text, expected_output):
|
|
103
|
+
class MockRegexpMatch(object):
|
|
104
|
+
|
|
105
|
+
def __init__(self, text):
|
|
106
|
+
self.text = text
|
|
107
|
+
|
|
108
|
+
def group(self, index):
|
|
109
|
+
assert index == 0
|
|
110
|
+
return self.text
|
|
111
|
+
|
|
112
|
+
assert unescape_single_character(MockRegexpMatch(text)) == expected_output
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from ..utils.postgres import (
|
|
8
|
+
DECODE_MAP,
|
|
9
|
+
POSTGRES_COPY_NULL_VALUE,
|
|
10
|
+
decode_copy_value,
|
|
11
|
+
encode_copy_value,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.mark.parametrize(
|
|
16
|
+
"input_value,expected_value",
|
|
17
|
+
(
|
|
18
|
+
("", ""),
|
|
19
|
+
(POSTGRES_COPY_NULL_VALUE, None),
|
|
20
|
+
("Test", "Test"),
|
|
21
|
+
("\\\\", "\\"),
|
|
22
|
+
("\\b", "\b"),
|
|
23
|
+
("\\f", "\f"),
|
|
24
|
+
("\\n", "\n"),
|
|
25
|
+
("\\r", "\r"),
|
|
26
|
+
("\\t", "\t"),
|
|
27
|
+
("\\v", "\v"),
|
|
28
|
+
("\\xff", "\xff"),
|
|
29
|
+
("\\123", "\123"),
|
|
30
|
+
("Test\\r\\nTest", "Test\r\nTest"),
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
def test_decode_copy_value(input_value, expected_value):
|
|
34
|
+
assert decode_copy_value(input_value) == expected_value
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.parametrize(
|
|
38
|
+
"input_value,expected_value",
|
|
39
|
+
(
|
|
40
|
+
("", ""),
|
|
41
|
+
(None, POSTGRES_COPY_NULL_VALUE),
|
|
42
|
+
("Test", "Test"),
|
|
43
|
+
("\\", "\\\\"),
|
|
44
|
+
("\b", "\\b"),
|
|
45
|
+
("\f", "\\f"),
|
|
46
|
+
("\n", "\\n"),
|
|
47
|
+
("\r", "\\r"),
|
|
48
|
+
("\t", "\\t"),
|
|
49
|
+
("\v", "\\v"),
|
|
50
|
+
("\xff", "\xff"),
|
|
51
|
+
("\123", "\123"),
|
|
52
|
+
("Test\r\nTest", "Test\\r\\nTest"),
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
def test_encode_copy_value(input_value, expected_value):
|
|
56
|
+
assert encode_copy_value(input_value) == expected_value
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_invalid_escape_sequence():
|
|
60
|
+
with pytest.raises(ValueError):
|
|
61
|
+
decode_copy_value("\\")
|
|
62
|
+
with pytest.raises(ValueError):
|
|
63
|
+
decode_copy_value("\\X")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_decode_map_contents():
|
|
67
|
+
assert DECODE_MAP['\\b'] == '\b'
|
|
68
|
+
assert DECODE_MAP['\\n'] == '\n'
|
|
69
|
+
assert DECODE_MAP['\\t'] == '\t'
|
|
70
|
+
assert DECODE_MAP['\\\\'] == '\\'
|
|
71
|
+
assert DECODE_MAP['\\0'] == '\0'
|
|
72
|
+
assert DECODE_MAP['\\74'] == '\74'
|
|
73
|
+
assert DECODE_MAP['\\x0'] == '\0'
|
|
74
|
+
assert DECODE_MAP['\\xa'] == '\x0a'
|
|
75
|
+
assert DECODE_MAP['\\xA'] == '\x0a'
|
|
76
|
+
assert DECODE_MAP['\\x00'] == '\0'
|
|
77
|
+
assert DECODE_MAP['\\xa3'] == '\xa3'
|
|
78
|
+
assert DECODE_MAP['\\xA3'] == '\xa3'
|
|
79
|
+
assert DECODE_MAP['\\xAb'] == '\xab'
|
|
80
|
+
assert DECODE_MAP['\\xaB'] == '\xab'
|
|
81
|
+
assert DECODE_MAP['\\xff'] == '\xff'
|
|
82
|
+
|
|
83
|
+
assert '\\' not in DECODE_MAP, "Unterminated escape is not mapped"
|
|
84
|
+
assert '\\z' not in DECODE_MAP, "Invalid escape sequences are not mapped"
|
|
85
|
+
|
|
86
|
+
assert len(DECODE_MAP) == 1097
|
|
File without changes
|