django-db-anonymiser 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +27 -7
- django_db_anonymiser/db_anonymiser/tests/test_command.py +47 -7
- {django_db_anonymiser-0.2.4.dist-info → django_db_anonymiser-0.3.0.dist-info}/METADATA +7 -5
- {django_db_anonymiser-0.2.4.dist-info → django_db_anonymiser-0.3.0.dist-info}/RECORD +5 -5
- {django_db_anonymiser-0.2.4.dist-info → django_db_anonymiser-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import logging
|
|
3
|
+
from datetime import datetime
|
|
3
4
|
|
|
4
5
|
import boto3
|
|
5
6
|
from django.conf import settings
|
|
@@ -28,16 +29,30 @@ class Command(BaseCommand):
|
|
|
28
29
|
action="store_true",
|
|
29
30
|
help="Generates and logs a presigned URL for the uploaded file.",
|
|
30
31
|
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--add-timestamp",
|
|
34
|
+
action="store_true",
|
|
35
|
+
help="Add current timestamp to dump file name.",
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
def configure(self):
|
|
33
39
|
self.keep_local_dumpfile = False
|
|
34
40
|
self.skip_s3_upload = False
|
|
35
41
|
self.presign = False
|
|
36
|
-
|
|
42
|
+
base_dump_file_name = settings.DB_ANONYMISER_DUMP_FILE_NAME
|
|
43
|
+
if self.add_timestamp:
|
|
44
|
+
now = datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
|
|
45
|
+
base_dump_file_name = f"{now}-{base_dump_file_name}"
|
|
46
|
+
dump_file_name = base_dump_file_name
|
|
47
|
+
if getattr(settings, "DB_ANONYMISER_AWS_STORAGE_KEY", None):
|
|
48
|
+
dump_file_name = (
|
|
49
|
+
f"{settings.DB_ANONYMISER_AWS_STORAGE_KEY}/{dump_file_name}"
|
|
50
|
+
)
|
|
51
|
+
self.dump_file_name = dump_file_name
|
|
37
52
|
self.temporary_dump_location = getattr(
|
|
38
53
|
settings,
|
|
39
54
|
"DB_ANONYMISER_TEMPORARY_DUMP_LOCATION",
|
|
40
|
-
f"/tmp/{
|
|
55
|
+
f"/tmp/{base_dump_file_name}",
|
|
41
56
|
)
|
|
42
57
|
try:
|
|
43
58
|
self.config_location = settings.DB_ANONYMISER_CONFIG_LOCATION
|
|
@@ -62,6 +77,11 @@ class Command(BaseCommand):
|
|
|
62
77
|
|
|
63
78
|
def handle(self, *args, **options):
|
|
64
79
|
logger.info("Starting DB dump and anonymiser")
|
|
80
|
+
|
|
81
|
+
self.add_timestamp = False
|
|
82
|
+
if options["add_timestamp"]:
|
|
83
|
+
self.add_timestamp = True
|
|
84
|
+
|
|
65
85
|
self.configure()
|
|
66
86
|
|
|
67
87
|
if options["keep_local_dumpfile"]:
|
|
@@ -69,7 +89,7 @@ class Command(BaseCommand):
|
|
|
69
89
|
|
|
70
90
|
if options["skip_s3_upload"]:
|
|
71
91
|
self.skip_s3_upload = True
|
|
72
|
-
|
|
92
|
+
|
|
73
93
|
if options["presign"]:
|
|
74
94
|
self.presign = True
|
|
75
95
|
|
|
@@ -104,12 +124,12 @@ class Command(BaseCommand):
|
|
|
104
124
|
self.temporary_dump_location, self.s3_bucket_name, self.dump_file_name
|
|
105
125
|
)
|
|
106
126
|
logger.info("Writing file to S3 complete")
|
|
107
|
-
|
|
127
|
+
|
|
108
128
|
def generate_presigned_url(self):
|
|
109
129
|
presigned = self.s3_client.generate_presigned_url(
|
|
110
|
-
|
|
111
|
-
Params={
|
|
112
|
-
ExpiresIn=600
|
|
130
|
+
"get_object",
|
|
131
|
+
Params={"Bucket": self.s3_bucket_name, "Key": self.dump_file_name},
|
|
132
|
+
ExpiresIn=600,
|
|
113
133
|
)
|
|
114
134
|
logger.info("Presigned URL: %s", presigned)
|
|
115
135
|
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from unittest.mock import ANY
|
|
3
4
|
from unittest.mock import patch
|
|
4
5
|
|
|
5
6
|
from django.conf import settings
|
|
6
7
|
from django.contrib.auth.models import User
|
|
7
8
|
from django.core.management import call_command
|
|
8
|
-
from django.test import TransactionTestCase
|
|
9
|
+
from django.test import override_settings, TransactionTestCase
|
|
9
10
|
|
|
10
11
|
import boto3
|
|
11
12
|
import pytest
|
|
@@ -13,7 +14,10 @@ import requests
|
|
|
13
14
|
from moto import mock_aws
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
@pytest.mark.skipif(
|
|
17
|
+
@pytest.mark.skipif(
|
|
18
|
+
os.getenv("CIRCLECI") != "true",
|
|
19
|
+
reason="Skipped because test requires real postgres db.",
|
|
20
|
+
)
|
|
17
21
|
@mock_aws
|
|
18
22
|
class TestDumpAndAnonmyiseCommand(TransactionTestCase):
|
|
19
23
|
def setUp(self):
|
|
@@ -91,12 +95,18 @@ class TestDumpAndAnonmyiseCommand(TransactionTestCase):
|
|
|
91
95
|
call_command("dump_and_anonymise", keep_local_dumpfile=True)
|
|
92
96
|
assert not mocked_os_remove.called
|
|
93
97
|
|
|
94
|
-
@patch(
|
|
95
|
-
|
|
98
|
+
@patch(
|
|
99
|
+
"django_db_anonymiser.db_anonymiser.management.commands.dump_and_anonymise.logger"
|
|
100
|
+
)
|
|
101
|
+
@patch(
|
|
102
|
+
"django_db_anonymiser.db_anonymiser.management.commands.dump_and_anonymise.Command.generate_presigned_url"
|
|
103
|
+
)
|
|
96
104
|
@patch(
|
|
97
105
|
"django_db_anonymiser.db_anonymiser.management.commands.dump_and_anonymise.os.remove"
|
|
98
106
|
)
|
|
99
|
-
def test_dump_and_anonymise_no_arguments(
|
|
107
|
+
def test_dump_and_anonymise_no_arguments(
|
|
108
|
+
self, mocked_os_remove, mocked_presign, mocked_logger
|
|
109
|
+
):
|
|
100
110
|
call_command("dump_and_anonymise")
|
|
101
111
|
bucket_contents = self.aws.list_objects(
|
|
102
112
|
Bucket=settings.DB_ANONYMISER_AWS_STORAGE_BUCKET_NAME
|
|
@@ -107,10 +117,15 @@ class TestDumpAndAnonmyiseCommand(TransactionTestCase):
|
|
|
107
117
|
)
|
|
108
118
|
mocked_presign.assert_not_called()
|
|
109
119
|
mocked_logger.info.assert_any_call("DB dump and anonymiser was successful!")
|
|
110
|
-
mocked_logger.info.assert_any_call(
|
|
120
|
+
mocked_logger.info.assert_any_call(
|
|
121
|
+
"Writing anonymised dumpfile to temporary location %s",
|
|
122
|
+
settings.DB_ANONYMISER_DUMP_FILE_NAME,
|
|
123
|
+
)
|
|
111
124
|
mocked_logger.info.assert_any_call("Writing file to S3 complete")
|
|
112
125
|
|
|
113
|
-
@patch(
|
|
126
|
+
@patch(
|
|
127
|
+
"django_db_anonymiser.db_anonymiser.management.commands.dump_and_anonymise.logger"
|
|
128
|
+
)
|
|
114
129
|
def test_dump_and_anonymise_with_presign(self, mocked_logger):
|
|
115
130
|
call_command("dump_and_anonymise", presign=True)
|
|
116
131
|
bucket_contents = self.aws.list_objects(
|
|
@@ -120,3 +135,28 @@ class TestDumpAndAnonmyiseCommand(TransactionTestCase):
|
|
|
120
135
|
mocked_logger.info.assert_any_call("Presigned URL: %s", ANY)
|
|
121
136
|
presigned_url = mocked_logger.info.call_args_list[6][0][1]
|
|
122
137
|
assert requests.get(presigned_url).status_code == 200
|
|
138
|
+
|
|
139
|
+
def test_dump_and_anonymise_with_timestamp(self):
|
|
140
|
+
now = datetime.now()
|
|
141
|
+
patch(
|
|
142
|
+
"django_db_anonymiser.db_anonymiser.management.commands.datetime.datetime.now",
|
|
143
|
+
return_value=now,
|
|
144
|
+
)
|
|
145
|
+
call_command("dump_and_anonymise", add_timestamp=True)
|
|
146
|
+
bucket_contents = self.aws.list_objects(
|
|
147
|
+
Bucket=settings.DB_ANONYMISER_AWS_STORAGE_BUCKET_NAME
|
|
148
|
+
).get("Contents", [])
|
|
149
|
+
assert (
|
|
150
|
+
bucket_contents[0]["Key"]
|
|
151
|
+
== f"{now.strftime('%Y-%m-%d-%H:%M:%S')}-{settings.DB_ANONYMISER_DUMP_FILE_NAME}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
@override_settings(DB_ANONYMISER_AWS_STORAGE_KEY="test")
|
|
155
|
+
def test_dump_and_anonymise_with_s3_key(self):
|
|
156
|
+
call_command("dump_and_anonymise")
|
|
157
|
+
bucket_contents = self.aws.list_objects(
|
|
158
|
+
Bucket=settings.DB_ANONYMISER_AWS_STORAGE_BUCKET_NAME
|
|
159
|
+
).get("Contents", [])
|
|
160
|
+
assert (
|
|
161
|
+
bucket_contents[0]["Key"] == f"test/{settings.DB_ANONYMISER_DUMP_FILE_NAME}"
|
|
162
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: django-db-anonymiser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Django app to create configurable anonymised DB dumps.
|
|
5
5
|
Author: Brendan Smith
|
|
6
6
|
Author-email: brendan.smith@digital.trade.gov.uk
|
|
@@ -22,13 +22,13 @@ Description-Content-Type: text/markdown
|
|
|
22
22
|
Django app to create configurable anonymised DB dumps.
|
|
23
23
|
|
|
24
24
|
django-db-anonymiser provides a django app with a management command `dump_and_anonymise`.
|
|
25
|
-
This command runs a `pg_dump` against a postgresql DB, applies anonymisation functions to
|
|
25
|
+
This command runs a `pg_dump` against a postgresql DB, applies anonymisation functions to
|
|
26
26
|
data dumped from the DB and then writes the anonymised dump to S3.
|
|
27
27
|
See here for lite-api's example anonymisation configuration; https://github.com/uktrade/lite-api/blob/dev/api/conf/anonymise_model_config.yaml
|
|
28
28
|
|
|
29
29
|
This pattern is designed as a replacement for Lite's old DB anonymisation process (although it is general purpose and can be used for any django project which uses postgresql).
|
|
30
30
|
The previous process was baked in to an airflow installation and involved making
|
|
31
|
-
a `pg_dump` from production, anonymising that dump with python and pushing the
|
|
31
|
+
a `pg_dump` from production, anonymising that dump with python and pushing the
|
|
32
32
|
file to S3. See; https://github.com/uktrade/lite-airflow-dags/blob/master/dags/export_lite_db.py
|
|
33
33
|
|
|
34
34
|
django-db-anonymiser follows the same overall pattern, but aims to achieve it
|
|
@@ -36,7 +36,7 @@ through a django management command instead of running on top of airflow. In ad
|
|
|
36
36
|
the configuration for how DB columns are anonymised can be configured in simple YAML.
|
|
37
37
|
|
|
38
38
|
**Note:** This repository depends upon code forked from https://github.com/andersinno/python-database-sanitizer
|
|
39
|
-
This is housed under the `database_sanitizer` directory and has been forked from the above repository
|
|
39
|
+
This is housed under the `database_sanitizer` directory and has been forked from the above repository
|
|
40
40
|
because it is unmaintained.
|
|
41
41
|
|
|
42
42
|
## Getting started
|
|
@@ -51,6 +51,8 @@ because it is unmaintained.
|
|
|
51
51
|
- `DB_ANONYMISER_AWS_SECRET_ACCESS_KEY` - AWS secret key for the S3 bucket to upload dumps to
|
|
52
52
|
- `DB_ANONYMISER_AWS_REGION` - AWS region for the S3 bucket to upload dumps to
|
|
53
53
|
- `DB_ANONYMISER_AWS_STORAGE_BUCKET_NAME` - AWS bucket name for the S3 bucket to upload dumps to
|
|
54
|
+
- `DB_ANONYMISER_DUMP_FILE_NAME` - Name for dumped DB file
|
|
55
|
+
- `DB_ANONYMISER_AWS_STORAGE_KEY` - optional, key under which file will be stored in AWS S3 bucket
|
|
54
56
|
|
|
55
57
|
## Running tests
|
|
56
58
|
|
|
@@ -65,7 +67,7 @@ the `db_anonymiser` command directly
|
|
|
65
67
|
|
|
66
68
|
Publishing to PyPI is currently a manual process:
|
|
67
69
|
|
|
68
|
-
1. Acquire API token from [Passman](https://passman.ci.uktrade.digital/secret/0f3d699a-1c7a-4e92-a235-6c756f678dd5/).
|
|
70
|
+
1. Acquire API token from [Passman](https://passman.ci.uktrade.digital/secret/0f3d699a-1c7a-4e92-a235-6c756f678dd5/). <!-- /PS-IGNORE -->
|
|
69
71
|
- Request access from the SRE team.
|
|
70
72
|
- _Note: You will need access to the `platform` group in Passman._
|
|
71
73
|
2. Run `poetry config pypi-token.pypi <token>` to add the token to your Poetry configuration.
|
|
@@ -32,9 +32,9 @@ django_db_anonymiser/db_anonymiser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
|
32
32
|
django_db_anonymiser/db_anonymiser/faker.py,sha256=5X8tH_MAMtrsg248waXMSXTkoacMosqQlkCSFjy_0pA,1729
|
|
33
33
|
django_db_anonymiser/db_anonymiser/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
django_db_anonymiser/db_anonymiser/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py,sha256=
|
|
36
|
-
django_db_anonymiser/db_anonymiser/tests/test_command.py,sha256=
|
|
35
|
+
django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py,sha256=Zoe-VuBRKaTV37-60zim8z4FofAM-OiQ0wToh-zQE9w,5074
|
|
36
|
+
django_db_anonymiser/db_anonymiser/tests/test_command.py,sha256=mO35e7MqfebXYa0NzXFVmBvAPgzOpEwkWjdwWuxvmhE,6647
|
|
37
37
|
django_db_anonymiser/db_anonymiser/tests/test_faker.py,sha256=gvrOpSlWDjr4ajLyN5BEhxtkxO7_zls-CP04EolFFV4,3197
|
|
38
|
-
django_db_anonymiser-0.
|
|
39
|
-
django_db_anonymiser-0.
|
|
40
|
-
django_db_anonymiser-0.
|
|
38
|
+
django_db_anonymiser-0.3.0.dist-info/METADATA,sha256=gkxgkQ8oqUS_0ugDmbUBmVaHsADMdo-Max_in544Qao,4800
|
|
39
|
+
django_db_anonymiser-0.3.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
40
|
+
django_db_anonymiser-0.3.0.dist-info/RECORD,,
|
|
File without changes
|