django-db-anonymiser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. django_db_anonymiser/database_sanitizer/__init__.py +0 -0
  2. django_db_anonymiser/database_sanitizer/__main__.py +68 -0
  3. django_db_anonymiser/database_sanitizer/config.py +373 -0
  4. django_db_anonymiser/database_sanitizer/dump/__init__.py +47 -0
  5. django_db_anonymiser/database_sanitizer/dump/mysql.py +196 -0
  6. django_db_anonymiser/database_sanitizer/dump/postgres.py +170 -0
  7. django_db_anonymiser/database_sanitizer/sanitizers/__init__.py +0 -0
  8. django_db_anonymiser/database_sanitizer/sanitizers/constant.py +14 -0
  9. django_db_anonymiser/database_sanitizer/sanitizers/derived.py +14 -0
  10. django_db_anonymiser/database_sanitizer/sanitizers/string.py +31 -0
  11. django_db_anonymiser/database_sanitizer/sanitizers/times.py +11 -0
  12. django_db_anonymiser/database_sanitizer/sanitizers/user.py +145 -0
  13. django_db_anonymiser/database_sanitizer/session.py +146 -0
  14. django_db_anonymiser/database_sanitizer/tests/__init__.py +0 -0
  15. django_db_anonymiser/database_sanitizer/tests/test_config.py +256 -0
  16. django_db_anonymiser/database_sanitizer/tests/test_dump.py +123 -0
  17. django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py +196 -0
  18. django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py +177 -0
  19. django_db_anonymiser/database_sanitizer/tests/test_main.py +91 -0
  20. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py +29 -0
  21. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py +19 -0
  22. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py +44 -0
  23. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py +18 -0
  24. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py +67 -0
  25. django_db_anonymiser/database_sanitizer/tests/test_session.py +36 -0
  26. django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py +112 -0
  27. django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py +86 -0
  28. django_db_anonymiser/database_sanitizer/utils/__init__.py +0 -0
  29. django_db_anonymiser/database_sanitizer/utils/mysql.py +161 -0
  30. django_db_anonymiser/database_sanitizer/utils/postgres.py +145 -0
  31. django_db_anonymiser/db_anonymiser/__init__.py +0 -0
  32. django_db_anonymiser/db_anonymiser/faker.py +91 -0
  33. django_db_anonymiser/db_anonymiser/management/__init__.py +0 -0
  34. django_db_anonymiser/db_anonymiser/management/commands/__init__.py +0 -0
  35. django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +105 -0
  36. django_db_anonymiser/db_anonymiser/tests/test_command.py +90 -0
  37. django_db_anonymiser/db_anonymiser/tests/test_faker.py +116 -0
  38. django_db_anonymiser-0.1.0.dist-info/METADATA +98 -0
  39. django_db_anonymiser-0.1.0.dist-info/RECORD +40 -0
  40. django_db_anonymiser-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,116 @@
1
+ import pytest
2
+
3
+ from faker import Faker
4
+
5
+ from db_anonymiser import faker as db_anonymiser_faker
6
+
7
+
8
+ # Force faker to use a seed when producing output so that we can assume a
9
+ # deterministic set of results
10
+ @pytest.fixture(autouse=True)
11
+ def seed_faker():
12
+ Faker.seed(0)
13
+
14
+
15
+ def test_sanitize_name():
16
+ assert db_anonymiser_faker.sanitize_name("Bob Benson") == "Dr Rhys Thomas"
17
+
18
+
19
+ def test_sanitize_first_name():
20
+ assert db_anonymiser_faker.sanitize_first_name("Bob") == "Emma"
21
+
22
+
23
+ def test_sanitize_last_name():
24
+ assert db_anonymiser_faker.sanitize_last_name("Benson") == "Watts"
25
+
26
+
27
+ def test_sanitize_email():
28
+ assert (
29
+ db_anonymiser_faker.sanitize_email("bob.benson@example.net")
30
+ == "areed@example.org"
31
+ )
32
+
33
+
34
+ def test_sanitize_company_name():
35
+ assert db_anonymiser_faker.sanitize_company_name("Sterling Cooper") == "Reed-Burton"
36
+
37
+
38
+ def test_sanitize_phone_number():
39
+ assert db_anonymiser_faker.sanitize_phone_number("011111111111") == "+448764759382"
40
+
41
+
42
+ def test_sanitize_address():
43
+ assert (
44
+ db_anonymiser_faker.sanitize_address("Madison Ave")
45
+ == "Flat 4, Gibbons tunnel, Lesleystad, L8C 2EZ"
46
+ )
47
+
48
+
49
+ def test_sanitize_website():
50
+ assert (
51
+ db_anonymiser_faker.sanitize_website("sterling.cooper")
52
+ == "reed-burton.arnold-jones.com"
53
+ )
54
+
55
+
56
+ def test_sanitize_text():
57
+ assert (
58
+ db_anonymiser_faker.sanitize_text("Not great, Bob.")
59
+ == "Aliquam vitae laborum ullam rerum voluptas. Nesciunt tenetur magnam eligendi quidem nulla. Voluptates minus provident nobis corporis. Quas tempore placeat iusto. Explicabo et odit dignissimos."
60
+ )
61
+ assert db_anonymiser_faker.sanitize_text(None) == None
62
+
63
+
64
+ def test_sanitize_street_address():
65
+ assert (
66
+ db_anonymiser_faker.sanitize_street_address("Madison Ave")
67
+ == "Studio 0\nArnold oval"
68
+ )
69
+
70
+
71
+ def test_sanitize_city():
72
+ assert db_anonymiser_faker.sanitize_city("New York") == "Reedchester"
73
+
74
+
75
+ def test_sanitize_postcode():
76
+ assert db_anonymiser_faker.sanitize_postcode("DT11 7DY") == "TA60 8UR"
77
+
78
+
79
+ def test_sanitize_eori_number():
80
+ assert (
81
+ db_anonymiser_faker.sanitize_eori_number("GB111111111111") == "GB424533559245"
82
+ )
83
+
84
+
85
+ def test_sanitize_ni_eori_number():
86
+ assert (
87
+ db_anonymiser_faker.sanitize_ni_eori_number("XI111111111111") == "XI424533559245"
88
+ )
89
+
90
+ def test_sanitize_eu_eori_number():
91
+ assert (
92
+ db_anonymiser_faker.sanitize_eu_eori_number("FR11111") == "FR99346"
93
+ )
94
+
95
+ def test_sanitize_sic_number():
96
+ assert db_anonymiser_faker.sanitize_sic_number("11111") == "50494"
97
+
98
+
99
+ def test_sanitize_vat_number():
100
+ assert db_anonymiser_faker.sanitize_vat_number("GB111111111") == "GB906691059"
101
+
102
+
103
+ def test_sanitize_registration_number():
104
+ assert db_anonymiser_faker.sanitize_registration_number("11111111") == "51706749"
105
+
106
+
107
+ def test_sanitize_filename():
108
+ assert db_anonymiser_faker.sanitize_filename("somefile.txt") == "molestiae.xlsx"
109
+
110
+
111
+ def test_sanitize_short_text():
112
+ assert (
113
+ db_anonymiser_faker.sanitize_short_text("some small text")
114
+ == "Tempore placeat iusto aut. Et odit dignissimos mollitia ipsam maxime."
115
+ )
116
+ assert db_anonymiser_faker.sanitize_short_text(None) == None
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.3
2
+ Name: django-db-anonymiser
3
+ Version: 0.1.0
4
+ Summary: Django app to create configurable anonymised DB dumps.
5
+ Author: Brendan Smith
6
+ Author-email: brendan.smith@digital.trade.gov.uk
7
+ Requires-Python: >3.9.1,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: boto3 (>=1.40.33,<2.0.0)
14
+ Requires-Dist: django (>=4.2.10,<5.0.0)
15
+ Requires-Dist: django-environ (>=0.12.0,<0.13.0)
16
+ Requires-Dist: faker (>=4.18.0)
17
+ Requires-Dist: psycopg2-binary (>=2.9.10,<3.0.0)
18
+ Requires-Dist: pymysql (>=1.1.2,<2.0.0)
19
+ Description-Content-Type: text/markdown
20
+
21
+ # django-db-anonymiser
22
+ Django app to create configurable anonymised DB dumps.
23
+
24
+ django-db-anonymiser provides a django app with a management command `dump_and_anonymise`.
25
+ This command runs a `pg_dump` against a postgresql DB, applies anonymisation functions to
26
+ data dumped from the DB and then writes the anonymised dump to S3.
27
+ See here for lite-api's example anonymisation configuration; https://github.com/uktrade/lite-api/blob/dev/api/conf/anonymise_model_config.yaml
28
+
29
+ This pattern is designed as a replacement for Lite's old DB anonymisation process (although it is general purpose and can be used for any django project which uses postgresql).
30
+ The previous process was baked in to an airflow installation and involved making
31
+ a `pg_dump` from production, anonymising that dump with python and pushing the
32
+ file to S3. See; https://github.com/uktrade/lite-airflow-dags/blob/master/dags/export_lite_db.py
33
+
34
+ django-db-anonymiser follows the same overall pattern, but aims to achieve it
35
+ through a django management command instead of running on top of airflow. In addition,
36
+ the configuration for how DB columns are anonymised can be configured in simple YAML.
37
+
38
+ **Note:** This repository depends upon code forked from https://github.com/andersinno/python-database-sanitizer
39
+ This is housed under the `database_sanitizer` directory and has been forked from the above repository
40
+ because it is unmaintained.
41
+
42
+ ## Getting started
43
+
44
+ - Add `faker>=4.18.0`, `boto3>=1.26.17` to python requirements; it is assumed python/psycopg and co are already installed.
45
+ - Either add this github repository as a submodule to your django application named `django_db_anonymiser` or install the python package (django-db-anonymiser)[] from PyPI.
46
+ - Add `django_db_anonymiser.db_anonymiser` to `INSTALLED_APPS`
47
+ - Set the following django settings;
48
+ - `DB_ANONYMISER_CONFIG_LOCATION` - the location of your anonymisation yaml file
49
+ - `DB_ANONYMISER_AWS_ENDPOINT_URL` - optional, custom URL for AWS (e.g. if using minio)
50
+ - `DB_ANONYMISER_AWS_ACCESS_KEY_ID` - AWS access key ID for the S3 bucket to upload dumps to
51
+ - `DB_ANONYMISER_AWS_SECRET_ACCESS_KEY` - AWS secret key for the S3 bucket to upload dumps to
52
+ - `DB_ANONYMISER_AWS_REGION` - AWS region for the S3 bucket to upload dumps to
53
+ - `DB_ANONYMISER_AWS_STORAGE_BUCKET_NAME` - AWS bucket name for the S3 bucket to upload dumps to
54
+
55
+ ## Running tests
56
+
57
+ For local unit testing from the root of the repository run:
58
+
59
+ $ poetry run pytest
60
+
61
+ **Note:** Currently for full test coverage, it is necessary to run tests in circleci, where we spin up a postgres db and test
62
+ the `db_anonymiser` command directly
63
+
64
+ ## Publishing
65
+
66
+ Publishing to PyPI is currently a manual process:
67
+
68
+ 1. Acquire API token from [Passman](https://passman.ci.uktrade.digital/secret/0f3d699a-1c7a-4e92-a235-6c756f678dd5/).
69
+ - Request access from the SRE team.
70
+ - _Note: You will need access to the `platform` group in Passman._
71
+ 2. Run `poetry config pypi-token.pypi <token>` to add the token to your Poetry configuration.
72
+
73
+ Update the version, as the same version cannot be published to PyPI.
74
+
75
+ ```
76
+ poetry version patch
77
+ ```
78
+
79
+ More options for the `version` command can be found in the [Poetry documentation](https://python-poetry.org/docs/cli/#version). For example, for a minor version bump: `poetry version minor`.
80
+
81
+ Build the Python package.
82
+
83
+ ```
84
+ poetry build
85
+ ```
86
+
87
+ Publish the Python package.
88
+
89
+ _Note: Make sure your Pull Request (PR) is approved and contains the version upgrade in `pyproject.toml` before publishing the package._
90
+
91
+ ```
92
+ poetry publish
93
+ ```
94
+
95
+ Check the [PyPI Release history](https://pypi.org/project/dbt-platform-helper/#history) to make sure the package has been updated.
96
+
97
+ For an optional manual check, install the package locally and test everything works as expected.
98
+
@@ -0,0 +1,40 @@
1
+ django_db_anonymiser/database_sanitizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ django_db_anonymiser/database_sanitizer/__main__.py,sha256=MD5YHVO17s3v1A836StbLnjnbHe8RUwccC4emP5DmiQ,1551
3
+ django_db_anonymiser/database_sanitizer/config.py,sha256=RBqzK8AU4Z9b5_BUIPQMsDCAEpZ9VKUxUHExu0GdtQc,13501
4
+ django_db_anonymiser/database_sanitizer/dump/__init__.py,sha256=IEMKBXC0t6seCIWGw9hXVTS389u8_lUnFiGDl5jVIho,1575
5
+ django_db_anonymiser/database_sanitizer/dump/mysql.py,sha256=b5DJKoY-5j_17beSmFFI3r31LgYyuqcwlHNY23E_YHI,6431
6
+ django_db_anonymiser/database_sanitizer/dump/postgres.py,sha256=AiJWydA-ilX793waQBtpaBlgn2EuE3_nU-2V9fg1vh0,4988
7
+ django_db_anonymiser/database_sanitizer/sanitizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ django_db_anonymiser/database_sanitizer/sanitizers/constant.py,sha256=0PHvKAF2NEPi2qZ-agnD_dOD3rKYccoMVV1i5BQXOO8,214
9
+ django_db_anonymiser/database_sanitizer/sanitizers/derived.py,sha256=6UN3703pR5g7mcrwF-lxrfRmmtdhPTZfpkdqCpl_PuE,375
10
+ django_db_anonymiser/database_sanitizer/sanitizers/string.py,sha256=h0VMsFgKwYBxqGgLMbdu2iP7rarFy1yqXKX_O8KLTmo,708
11
+ django_db_anonymiser/database_sanitizer/sanitizers/times.py,sha256=ORa5MlwcgHNhwP2VIZNKkyTV3dU1_DYfmkG_COToqN0,299
12
+ django_db_anonymiser/database_sanitizer/sanitizers/user.py,sha256=vj-CzSb5ckToTwGQ0eRf9u9cxk0dDXRafXfOVG1HHzI,7547
13
+ django_db_anonymiser/database_sanitizer/session.py,sha256=gO5RV0zd4Qk4mayVKu4W1nqoAkX-pRDGNS-DWM6OcHU,4597
14
+ django_db_anonymiser/database_sanitizer/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ django_db_anonymiser/database_sanitizer/tests/test_config.py,sha256=1PC-sv9ZdN-kVnO6oFxxOg3LEJbTWSsaEQ0hGinatY4,8367
16
+ django_db_anonymiser/database_sanitizer/tests/test_dump.py,sha256=0Gv1NvzApKE-5YgzIagvvHUkBC7ClnURuiAn_qjLj1o,3692
17
+ django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py,sha256=Vg970aNuOuOizHvxPz5501EPaFvYnGAjFQFl0gOnmKw,5635
18
+ django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py,sha256=qaX8RbaigwRINonATD7gYFMajuKgA32iTZ0x2cHsjnk,5483
19
+ django_db_anonymiser/database_sanitizer/tests/test_main.py,sha256=hZr1iuiFhWhypctmp_RKFIW87aFDxleMjjFyrHkPta4,3161
20
+ django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py,sha256=c0kdaTnEAY-uqlTOGmEpK8cINyuTTuLuVSIR1Yi74wI,1156
21
+ django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py,sha256=Mh29lsKmM6skxYcM9hSkOF12EQg2mKpML8yZBdZJZdM,741
22
+ django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py,sha256=yafmFXob0n__LFlk52nuj3zqOU3ZWHbGd8N65c_SIUA,1077
23
+ django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py,sha256=m08KiGkms912PirRWJgkUQfPb_oxpbdIifOHAltIx4g,471
24
+ django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py,sha256=g-qBahyxs2_MYy9QVelt1mdKYYIsCP6bYwIO1mAqgm8,2792
25
+ django_db_anonymiser/database_sanitizer/tests/test_session.py,sha256=l4jTJu5wJV6HemoabKZFO0_ZFICngTC7L_Z0JUCoGV8,906
26
+ django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py,sha256=jSrYZdInPdKNQD9f3yphDaGHWMmnjSskLYV2wZ1ooWI,2737
27
+ django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py,sha256=6duk1llpfmU5pf0S3lxDLCiMHgucUB4mpK567Udm27Q,2266
28
+ django_db_anonymiser/database_sanitizer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ django_db_anonymiser/database_sanitizer/utils/mysql.py,sha256=Pg6Qv2W6v7FgEaxcLGUZVDoJDwAYv7hxCQPcfOFZzL8,4657
30
+ django_db_anonymiser/database_sanitizer/utils/postgres.py,sha256=v2rm3dX_9Z7NdkY8irJ-rMEh6i31o8ymEqpxHSdtaqU,4245
31
+ django_db_anonymiser/db_anonymiser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ django_db_anonymiser/db_anonymiser/faker.py,sha256=5X8tH_MAMtrsg248waXMSXTkoacMosqQlkCSFjy_0pA,1729
33
+ django_db_anonymiser/db_anonymiser/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ django_db_anonymiser/db_anonymiser/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py,sha256=XNyDLwGgDWD1vXUs9ChYPdzI-gbLOJrcfcCceRR1tCU,3692
36
+ django_db_anonymiser/db_anonymiser/tests/test_command.py,sha256=lngzFc-_K2fC-LgIaLNSkJa3iBrDAPWxh56heIa5whg,3637
37
+ django_db_anonymiser/db_anonymiser/tests/test_faker.py,sha256=gvrOpSlWDjr4ajLyN5BEhxtkxO7_zls-CP04EolFFV4,3197
38
+ django_db_anonymiser-0.1.0.dist-info/METADATA,sha256=G1Wk_yRHC2lIfUXUsrk1pBf2frmBM0dCz29eGujFFCc,4567
39
+ django_db_anonymiser-0.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
40
+ django_db_anonymiser-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.1.3
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any