django-db-anonymiser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_db_anonymiser/database_sanitizer/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/__main__.py +68 -0
- django_db_anonymiser/database_sanitizer/config.py +373 -0
- django_db_anonymiser/database_sanitizer/dump/__init__.py +47 -0
- django_db_anonymiser/database_sanitizer/dump/mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/dump/postgres.py +170 -0
- django_db_anonymiser/database_sanitizer/sanitizers/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/sanitizers/constant.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/derived.py +14 -0
- django_db_anonymiser/database_sanitizer/sanitizers/string.py +31 -0
- django_db_anonymiser/database_sanitizer/sanitizers/times.py +11 -0
- django_db_anonymiser/database_sanitizer/sanitizers/user.py +145 -0
- django_db_anonymiser/database_sanitizer/session.py +146 -0
- django_db_anonymiser/database_sanitizer/tests/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/tests/test_config.py +256 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump.py +123 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py +196 -0
- django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py +177 -0
- django_db_anonymiser/database_sanitizer/tests/test_main.py +91 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py +29 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py +19 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py +44 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py +18 -0
- django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py +67 -0
- django_db_anonymiser/database_sanitizer/tests/test_session.py +36 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py +112 -0
- django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py +86 -0
- django_db_anonymiser/database_sanitizer/utils/__init__.py +0 -0
- django_db_anonymiser/database_sanitizer/utils/mysql.py +161 -0
- django_db_anonymiser/database_sanitizer/utils/postgres.py +145 -0
- django_db_anonymiser/db_anonymiser/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/faker.py +91 -0
- django_db_anonymiser/db_anonymiser/management/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/__init__.py +0 -0
- django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +105 -0
- django_db_anonymiser/db_anonymiser/tests/test_command.py +90 -0
- django_db_anonymiser/db_anonymiser/tests/test_faker.py +116 -0
- django_db_anonymiser-0.1.0.dist-info/METADATA +98 -0
- django_db_anonymiser-0.1.0.dist-info/RECORD +40 -0
- django_db_anonymiser-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import unicode_literals
|
|
4
|
+
|
|
5
|
+
import codecs
|
|
6
|
+
import io
|
|
7
|
+
import re
|
|
8
|
+
import subprocess
|
|
9
|
+
|
|
10
|
+
from ..utils.postgres import decode_copy_value, encode_copy_value
|
|
11
|
+
from ..config import PG_DUMP_DEFAULT_PARAMETERS
|
|
12
|
+
|
|
13
|
+
COPY_LINE_PATTERN = re.compile(
|
|
14
|
+
r"^COPY \"(?P<schema>[^\"]*)\".\"(?P<table>[^\"]*)\" "
|
|
15
|
+
r"\((?P<columns>.*)\) "
|
|
16
|
+
r"FROM stdin;$"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def sanitize(url, config):
|
|
21
|
+
"""
|
|
22
|
+
Obtains dump of an Postgres database by executing `pg_dump` command and
|
|
23
|
+
sanitizes it's output.
|
|
24
|
+
|
|
25
|
+
:param url: URL to the database which is going to be sanitized, parsed by
|
|
26
|
+
Python's URL parser.
|
|
27
|
+
:type url: six.moves.urllib.parse.ParseResult
|
|
28
|
+
|
|
29
|
+
:param config: Optional sanitizer configuration to be used for sanitation
|
|
30
|
+
of the values stored in the database.
|
|
31
|
+
:type config: database_sanitizer.config.Configuration|None
|
|
32
|
+
"""
|
|
33
|
+
if url.scheme not in ("postgres", "postgresql", "postgis"):
|
|
34
|
+
raise ValueError("Unsupported database type: '%s'" % (url.scheme,))
|
|
35
|
+
|
|
36
|
+
extra_params = PG_DUMP_DEFAULT_PARAMETERS
|
|
37
|
+
if config:
|
|
38
|
+
extra_params = config.pg_dump_params
|
|
39
|
+
|
|
40
|
+
process = subprocess.Popen(
|
|
41
|
+
(
|
|
42
|
+
"pg_dump",
|
|
43
|
+
# Force output to be UTF-8 encoded.
|
|
44
|
+
"--encoding=utf-8",
|
|
45
|
+
# Quote all table and column names, just in case.
|
|
46
|
+
"--quote-all-identifiers",
|
|
47
|
+
# Luckily `pg_dump` supports DB URLs, so we can just pass it the
|
|
48
|
+
# URL as argument to the command.
|
|
49
|
+
"--dbname",
|
|
50
|
+
url.geturl().replace('postgis://', 'postgresql://'),
|
|
51
|
+
) + tuple(extra_params),
|
|
52
|
+
stdout=subprocess.PIPE,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
sanitize_value_line = None
|
|
56
|
+
current_table = None
|
|
57
|
+
current_table_columns = None
|
|
58
|
+
skip_table = False
|
|
59
|
+
|
|
60
|
+
for line in io.TextIOWrapper(process.stdout, encoding="utf-8"):
|
|
61
|
+
# Eat the trailing new line.
|
|
62
|
+
line = line.rstrip("\n")
|
|
63
|
+
|
|
64
|
+
# Are we currently in middle of `COPY` statement?
|
|
65
|
+
if current_table:
|
|
66
|
+
# Backslash following a dot marks end of an `COPY` statement.
|
|
67
|
+
if line == "\\.":
|
|
68
|
+
current_table = None
|
|
69
|
+
current_table_columns = None
|
|
70
|
+
if not skip_table:
|
|
71
|
+
yield "\\."
|
|
72
|
+
skip_table = False
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
if skip_table:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
if not sanitize_value_line:
|
|
79
|
+
yield line
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
yield sanitize_value_line(line)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Is the line beginning of `COPY` statement?
|
|
86
|
+
copy_line_match = COPY_LINE_PATTERN.match(line)
|
|
87
|
+
if not copy_line_match:
|
|
88
|
+
yield line
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
current_table = copy_line_match.group("table")
|
|
92
|
+
current_table_columns = parse_column_names(copy_line_match.group("columns"))
|
|
93
|
+
|
|
94
|
+
# Skip `COPY` statement if table rows are configured
|
|
95
|
+
# to be skipped.
|
|
96
|
+
if config and current_table in config.skip_rows_for_tables:
|
|
97
|
+
skip_table = True
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
sanitize_value_line = get_value_line_sanitizer(
|
|
101
|
+
config, current_table, current_table_columns)
|
|
102
|
+
|
|
103
|
+
yield line
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_value_line_sanitizer(config, table, columns):
|
|
107
|
+
if not config:
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
def get_sanitizer(column):
|
|
111
|
+
sanitizer = config.get_sanitizer_for(table, column)
|
|
112
|
+
|
|
113
|
+
if not sanitizer:
|
|
114
|
+
return _identity
|
|
115
|
+
|
|
116
|
+
def decode_sanitize_encode(value):
|
|
117
|
+
return encode_copy_value(sanitizer(decode_copy_value(value)))
|
|
118
|
+
|
|
119
|
+
return decode_sanitize_encode
|
|
120
|
+
|
|
121
|
+
sanitizers = [get_sanitizer(column) for column in columns]
|
|
122
|
+
|
|
123
|
+
if all(x is _identity for x in sanitizers):
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
def sanitize_line(line):
|
|
127
|
+
values = line.split('\t')
|
|
128
|
+
if len(values) != len(columns):
|
|
129
|
+
raise ValueError("Mismatch between column names and values.")
|
|
130
|
+
return '\t'.join(
|
|
131
|
+
sanitizer(value)
|
|
132
|
+
for (sanitizer, value) in zip(sanitizers, values))
|
|
133
|
+
|
|
134
|
+
return sanitize_line
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _identity(x):
|
|
138
|
+
return x
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def parse_column_names(text):
|
|
142
|
+
"""
|
|
143
|
+
Extracts column names from a string containing quoted and comma separated
|
|
144
|
+
column names.
|
|
145
|
+
|
|
146
|
+
:param text: Line extracted from `COPY` statement containing quoted and
|
|
147
|
+
comma separated column names.
|
|
148
|
+
:type text: str
|
|
149
|
+
|
|
150
|
+
:return: Tuple containing just the column names.
|
|
151
|
+
:rtype: tuple[str]
|
|
152
|
+
"""
|
|
153
|
+
return tuple(
|
|
154
|
+
re.sub(r"^\"(.*)\"$", r"\1", column_name.strip())
|
|
155
|
+
for column_name in text.split(",")
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def parse_values(text):
|
|
160
|
+
"""
|
|
161
|
+
Parses line following `COPY` statement containing values for a single row
|
|
162
|
+
in the table, in custom Postgres format.
|
|
163
|
+
|
|
164
|
+
:param text: Line following `COPY` statement containing values.
|
|
165
|
+
:type text: str
|
|
166
|
+
|
|
167
|
+
:return: Column values extracted from the given line.
|
|
168
|
+
:rtype: tuple[str|None]
|
|
169
|
+
"""
|
|
170
|
+
return tuple(decode_copy_value(value) for value in text.split("\t"))
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
from database_sanitizer.session import hash_text
|
|
4
|
+
|
|
5
|
+
NIL_UUID = '00000000-0000-0000-0000-000000000000'
|
|
6
|
+
NIL_UUID_WITHOUT_DASHES = NIL_UUID.replace('-', '')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sanitize_uuid4(value):
|
|
10
|
+
if not value:
|
|
11
|
+
return value
|
|
12
|
+
if value.replace('-', '') == NIL_UUID_WITHOUT_DASHES:
|
|
13
|
+
return NIL_UUID
|
|
14
|
+
return str(uuid.UUID(hash_text(value)[:32], version=4))
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import absolute_import, unicode_literals
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import string
|
|
7
|
+
|
|
8
|
+
CHARACTERS = string.ascii_letters + string.digits
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def sanitize_empty(value):
|
|
12
|
+
"""
|
|
13
|
+
Built-in sanitizer which replaces the original value with empty string.
|
|
14
|
+
"""
|
|
15
|
+
return None if value is None else ""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sanitize_zfill(value):
|
|
19
|
+
"""
|
|
20
|
+
Built-in sanitizer which replaces the original value with zeros.
|
|
21
|
+
"""
|
|
22
|
+
return None if value is None else "".zfill(len(value))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def sanitize_random(value):
|
|
26
|
+
"""
|
|
27
|
+
Random string of same length as the given value.
|
|
28
|
+
"""
|
|
29
|
+
if not value:
|
|
30
|
+
return value
|
|
31
|
+
return ''.join(random.choice(CHARACTERS) for _ in range(len(value)))
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import random
|
|
3
|
+
|
|
4
|
+
TEN_YEARS_AS_SECONDS = 10 * 365 * 24 * 3600
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def sanitize_random_past_timestamp(value):
|
|
8
|
+
num = random.randint(0, TEN_YEARS_AS_SECONDS * 1000)
|
|
9
|
+
delta = datetime.timedelta(seconds=(num / 1000.0))
|
|
10
|
+
dt = datetime.datetime.now() - delta
|
|
11
|
+
return dt.isoformat()
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from __future__ import unicode_literals
|
|
2
|
+
|
|
3
|
+
from six import text_type
|
|
4
|
+
|
|
5
|
+
from database_sanitizer.session import hash_text_to_int, hash_text_to_ints
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def sanitize_email(value):
|
|
9
|
+
if not value:
|
|
10
|
+
return value
|
|
11
|
+
(num1, num2, num3) = hash_text_to_ints(value.strip(), [16, 16, 32])
|
|
12
|
+
given_name = given_names[num1 % given_names_count]
|
|
13
|
+
surname = surnames[num2 % surnames_count]
|
|
14
|
+
case_convert = (text_type.lower if num3 % 8 > 0 else lambda x: x)
|
|
15
|
+
return '{first}.{last}@x{num:x}.sanitized.net'.format(
|
|
16
|
+
first=case_convert(given_name),
|
|
17
|
+
last=case_convert(surname).replace("'", ''),
|
|
18
|
+
num=num3)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def sanitize_username(value):
|
|
22
|
+
if not value:
|
|
23
|
+
return value
|
|
24
|
+
(num1, num2) = hash_text_to_ints(value, [16, 32])
|
|
25
|
+
return '{}{:x}'.format(given_names[num1 % given_names_count].lower(), num2)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def sanitize_full_name_en_gb(value):
|
|
29
|
+
if not value:
|
|
30
|
+
return value
|
|
31
|
+
(num1, num2) = hash_text_to_ints(value.strip().lower(), [16, 16])
|
|
32
|
+
return '{} {}'.format(
|
|
33
|
+
given_names[num1 % given_names_count], surnames[num2 % surnames_count])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def sanitize_given_name_en_gb(value):
|
|
37
|
+
if not value:
|
|
38
|
+
return value
|
|
39
|
+
num = hash_text_to_int(value.strip().lower())
|
|
40
|
+
return given_names[num % given_names_count]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def sanitize_surname_en_gb(value):
|
|
44
|
+
if not value:
|
|
45
|
+
return value
|
|
46
|
+
num = hash_text_to_int(value.strip().lower())
|
|
47
|
+
return surnames[num % surnames_count]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
given_names = """
|
|
51
|
+
Aaron Abbie Abdul Abigail Adam Adrian Aimee Alan Albert Alex
|
|
52
|
+
Alexander Alexandra Alice Alison Allan Amanda Amber Amelia Amy Andrea
|
|
53
|
+
Andrew Angela Ann Anna Anne Annette Anthony Antony Arthur Ashleigh
|
|
54
|
+
Ashley Barbara Barry Ben Benjamin Bernard Beth Bethan Bethany Beverley
|
|
55
|
+
Billy Bradley Brandon Brenda Brett Brian Bruce Bryan Callum Cameron Carl
|
|
56
|
+
Carly Carol Carole Caroline Carolyn Catherine Charlene Charles Charlie
|
|
57
|
+
Charlotte Chelsea Cheryl Chloe Christian Christine Christopher Claire
|
|
58
|
+
Clare Clifford Clive Colin Connor Conor Craig Dale Damian Damien Daniel
|
|
59
|
+
Danielle Danny Darren David Dawn Dean Deborah Debra Declan Denis Denise
|
|
60
|
+
Dennis Derek Diana Diane Dominic Donald Donna Dorothy Douglas Duncan
|
|
61
|
+
Dylan Edward Eileen Elaine Eleanor Elizabeth Ellie Elliot Elliott Emily
|
|
62
|
+
Emma Eric Fiona Frances Francesca Francis Frank Frederick Gail Gareth
|
|
63
|
+
Garry Gary Gavin Gemma Geoffrey George Georgia Georgina Gerald Geraldine
|
|
64
|
+
Gerard Gillian Glen Glenn Gordon Grace Graeme Graham Gregory Guy Hannah
|
|
65
|
+
Harriet Harry Hayley Hazel Heather Helen Henry Hilary Hollie Holly
|
|
66
|
+
Howard Hugh Iain Ian Irene Jack Jacob Jacqueline Jade Jake James Jamie
|
|
67
|
+
Jane Janet Janice Jasmine Jason Jay Jayne Jean Jeffrey Jemma Jenna
|
|
68
|
+
Jennifer Jeremy Jessica Jill Joan Joanna Joanne Jodie Joe Joel John
|
|
69
|
+
Jonathan Jordan Joseph Josephine Josh Joshua Joyce Judith Julia Julian
|
|
70
|
+
Julie June Justin Karen Karl Kate Katherine Kathleen Kathryn Katie Katy
|
|
71
|
+
Kayleigh Keith Kelly Kenneth Kerry Kevin Kieran Kim Kimberley Kirsty
|
|
72
|
+
Kyle Laura Lauren Lawrence Leah Leanne Lee Leigh Leon Leonard Lesley
|
|
73
|
+
Leslie Lewis Liam Linda Lindsey Lisa Lorraine Louis Louise Lucy Luke
|
|
74
|
+
Lydia Lynda Lynn Lynne Malcolm Mandy Marc Marcus Margaret Maria Marian
|
|
75
|
+
Marie Marilyn Marion Mark Martin Martyn Mary Mathew Matthew Maureen
|
|
76
|
+
Maurice Max Megan Melanie Melissa Michael Michelle Mitchell Mohamed
|
|
77
|
+
Mohammad Mohammed Molly Naomi Natalie Natasha Nathan Neil Nicholas
|
|
78
|
+
Nicola Nicole Nigel Norman Oliver Olivia Owen Paige Pamela Patricia
|
|
79
|
+
Patrick Paul Paula Pauline Peter Philip Phillip Rachael Rachel Raymond
|
|
80
|
+
Rebecca Reece Rhys Richard Ricky Rita Robert Robin Roger Ronald Rosemary
|
|
81
|
+
Rosie Ross Roy Russell Ruth Ryan Sally Sam Samantha Samuel Sandra Sara
|
|
82
|
+
Sarah Scott Sean Shane Shannon Sharon Shaun Sheila Shirley Sian Simon
|
|
83
|
+
Sophie Stacey Stanley Stephanie Stephen Steven Stewart Stuart Susan
|
|
84
|
+
Suzanne Sylvia Terence Teresa Terry Thomas Timothy Tina Toby Tom Tony
|
|
85
|
+
Tracey Tracy Trevor Valerie Vanessa Victor Victoria Vincent Wayne Wendy
|
|
86
|
+
William Yvonne Zoe
|
|
87
|
+
""".strip().split()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
surnames = """
|
|
91
|
+
Abbott Adams Ahmed Akhtar Alexander Ali Allan Allen Anderson Andrews
|
|
92
|
+
Archer Armstrong Arnold Ashton Atkins Atkinson Austin Bailey Baker
|
|
93
|
+
Baldwin Ball Banks Barber Barker Barlow Barnes Barnett Barrett Barry
|
|
94
|
+
Bartlett Barton Bates Baxter Begum Bell Bennett Benson Bentley Berry
|
|
95
|
+
Bevan Bibi Birch Bird Bishop Black Blackburn Bolton Bond Booth Bowen
|
|
96
|
+
Boyle Bradley Bradshaw Brady Bray Brennan Briggs Brookes Brooks Brown
|
|
97
|
+
Browne Bruce Bryan Bryant Bull Burgess Burke Burns Burrows Burton
|
|
98
|
+
Butcher Butler Byrne Cameron Campbell Carey Carpenter Carr Carroll
|
|
99
|
+
Carter Cartwright Chadwick Chambers Chan Chandler Chapman Charlton Clark
|
|
100
|
+
Clarke Clayton Clements Coates Cole Coleman Coles Collier Collins
|
|
101
|
+
Connolly Connor Conway Cook Cooke Cooper Cox Craig Crawford Cross
|
|
102
|
+
Cunningham Curtis Dale Daly Daniels Davey Davidson Davies Davis Davison
|
|
103
|
+
Dawson Day Dean Dennis Dickinson Dixon Dobson Dodd Doherty Donnelly
|
|
104
|
+
Douglas Doyle Duffy Duncan Dunn Dyer Edwards Elliott Ellis Evans Farmer
|
|
105
|
+
Farrell Faulkner Ferguson Field Finch Fisher Fitzgerald Fleming Fletcher
|
|
106
|
+
Flynn Ford Forster Foster Fowler Fox Francis Franklin Fraser Freeman
|
|
107
|
+
French Frost Fry Fuller Gallagher Gardiner Gardner Garner George Gibbons
|
|
108
|
+
Gibbs Gibson Gilbert Giles Gill Glover Goddard Godfrey Goodwin Gordon
|
|
109
|
+
Gough Gould Graham Grant Gray Green Greenwood Gregory Griffin Griffiths
|
|
110
|
+
Hale Hall Hamilton Hammond Hancock Hanson Harding Hardy Hargreaves
|
|
111
|
+
Harper Harris Harrison Hart Hartley Harvey Hawkins Hayes Haynes Hayward
|
|
112
|
+
Heath Henderson Henry Herbert Hewitt Hicks Higgins Hill Hilton Hodgson
|
|
113
|
+
Holden Holland Holloway Holmes Holt Hooper Hope Hopkins Horton Houghton
|
|
114
|
+
Howard Howarth Howe Howell Howells Hudson Hughes Humphreys Humphries
|
|
115
|
+
Hunt Hunter Hurst Hussain Hutchinson Hyde Ingram Iqbal Jackson James
|
|
116
|
+
Jarvis Jenkins Jennings John Johnson Johnston Jones Jordan Joyce Kaur
|
|
117
|
+
Kay Kelly Kemp Kennedy Kent Kerr Khan King Kirby Kirk Knight Knowles
|
|
118
|
+
Lamb Lambert Lane Law Lawrence Lawson Leach Lee Lees Leonard Lewis
|
|
119
|
+
Little Lloyd Long Lord Lowe Lucas Lynch Lyons Macdonald Mahmood Mann
|
|
120
|
+
Manning Marsden Marsh Marshall Martin Mason Matthews May McCarthy
|
|
121
|
+
McDonald McKenzie McLean Mellor Metcalfe Miah Middleton Miles Miller
|
|
122
|
+
Mills Mistry Mitchell Moore Moran Morgan Morley Morris Morrison Morton
|
|
123
|
+
Moss Murphy Murray Myers Nash Naylor Nelson Newman Newton Nicholls
|
|
124
|
+
Nicholson Nixon Noble Nolan Norman Norris North Norton O'Blake O'Buckley
|
|
125
|
+
O'Chamberlain O'Hobbs O'Thompson Oliver Osborne Owen Owens Page Palmer
|
|
126
|
+
Parker Parkes Parkin Parkinson Parry Parsons Patel Patterson Payne
|
|
127
|
+
Peacock Pearce Pearson Perkins Perry Peters Phillips Pickering Pollard
|
|
128
|
+
Poole Pope Porter Potter Potts Powell Power Pratt Preston Price
|
|
129
|
+
Pritchard Pugh Quinn Rahman Randall Read Reed Rees Reeves Reid Reynolds
|
|
130
|
+
Rhodes Rice Richards Richardson Riley Roberts Robertson Robinson Robson
|
|
131
|
+
Rogers Rose Ross Rowe Rowley Russell Ryan Sanders Sanderson Saunders
|
|
132
|
+
Savage Schofield Scott Shah Sharp Sharpe Shaw Shepherd Sheppard Short
|
|
133
|
+
Simmons Simpson Sims Sinclair Singh Skinner Slater Smart Smith Spencer
|
|
134
|
+
Stanley Steele Stephens Stephenson Stevens Stevenson Stewart Stokes
|
|
135
|
+
Stone Storey Sullivan Summers Sutton Swift Sykes Talbot Taylor Thomas
|
|
136
|
+
Thomson Thornton Thorpe Todd Tomlinson Townsend Tucker Turnbull Turner
|
|
137
|
+
Tyler Vaughan Vincent Wade Walker Wall Wallace Wallis Walsh Walters
|
|
138
|
+
Walton Ward Warner Warren Waters Watkins Watson Watts Webb Webster Welch
|
|
139
|
+
Wells West Weston Wheeler White Whitehead Whitehouse Whittaker Wilkins
|
|
140
|
+
Wilkinson Williams Williamson Willis Wilson Winter Wong Wood Woods
|
|
141
|
+
Woodward Wright Wyatt Yates Young
|
|
142
|
+
""".strip().split()
|
|
143
|
+
|
|
144
|
+
given_names_count = len(given_names)
|
|
145
|
+
surnames_count = len(surnames)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API to sanitation session.
|
|
3
|
+
|
|
4
|
+
Sanitation session allows having a state within a single sanitation
|
|
5
|
+
process.
|
|
6
|
+
|
|
7
|
+
One important thing stored to the session is a secret key which is
|
|
8
|
+
generated to a new random value for each sanitation session, but it
|
|
9
|
+
stays constant during the whole sanitation process. Its value is never
|
|
10
|
+
revealed, so that it is possible to generate such one way hashes with
|
|
11
|
+
it, that should not be redoable afterwards. I.e. during the sanitation
|
|
12
|
+
session it's possible to do ``hash(C) -> H`` for any clear text C, but
|
|
13
|
+
it is not possible to check if H is the hashed value of C after the
|
|
14
|
+
sanitation session has ended.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import hmac
|
|
19
|
+
import random
|
|
20
|
+
import sys
|
|
21
|
+
import threading
|
|
22
|
+
|
|
23
|
+
from six import int2byte
|
|
24
|
+
|
|
25
|
+
if sys.version_info >= (3, 6):
|
|
26
|
+
from typing import Callable, Optional, Sequence # noqa
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
SECRET_KEY_BITS = 128
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_thread_local_storage = threading.local()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def hash_text_to_int(value, bit_length=32):
|
|
36
|
+
# type: (str, int) -> int
|
|
37
|
+
"""
|
|
38
|
+
Hash a text value to an integer.
|
|
39
|
+
|
|
40
|
+
Generates an integer number based on the hash derived with
|
|
41
|
+
`hash_text` from the given text value.
|
|
42
|
+
|
|
43
|
+
:param bit_length: Number of bits to use from the hash value.
|
|
44
|
+
:return: Integer value within ``0 <= result < 2**bit_length``
|
|
45
|
+
"""
|
|
46
|
+
hash_value = hash_text(value)
|
|
47
|
+
return int(hash_value[0:(bit_length // 4)], 16)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def hash_text_to_ints(value, bit_lengths=(16, 16, 16, 16)):
|
|
51
|
+
# type: (str, Sequence[int]) -> Sequence[int]
|
|
52
|
+
"""
|
|
53
|
+
Hash a text value to a sequence of integers.
|
|
54
|
+
|
|
55
|
+
Generates a sequence of integer values with given bit-lengths
|
|
56
|
+
similarly to `hash_text_to_int`, but allowing generating many
|
|
57
|
+
separate numbers with a single call.
|
|
58
|
+
|
|
59
|
+
:param bit_lengths:
|
|
60
|
+
Tuple of bit lengths for the resulting integers. Defines also the
|
|
61
|
+
length of the result tuple.
|
|
62
|
+
:return:
|
|
63
|
+
Tuple of ``n`` integers ``(R_1, ... R_n)`` with the requested
|
|
64
|
+
bit-lengths ``(L_1, ..., L_n)`` and values ranging within
|
|
65
|
+
``0 <= R_i < 2**L_i`` for each ``i``.
|
|
66
|
+
"""
|
|
67
|
+
hash_value = hash_text(value)
|
|
68
|
+
hex_lengths = [x // 4 for x in bit_lengths]
|
|
69
|
+
hex_ranges = (
|
|
70
|
+
(sum(hex_lengths[0:i]), sum(hex_lengths[0:(i + 1)]))
|
|
71
|
+
for i in range(len(hex_lengths)))
|
|
72
|
+
return tuple(int(hash_value[a:b], 16) for (a, b) in hex_ranges)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def hash_text(value, hasher=hashlib.sha256, encoding='utf-8'):
|
|
76
|
+
# type: (str, Callable, str) -> str
|
|
77
|
+
"""
|
|
78
|
+
Generate a hash for a text value.
|
|
79
|
+
|
|
80
|
+
The hash will be generated by encoding the text to bytes with given
|
|
81
|
+
encoding and then generating a hash with HMAC using the session
|
|
82
|
+
secret as the key and the given hash function.
|
|
83
|
+
|
|
84
|
+
:param value: Text value to hash
|
|
85
|
+
:param hasher: Hash function to use, SHA256 by default
|
|
86
|
+
:param encoding: Encoding to use, UTF-8 by default
|
|
87
|
+
:return: Hexadecimal presentation of the hash as a string
|
|
88
|
+
"""
|
|
89
|
+
return hash_bytes(value.encode(encoding), hasher)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def hash_bytes(value, hasher=hashlib.sha256):
|
|
93
|
+
# type: (bytes, Callable) -> str
|
|
94
|
+
"""
|
|
95
|
+
Generate a hash for a bytes value.
|
|
96
|
+
|
|
97
|
+
The hash will be generated by generating a hash with HMAC using the
|
|
98
|
+
session secret as the key and the given hash function.
|
|
99
|
+
|
|
100
|
+
:param value: Bytes value to hash
|
|
101
|
+
:param hasher: Hash function to use.
|
|
102
|
+
:return: Hexadecimal presentation of the hash as a string
|
|
103
|
+
"""
|
|
104
|
+
return hmac.new(get_secret(), value, hasher).hexdigest()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_secret():
|
|
108
|
+
# type: () -> bytes
|
|
109
|
+
"""
|
|
110
|
+
Get session specific secret key.
|
|
111
|
+
|
|
112
|
+
:return: Session key as bytes
|
|
113
|
+
"""
|
|
114
|
+
if not getattr(_thread_local_storage, 'secret_key', None):
|
|
115
|
+
_initialize_session()
|
|
116
|
+
return _thread_local_storage.secret_key # type: ignore
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def reset(secret_key=None):
|
|
120
|
+
# type: (Optional[bytes]) -> None
|
|
121
|
+
"""
|
|
122
|
+
Reset the session.
|
|
123
|
+
|
|
124
|
+
By default, this resets the value of the secret to None so that, if
|
|
125
|
+
there was an earlier sanitation process ran on the same thread, then
|
|
126
|
+
a next call that needs the secret key of the session will generate a
|
|
127
|
+
new value for it.
|
|
128
|
+
|
|
129
|
+
This may also be used to set a predefined value for the secret key.
|
|
130
|
+
|
|
131
|
+
:param secret_key:
|
|
132
|
+
Value to set as the new session secret key or None if a new one
|
|
133
|
+
should be generated as soon as one is needed.
|
|
134
|
+
"""
|
|
135
|
+
_thread_local_storage.secret_key = secret_key
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _initialize_session():
|
|
139
|
+
# type: () -> None
|
|
140
|
+
"""
|
|
141
|
+
Generate a new session key and store it to thread local storage.
|
|
142
|
+
"""
|
|
143
|
+
sys_random = random.SystemRandom()
|
|
144
|
+
_thread_local_storage.secret_key = b''.join(
|
|
145
|
+
int2byte(sys_random.randint(0, 255))
|
|
146
|
+
for _ in range(SECRET_KEY_BITS // 8))
|
|
File without changes
|