django-db-anonymiser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. django_db_anonymiser/database_sanitizer/__init__.py +0 -0
  2. django_db_anonymiser/database_sanitizer/__main__.py +68 -0
  3. django_db_anonymiser/database_sanitizer/config.py +373 -0
  4. django_db_anonymiser/database_sanitizer/dump/__init__.py +47 -0
  5. django_db_anonymiser/database_sanitizer/dump/mysql.py +196 -0
  6. django_db_anonymiser/database_sanitizer/dump/postgres.py +170 -0
  7. django_db_anonymiser/database_sanitizer/sanitizers/__init__.py +0 -0
  8. django_db_anonymiser/database_sanitizer/sanitizers/constant.py +14 -0
  9. django_db_anonymiser/database_sanitizer/sanitizers/derived.py +14 -0
  10. django_db_anonymiser/database_sanitizer/sanitizers/string.py +31 -0
  11. django_db_anonymiser/database_sanitizer/sanitizers/times.py +11 -0
  12. django_db_anonymiser/database_sanitizer/sanitizers/user.py +145 -0
  13. django_db_anonymiser/database_sanitizer/session.py +146 -0
  14. django_db_anonymiser/database_sanitizer/tests/__init__.py +0 -0
  15. django_db_anonymiser/database_sanitizer/tests/test_config.py +256 -0
  16. django_db_anonymiser/database_sanitizer/tests/test_dump.py +123 -0
  17. django_db_anonymiser/database_sanitizer/tests/test_dump_mysql.py +196 -0
  18. django_db_anonymiser/database_sanitizer/tests/test_dump_postgres.py +177 -0
  19. django_db_anonymiser/database_sanitizer/tests/test_main.py +91 -0
  20. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_constant.py +29 -0
  21. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_derived.py +19 -0
  22. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_string.py +44 -0
  23. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_times.py +18 -0
  24. django_db_anonymiser/database_sanitizer/tests/test_sanitizers_user.py +67 -0
  25. django_db_anonymiser/database_sanitizer/tests/test_session.py +36 -0
  26. django_db_anonymiser/database_sanitizer/tests/test_utils_mysql.py +112 -0
  27. django_db_anonymiser/database_sanitizer/tests/test_utils_postgres.py +86 -0
  28. django_db_anonymiser/database_sanitizer/utils/__init__.py +0 -0
  29. django_db_anonymiser/database_sanitizer/utils/mysql.py +161 -0
  30. django_db_anonymiser/database_sanitizer/utils/postgres.py +145 -0
  31. django_db_anonymiser/db_anonymiser/__init__.py +0 -0
  32. django_db_anonymiser/db_anonymiser/faker.py +91 -0
  33. django_db_anonymiser/db_anonymiser/management/__init__.py +0 -0
  34. django_db_anonymiser/db_anonymiser/management/commands/__init__.py +0 -0
  35. django_db_anonymiser/db_anonymiser/management/commands/dump_and_anonymise.py +105 -0
  36. django_db_anonymiser/db_anonymiser/tests/test_command.py +90 -0
  37. django_db_anonymiser/db_anonymiser/tests/test_faker.py +116 -0
  38. django_db_anonymiser-0.1.0.dist-info/METADATA +98 -0
  39. django_db_anonymiser-0.1.0.dist-info/RECORD +40 -0
  40. django_db_anonymiser-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,170 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import unicode_literals
4
+
5
+ import codecs
6
+ import io
7
+ import re
8
+ import subprocess
9
+
10
+ from ..utils.postgres import decode_copy_value, encode_copy_value
11
+ from ..config import PG_DUMP_DEFAULT_PARAMETERS
12
+
13
+ COPY_LINE_PATTERN = re.compile(
14
+ r"^COPY \"(?P<schema>[^\"]*)\".\"(?P<table>[^\"]*)\" "
15
+ r"\((?P<columns>.*)\) "
16
+ r"FROM stdin;$"
17
+ )
18
+
19
+
20
+ def sanitize(url, config):
21
+ """
22
+ Obtains dump of an Postgres database by executing `pg_dump` command and
23
+ sanitizes it's output.
24
+
25
+ :param url: URL to the database which is going to be sanitized, parsed by
26
+ Python's URL parser.
27
+ :type url: six.moves.urllib.parse.ParseResult
28
+
29
+ :param config: Optional sanitizer configuration to be used for sanitation
30
+ of the values stored in the database.
31
+ :type config: database_sanitizer.config.Configuration|None
32
+ """
33
+ if url.scheme not in ("postgres", "postgresql", "postgis"):
34
+ raise ValueError("Unsupported database type: '%s'" % (url.scheme,))
35
+
36
+ extra_params = PG_DUMP_DEFAULT_PARAMETERS
37
+ if config:
38
+ extra_params = config.pg_dump_params
39
+
40
+ process = subprocess.Popen(
41
+ (
42
+ "pg_dump",
43
+ # Force output to be UTF-8 encoded.
44
+ "--encoding=utf-8",
45
+ # Quote all table and column names, just in case.
46
+ "--quote-all-identifiers",
47
+ # Luckily `pg_dump` supports DB URLs, so we can just pass it the
48
+ # URL as argument to the command.
49
+ "--dbname",
50
+ url.geturl().replace('postgis://', 'postgresql://'),
51
+ ) + tuple(extra_params),
52
+ stdout=subprocess.PIPE,
53
+ )
54
+
55
+ sanitize_value_line = None
56
+ current_table = None
57
+ current_table_columns = None
58
+ skip_table = False
59
+
60
+ for line in io.TextIOWrapper(process.stdout, encoding="utf-8"):
61
+ # Eat the trailing new line.
62
+ line = line.rstrip("\n")
63
+
64
+ # Are we currently in middle of `COPY` statement?
65
+ if current_table:
66
+ # Backslash following a dot marks end of an `COPY` statement.
67
+ if line == "\\.":
68
+ current_table = None
69
+ current_table_columns = None
70
+ if not skip_table:
71
+ yield "\\."
72
+ skip_table = False
73
+ continue
74
+
75
+ if skip_table:
76
+ continue
77
+
78
+ if not sanitize_value_line:
79
+ yield line
80
+ continue
81
+
82
+ yield sanitize_value_line(line)
83
+ continue
84
+
85
+ # Is the line beginning of `COPY` statement?
86
+ copy_line_match = COPY_LINE_PATTERN.match(line)
87
+ if not copy_line_match:
88
+ yield line
89
+ continue
90
+
91
+ current_table = copy_line_match.group("table")
92
+ current_table_columns = parse_column_names(copy_line_match.group("columns"))
93
+
94
+ # Skip `COPY` statement if table rows are configured
95
+ # to be skipped.
96
+ if config and current_table in config.skip_rows_for_tables:
97
+ skip_table = True
98
+ continue
99
+
100
+ sanitize_value_line = get_value_line_sanitizer(
101
+ config, current_table, current_table_columns)
102
+
103
+ yield line
104
+
105
+
106
+ def get_value_line_sanitizer(config, table, columns):
107
+ if not config:
108
+ return None
109
+
110
+ def get_sanitizer(column):
111
+ sanitizer = config.get_sanitizer_for(table, column)
112
+
113
+ if not sanitizer:
114
+ return _identity
115
+
116
+ def decode_sanitize_encode(value):
117
+ return encode_copy_value(sanitizer(decode_copy_value(value)))
118
+
119
+ return decode_sanitize_encode
120
+
121
+ sanitizers = [get_sanitizer(column) for column in columns]
122
+
123
+ if all(x is _identity for x in sanitizers):
124
+ return None
125
+
126
+ def sanitize_line(line):
127
+ values = line.split('\t')
128
+ if len(values) != len(columns):
129
+ raise ValueError("Mismatch between column names and values.")
130
+ return '\t'.join(
131
+ sanitizer(value)
132
+ for (sanitizer, value) in zip(sanitizers, values))
133
+
134
+ return sanitize_line
135
+
136
+
137
+ def _identity(x):
138
+ return x
139
+
140
+
141
+ def parse_column_names(text):
142
+ """
143
+ Extracts column names from a string containing quoted and comma separated
144
+ column names.
145
+
146
+ :param text: Line extracted from `COPY` statement containing quoted and
147
+ comma separated column names.
148
+ :type text: str
149
+
150
+ :return: Tuple containing just the column names.
151
+ :rtype: tuple[str]
152
+ """
153
+ return tuple(
154
+ re.sub(r"^\"(.*)\"$", r"\1", column_name.strip())
155
+ for column_name in text.split(",")
156
+ )
157
+
158
+
159
+ def parse_values(text):
160
+ """
161
+ Parses line following `COPY` statement containing values for a single row
162
+ in the table, in custom Postgres format.
163
+
164
+ :param text: Line following `COPY` statement containing values.
165
+ :type text: str
166
+
167
+ :return: Column values extracted from the given line.
168
+ :rtype: tuple[str|None]
169
+ """
170
+ return tuple(decode_copy_value(value) for value in text.split("\t"))
@@ -0,0 +1,14 @@
1
+ def sanitize_null(value):
2
+ return None
3
+
4
+
5
+ def sanitize_empty_json_dict(value):
6
+ return '{}'
7
+
8
+
9
+ def sanitize_empty_json_list(value):
10
+ return '[]'
11
+
12
+
13
+ def sanitize_invalid_django_password(value):
14
+ return '!'
@@ -0,0 +1,14 @@
1
+ import uuid
2
+
3
+ from database_sanitizer.session import hash_text
4
+
5
+ NIL_UUID = '00000000-0000-0000-0000-000000000000'
6
+ NIL_UUID_WITHOUT_DASHES = NIL_UUID.replace('-', '')
7
+
8
+
9
+ def sanitize_uuid4(value):
10
+ if not value:
11
+ return value
12
+ if value.replace('-', '') == NIL_UUID_WITHOUT_DASHES:
13
+ return NIL_UUID
14
+ return str(uuid.UUID(hash_text(value)[:32], version=4))
@@ -0,0 +1,31 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import absolute_import, unicode_literals
4
+
5
+ import random
6
+ import string
7
+
8
+ CHARACTERS = string.ascii_letters + string.digits
9
+
10
+
11
+ def sanitize_empty(value):
12
+ """
13
+ Built-in sanitizer which replaces the original value with empty string.
14
+ """
15
+ return None if value is None else ""
16
+
17
+
18
+ def sanitize_zfill(value):
19
+ """
20
+ Built-in sanitizer which replaces the original value with zeros.
21
+ """
22
+ return None if value is None else "".zfill(len(value))
23
+
24
+
25
+ def sanitize_random(value):
26
+ """
27
+ Random string of same length as the given value.
28
+ """
29
+ if not value:
30
+ return value
31
+ return ''.join(random.choice(CHARACTERS) for _ in range(len(value)))
@@ -0,0 +1,11 @@
1
+ import datetime
2
+ import random
3
+
4
+ TEN_YEARS_AS_SECONDS = 10 * 365 * 24 * 3600
5
+
6
+
7
+ def sanitize_random_past_timestamp(value):
8
+ num = random.randint(0, TEN_YEARS_AS_SECONDS * 1000)
9
+ delta = datetime.timedelta(seconds=(num / 1000.0))
10
+ dt = datetime.datetime.now() - delta
11
+ return dt.isoformat()
@@ -0,0 +1,145 @@
1
+ from __future__ import unicode_literals
2
+
3
+ from six import text_type
4
+
5
+ from database_sanitizer.session import hash_text_to_int, hash_text_to_ints
6
+
7
+
8
+ def sanitize_email(value):
9
+ if not value:
10
+ return value
11
+ (num1, num2, num3) = hash_text_to_ints(value.strip(), [16, 16, 32])
12
+ given_name = given_names[num1 % given_names_count]
13
+ surname = surnames[num2 % surnames_count]
14
+ case_convert = (text_type.lower if num3 % 8 > 0 else lambda x: x)
15
+ return '{first}.{last}@x{num:x}.sanitized.net'.format(
16
+ first=case_convert(given_name),
17
+ last=case_convert(surname).replace("'", ''),
18
+ num=num3)
19
+
20
+
21
+ def sanitize_username(value):
22
+ if not value:
23
+ return value
24
+ (num1, num2) = hash_text_to_ints(value, [16, 32])
25
+ return '{}{:x}'.format(given_names[num1 % given_names_count].lower(), num2)
26
+
27
+
28
+ def sanitize_full_name_en_gb(value):
29
+ if not value:
30
+ return value
31
+ (num1, num2) = hash_text_to_ints(value.strip().lower(), [16, 16])
32
+ return '{} {}'.format(
33
+ given_names[num1 % given_names_count], surnames[num2 % surnames_count])
34
+
35
+
36
+ def sanitize_given_name_en_gb(value):
37
+ if not value:
38
+ return value
39
+ num = hash_text_to_int(value.strip().lower())
40
+ return given_names[num % given_names_count]
41
+
42
+
43
+ def sanitize_surname_en_gb(value):
44
+ if not value:
45
+ return value
46
+ num = hash_text_to_int(value.strip().lower())
47
+ return surnames[num % surnames_count]
48
+
49
+
50
+ given_names = """
51
+ Aaron Abbie Abdul Abigail Adam Adrian Aimee Alan Albert Alex
52
+ Alexander Alexandra Alice Alison Allan Amanda Amber Amelia Amy Andrea
53
+ Andrew Angela Ann Anna Anne Annette Anthony Antony Arthur Ashleigh
54
+ Ashley Barbara Barry Ben Benjamin Bernard Beth Bethan Bethany Beverley
55
+ Billy Bradley Brandon Brenda Brett Brian Bruce Bryan Callum Cameron Carl
56
+ Carly Carol Carole Caroline Carolyn Catherine Charlene Charles Charlie
57
+ Charlotte Chelsea Cheryl Chloe Christian Christine Christopher Claire
58
+ Clare Clifford Clive Colin Connor Conor Craig Dale Damian Damien Daniel
59
+ Danielle Danny Darren David Dawn Dean Deborah Debra Declan Denis Denise
60
+ Dennis Derek Diana Diane Dominic Donald Donna Dorothy Douglas Duncan
61
+ Dylan Edward Eileen Elaine Eleanor Elizabeth Ellie Elliot Elliott Emily
62
+ Emma Eric Fiona Frances Francesca Francis Frank Frederick Gail Gareth
63
+ Garry Gary Gavin Gemma Geoffrey George Georgia Georgina Gerald Geraldine
64
+ Gerard Gillian Glen Glenn Gordon Grace Graeme Graham Gregory Guy Hannah
65
+ Harriet Harry Hayley Hazel Heather Helen Henry Hilary Hollie Holly
66
+ Howard Hugh Iain Ian Irene Jack Jacob Jacqueline Jade Jake James Jamie
67
+ Jane Janet Janice Jasmine Jason Jay Jayne Jean Jeffrey Jemma Jenna
68
+ Jennifer Jeremy Jessica Jill Joan Joanna Joanne Jodie Joe Joel John
69
+ Jonathan Jordan Joseph Josephine Josh Joshua Joyce Judith Julia Julian
70
+ Julie June Justin Karen Karl Kate Katherine Kathleen Kathryn Katie Katy
71
+ Kayleigh Keith Kelly Kenneth Kerry Kevin Kieran Kim Kimberley Kirsty
72
+ Kyle Laura Lauren Lawrence Leah Leanne Lee Leigh Leon Leonard Lesley
73
+ Leslie Lewis Liam Linda Lindsey Lisa Lorraine Louis Louise Lucy Luke
74
+ Lydia Lynda Lynn Lynne Malcolm Mandy Marc Marcus Margaret Maria Marian
75
+ Marie Marilyn Marion Mark Martin Martyn Mary Mathew Matthew Maureen
76
+ Maurice Max Megan Melanie Melissa Michael Michelle Mitchell Mohamed
77
+ Mohammad Mohammed Molly Naomi Natalie Natasha Nathan Neil Nicholas
78
+ Nicola Nicole Nigel Norman Oliver Olivia Owen Paige Pamela Patricia
79
+ Patrick Paul Paula Pauline Peter Philip Phillip Rachael Rachel Raymond
80
+ Rebecca Reece Rhys Richard Ricky Rita Robert Robin Roger Ronald Rosemary
81
+ Rosie Ross Roy Russell Ruth Ryan Sally Sam Samantha Samuel Sandra Sara
82
+ Sarah Scott Sean Shane Shannon Sharon Shaun Sheila Shirley Sian Simon
83
+ Sophie Stacey Stanley Stephanie Stephen Steven Stewart Stuart Susan
84
+ Suzanne Sylvia Terence Teresa Terry Thomas Timothy Tina Toby Tom Tony
85
+ Tracey Tracy Trevor Valerie Vanessa Victor Victoria Vincent Wayne Wendy
86
+ William Yvonne Zoe
87
+ """.strip().split()
88
+
89
+
90
+ surnames = """
91
+ Abbott Adams Ahmed Akhtar Alexander Ali Allan Allen Anderson Andrews
92
+ Archer Armstrong Arnold Ashton Atkins Atkinson Austin Bailey Baker
93
+ Baldwin Ball Banks Barber Barker Barlow Barnes Barnett Barrett Barry
94
+ Bartlett Barton Bates Baxter Begum Bell Bennett Benson Bentley Berry
95
+ Bevan Bibi Birch Bird Bishop Black Blackburn Bolton Bond Booth Bowen
96
+ Boyle Bradley Bradshaw Brady Bray Brennan Briggs Brookes Brooks Brown
97
+ Browne Bruce Bryan Bryant Bull Burgess Burke Burns Burrows Burton
98
+ Butcher Butler Byrne Cameron Campbell Carey Carpenter Carr Carroll
99
+ Carter Cartwright Chadwick Chambers Chan Chandler Chapman Charlton Clark
100
+ Clarke Clayton Clements Coates Cole Coleman Coles Collier Collins
101
+ Connolly Connor Conway Cook Cooke Cooper Cox Craig Crawford Cross
102
+ Cunningham Curtis Dale Daly Daniels Davey Davidson Davies Davis Davison
103
+ Dawson Day Dean Dennis Dickinson Dixon Dobson Dodd Doherty Donnelly
104
+ Douglas Doyle Duffy Duncan Dunn Dyer Edwards Elliott Ellis Evans Farmer
105
+ Farrell Faulkner Ferguson Field Finch Fisher Fitzgerald Fleming Fletcher
106
+ Flynn Ford Forster Foster Fowler Fox Francis Franklin Fraser Freeman
107
+ French Frost Fry Fuller Gallagher Gardiner Gardner Garner George Gibbons
108
+ Gibbs Gibson Gilbert Giles Gill Glover Goddard Godfrey Goodwin Gordon
109
+ Gough Gould Graham Grant Gray Green Greenwood Gregory Griffin Griffiths
110
+ Hale Hall Hamilton Hammond Hancock Hanson Harding Hardy Hargreaves
111
+ Harper Harris Harrison Hart Hartley Harvey Hawkins Hayes Haynes Hayward
112
+ Heath Henderson Henry Herbert Hewitt Hicks Higgins Hill Hilton Hodgson
113
+ Holden Holland Holloway Holmes Holt Hooper Hope Hopkins Horton Houghton
114
+ Howard Howarth Howe Howell Howells Hudson Hughes Humphreys Humphries
115
+ Hunt Hunter Hurst Hussain Hutchinson Hyde Ingram Iqbal Jackson James
116
+ Jarvis Jenkins Jennings John Johnson Johnston Jones Jordan Joyce Kaur
117
+ Kay Kelly Kemp Kennedy Kent Kerr Khan King Kirby Kirk Knight Knowles
118
+ Lamb Lambert Lane Law Lawrence Lawson Leach Lee Lees Leonard Lewis
119
+ Little Lloyd Long Lord Lowe Lucas Lynch Lyons Macdonald Mahmood Mann
120
+ Manning Marsden Marsh Marshall Martin Mason Matthews May McCarthy
121
+ McDonald McKenzie McLean Mellor Metcalfe Miah Middleton Miles Miller
122
+ Mills Mistry Mitchell Moore Moran Morgan Morley Morris Morrison Morton
123
+ Moss Murphy Murray Myers Nash Naylor Nelson Newman Newton Nicholls
124
+ Nicholson Nixon Noble Nolan Norman Norris North Norton O'Blake O'Buckley
125
+ O'Chamberlain O'Hobbs O'Thompson Oliver Osborne Owen Owens Page Palmer
126
+ Parker Parkes Parkin Parkinson Parry Parsons Patel Patterson Payne
127
+ Peacock Pearce Pearson Perkins Perry Peters Phillips Pickering Pollard
128
+ Poole Pope Porter Potter Potts Powell Power Pratt Preston Price
129
+ Pritchard Pugh Quinn Rahman Randall Read Reed Rees Reeves Reid Reynolds
130
+ Rhodes Rice Richards Richardson Riley Roberts Robertson Robinson Robson
131
+ Rogers Rose Ross Rowe Rowley Russell Ryan Sanders Sanderson Saunders
132
+ Savage Schofield Scott Shah Sharp Sharpe Shaw Shepherd Sheppard Short
133
+ Simmons Simpson Sims Sinclair Singh Skinner Slater Smart Smith Spencer
134
+ Stanley Steele Stephens Stephenson Stevens Stevenson Stewart Stokes
135
+ Stone Storey Sullivan Summers Sutton Swift Sykes Talbot Taylor Thomas
136
+ Thomson Thornton Thorpe Todd Tomlinson Townsend Tucker Turnbull Turner
137
+ Tyler Vaughan Vincent Wade Walker Wall Wallace Wallis Walsh Walters
138
+ Walton Ward Warner Warren Waters Watkins Watson Watts Webb Webster Welch
139
+ Wells West Weston Wheeler White Whitehead Whitehouse Whittaker Wilkins
140
+ Wilkinson Williams Williamson Willis Wilson Winter Wong Wood Woods
141
+ Woodward Wright Wyatt Yates Young
142
+ """.strip().split()
143
+
144
+ given_names_count = len(given_names)
145
+ surnames_count = len(surnames)
@@ -0,0 +1,146 @@
1
+ """
2
+ API to sanitation session.
3
+
4
+ Sanitation session allows having a state within a single sanitation
5
+ process.
6
+
7
+ One important thing stored to the session is a secret key which is
8
+ generated to a new random value for each sanitation session, but it
9
+ stays constant during the whole sanitation process. Its value is never
10
+ revealed, so that it is possible to generate such one way hashes with
11
+ it, that should not be redoable afterwards. I.e. during the sanitation
12
+ session it's possible to do ``hash(C) -> H`` for any clear text C, but
13
+ it is not possible to check if H is the hashed value of C after the
14
+ sanitation session has ended.
15
+ """
16
+
17
+ import hashlib
18
+ import hmac
19
+ import random
20
+ import sys
21
+ import threading
22
+
23
+ from six import int2byte
24
+
25
+ if sys.version_info >= (3, 6):
26
+ from typing import Callable, Optional, Sequence # noqa
27
+
28
+
29
+ SECRET_KEY_BITS = 128
30
+
31
+
32
+ _thread_local_storage = threading.local()
33
+
34
+
35
+ def hash_text_to_int(value, bit_length=32):
36
+ # type: (str, int) -> int
37
+ """
38
+ Hash a text value to an integer.
39
+
40
+ Generates an integer number based on the hash derived with
41
+ `hash_text` from the given text value.
42
+
43
+ :param bit_length: Number of bits to use from the hash value.
44
+ :return: Integer value within ``0 <= result < 2**bit_length``
45
+ """
46
+ hash_value = hash_text(value)
47
+ return int(hash_value[0:(bit_length // 4)], 16)
48
+
49
+
50
+ def hash_text_to_ints(value, bit_lengths=(16, 16, 16, 16)):
51
+ # type: (str, Sequence[int]) -> Sequence[int]
52
+ """
53
+ Hash a text value to a sequence of integers.
54
+
55
+ Generates a sequence of integer values with given bit-lengths
56
+ similarly to `hash_text_to_int`, but allowing generating many
57
+ separate numbers with a single call.
58
+
59
+ :param bit_lengths:
60
+ Tuple of bit lengths for the resulting integers. Defines also the
61
+ length of the result tuple.
62
+ :return:
63
+ Tuple of ``n`` integers ``(R_1, ... R_n)`` with the requested
64
+ bit-lengths ``(L_1, ..., L_n)`` and values ranging within
65
+ ``0 <= R_i < 2**L_i`` for each ``i``.
66
+ """
67
+ hash_value = hash_text(value)
68
+ hex_lengths = [x // 4 for x in bit_lengths]
69
+ hex_ranges = (
70
+ (sum(hex_lengths[0:i]), sum(hex_lengths[0:(i + 1)]))
71
+ for i in range(len(hex_lengths)))
72
+ return tuple(int(hash_value[a:b], 16) for (a, b) in hex_ranges)
73
+
74
+
75
+ def hash_text(value, hasher=hashlib.sha256, encoding='utf-8'):
76
+ # type: (str, Callable, str) -> str
77
+ """
78
+ Generate a hash for a text value.
79
+
80
+ The hash will be generated by encoding the text to bytes with given
81
+ encoding and then generating a hash with HMAC using the session
82
+ secret as the key and the given hash function.
83
+
84
+ :param value: Text value to hash
85
+ :param hasher: Hash function to use, SHA256 by default
86
+ :param encoding: Encoding to use, UTF-8 by default
87
+ :return: Hexadecimal presentation of the hash as a string
88
+ """
89
+ return hash_bytes(value.encode(encoding), hasher)
90
+
91
+
92
+ def hash_bytes(value, hasher=hashlib.sha256):
93
+ # type: (bytes, Callable) -> str
94
+ """
95
+ Generate a hash for a bytes value.
96
+
97
+ The hash will be generated by generating a hash with HMAC using the
98
+ session secret as the key and the given hash function.
99
+
100
+ :param value: Bytes value to hash
101
+ :param hasher: Hash function to use.
102
+ :return: Hexadecimal presentation of the hash as a string
103
+ """
104
+ return hmac.new(get_secret(), value, hasher).hexdigest()
105
+
106
+
107
+ def get_secret():
108
+ # type: () -> bytes
109
+ """
110
+ Get session specific secret key.
111
+
112
+ :return: Session key as bytes
113
+ """
114
+ if not getattr(_thread_local_storage, 'secret_key', None):
115
+ _initialize_session()
116
+ return _thread_local_storage.secret_key # type: ignore
117
+
118
+
119
+ def reset(secret_key=None):
120
+ # type: (Optional[bytes]) -> None
121
+ """
122
+ Reset the session.
123
+
124
+ By default, this resets the value of the secret to None so that, if
125
+ there was an earlier sanitation process ran on the same thread, then
126
+ a next call that needs the secret key of the session will generate a
127
+ new value for it.
128
+
129
+ This may also be used to set a predefined value for the secret key.
130
+
131
+ :param secret_key:
132
+ Value to set as the new session secret key or None if a new one
133
+ should be generated as soon as one is needed.
134
+ """
135
+ _thread_local_storage.secret_key = secret_key
136
+
137
+
138
+ def _initialize_session():
139
+ # type: () -> None
140
+ """
141
+ Generate a new session key and store it to thread local storage.
142
+ """
143
+ sys_random = random.SystemRandom()
144
+ _thread_local_storage.secret_key = b''.join(
145
+ int2byte(sys_random.randint(0, 255))
146
+ for _ in range(SECRET_KEY_BITS // 8))