redacted-py 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- redacted-py-1.0.4/LICENSE +17 -0
- redacted-py-1.0.4/MANIFEST.in +3 -0
- redacted-py-1.0.4/PKG-INFO +123 -0
- redacted-py-1.0.4/README.md +90 -0
- redacted-py-1.0.4/pyproject.toml +35 -0
- redacted-py-1.0.4/setup.cfg +4 -0
- redacted-py-1.0.4/src/redacted/__init__.py +7 -0
- redacted-py-1.0.4/src/redacted/__main__.py +114 -0
- redacted-py-1.0.4/src/redacted/dictionary.py +83 -0
- redacted-py-1.0.4/src/redacted/redactor.py +116 -0
- redacted-py-1.0.4/src/redacted/tag.py +1 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/PKG-INFO +123 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/SOURCES.txt +17 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/dependency_links.txt +1 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/entry_points.txt +2 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/requires.txt +1 -0
- redacted-py-1.0.4/src/redacted_py.egg-info/top_level.txt +1 -0
- redacted-py-1.0.4/tests/test_dictionary.py +23 -0
- redacted-py-1.0.4/tests/test_redactor.py +71 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Copyright (c) 2024 Cyril Dever
|
|
2
|
+
|
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
4
|
+
|
|
5
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
6
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
7
|
+
|
|
8
|
+
Subject to the terms and conditions of this license, each copyright holder and contributor hereby grants to those receiving rights under this license a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except for failure to satisfy the conditions of this license) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer this software, where such license applies only to those patent claims, already acquired or hereafter acquired, licensable by such copyright holder or contributor that are necessarily infringed by:
|
|
9
|
+
|
|
10
|
+
(a) their Contribution(s) (the licensed copyrights of copyright holders and non-copyrightable additions of contributors, in source or binary form) alone; or
|
|
11
|
+
(b) combination of their Contribution(s) with the work of authorship to which such Contribution(s) was added by such copyright holder or contributor, if, at the time the Contribution is added, such addition causes such combination to be necessarily infringed. The patent license shall not apply to any other combinations which include the Contribution.
|
|
12
|
+
|
|
13
|
+
Except as expressly stated above, no rights or licenses from any copyright holder or contributor is granted under this license, whether expressly, by implication, estoppel or otherwise.
|
|
14
|
+
|
|
15
|
+
DISCLAIMER
|
|
16
|
+
|
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: redacted-py
|
|
3
|
+
Version: 1.0.4
|
|
4
|
+
Summary: Redacting classified documents
|
|
5
|
+
Author-email: Cyril Dever <cdever@pep-s.com>
|
|
6
|
+
License: Copyright (c) 2024 Cyril Dever
|
|
7
|
+
|
|
8
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
9
|
+
|
|
10
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
12
|
+
|
|
13
|
+
Subject to the terms and conditions of this license, each copyright holder and contributor hereby grants to those receiving rights under this license a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except for failure to satisfy the conditions of this license) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer this software, where such license applies only to those patent claims, already acquired or hereafter acquired, licensable by such copyright holder or contributor that are necessarily infringed by:
|
|
14
|
+
|
|
15
|
+
(a) their Contribution(s) (the licensed copyrights of copyright holders and non-copyrightable additions of contributors, in source or binary form) alone; or
|
|
16
|
+
(b) combination of their Contribution(s) with the work of authorship to which such Contribution(s) was added by such copyright holder or contributor, if, at the time the Contribution is added, such addition causes such combination to be necessarily infringed. The patent license shall not apply to any other combinations which include the Contribution.
|
|
17
|
+
|
|
18
|
+
Except as expressly stated above, no rights or licenses from any copyright holder or contributor is granted under this license, whether expressly, by implication, estoppel or otherwise.
|
|
19
|
+
|
|
20
|
+
DISCLAIMER
|
|
21
|
+
|
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23
|
+
|
|
24
|
+
Project-URL: Homepage, https://github.com/cyrildever/redacted
|
|
25
|
+
Keywords: data,obfuscation,data masking,redacted,classified
|
|
26
|
+
Classifier: Programming Language :: Python :: 3
|
|
27
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
28
|
+
Classifier: Operating System :: OS Independent
|
|
29
|
+
Requires-Python: >=3.10.2
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Requires-Dist: feistel-py>=0.2.0
|
|
33
|
+
|
|
34
|
+
# redacted-py
|
|
35
|
+
_Redacting classified documents_
|
|
36
|
+
|
|
37
|
+

|
|
38
|
+

|
|
39
|
+

|
|
40
|
+

|
|
41
|
+

|
|
42
|
+
|
|
43
|
+
This repository holds the code base for my `redacted-py` library in Python. \
|
|
44
|
+
It is mainly based off my [Feistel cipher for Format-Preserving Encryption](https://github.com/cyrildever/feistel-py) to which I added a few tools to handle document, database and file manipulation to ease out the operation.
|
|
45
|
+
|
|
46
|
+
### Motivation
|
|
47
|
+
|
|
48
|
+
In some fields (like healthcare for instance), protecting the privacy of data whilst being able to conduct in-depth studies is both vital and mandatory. Redacting documents and databases is therefore the obligatory passage.
|
|
49
|
+
With `redacted-py`, I provide a simple yet secure tool to help redacting documents based on either a dictionary, a record layout or a tag to decide which parts should actually be redacted.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
### Usage
|
|
53
|
+
|
|
54
|
+
You can use either a dictionary or a tag (or both) to identify the words you want to redact in a document.
|
|
55
|
+
The tag should be placed before any word that should be redacted. The default tag is the tilde character (`~`).
|
|
56
|
+
|
|
57
|
+
For example, the following sentence will only see the word `tagged` redacted: `"This is a ~tagged sentence"`.
|
|
58
|
+
|
|
59
|
+
```console
|
|
60
|
+
$ pip install redacted-py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from redacted import DefaultRedactor, Dictionary
|
|
65
|
+
from feistel import FPECipher, SHA_256
|
|
66
|
+
|
|
67
|
+
source = "Some text ~tagged or using words in a dictionary"
|
|
68
|
+
|
|
69
|
+
cipher = FPECipher(SHA_256, key, 10)
|
|
70
|
+
redactor = DefaultRedactor(cipher)
|
|
71
|
+
redacted = redactor.redact(source)
|
|
72
|
+
|
|
73
|
+
expanded = redactor.expand(redacted)
|
|
74
|
+
assert expanded == source, "Original data should equal ciphered then deciphered data"
|
|
75
|
+
|
|
76
|
+
cleansed = redactor.clean(expanded)
|
|
77
|
+
assert cleansed == "Some text tagged or using words in a dictionary", "Cleaning should remove any tag mark"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
You may also use it in the console with the following command line instructions:
|
|
81
|
+
```
|
|
82
|
+
usage: python3 -m redacted [-h] [-b | --both | --no-both] [-d DICTIONARY] [-H HASH] [-i INPUT] [-k KEY] [-o OUTPUT] [-r ROUNDS] [-t TAG] [-x | --expand | --no-expand]
|
|
83
|
+
|
|
84
|
+
options:
|
|
85
|
+
-h, --help show this help message and exit
|
|
86
|
+
-b, --both, --no-both
|
|
87
|
+
Add to use both dictionary and tag
|
|
88
|
+
-d DICTIONARY, --dictionary DICTIONARY
|
|
89
|
+
The optional path to the dictionary of words to redact
|
|
90
|
+
-H HASH, --hash HASH The hash engine for the round function [default sha-256]
|
|
91
|
+
-i INPUT, --input INPUT
|
|
92
|
+
The path to the document to be redacted
|
|
93
|
+
-k KEY, --key KEY The optional key for the FPE scheme (leave it empty to use default)
|
|
94
|
+
-o OUTPUT, --output OUTPUT
|
|
95
|
+
The name of the output file
|
|
96
|
+
-r ROUNDS, --rounds ROUNDS
|
|
97
|
+
The number of rounds for the Feistel cipher [default 10]
|
|
98
|
+
-t TAG, --tag TAG The optional tag that prefixes words to redact [default ~]
|
|
99
|
+
-x, --expand, --no-expand
|
|
100
|
+
Add to expand a redacted document
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### Tests
|
|
105
|
+
|
|
106
|
+
```console
|
|
107
|
+
$ git clone https://github.com/cyrildever/redacted.git
|
|
108
|
+
$ cd redacted/py/
|
|
109
|
+
$ pip install -e .
|
|
110
|
+
$ python3 -m unittest discover
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
### License
|
|
115
|
+
|
|
116
|
+
The use of the `redacted` libraries and executables are subject to fees for commercial purpose and to the respect of the [BSD-2-Clause-Patent license](LICENSE). \
|
|
117
|
+
Please [contact me](mailto:cdever@pep-s.com) to get further information.
|
|
118
|
+
|
|
119
|
+
_NB: It is still under development so use in production at your own risk for now._
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
<hr />
|
|
123
|
+
© 2024 Cyril Dever. All rights reserved.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# redacted-py
|
|
2
|
+
_Redacting classified documents_
|
|
3
|
+
|
|
4
|
+

|
|
5
|
+

|
|
6
|
+

|
|
7
|
+

|
|
8
|
+

|
|
9
|
+
|
|
10
|
+
This repository holds the code base for my `redacted-py` library in Python. \
|
|
11
|
+
It is mainly based off my [Feistel cipher for Format-Preserving Encryption](https://github.com/cyrildever/feistel-py) to which I added a few tools to handle document, database and file manipulation to ease out the operation.
|
|
12
|
+
|
|
13
|
+
### Motivation
|
|
14
|
+
|
|
15
|
+
In some fields (like healthcare for instance), protecting the privacy of data whilst being able to conduct in-depth studies is both vital and mandatory. Redacting documents and databases is therefore the obligatory passage.
|
|
16
|
+
With `redacted-py`, I provide a simple yet secure tool to help redacting documents based on either a dictionary, a record layout or a tag to decide which parts should actually be redacted.
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
### Usage
|
|
20
|
+
|
|
21
|
+
You can use either a dictionary or a tag (or both) to identify the words you want to redact in a document.
|
|
22
|
+
The tag should be placed before any word that should be redacted. The default tag is the tilde character (`~`).
|
|
23
|
+
|
|
24
|
+
For example, the following sentence will only see the word `tagged` redacted: `"This is a ~tagged sentence"`.
|
|
25
|
+
|
|
26
|
+
```console
|
|
27
|
+
$ pip install redacted-py
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from redacted import DefaultRedactor, Dictionary
|
|
32
|
+
from feistel import FPECipher, SHA_256
|
|
33
|
+
|
|
34
|
+
source = "Some text ~tagged or using words in a dictionary"
|
|
35
|
+
|
|
36
|
+
cipher = FPECipher(SHA_256, key, 10)
|
|
37
|
+
redactor = DefaultRedactor(cipher)
|
|
38
|
+
redacted = redactor.redact(source)
|
|
39
|
+
|
|
40
|
+
expanded = redactor.expand(redacted)
|
|
41
|
+
assert expanded == source, "Original data should equal ciphered then deciphered data"
|
|
42
|
+
|
|
43
|
+
cleansed = redactor.clean(expanded)
|
|
44
|
+
assert cleansed == "Some text tagged or using words in a dictionary", "Cleaning should remove any tag mark"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
You may also use it in the console with the following command line instructions:
|
|
48
|
+
```
|
|
49
|
+
usage: python3 -m redacted [-h] [-b | --both | --no-both] [-d DICTIONARY] [-H HASH] [-i INPUT] [-k KEY] [-o OUTPUT] [-r ROUNDS] [-t TAG] [-x | --expand | --no-expand]
|
|
50
|
+
|
|
51
|
+
options:
|
|
52
|
+
-h, --help show this help message and exit
|
|
53
|
+
-b, --both, --no-both
|
|
54
|
+
Add to use both dictionary and tag
|
|
55
|
+
-d DICTIONARY, --dictionary DICTIONARY
|
|
56
|
+
The optional path to the dictionary of words to redact
|
|
57
|
+
-H HASH, --hash HASH The hash engine for the round function [default sha-256]
|
|
58
|
+
-i INPUT, --input INPUT
|
|
59
|
+
The path to the document to be redacted
|
|
60
|
+
-k KEY, --key KEY The optional key for the FPE scheme (leave it empty to use default)
|
|
61
|
+
-o OUTPUT, --output OUTPUT
|
|
62
|
+
The name of the output file
|
|
63
|
+
-r ROUNDS, --rounds ROUNDS
|
|
64
|
+
The number of rounds for the Feistel cipher [default 10]
|
|
65
|
+
-t TAG, --tag TAG The optional tag that prefixes words to redact [default ~]
|
|
66
|
+
-x, --expand, --no-expand
|
|
67
|
+
Add to expand a redacted document
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
### Tests
|
|
72
|
+
|
|
73
|
+
```console
|
|
74
|
+
$ git clone https://github.com/cyrildever/redacted.git
|
|
75
|
+
$ cd redacted/py/
|
|
76
|
+
$ pip install -e .
|
|
77
|
+
$ python3 -m unittest discover
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
### License
|
|
82
|
+
|
|
83
|
+
The use of the `redacted` libraries and executables are subject to fees for commercial purpose and to the respect of the [BSD-2-Clause-Patent license](LICENSE). \
|
|
84
|
+
Please [contact me](mailto:cdever@pep-s.com) to get further information.
|
|
85
|
+
|
|
86
|
+
_NB: It is still under development so use in production at your own risk for now._
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
<hr />
|
|
90
|
+
© 2024 Cyril Dever. All rights reserved.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# pyproject.toml
|
|
2
|
+
|
|
3
|
+
[build-system]
|
|
4
|
+
requires = ["setuptools>=58.1.0", "wheel"]
|
|
5
|
+
build-backend = "setuptools.build_meta"
|
|
6
|
+
|
|
7
|
+
[project]
|
|
8
|
+
name = "redacted-py"
|
|
9
|
+
version = "1.0.4"
|
|
10
|
+
description = "Redacting classified documents"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
authors = [{ name = "Cyril Dever", email = "cdever@pep-s.com" }]
|
|
13
|
+
license = { file = "LICENSE" }
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
keywords = [
|
|
20
|
+
"data",
|
|
21
|
+
"obfuscation",
|
|
22
|
+
"data masking",
|
|
23
|
+
"redacted",
|
|
24
|
+
"classified",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"feistel-py >= 0.2.0",
|
|
28
|
+
]
|
|
29
|
+
requires-python = ">=3.10.2"
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/cyrildever/redacted"
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
redacted-py = "redacted.__main__:main"
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from feistel import FPECipher, Engine, is_available_engine, SHA_256
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from redacted import (
|
|
6
|
+
DEFAULT_TAG,
|
|
7
|
+
file2Dictionary,
|
|
8
|
+
Redactor,
|
|
9
|
+
RedactorWithDictionary,
|
|
10
|
+
RedactorWithTag,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
DEFAULT_KEY = "d51e1d9a9b12cd88a1d232c1b8730a05c8a65d9706f30cdb8e08b9ed4c7b16a0"
|
|
15
|
+
DEFAULT_ROUNDS = 10
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main(args):
|
|
19
|
+
if not args.input or not args.output:
|
|
20
|
+
raise Exception("Input and output file paths are mandatory")
|
|
21
|
+
if not args.tag and not args.dictionary:
|
|
22
|
+
raise Exception("Use to set either a tag or a dictionary")
|
|
23
|
+
tag = args.tag
|
|
24
|
+
if args.both:
|
|
25
|
+
if not args.dictionary:
|
|
26
|
+
raise Exception(
|
|
27
|
+
"Tag and dictionary must be set if you want to use them both"
|
|
28
|
+
)
|
|
29
|
+
elif not args.tag:
|
|
30
|
+
print("WARN - Tag not set: default ~ will be used!")
|
|
31
|
+
tag = DEFAULT_TAG
|
|
32
|
+
|
|
33
|
+
hash_engine = Engine(args.hash)
|
|
34
|
+
if not args.hash and not is_available_engine(hash_engine):
|
|
35
|
+
print("WARN - Wrong hash engine: default SHA-256 will be used instead!")
|
|
36
|
+
hash_engine = SHA_256
|
|
37
|
+
key = args.key
|
|
38
|
+
if not key:
|
|
39
|
+
key = DEFAULT_KEY
|
|
40
|
+
rounds = int(args.rounds) if args.rounds else 0
|
|
41
|
+
if rounds < 2:
|
|
42
|
+
print("WARN - Not enough rounds: default 10 will be used instead!")
|
|
43
|
+
rounds = DEFAULT_ROUNDS
|
|
44
|
+
|
|
45
|
+
msg = "Start redacting..."
|
|
46
|
+
if args.expand:
|
|
47
|
+
msg = "Start expanding..."
|
|
48
|
+
print(f"INFO - {msg}")
|
|
49
|
+
|
|
50
|
+
# Prepare processing
|
|
51
|
+
if args.dictionary:
|
|
52
|
+
dic = file2Dictionary(args.dictionary)
|
|
53
|
+
|
|
54
|
+
cipher = FPECipher(hash_engine, key, rounds)
|
|
55
|
+
if args.both:
|
|
56
|
+
redactor = Redactor(dictionary=dic, tag=tag, cipher=cipher, both=True)
|
|
57
|
+
elif not args.expand and not dic.is_empty():
|
|
58
|
+
redactor = RedactorWithDictionary(dictionary=dic, cipher=cipher)
|
|
59
|
+
else:
|
|
60
|
+
redactor = RedactorWithTag(tag=tag, cipher=cipher)
|
|
61
|
+
|
|
62
|
+
# Do process
|
|
63
|
+
with open(args.input, "r") as inputfile, open(args.output, "w") as outputfile:
|
|
64
|
+
for line in inputfile:
|
|
65
|
+
if not args.expand:
|
|
66
|
+
redacted_line = redactor.redact(line)
|
|
67
|
+
outputfile.write(redacted_line + "\n")
|
|
68
|
+
else:
|
|
69
|
+
expanded_line = redactor.expand(line)
|
|
70
|
+
outputfile.write(expanded_line + "\n")
|
|
71
|
+
|
|
72
|
+
print("INFO - Process completed.")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
parser = argparse.ArgumentParser()
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"-b",
|
|
79
|
+
"--both",
|
|
80
|
+
action=argparse.BooleanOptionalAction,
|
|
81
|
+
help="Add to use both dictionary and tag",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"-d",
|
|
85
|
+
"--dictionary",
|
|
86
|
+
help="The optional path to the dictionary of words to redact",
|
|
87
|
+
)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"-H", "--hash", help="The hash engine for the round function [default sha-256]"
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument("-i", "--input", help="The path to the document to be redacted")
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"-k",
|
|
94
|
+
"--key",
|
|
95
|
+
help="The optional key for the FPE scheme (leave it empty to use default)",
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument("-o", "--output", help="The name of the output file")
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"-r",
|
|
100
|
+
"--rounds",
|
|
101
|
+
help="The number of rounds for the Feistel cipher [default 10]",
|
|
102
|
+
)
|
|
103
|
+
parser.add_argument(
|
|
104
|
+
"-t", "--tag", help="The optional tag that prefixes words to redact [default ~]"
|
|
105
|
+
)
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"-x",
|
|
108
|
+
"--expand",
|
|
109
|
+
action=argparse.BooleanOptionalAction,
|
|
110
|
+
help="Add to expand a redacted document",
|
|
111
|
+
)
|
|
112
|
+
args = parser.parse_args()
|
|
113
|
+
|
|
114
|
+
main(args)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
class Dictionary:
|
|
2
|
+
"""
|
|
3
|
+
A Dictionary holds the list of words to be tagged
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
def __init__(self, words: list[str]):
|
|
7
|
+
self.words = words
|
|
8
|
+
|
|
9
|
+
def contains(self, word: str) -> bool:
|
|
10
|
+
"""
|
|
11
|
+
Check whether the passed word is already in the Dictionary
|
|
12
|
+
"""
|
|
13
|
+
return (
|
|
14
|
+
word in self.words
|
|
15
|
+
or (word.endswith("'s") and word[: len(word) - 2]) in self.words
|
|
16
|
+
or _remove_punctuation(word.strip()) in self.words
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
def is_empty(self) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Returns `True` if there is no word in the Dictionary
|
|
22
|
+
"""
|
|
23
|
+
return len(self.words) == 0
|
|
24
|
+
|
|
25
|
+
def length(self) -> int:
|
|
26
|
+
"""
|
|
27
|
+
Gets the number of words
|
|
28
|
+
"""
|
|
29
|
+
return len(self.words)
|
|
30
|
+
|
|
31
|
+
def to_string(self) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Returns the dictionay as a space-separated list of words
|
|
34
|
+
"""
|
|
35
|
+
return " ".join(self.words)
|
|
36
|
+
|
|
37
|
+
def __eq__(self, other) -> bool:
|
|
38
|
+
return self.words == other.words
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def file2Dictionary(path: str) -> Dictionary:
|
|
42
|
+
"""
|
|
43
|
+
Upload the content of a file to a Dictionary
|
|
44
|
+
"""
|
|
45
|
+
with open(path, "r") as f:
|
|
46
|
+
data = f.read()
|
|
47
|
+
|
|
48
|
+
return string2Dictionary(data)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def string2Dictionary(string: str, *delimiters) -> Dictionary:
|
|
52
|
+
"""
|
|
53
|
+
Transforms a string into a dictionary (using the optionally passed delimiters [default space])
|
|
54
|
+
"""
|
|
55
|
+
if len(delimiters) == 0:
|
|
56
|
+
delimiters = [" "]
|
|
57
|
+
|
|
58
|
+
words = [string]
|
|
59
|
+
for delimiter in delimiters:
|
|
60
|
+
tmp = list[str]()
|
|
61
|
+
for word in words:
|
|
62
|
+
tmp.extend(word.split(delimiter))
|
|
63
|
+
words = tmp
|
|
64
|
+
|
|
65
|
+
return Dictionary(words)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _remove_punctuation(string: str) -> str:
|
|
69
|
+
return (
|
|
70
|
+
string.strip(".")
|
|
71
|
+
.strip(",")
|
|
72
|
+
.strip(":")
|
|
73
|
+
.strip(";")
|
|
74
|
+
.strip("?")
|
|
75
|
+
.strip("!")
|
|
76
|
+
.strip("(")
|
|
77
|
+
.strip(")")
|
|
78
|
+
.strip("-")
|
|
79
|
+
.strip("_")
|
|
80
|
+
.strip("+")
|
|
81
|
+
.strip("/")
|
|
82
|
+
.strip("\\")
|
|
83
|
+
)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from feistel import FPECipher
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from redacted import Dictionary, DEFAULT_TAG
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Redactor:
|
|
9
|
+
"""
|
|
10
|
+
Parent class for redactors
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, dictionary: Dictionary, tag: str, cipher: FPECipher, both: bool):
|
|
14
|
+
self.dictionary = dictionary
|
|
15
|
+
self.tag = tag
|
|
16
|
+
self.cipher = cipher
|
|
17
|
+
self.both = both
|
|
18
|
+
|
|
19
|
+
def redact(self, line: str, *delimiters) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Returns the ciphered version of the passed input data
|
|
22
|
+
"""
|
|
23
|
+
actual_delimiters = "\\s" + "".join(
|
|
24
|
+
[
|
|
25
|
+
(
|
|
26
|
+
delim
|
|
27
|
+
if delim != " "
|
|
28
|
+
and delim != "\t"
|
|
29
|
+
and delim != "\n"
|
|
30
|
+
and delim != "\\s"
|
|
31
|
+
else ""
|
|
32
|
+
)
|
|
33
|
+
for delim in delimiters
|
|
34
|
+
]
|
|
35
|
+
)
|
|
36
|
+
words = re.split(actual_delimiters, line)
|
|
37
|
+
tokens = list[str]()
|
|
38
|
+
for word in words:
|
|
39
|
+
if self.both or not self.dictionary.is_empty():
|
|
40
|
+
if self.dictionary.contains(word):
|
|
41
|
+
tokens.append(self.cipher.encrypt(word))
|
|
42
|
+
else:
|
|
43
|
+
if self.tag and word.startswith(self.tag):
|
|
44
|
+
tokens.append(
|
|
45
|
+
self.tag + self.cipher.encrypt(word[len(self.tag) :])
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
tokens.append(word)
|
|
49
|
+
else:
|
|
50
|
+
if (
|
|
51
|
+
not self.both
|
|
52
|
+
and self.dictionary.is_empty()
|
|
53
|
+
and self.tag
|
|
54
|
+
and word.startswith(self.tag)
|
|
55
|
+
):
|
|
56
|
+
tokens.append(self.tag + self.cipher.encrypt(word[len(self.tag) :]))
|
|
57
|
+
else:
|
|
58
|
+
tokens.append(word)
|
|
59
|
+
|
|
60
|
+
return " ".join(tokens)
|
|
61
|
+
|
|
62
|
+
def expand(self, line: str) -> str:
|
|
63
|
+
"""
|
|
64
|
+
Returns the deciphered version of the passed input data
|
|
65
|
+
"""
|
|
66
|
+
words = list[str]()
|
|
67
|
+
for word in line.split(" "):
|
|
68
|
+
if self.both or not self.dictionary.is_empty():
|
|
69
|
+
if self.tag and word.startswith(self.tag):
|
|
70
|
+
words.append(self.tag + self.cipher.decrypt(word[len(self.tag) :]))
|
|
71
|
+
else:
|
|
72
|
+
if not word.strip():
|
|
73
|
+
# Avoid using new lines
|
|
74
|
+
continue
|
|
75
|
+
else:
|
|
76
|
+
deciphered = self.cipher.decrypt(word)
|
|
77
|
+
if deciphered and self.dictionary.contains(deciphered):
|
|
78
|
+
words.append(deciphered)
|
|
79
|
+
else:
|
|
80
|
+
words.append(word)
|
|
81
|
+
else:
|
|
82
|
+
if not self.both and self.dictionary.is_empty():
|
|
83
|
+
if self.tag and word.startswith(self.tag):
|
|
84
|
+
words.append(
|
|
85
|
+
self.tag + self.cipher.decrypt(word[len(self.tag) :])
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
words.append(word)
|
|
89
|
+
else:
|
|
90
|
+
words.append(word)
|
|
91
|
+
|
|
92
|
+
return " ".join(words)
|
|
93
|
+
|
|
94
|
+
def clean(self, string: str) -> str:
|
|
95
|
+
"""
|
|
96
|
+
Removes any delimiters from the passed string
|
|
97
|
+
"""
|
|
98
|
+
if self.tag:
|
|
99
|
+
return string.replace(self.tag, "")
|
|
100
|
+
else:
|
|
101
|
+
return string
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class DefaultRedactor(Redactor):
|
|
105
|
+
def __init__(self, cipher: FPECipher):
|
|
106
|
+
super().__init__(Dictionary([]), DEFAULT_TAG, cipher, False)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class RedactorWithDictionary(Redactor):
|
|
110
|
+
def __init__(self, dictionary: Dictionary, cipher: FPECipher):
|
|
111
|
+
super().__init__(dictionary, "", cipher, False)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class RedactorWithTag(Redactor):
|
|
115
|
+
def __init__(self, tag: str, cipher: FPECipher):
|
|
116
|
+
super().__init__(Dictionary([]), tag, cipher, False)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
DEFAULT_TAG = "~"
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: redacted-py
|
|
3
|
+
Version: 1.0.4
|
|
4
|
+
Summary: Redacting classified documents
|
|
5
|
+
Author-email: Cyril Dever <cdever@pep-s.com>
|
|
6
|
+
License: Copyright (c) 2024 Cyril Dever
|
|
7
|
+
|
|
8
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
9
|
+
|
|
10
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
|
12
|
+
|
|
13
|
+
Subject to the terms and conditions of this license, each copyright holder and contributor hereby grants to those receiving rights under this license a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except for failure to satisfy the conditions of this license) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer this software, where such license applies only to those patent claims, already acquired or hereafter acquired, licensable by such copyright holder or contributor that are necessarily infringed by:
|
|
14
|
+
|
|
15
|
+
(a) their Contribution(s) (the licensed copyrights of copyright holders and non-copyrightable additions of contributors, in source or binary form) alone; or
|
|
16
|
+
(b) combination of their Contribution(s) with the work of authorship to which such Contribution(s) was added by such copyright holder or contributor, if, at the time the Contribution is added, such addition causes such combination to be necessarily infringed. The patent license shall not apply to any other combinations which include the Contribution.
|
|
17
|
+
|
|
18
|
+
Except as expressly stated above, no rights or licenses from any copyright holder or contributor is granted under this license, whether expressly, by implication, estoppel or otherwise.
|
|
19
|
+
|
|
20
|
+
DISCLAIMER
|
|
21
|
+
|
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23
|
+
|
|
24
|
+
Project-URL: Homepage, https://github.com/cyrildever/redacted
|
|
25
|
+
Keywords: data,obfuscation,data masking,redacted,classified
|
|
26
|
+
Classifier: Programming Language :: Python :: 3
|
|
27
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
28
|
+
Classifier: Operating System :: OS Independent
|
|
29
|
+
Requires-Python: >=3.10.2
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Requires-Dist: feistel-py>=0.2.0
|
|
33
|
+
|
|
34
|
+
# redacted-py
|
|
35
|
+
_Redacting classified documents_
|
|
36
|
+
|
|
37
|
+

|
|
38
|
+

|
|
39
|
+

|
|
40
|
+

|
|
41
|
+

|
|
42
|
+
|
|
43
|
+
This repository holds the code base for my `redacted-py` library in Python. \
|
|
44
|
+
It is mainly based off my [Feistel cipher for Format-Preserving Encryption](https://github.com/cyrildever/feistel-py) to which I added a few tools to handle document, database and file manipulation to ease out the operation.
|
|
45
|
+
|
|
46
|
+
### Motivation
|
|
47
|
+
|
|
48
|
+
In some fields (like healthcare for instance), protecting the privacy of data whilst being able to conduct in-depth studies is both vital and mandatory. Redacting documents and databases is therefore the obligatory passage.
|
|
49
|
+
With `redacted-py`, I provide a simple yet secure tool to help redacting documents based on either a dictionary, a record layout or a tag to decide which parts should actually be redacted.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
### Usage
|
|
53
|
+
|
|
54
|
+
You can use either a dictionary or a tag (or both) to identify the words you want to redact in a document.
|
|
55
|
+
The tag should be placed before any word that should be redacted. The default tag is the tilde character (`~`).
|
|
56
|
+
|
|
57
|
+
For example, the following sentence will only see the word `tagged` redacted: `"This is a ~tagged sentence"`.
|
|
58
|
+
|
|
59
|
+
```console
|
|
60
|
+
$ pip install redacted-py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from redacted import DefaultRedactor, Dictionary
|
|
65
|
+
from feistel import FPECipher, SHA_256
|
|
66
|
+
|
|
67
|
+
source = "Some text ~tagged or using words in a dictionary"
|
|
68
|
+
|
|
69
|
+
cipher = FPECipher(SHA_256, key, 10)
|
|
70
|
+
redactor = DefaultRedactor(cipher)
|
|
71
|
+
redacted = redactor.redact(source)
|
|
72
|
+
|
|
73
|
+
expanded = redactor.expand(redacted)
|
|
74
|
+
assert expanded == source, "Original data should equal ciphered then deciphered data"
|
|
75
|
+
|
|
76
|
+
cleansed = redactor.clean(expanded)
|
|
77
|
+
assert cleansed == "Some text tagged or using words in a dictionary", "Cleaning should remove any tag mark"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
You may also use it in the console with the following command line instructions:
|
|
81
|
+
```
|
|
82
|
+
usage: python3 -m redacted [-h] [-b | --both | --no-both] [-d DICTIONARY] [-H HASH] [-i INPUT] [-k KEY] [-o OUTPUT] [-r ROUNDS] [-t TAG] [-x | --expand | --no-expand]
|
|
83
|
+
|
|
84
|
+
options:
|
|
85
|
+
-h, --help show this help message and exit
|
|
86
|
+
-b, --both, --no-both
|
|
87
|
+
Add to use both dictionary and tag
|
|
88
|
+
-d DICTIONARY, --dictionary DICTIONARY
|
|
89
|
+
The optional path to the dictionary of words to redact
|
|
90
|
+
-H HASH, --hash HASH The hash engine for the round function [default sha-256]
|
|
91
|
+
-i INPUT, --input INPUT
|
|
92
|
+
The path to the document to be redacted
|
|
93
|
+
-k KEY, --key KEY The optional key for the FPE scheme (leave it empty to use default)
|
|
94
|
+
-o OUTPUT, --output OUTPUT
|
|
95
|
+
The name of the output file
|
|
96
|
+
-r ROUNDS, --rounds ROUNDS
|
|
97
|
+
The number of rounds for the Feistel cipher [default 10]
|
|
98
|
+
-t TAG, --tag TAG The optional tag that prefixes words to redact [default ~]
|
|
99
|
+
-x, --expand, --no-expand
|
|
100
|
+
Add to expand a redacted document
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### Tests
|
|
105
|
+
|
|
106
|
+
```console
|
|
107
|
+
$ git clone https://github.com/cyrildever/redacted.git
|
|
108
|
+
$ cd redacted/py/
|
|
109
|
+
$ pip install -e .
|
|
110
|
+
$ python3 -m unittest discover
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
### License
|
|
115
|
+
|
|
116
|
+
The use of the `redacted` libraries and executables are subject to fees for commercial purpose and to the respect of the [BSD-2-Clause-Patent license](LICENSE). \
|
|
117
|
+
Please [contact me](mailto:cdever@pep-s.com) to get further information.
|
|
118
|
+
|
|
119
|
+
_NB: It is still under development so use in production at your own risk for now._
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
<hr />
|
|
123
|
+
© 2024 Cyril Dever. All rights reserved.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
src/redacted/__init__.py
|
|
6
|
+
src/redacted/__main__.py
|
|
7
|
+
src/redacted/dictionary.py
|
|
8
|
+
src/redacted/redactor.py
|
|
9
|
+
src/redacted/tag.py
|
|
10
|
+
src/redacted_py.egg-info/PKG-INFO
|
|
11
|
+
src/redacted_py.egg-info/SOURCES.txt
|
|
12
|
+
src/redacted_py.egg-info/dependency_links.txt
|
|
13
|
+
src/redacted_py.egg-info/entry_points.txt
|
|
14
|
+
src/redacted_py.egg-info/requires.txt
|
|
15
|
+
src/redacted_py.egg-info/top_level.txt
|
|
16
|
+
tests/test_dictionary.py
|
|
17
|
+
tests/test_redactor.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
feistel-py>=0.2.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
redacted
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from unittest import TestCase
|
|
2
|
+
|
|
3
|
+
from redacted import Dictionary, file2Dictionary, string2Dictionary
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestDictionary(TestCase):
|
|
7
|
+
def test_string2Dictionary(self):
|
|
8
|
+
string = "Cyril Antoine Laurent,Dever"
|
|
9
|
+
dic = string2Dictionary(string)
|
|
10
|
+
self.assertEqual(dic.length(), 3)
|
|
11
|
+
self.assertFalse(dic.contains("Dever"))
|
|
12
|
+
dic = string2Dictionary(string, " ", ",")
|
|
13
|
+
self.assertEqual(dic.length(), 4)
|
|
14
|
+
self.assertTrue(dic.contains("Dever"))
|
|
15
|
+
self.assertFalse(dic.is_empty())
|
|
16
|
+
self.assertEqual(dic.to_string(), "Cyril Antoine Laurent Dever")
|
|
17
|
+
|
|
18
|
+
dic2 = Dictionary(["Cyril", "Antoine", "Laurent", "Dever"])
|
|
19
|
+
self.assertEqual(dic, dic2)
|
|
20
|
+
|
|
21
|
+
dic = file2Dictionary("./tests/dictionaryExample.txt")
|
|
22
|
+
self.assertEqual(dic.length(), 5)
|
|
23
|
+
self.assertEqual(dic.to_string(), "M. Cyril Antoine Laurent Dever")
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from unittest import TestCase
|
|
2
|
+
from feistel import FPECipher, BLAKE2B, KECCAK, SHA_256
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from redacted import Dictionary, DefaultRedactor, RedactorWithDictionary
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestRedactor(TestCase):
|
|
9
|
+
def test_dictionary_redactor(self):
|
|
10
|
+
dic = Dictionary(["M.", "Cyril", "Antoine", "Laurent", "Dever"])
|
|
11
|
+
ref = "B6ds. is testing ¼= Du:,l26 library while ¾.=y£|v Izizb is listening to Âvhis*l<"
|
|
12
|
+
|
|
13
|
+
txt = "Cyril is testing M. Dever's library while Antoine Dever is listening to Laurent."
|
|
14
|
+
redactor = RedactorWithDictionary(
|
|
15
|
+
dic,
|
|
16
|
+
FPECipher(
|
|
17
|
+
SHA_256,
|
|
18
|
+
"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
|
|
19
|
+
10,
|
|
20
|
+
),
|
|
21
|
+
)
|
|
22
|
+
redacted = redactor.redact(txt)
|
|
23
|
+
self.assertEqual(redacted, ref)
|
|
24
|
+
expanded = redactor.expand(redacted)
|
|
25
|
+
self.assertEqual(expanded, txt)
|
|
26
|
+
|
|
27
|
+
blake2 = "¸lk€$ is testing F: B!@x7;1 library while Cs>v0'* ¹'90< is listening to Pz2;ws?o"
|
|
28
|
+
redacted = RedactorWithDictionary(
|
|
29
|
+
dic,
|
|
30
|
+
FPECipher(
|
|
31
|
+
BLAKE2B,
|
|
32
|
+
"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
|
|
33
|
+
10,
|
|
34
|
+
),
|
|
35
|
+
).redact(txt)
|
|
36
|
+
self.assertEqual(redacted, blake2)
|
|
37
|
+
self.assertTrue(redacted != ref)
|
|
38
|
+
|
|
39
|
+
keccak = "H1i,{ is testing ½5 ¿&bv8f8 library while ¸&7+r$u ¹|6'h is listening to Å€j;$\"4<"
|
|
40
|
+
redacted = RedactorWithDictionary(
|
|
41
|
+
dic,
|
|
42
|
+
FPECipher(
|
|
43
|
+
KECCAK,
|
|
44
|
+
"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
|
|
45
|
+
10,
|
|
46
|
+
),
|
|
47
|
+
).redact(txt)
|
|
48
|
+
self.assertEqual(redacted, keccak)
|
|
49
|
+
self.assertTrue(redacted != ref)
|
|
50
|
+
|
|
51
|
+
def test_tag_redactor(self):
|
|
52
|
+
ref = "~B6ds. is testing ~¼= ~Du:,l26 library while ~¾.=y£|v ~Izizb is listening to ~Âvhis*l<"
|
|
53
|
+
|
|
54
|
+
txt = "~Cyril is testing ~M. ~Dever's library while ~Antoine ~Dever is listening to ~Laurent."
|
|
55
|
+
redactor = DefaultRedactor(
|
|
56
|
+
FPECipher(
|
|
57
|
+
SHA_256,
|
|
58
|
+
"1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
|
|
59
|
+
10,
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
redacted = redactor.redact(txt)
|
|
63
|
+
self.assertEqual(redacted, ref)
|
|
64
|
+
expanded = redactor.expand(redacted)
|
|
65
|
+
self.assertEqual(expanded, txt)
|
|
66
|
+
|
|
67
|
+
cleansed = redactor.clean(expanded)
|
|
68
|
+
self.assertEqual(
|
|
69
|
+
cleansed,
|
|
70
|
+
"Cyril is testing M. Dever's library while Antoine Dever is listening to Laurent.",
|
|
71
|
+
)
|