devicer.py 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devicer_py-0.1.0/PKG-INFO +41 -0
- devicer_py-0.1.0/README.md +17 -0
- devicer_py-0.1.0/license.txt +26 -0
- devicer_py-0.1.0/pyproject.toml +43 -0
- devicer_py-0.1.0/setup.cfg +4 -0
- devicer_py-0.1.0/src/devicer/__init__.py +0 -0
- devicer_py-0.1.0/src/devicer/confidence.py +52 -0
- devicer_py-0.1.0/src/devicer/confidence_test.py +112 -0
- devicer_py-0.1.0/src/devicer/data_test.py +98 -0
- devicer_py-0.1.0/src/devicer/hashing.py +27 -0
- devicer_py-0.1.0/src/devicer/hashing_test.py +66 -0
- devicer_py-0.1.0/src/devicer.py.egg-info/PKG-INFO +41 -0
- devicer_py-0.1.0/src/devicer.py.egg-info/SOURCES.txt +14 -0
- devicer_py-0.1.0/src/devicer.py.egg-info/dependency_links.txt +1 -0
- devicer_py-0.1.0/src/devicer.py.egg-info/requires.txt +1 -0
- devicer_py-0.1.0/src/devicer.py.egg-info/top_level.txt +1 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: devicer.py
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Open-Source Python Middleware for Digital Fingerprinting
|
5
|
+
Author: One anonymous contributor
|
6
|
+
Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
|
7
|
+
Project-URL: Homepage, https://gatewaycorporate.org/
|
8
|
+
Project-URL: Repository, https://github.com/gatewaycorporate/fp-devicer-python.git
|
9
|
+
Project-URL: Issues, https://github.com/gatewaycorporate/fp-devicer-python/issues
|
10
|
+
Keywords: fingerprinting,identification,middleware,tracking
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
19
|
+
Requires-Python: >=3.8
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: license.txt
|
22
|
+
Requires-Dist: py-tlsh>=4.7.2
|
23
|
+
Dynamic: license-file
|
24
|
+
|
25
|
+
# FP-Devicer
|
26
|
+
## Developed by Gateway Corporate Solutions LLC
|
27
|
+
|
28
|
+
FP-Devicer is a digital fingerprinting middleware library designed for ease of use and near-universal compatibility with servers.
|
29
|
+
|
30
|
+
Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
|
31
|
+
```python
|
32
|
+
from devicer.confidence import calculate_condifence
|
33
|
+
|
34
|
+
user1, user2 = {
|
35
|
+
"""Collected data goes here"""
|
36
|
+
}
|
37
|
+
|
38
|
+
const confidence = calculate_confidence(user1, user2)
|
39
|
+
```
|
40
|
+
|
41
|
+
The resulting confidence will range between 0 and 100, with 100 providing the highest confidence of the users being identical.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# FP-Devicer
|
2
|
+
## Developed by Gateway Corporate Solutions LLC
|
3
|
+
|
4
|
+
FP-Devicer is a digital fingerprinting middleware library designed for ease of use and near-universal compatibility with servers.
|
5
|
+
|
6
|
+
Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
|
7
|
+
```python
|
8
|
+
from devicer.confidence import calculate_condifence
|
9
|
+
|
10
|
+
user1, user2 = {
|
11
|
+
"""Collected data goes here"""
|
12
|
+
}
|
13
|
+
|
14
|
+
const confidence = calculate_confidence(user1, user2)
|
15
|
+
```
|
16
|
+
|
17
|
+
The resulting confidence will range between 0 and 100, with 100 providing the highest confidence of the users being identical.
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# DON'T BE A DICK PUBLIC LICENSE
|
2
|
+
|
3
|
+
> Version 1.1, December 2016
|
4
|
+
|
5
|
+
> Copyright (C) 2025 Gateway Corporate Solutions LLC
|
6
|
+
|
7
|
+
Everyone is permitted to copy and distribute verbatim or modified
|
8
|
+
copies of this license document.
|
9
|
+
|
10
|
+
> DON'T BE A DICK PUBLIC LICENSE
|
11
|
+
> TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
12
|
+
|
13
|
+
1. Do whatever you like with the original work, just don't be a dick.
|
14
|
+
|
15
|
+
Being a dick includes - but is not limited to - the following instances:
|
16
|
+
|
17
|
+
1a. Outright copyright infringement - Don't just copy this and change the name.
|
18
|
+
1b. Selling the unmodified original with no work done what-so-ever, that's REALLY being a dick.
|
19
|
+
1c. Modifying the original work to contain hidden harmful content. That would make you a PROPER dick.
|
20
|
+
|
21
|
+
2. If you become rich through modifications, related works/services, or supporting the original work,
|
22
|
+
share the love. Only a dick would make loads off this work and not buy the original work's
|
23
|
+
creator(s) a pint.
|
24
|
+
|
25
|
+
3. Code is provided with no warranty. Using somebody else's code and bitching when it goes wrong makes
|
26
|
+
you a DONKEY dick. Fix the problem yourself. A non-dick would submit the fix back.
|
@@ -0,0 +1,43 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = [
|
3
|
+
"setuptools>=61.2.0",
|
4
|
+
"wheel",
|
5
|
+
]
|
6
|
+
build-backend = "setuptools.build_meta"
|
7
|
+
[project]
|
8
|
+
name = "devicer.py"
|
9
|
+
version = "0.1.0"
|
10
|
+
authors = [
|
11
|
+
{name = "Samuel Roux", email = "sam.roux.com@gmail.com"},
|
12
|
+
{name = "Stephen Perso", email = "stephenrperso@gmail.com"},
|
13
|
+
{name = "One anonymous contributor"}
|
14
|
+
]
|
15
|
+
description = "Open-Source Python Middleware for Digital Fingerprinting"
|
16
|
+
keywords = [
|
17
|
+
"fingerprinting",
|
18
|
+
"identification",
|
19
|
+
"middleware",
|
20
|
+
"tracking"
|
21
|
+
]
|
22
|
+
classifiers = [
|
23
|
+
"Development Status :: 4 - Beta",
|
24
|
+
"Intended Audience :: Developers",
|
25
|
+
"Programming Language :: Python :: 3",
|
26
|
+
"Programming Language :: Python :: 3.8",
|
27
|
+
"Programming Language :: Python :: 3.9",
|
28
|
+
"Programming Language :: Python :: 3.10",
|
29
|
+
"Programming Language :: Python :: 3.11",
|
30
|
+
"Topic :: Software Development :: Libraries"
|
31
|
+
]
|
32
|
+
dependencies = [
|
33
|
+
"py-tlsh>=4.7.2"
|
34
|
+
]
|
35
|
+
readme = "README.md"
|
36
|
+
requires-python = ">=3.8"
|
37
|
+
license-files = [
|
38
|
+
"license.txt"
|
39
|
+
]
|
40
|
+
[project.urls]
|
41
|
+
Homepage = "https://gatewaycorporate.org/"
|
42
|
+
Repository = "https://github.com/gatewaycorporate/fp-devicer-python.git"
|
43
|
+
Issues = "https://github.com/gatewaycorporate/fp-devicer-python/issues"
|
File without changes
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from hashing import get_tlsh_hash, get_hash_difference
|
2
|
+
import math
|
3
|
+
|
4
|
+
def compare_dictionaries(data1: dict, data2: dict) -> tuple[int, int]:
|
5
|
+
"""
|
6
|
+
Compare two dictionaries and return the count of matching fields and total fields.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
data1 (dict): First dictionary containing data.
|
10
|
+
data2 (dict): Second dictionary containing data.
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
tuple: A tuple containing the count of matching fields and total fields compared.
|
14
|
+
"""
|
15
|
+
fields = 0
|
16
|
+
matches = 0
|
17
|
+
for key in data1:
|
18
|
+
if key in data2:
|
19
|
+
fields += 1
|
20
|
+
if isinstance(data1[key], dict):
|
21
|
+
sub_matches, sub_fields = compare_dictionaries(data1[key], data2[key])
|
22
|
+
matches += sub_matches
|
23
|
+
fields += sub_fields
|
24
|
+
elif data1[key] == data2[key]:
|
25
|
+
matches += 1
|
26
|
+
return matches, fields
|
27
|
+
|
28
|
+
def calculate_confidence(data1: dict, data2: dict) -> float:
|
29
|
+
"""
|
30
|
+
Calculate the confidence score based on two dictionaries of data.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
data1 (dict): First dictionary containing data.
|
34
|
+
data2 (dict): Second dictionary containing data.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
float: Confidence score calculated as the ratio of the sum of values in data1 to the sum of values in data2.
|
38
|
+
"""
|
39
|
+
matches, fields = compare_dictionaries(data1, data2)
|
40
|
+
|
41
|
+
if fields == 0:
|
42
|
+
return 0.0
|
43
|
+
|
44
|
+
hash1 = get_tlsh_hash(str(data1).encode('utf-8'))
|
45
|
+
hash2 = get_tlsh_hash(str(data2).encode('utf-8'))
|
46
|
+
difference_score = get_hash_difference(hash1, hash2)
|
47
|
+
|
48
|
+
inverse_match_score = 1 - (matches / fields)
|
49
|
+
x = (difference_score / 1.5) * inverse_match_score
|
50
|
+
if (inverse_match_score == 0 or difference_score == 0):
|
51
|
+
return 100
|
52
|
+
return 100 / (1 + math.e ** (-4.5 + (0.25 * x)))
|
@@ -0,0 +1,112 @@
|
|
1
|
+
import unittest
|
2
|
+
from confidence import calculate_confidence
|
3
|
+
from hashing_test import random_string
|
4
|
+
|
5
|
+
sampleData1 = {
|
6
|
+
"fonts": ['Arial', 'Verdana'],
|
7
|
+
"hardware": {
|
8
|
+
"cpu": 'Intel Core i7',
|
9
|
+
"gpu": 'NVIDIA GTX 1080',
|
10
|
+
"ram": 16384
|
11
|
+
},
|
12
|
+
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
13
|
+
"screen": {
|
14
|
+
"width": 1920,
|
15
|
+
"height": 1080,
|
16
|
+
"colorDepth": 24
|
17
|
+
},
|
18
|
+
"timezone": 'America/New_York',
|
19
|
+
"ip": '157.185.170.244',
|
20
|
+
"languages": ['en-US', 'en'],
|
21
|
+
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
22
|
+
"canvasHash": random_string().encode('utf-8'),
|
23
|
+
"audioHash": random_string().encode('utf-8'),
|
24
|
+
"webglHash": random_string().encode('utf-8'),
|
25
|
+
}
|
26
|
+
|
27
|
+
sampleData2 = {
|
28
|
+
"fonts": ['Arial', 'Verdana'],
|
29
|
+
"hardware": {
|
30
|
+
"cpu": 'Pentium 4',
|
31
|
+
"gpu": 'Intel HD Graphics',
|
32
|
+
"ram": 4096
|
33
|
+
},
|
34
|
+
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
35
|
+
"screen": {
|
36
|
+
"width": 1280,
|
37
|
+
"height": 720,
|
38
|
+
"colorDepth": 24
|
39
|
+
},
|
40
|
+
"timezone": 'Europe/London',
|
41
|
+
"ip": '178.238.11.6',
|
42
|
+
"languages": ['en-GB', 'en'],
|
43
|
+
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
44
|
+
"canvasHash": random_string().encode('utf-8'),
|
45
|
+
"audioHash": random_string().encode('utf-8'),
|
46
|
+
"webglHash": random_string().encode('utf-8'),
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
class TestConfidenceCalculation(unittest.TestCase):
|
51
|
+
def test_confidence_range(self):
|
52
|
+
"""
|
53
|
+
Test that the confidence score is between 0 and 100.
|
54
|
+
"""
|
55
|
+
confidence = calculate_confidence(sampleData1, sampleData2)
|
56
|
+
self.assertGreaterEqual(confidence, 0, "Confidence score should be at least 0.")
|
57
|
+
self.assertLessEqual(confidence, 100, "Confidence score should not exceed 100.")
|
58
|
+
|
59
|
+
def test_confidence_identical_data(self):
|
60
|
+
"""
|
61
|
+
Test that the confidence score is 100 when both data dictionaries are identical.
|
62
|
+
"""
|
63
|
+
confidence = calculate_confidence(sampleData1, sampleData1)
|
64
|
+
self.assertEqual(confidence, 100, "Confidence score should be 100 for identical data.")
|
65
|
+
|
66
|
+
def test_confidence_different_data(self):
|
67
|
+
"""
|
68
|
+
Test that the confidence score is less than 10 when data dictionaries are different.
|
69
|
+
"""
|
70
|
+
confidence = calculate_confidence(sampleData1, sampleData2)
|
71
|
+
self.assertLess(confidence, 10, "Confidence score should be less than 10 for different data.")
|
72
|
+
|
73
|
+
def test_confidence_similar_data(self):
|
74
|
+
"""
|
75
|
+
Test that the confidence score is greater than 80 when data dictionaries are similar.
|
76
|
+
"""
|
77
|
+
similar_data = sampleData2.copy()
|
78
|
+
similar_data['hardware']['ram'] = 8192
|
79
|
+
confidence = calculate_confidence(sampleData2, similar_data)
|
80
|
+
self.assertGreater(confidence, 80, "Confidence score should be greater than 80 for similar data.")
|
81
|
+
|
82
|
+
def test_confidence_partial_data(self):
|
83
|
+
"""
|
84
|
+
Test that the confidence score is calculated correctly when some fields match.
|
85
|
+
"""
|
86
|
+
partial_data = sampleData1.copy()
|
87
|
+
partial_data['hardware']['cpu'] = 'Pentium 4'
|
88
|
+
partial_data['hardware']['gpu'] = 'Intel HD Graphics'
|
89
|
+
partial_data['hardware']['ram'] = 4096
|
90
|
+
partial_data['timezone'] = 'Europe/London'
|
91
|
+
partial_data['ip'] = '178.238.11.6'
|
92
|
+
partial_data['languages'] = ['en-GB', 'en']
|
93
|
+
partial_data['userAgent'] = 'Mozilla/5.0 (compatible; Konqueror/2.2.2-3; Linux)'
|
94
|
+
confidence = calculate_confidence(sampleData1, partial_data)
|
95
|
+
self.assertGreater(confidence, 10, "Confidence score should be greater than 10 for partially matching data.")
|
96
|
+
self.assertLess(confidence, 95, "Confidence score should be less than 95 for partially matching data.")
|
97
|
+
|
98
|
+
def test_confidence_empty_data(self):
|
99
|
+
"""
|
100
|
+
Test that the confidence score is 0 when one of the data dictionaries is empty.
|
101
|
+
"""
|
102
|
+
confidence = calculate_confidence({}, sampleData2)
|
103
|
+
self.assertEqual(confidence, 0, "Confidence score should be 0 for empty data.")
|
104
|
+
|
105
|
+
confidence = calculate_confidence(sampleData1, {})
|
106
|
+
self.assertEqual(confidence, 0, "Confidence score should be 0 for empty data.")
|
107
|
+
|
108
|
+
confidence = calculate_confidence({}, {})
|
109
|
+
self.assertEqual(confidence, 0, "Confidence score should be 0 for both data dictionaries being empty.")
|
110
|
+
|
111
|
+
if __name__ == '__main__':
|
112
|
+
unittest.main()
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import unittest
|
2
|
+
import random
|
3
|
+
from hashing import get_tlsh_hash, get_hash_difference
|
4
|
+
from hashing_test import random_string
|
5
|
+
|
6
|
+
sampleData1 = {
|
7
|
+
"fonts": ['Arial', 'Verdana'],
|
8
|
+
"hardware": {
|
9
|
+
"cpu": 'Intel Core i7',
|
10
|
+
"gpu": 'NVIDIA GTX 1080',
|
11
|
+
"ram": 16384
|
12
|
+
},
|
13
|
+
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
14
|
+
"screen": {
|
15
|
+
"width": 1920,
|
16
|
+
"height": 1080,
|
17
|
+
"colorDepth": 24
|
18
|
+
},
|
19
|
+
"timezone": 'America/New_York',
|
20
|
+
"ip": '157.185.170.244',
|
21
|
+
"languages": ['en-US', 'en'],
|
22
|
+
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
23
|
+
"canvasHash": random_string().encode('utf-8'),
|
24
|
+
"audioHash": random_string().encode('utf-8'),
|
25
|
+
"webglHash": random_string().encode('utf-8'),
|
26
|
+
}
|
27
|
+
|
28
|
+
sampleData2 = {
|
29
|
+
"fonts": ['Arial', 'Verdana'],
|
30
|
+
"hardware": {
|
31
|
+
"cpu": 'Pentium 4',
|
32
|
+
"gpu": 'Intel HD Graphics',
|
33
|
+
"ram": 4096
|
34
|
+
},
|
35
|
+
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
36
|
+
"screen": {
|
37
|
+
"width": 1280,
|
38
|
+
"height": 720,
|
39
|
+
"colorDepth": 24
|
40
|
+
},
|
41
|
+
"timezone": 'Europe/London',
|
42
|
+
"ip": '178.238.11.6',
|
43
|
+
"languages": ['en-GB', 'en'],
|
44
|
+
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
45
|
+
"canvasHash": random_string().encode('utf-8'),
|
46
|
+
"audioHash": random_string().encode('utf-8'),
|
47
|
+
"webglHash": random_string().encode('utf-8'),
|
48
|
+
}
|
49
|
+
|
50
|
+
|
51
|
+
class TestHashingData(unittest.TestCase):
|
52
|
+
def test_hash_nonempty(self):
|
53
|
+
"""
|
54
|
+
Test that the hashing function returns a non-empty string for a non-empty input.
|
55
|
+
"""
|
56
|
+
data = str(sampleData1).encode('utf-8')
|
57
|
+
hash_value = get_tlsh_hash(data)
|
58
|
+
self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
|
59
|
+
|
60
|
+
def test_hash_identical_inputs(self):
|
61
|
+
"""
|
62
|
+
Test that the hashing function returns the same hash for identical inputs.
|
63
|
+
Also checks that the difference between the hashes is zero.
|
64
|
+
"""
|
65
|
+
data = str(sampleData1).encode('utf-8')
|
66
|
+
hash1 = get_tlsh_hash(data)
|
67
|
+
hash2 = get_tlsh_hash(data)
|
68
|
+
difference = get_hash_difference(hash1, hash2)
|
69
|
+
self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
|
70
|
+
self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
|
71
|
+
|
72
|
+
def test_hash_distance_when_different(self):
|
73
|
+
"""
|
74
|
+
Test that the hash difference is non-zero for different inputs.
|
75
|
+
Also checks that the difference is large for sufficiently different inputs.
|
76
|
+
"""
|
77
|
+
data1 = str(sampleData1).encode('utf-8')
|
78
|
+
data2 = str(sampleData2).encode('utf-8')
|
79
|
+
hash1 = get_tlsh_hash(data1)
|
80
|
+
hash2 = get_tlsh_hash(data2)
|
81
|
+
difference = get_hash_difference(hash1, hash2)
|
82
|
+
self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
|
83
|
+
self.assertGreater(difference, 80, "Hash difference should be large for sufficiently different inputs.")
|
84
|
+
|
85
|
+
def test_hash_distance_when_similar(self):
|
86
|
+
"""
|
87
|
+
Test that the hash difference is small for similar inputs.
|
88
|
+
"""
|
89
|
+
data1 = str(sampleData1).encode('utf-8')
|
90
|
+
random_index = random.randint(0, len(data1) - 4)
|
91
|
+
data2 = data1[:random_index] + random_string(length=4).encode('utf-8') + data1[random_index + 4:]
|
92
|
+
hash1 = get_tlsh_hash(data1)
|
93
|
+
hash2 = get_tlsh_hash(data2)
|
94
|
+
difference = get_hash_difference(hash1, hash2)
|
95
|
+
self.assertLess(difference, 140, "Hash difference should be small for similar inputs.")
|
96
|
+
|
97
|
+
if __name__ == '__main__':
|
98
|
+
unittest.main()
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import tlsh;
|
2
|
+
|
3
|
+
def get_tlsh_hash(data: bytes) -> str:
|
4
|
+
"""
|
5
|
+
Generate a TLSH hash for the given data.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
data (bytes): The input data to hash.
|
9
|
+
|
10
|
+
Returns:
|
11
|
+
str: The TLSH hash as a hexadecimal string.
|
12
|
+
"""
|
13
|
+
tlsh_hash = tlsh.hash(data)
|
14
|
+
return tlsh_hash
|
15
|
+
|
16
|
+
def get_hash_difference(hash1: str, hash2: str) -> int:
|
17
|
+
"""
|
18
|
+
Calculate the difference between two TLSH hashes.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
hash1 (str): The first TLSH hash.
|
22
|
+
hash2 (str): The second TLSH hash.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
int: The difference score between the two hashes.
|
26
|
+
"""
|
27
|
+
return tlsh.diff(hash1, hash2)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import unittest
|
2
|
+
import random
|
3
|
+
from hashing import get_tlsh_hash, get_hash_difference
|
4
|
+
|
5
|
+
def random_string(length=524):
|
6
|
+
"""
|
7
|
+
Generate a random string of specified length.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
length (int): The length of the string to generate. Default is 524.
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
str: A random string of the specified length.
|
14
|
+
"""
|
15
|
+
characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[];!@#$%^&*()-_=+|;:,.<>?"
|
16
|
+
return ''.join(random.choice(characters) for _ in range(length))
|
17
|
+
|
18
|
+
|
19
|
+
class TestHashingMethods(unittest.TestCase):
|
20
|
+
def test_hash_nonempty(self):
|
21
|
+
"""
|
22
|
+
Test that the hashing function returns a non-empty string for a non-empty input.
|
23
|
+
"""
|
24
|
+
data = random_string()
|
25
|
+
hash_value = get_tlsh_hash(data.encode('utf-8'))
|
26
|
+
self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
|
27
|
+
|
28
|
+
def test_hash_identical_inputs(self):
|
29
|
+
"""
|
30
|
+
Test that the hashing function returns the same hash for identical inputs.
|
31
|
+
Also checks that the difference between the hashes is zero.
|
32
|
+
"""
|
33
|
+
data = random_string()
|
34
|
+
hash1 = get_tlsh_hash(data.encode('utf-8'))
|
35
|
+
hash2 = get_tlsh_hash(data.encode('utf-8'))
|
36
|
+
difference = get_hash_difference(hash1, hash2)
|
37
|
+
self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
|
38
|
+
self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
|
39
|
+
|
40
|
+
def test_hash_distance_when_different(self):
|
41
|
+
"""
|
42
|
+
Test that the hash difference is non-zero for different inputs.
|
43
|
+
Also checks that the difference is large for sufficiently different inputs.
|
44
|
+
"""
|
45
|
+
data1 = random_string()
|
46
|
+
data2 = random_string()
|
47
|
+
hash1 = get_tlsh_hash(data1.encode('utf-8'))
|
48
|
+
hash2 = get_tlsh_hash(data2.encode('utf-8'))
|
49
|
+
difference = get_hash_difference(hash1, hash2)
|
50
|
+
self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
|
51
|
+
self.assertGreater(difference, 180, "Hash difference should be large for sufficiently different inputs.")
|
52
|
+
|
53
|
+
def test_hash_distance_when_similar(self):
|
54
|
+
"""
|
55
|
+
Test that the hash difference is small for similar inputs.
|
56
|
+
"""
|
57
|
+
data1 = random_string()
|
58
|
+
random_index = random.randint(0, len(data1) - 4)
|
59
|
+
data2 = data1[:random_index] + random_string(length=4) + data1[random_index + 4:]
|
60
|
+
hash1 = get_tlsh_hash(data1.encode('utf-8'))
|
61
|
+
hash2 = get_tlsh_hash(data2.encode('utf-8'))
|
62
|
+
difference = get_hash_difference(hash1, hash2)
|
63
|
+
self.assertLess(difference, 200, "Hash difference should be small for similar inputs.")
|
64
|
+
|
65
|
+
if __name__ == '__main__':
|
66
|
+
unittest.main()
|
@@ -0,0 +1,41 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: devicer.py
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Open-Source Python Middleware for Digital Fingerprinting
|
5
|
+
Author: One anonymous contributor
|
6
|
+
Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
|
7
|
+
Project-URL: Homepage, https://gatewaycorporate.org/
|
8
|
+
Project-URL: Repository, https://github.com/gatewaycorporate/fp-devicer-python.git
|
9
|
+
Project-URL: Issues, https://github.com/gatewaycorporate/fp-devicer-python/issues
|
10
|
+
Keywords: fingerprinting,identification,middleware,tracking
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
19
|
+
Requires-Python: >=3.8
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: license.txt
|
22
|
+
Requires-Dist: py-tlsh>=4.7.2
|
23
|
+
Dynamic: license-file
|
24
|
+
|
25
|
+
# FP-Devicer
|
26
|
+
## Developed by Gateway Corporate Solutions LLC
|
27
|
+
|
28
|
+
FP-Devicer is a digital fingerprinting middleware library designed for ease of use and near-universal compatibility with servers.
|
29
|
+
|
30
|
+
Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
|
31
|
+
```python
|
32
|
+
from devicer.confidence import calculate_condifence
|
33
|
+
|
34
|
+
user1, user2 = {
|
35
|
+
"""Collected data goes here"""
|
36
|
+
}
|
37
|
+
|
38
|
+
const confidence = calculate_confidence(user1, user2)
|
39
|
+
```
|
40
|
+
|
41
|
+
The resulting confidence will range between 0 and 100, with 100 providing the highest confidence of the users being identical.
|
@@ -0,0 +1,14 @@
|
|
1
|
+
README.md
|
2
|
+
license.txt
|
3
|
+
pyproject.toml
|
4
|
+
src/devicer/__init__.py
|
5
|
+
src/devicer/confidence.py
|
6
|
+
src/devicer/confidence_test.py
|
7
|
+
src/devicer/data_test.py
|
8
|
+
src/devicer/hashing.py
|
9
|
+
src/devicer/hashing_test.py
|
10
|
+
src/devicer.py.egg-info/PKG-INFO
|
11
|
+
src/devicer.py.egg-info/SOURCES.txt
|
12
|
+
src/devicer.py.egg-info/dependency_links.txt
|
13
|
+
src/devicer.py.egg-info/requires.txt
|
14
|
+
src/devicer.py.egg-info/top_level.txt
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
py-tlsh>=4.7.2
|
@@ -0,0 +1 @@
|
|
1
|
+
devicer
|