devicer.py 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {devicer_py-0.1.2 → devicer_py-0.1.4}/PKG-INFO +1 -1
- {devicer_py-0.1.2 → devicer_py-0.1.4}/pyproject.toml +1 -1
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer/confidence.py +4 -6
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer.py.egg-info/PKG-INFO +1 -1
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer.py.egg-info/SOURCES.txt +0 -2
- devicer_py-0.1.2/src/devicer/data_test.py +0 -98
- devicer_py-0.1.2/src/devicer/hashing_test.py +0 -66
- {devicer_py-0.1.2 → devicer_py-0.1.4}/README.md +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/license.txt +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/setup.cfg +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer/__init__.py +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer/hashing.py +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer.py.egg-info/dependency_links.txt +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer.py.egg-info/requires.txt +0 -0
- {devicer_py-0.1.2 → devicer_py-0.1.4}/src/devicer.py.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: devicer.py
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Open-Source Python Middleware for Digital Fingerprinting
|
5
5
|
Author: One anonymous contributor
|
6
6
|
Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
|
@@ -6,7 +6,7 @@ requires = [
|
|
6
6
|
build-backend = "setuptools.build_meta"
|
7
7
|
[project]
|
8
8
|
name = "devicer.py"
|
9
|
-
version = "0.1.
|
9
|
+
version = "0.1.4"
|
10
10
|
authors = [
|
11
11
|
{name = "Samuel Roux", email = "sam.roux.com@gmail.com"},
|
12
12
|
{name = "Stephen Perso", email = "stephenrperso@gmail.com"},
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from hashing import get_tlsh_hash, get_hash_difference
|
1
|
+
from .hashing import get_tlsh_hash, get_hash_difference
|
2
2
|
import math
|
3
3
|
|
4
4
|
def compare_dictionaries(data1: dict, data2: dict) -> tuple[int, int]:
|
@@ -17,7 +17,7 @@ def compare_dictionaries(data1: dict, data2: dict) -> tuple[int, int]:
|
|
17
17
|
for key in data1:
|
18
18
|
if key in data2:
|
19
19
|
fields += 1
|
20
|
-
if isinstance(data1[key], dict):
|
20
|
+
if isinstance(data1[key], dict) and isinstance(data2[key], dict):
|
21
21
|
sub_matches, sub_fields = compare_dictionaries(data1[key], data2[key])
|
22
22
|
matches += sub_matches
|
23
23
|
fields += sub_fields - 1 # Subtract 1 to avoid double counting the key
|
@@ -38,14 +38,12 @@ def calculate_confidence(data1: dict, data2: dict) -> float:
|
|
38
38
|
"""
|
39
39
|
matches, fields = compare_dictionaries(data1, data2)
|
40
40
|
|
41
|
-
if fields == 0:
|
42
|
-
return 0
|
41
|
+
if fields == 0 or matches == 0:
|
42
|
+
return 0
|
43
43
|
|
44
44
|
hash1 = get_tlsh_hash(str(data1).encode('utf-8'))
|
45
45
|
hash2 = get_tlsh_hash(str(data2).encode('utf-8'))
|
46
46
|
difference_score = get_hash_difference(hash1, hash2)
|
47
|
-
|
48
|
-
print(f"Matches: {matches}, Fields: {fields}, Difference Score: {difference_score}")
|
49
47
|
|
50
48
|
inverse_match_score = 1 - (matches / fields)
|
51
49
|
x = (difference_score / 1.5) * inverse_match_score
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: devicer.py
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Open-Source Python Middleware for Digital Fingerprinting
|
5
5
|
Author: One anonymous contributor
|
6
6
|
Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
|
@@ -3,9 +3,7 @@ license.txt
|
|
3
3
|
pyproject.toml
|
4
4
|
src/devicer/__init__.py
|
5
5
|
src/devicer/confidence.py
|
6
|
-
src/devicer/data_test.py
|
7
6
|
src/devicer/hashing.py
|
8
|
-
src/devicer/hashing_test.py
|
9
7
|
src/devicer.py.egg-info/PKG-INFO
|
10
8
|
src/devicer.py.egg-info/SOURCES.txt
|
11
9
|
src/devicer.py.egg-info/dependency_links.txt
|
@@ -1,98 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
import random
|
3
|
-
from hashing import get_tlsh_hash, get_hash_difference
|
4
|
-
from hashing_test import random_string
|
5
|
-
|
6
|
-
sampleData1 = {
|
7
|
-
"fonts": ['Arial', 'Verdana'],
|
8
|
-
"hardware": {
|
9
|
-
"cpu": 'Intel Core i7',
|
10
|
-
"gpu": 'NVIDIA GTX 1080',
|
11
|
-
"ram": 16384
|
12
|
-
},
|
13
|
-
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
14
|
-
"screen": {
|
15
|
-
"width": 1920,
|
16
|
-
"height": 1080,
|
17
|
-
"colorDepth": 24
|
18
|
-
},
|
19
|
-
"timezone": 'America/New_York',
|
20
|
-
"ip": '157.185.170.244',
|
21
|
-
"languages": ['en-US', 'en'],
|
22
|
-
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
23
|
-
"canvasHash": random_string().encode('utf-8'),
|
24
|
-
"audioHash": random_string().encode('utf-8'),
|
25
|
-
"webglHash": random_string().encode('utf-8'),
|
26
|
-
}
|
27
|
-
|
28
|
-
sampleData2 = {
|
29
|
-
"fonts": ['Arial', 'Verdana'],
|
30
|
-
"hardware": {
|
31
|
-
"cpu": 'Pentium 4',
|
32
|
-
"gpu": 'Intel HD Graphics',
|
33
|
-
"ram": 4096
|
34
|
-
},
|
35
|
-
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
36
|
-
"screen": {
|
37
|
-
"width": 1280,
|
38
|
-
"height": 720,
|
39
|
-
"colorDepth": 24
|
40
|
-
},
|
41
|
-
"timezone": 'Europe/London',
|
42
|
-
"ip": '178.238.11.6',
|
43
|
-
"languages": ['en-GB', 'en'],
|
44
|
-
"plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
|
45
|
-
"canvasHash": random_string().encode('utf-8'),
|
46
|
-
"audioHash": random_string().encode('utf-8'),
|
47
|
-
"webglHash": random_string().encode('utf-8'),
|
48
|
-
}
|
49
|
-
|
50
|
-
|
51
|
-
class TestHashingData(unittest.TestCase):
|
52
|
-
def test_hash_nonempty(self):
|
53
|
-
"""
|
54
|
-
Test that the hashing function returns a non-empty string for a non-empty input.
|
55
|
-
"""
|
56
|
-
data = str(sampleData1).encode('utf-8')
|
57
|
-
hash_value = get_tlsh_hash(data)
|
58
|
-
self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
|
59
|
-
|
60
|
-
def test_hash_identical_inputs(self):
|
61
|
-
"""
|
62
|
-
Test that the hashing function returns the same hash for identical inputs.
|
63
|
-
Also checks that the difference between the hashes is zero.
|
64
|
-
"""
|
65
|
-
data = str(sampleData1).encode('utf-8')
|
66
|
-
hash1 = get_tlsh_hash(data)
|
67
|
-
hash2 = get_tlsh_hash(data)
|
68
|
-
difference = get_hash_difference(hash1, hash2)
|
69
|
-
self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
|
70
|
-
self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
|
71
|
-
|
72
|
-
def test_hash_distance_when_different(self):
|
73
|
-
"""
|
74
|
-
Test that the hash difference is non-zero for different inputs.
|
75
|
-
Also checks that the difference is large for sufficiently different inputs.
|
76
|
-
"""
|
77
|
-
data1 = str(sampleData1).encode('utf-8')
|
78
|
-
data2 = str(sampleData2).encode('utf-8')
|
79
|
-
hash1 = get_tlsh_hash(data1)
|
80
|
-
hash2 = get_tlsh_hash(data2)
|
81
|
-
difference = get_hash_difference(hash1, hash2)
|
82
|
-
self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
|
83
|
-
self.assertGreater(difference, 80, "Hash difference should be large for sufficiently different inputs.")
|
84
|
-
|
85
|
-
def test_hash_distance_when_similar(self):
|
86
|
-
"""
|
87
|
-
Test that the hash difference is small for similar inputs.
|
88
|
-
"""
|
89
|
-
data1 = str(sampleData1).encode('utf-8')
|
90
|
-
random_index = random.randint(0, len(data1) - 4)
|
91
|
-
data2 = data1[:random_index] + random_string(length=4).encode('utf-8') + data1[random_index + 4:]
|
92
|
-
hash1 = get_tlsh_hash(data1)
|
93
|
-
hash2 = get_tlsh_hash(data2)
|
94
|
-
difference = get_hash_difference(hash1, hash2)
|
95
|
-
self.assertLess(difference, 140, "Hash difference should be small for similar inputs.")
|
96
|
-
|
97
|
-
if __name__ == '__main__':
|
98
|
-
unittest.main()
|
@@ -1,66 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
import random
|
3
|
-
from hashing import get_tlsh_hash, get_hash_difference
|
4
|
-
|
5
|
-
def random_string(length=524):
|
6
|
-
"""
|
7
|
-
Generate a random string of specified length.
|
8
|
-
|
9
|
-
Args:
|
10
|
-
length (int): The length of the string to generate. Default is 524.
|
11
|
-
|
12
|
-
Returns:
|
13
|
-
str: A random string of the specified length.
|
14
|
-
"""
|
15
|
-
characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[];!@#$%^&*()-_=+|;:,.<>?"
|
16
|
-
return ''.join(random.choice(characters) for _ in range(length))
|
17
|
-
|
18
|
-
|
19
|
-
class TestHashingMethods(unittest.TestCase):
|
20
|
-
def test_hash_nonempty(self):
|
21
|
-
"""
|
22
|
-
Test that the hashing function returns a non-empty string for a non-empty input.
|
23
|
-
"""
|
24
|
-
data = random_string()
|
25
|
-
hash_value = get_tlsh_hash(data.encode('utf-8'))
|
26
|
-
self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
|
27
|
-
|
28
|
-
def test_hash_identical_inputs(self):
|
29
|
-
"""
|
30
|
-
Test that the hashing function returns the same hash for identical inputs.
|
31
|
-
Also checks that the difference between the hashes is zero.
|
32
|
-
"""
|
33
|
-
data = random_string()
|
34
|
-
hash1 = get_tlsh_hash(data.encode('utf-8'))
|
35
|
-
hash2 = get_tlsh_hash(data.encode('utf-8'))
|
36
|
-
difference = get_hash_difference(hash1, hash2)
|
37
|
-
self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
|
38
|
-
self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
|
39
|
-
|
40
|
-
def test_hash_distance_when_different(self):
|
41
|
-
"""
|
42
|
-
Test that the hash difference is non-zero for different inputs.
|
43
|
-
Also checks that the difference is large for sufficiently different inputs.
|
44
|
-
"""
|
45
|
-
data1 = random_string()
|
46
|
-
data2 = random_string()
|
47
|
-
hash1 = get_tlsh_hash(data1.encode('utf-8'))
|
48
|
-
hash2 = get_tlsh_hash(data2.encode('utf-8'))
|
49
|
-
difference = get_hash_difference(hash1, hash2)
|
50
|
-
self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
|
51
|
-
self.assertGreater(difference, 180, "Hash difference should be large for sufficiently different inputs.")
|
52
|
-
|
53
|
-
def test_hash_distance_when_similar(self):
|
54
|
-
"""
|
55
|
-
Test that the hash difference is small for similar inputs.
|
56
|
-
"""
|
57
|
-
data1 = random_string()
|
58
|
-
random_index = random.randint(0, len(data1) - 4)
|
59
|
-
data2 = data1[:random_index] + random_string(length=4) + data1[random_index + 4:]
|
60
|
-
hash1 = get_tlsh_hash(data1.encode('utf-8'))
|
61
|
-
hash2 = get_tlsh_hash(data2.encode('utf-8'))
|
62
|
-
difference = get_hash_difference(hash1, hash2)
|
63
|
-
self.assertLess(difference, 200, "Hash difference should be small for similar inputs.")
|
64
|
-
|
65
|
-
if __name__ == '__main__':
|
66
|
-
unittest.main()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|