devicer.py 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: devicer.py
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Open-Source Python Middleware for Digital Fingerprinting
5
5
  Author: One anonymous contributor
6
6
  Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
@@ -29,7 +29,7 @@ FP-Devicer is a digital fingerprinting middleware library designed for ease of u
29
29
 
30
30
  Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
31
31
  ```python
32
- from devicer.confidence import calculate_condifence
32
+ from devicer.confidence import calculate_confidence
33
33
 
34
34
  user1, user2 = {
35
35
  """Collected data goes here"""
@@ -5,7 +5,7 @@ FP-Devicer is a digital fingerprinting middleware library designed for ease of u
5
5
 
6
6
  Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
7
7
  ```python
8
- from devicer.confidence import calculate_condifence
8
+ from devicer.confidence import calculate_confidence
9
9
 
10
10
  user1, user2 = {
11
11
  """Collected data goes here"""
@@ -6,7 +6,7 @@ requires = [
6
6
  build-backend = "setuptools.build_meta"
7
7
  [project]
8
8
  name = "devicer.py"
9
- version = "0.1.1"
9
+ version = "0.1.3"
10
10
  authors = [
11
11
  {name = "Samuel Roux", email = "sam.roux.com@gmail.com"},
12
12
  {name = "Stephen Perso", email = "stephenrperso@gmail.com"},
@@ -1,4 +1,4 @@
1
- from hashing import get_tlsh_hash, get_hash_difference
1
+ from .hashing import get_tlsh_hash, get_hash_difference
2
2
  import math
3
3
 
4
4
  def compare_dictionaries(data1: dict, data2: dict) -> tuple[int, int]:
@@ -38,14 +38,12 @@ def calculate_confidence(data1: dict, data2: dict) -> float:
38
38
  """
39
39
  matches, fields = compare_dictionaries(data1, data2)
40
40
 
41
- if fields == 0:
42
- return 0.0
41
+ if fields == 0 or matches == 0:
42
+ return 0
43
43
 
44
44
  hash1 = get_tlsh_hash(str(data1).encode('utf-8'))
45
45
  hash2 = get_tlsh_hash(str(data2).encode('utf-8'))
46
46
  difference_score = get_hash_difference(hash1, hash2)
47
-
48
- print(f"Matches: {matches}, Fields: {fields}, Difference Score: {difference_score}")
49
47
 
50
48
  inverse_match_score = 1 - (matches / fields)
51
49
  x = (difference_score / 1.5) * inverse_match_score
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: devicer.py
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Open-Source Python Middleware for Digital Fingerprinting
5
5
  Author: One anonymous contributor
6
6
  Author-email: Samuel Roux <sam.roux.com@gmail.com>, Stephen Perso <stephenrperso@gmail.com>
@@ -29,7 +29,7 @@ FP-Devicer is a digital fingerprinting middleware library designed for ease of u
29
29
 
30
30
  Importing and using the library to compare fingerprints between users is as simple as collecting some user data and running the calculateConfidence function.
31
31
  ```python
32
- from devicer.confidence import calculate_condifence
32
+ from devicer.confidence import calculate_confidence
33
33
 
34
34
  user1, user2 = {
35
35
  """Collected data goes here"""
@@ -3,10 +3,7 @@ license.txt
3
3
  pyproject.toml
4
4
  src/devicer/__init__.py
5
5
  src/devicer/confidence.py
6
- src/devicer/confidence_test.py
7
- src/devicer/data_test.py
8
6
  src/devicer/hashing.py
9
- src/devicer/hashing_test.py
10
7
  src/devicer.py.egg-info/PKG-INFO
11
8
  src/devicer.py.egg-info/SOURCES.txt
12
9
  src/devicer.py.egg-info/dependency_links.txt
@@ -1,112 +0,0 @@
1
- import unittest
2
- from confidence import calculate_confidence
3
- from hashing_test import random_string
4
-
5
- sampleData1 = {
6
- "fonts": ['Arial', 'Verdana'],
7
- "hardware": {
8
- "cpu": 'Intel Core i7',
9
- "gpu": 'NVIDIA GTX 1080',
10
- "ram": 16384
11
- },
12
- "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
13
- "screen": {
14
- "width": 1920,
15
- "height": 1080,
16
- "colorDepth": 24
17
- },
18
- "timezone": 'America/New_York',
19
- "ip": '157.185.170.244',
20
- "languages": ['en-US', 'en'],
21
- "plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
22
- "canvasHash": random_string().encode('utf-8'),
23
- "audioHash": random_string().encode('utf-8'),
24
- "webglHash": random_string().encode('utf-8'),
25
- }
26
-
27
- sampleData2 = {
28
- "fonts": ['Arial', 'Verdana'],
29
- "hardware": {
30
- "cpu": 'Pentium 4',
31
- "gpu": 'Intel HD Graphics',
32
- "ram": 4096
33
- },
34
- "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
35
- "screen": {
36
- "width": 1280,
37
- "height": 720,
38
- "colorDepth": 24
39
- },
40
- "timezone": 'Europe/London',
41
- "ip": '178.238.11.6',
42
- "languages": ['en-GB', 'en'],
43
- "plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
44
- "canvasHash": random_string().encode('utf-8'),
45
- "audioHash": random_string().encode('utf-8'),
46
- "webglHash": random_string().encode('utf-8'),
47
- }
48
-
49
-
50
- class TestConfidenceCalculation(unittest.TestCase):
51
- def test_confidence_range(self):
52
- """
53
- Test that the confidence score is between 0 and 100.
54
- """
55
- confidence = calculate_confidence(sampleData1, sampleData2)
56
- self.assertGreaterEqual(confidence, 0, "Confidence score should be at least 0.")
57
- self.assertLessEqual(confidence, 100, "Confidence score should not exceed 100.")
58
-
59
- def test_confidence_identical_data(self):
60
- """
61
- Test that the confidence score is 100 when both data dictionaries are identical.
62
- """
63
- confidence = calculate_confidence(sampleData1, sampleData1)
64
- self.assertEqual(confidence, 100, "Confidence score should be 100 for identical data.")
65
-
66
- def test_confidence_different_data(self):
67
- """
68
- Test that the confidence score is less than 10 when data dictionaries are different.
69
- """
70
- confidence = calculate_confidence(sampleData1, sampleData2)
71
- self.assertLess(confidence, 10, "Confidence score should be less than 10 for different data.")
72
-
73
- def test_confidence_similar_data(self):
74
- """
75
- Test that the confidence score is greater than 80 when data dictionaries are similar.
76
- """
77
- similar_data = sampleData2.copy()
78
- similar_data['hardware']['ram'] = 8192
79
- confidence = calculate_confidence(sampleData2, similar_data)
80
- self.assertGreater(confidence, 80, "Confidence score should be greater than 80 for similar data.")
81
-
82
- def test_confidence_partial_data(self):
83
- """
84
- Test that the confidence score is calculated correctly when some fields match.
85
- """
86
- partial_data = sampleData1.copy()
87
- partial_data['hardware']['cpu'] = 'Pentium 4'
88
- partial_data['hardware']['gpu'] = 'Intel HD Graphics'
89
- partial_data['hardware']['ram'] = 4096
90
- partial_data['timezone'] = 'Europe/London'
91
- partial_data['ip'] = '178.238.11.6'
92
- partial_data['languages'] = ['en-GB', 'en']
93
- partial_data['userAgent'] = 'Mozilla/5.0 (compatible; Konqueror/2.2.2-3; Linux)'
94
- confidence = calculate_confidence(sampleData1, partial_data)
95
- self.assertGreater(confidence, 10, "Confidence score should be greater than 10 for partially matching data.")
96
- self.assertLess(confidence, 95, "Confidence score should be less than 95 for partially matching data.")
97
-
98
- def test_confidence_empty_data(self):
99
- """
100
- Test that the confidence score is 0 when one of the data dictionaries is empty.
101
- """
102
- confidence = calculate_confidence({}, sampleData2)
103
- self.assertEqual(confidence, 0, "Confidence score should be 0 for empty data.")
104
-
105
- confidence = calculate_confidence(sampleData1, {})
106
- self.assertEqual(confidence, 0, "Confidence score should be 0 for empty data.")
107
-
108
- confidence = calculate_confidence({}, {})
109
- self.assertEqual(confidence, 0, "Confidence score should be 0 for both data dictionaries being empty.")
110
-
111
- if __name__ == '__main__':
112
- unittest.main()
@@ -1,98 +0,0 @@
1
- import unittest
2
- import random
3
- from hashing import get_tlsh_hash, get_hash_difference
4
- from hashing_test import random_string
5
-
6
- sampleData1 = {
7
- "fonts": ['Arial', 'Verdana'],
8
- "hardware": {
9
- "cpu": 'Intel Core i7',
10
- "gpu": 'NVIDIA GTX 1080',
11
- "ram": 16384
12
- },
13
- "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
14
- "screen": {
15
- "width": 1920,
16
- "height": 1080,
17
- "colorDepth": 24
18
- },
19
- "timezone": 'America/New_York',
20
- "ip": '157.185.170.244',
21
- "languages": ['en-US', 'en'],
22
- "plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
23
- "canvasHash": random_string().encode('utf-8'),
24
- "audioHash": random_string().encode('utf-8'),
25
- "webglHash": random_string().encode('utf-8'),
26
- }
27
-
28
- sampleData2 = {
29
- "fonts": ['Arial', 'Verdana'],
30
- "hardware": {
31
- "cpu": 'Pentium 4',
32
- "gpu": 'Intel HD Graphics',
33
- "ram": 4096
34
- },
35
- "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
36
- "screen": {
37
- "width": 1280,
38
- "height": 720,
39
- "colorDepth": 24
40
- },
41
- "timezone": 'Europe/London',
42
- "ip": '178.238.11.6',
43
- "languages": ['en-GB', 'en'],
44
- "plugins": ['Chrome PDF Viewer', 'Shockwave Flash'],
45
- "canvasHash": random_string().encode('utf-8'),
46
- "audioHash": random_string().encode('utf-8'),
47
- "webglHash": random_string().encode('utf-8'),
48
- }
49
-
50
-
51
- class TestHashingData(unittest.TestCase):
52
- def test_hash_nonempty(self):
53
- """
54
- Test that the hashing function returns a non-empty string for a non-empty input.
55
- """
56
- data = str(sampleData1).encode('utf-8')
57
- hash_value = get_tlsh_hash(data)
58
- self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
59
-
60
- def test_hash_identical_inputs(self):
61
- """
62
- Test that the hashing function returns the same hash for identical inputs.
63
- Also checks that the difference between the hashes is zero.
64
- """
65
- data = str(sampleData1).encode('utf-8')
66
- hash1 = get_tlsh_hash(data)
67
- hash2 = get_tlsh_hash(data)
68
- difference = get_hash_difference(hash1, hash2)
69
- self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
70
- self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
71
-
72
- def test_hash_distance_when_different(self):
73
- """
74
- Test that the hash difference is non-zero for different inputs.
75
- Also checks that the difference is large for sufficiently different inputs.
76
- """
77
- data1 = str(sampleData1).encode('utf-8')
78
- data2 = str(sampleData2).encode('utf-8')
79
- hash1 = get_tlsh_hash(data1)
80
- hash2 = get_tlsh_hash(data2)
81
- difference = get_hash_difference(hash1, hash2)
82
- self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
83
- self.assertGreater(difference, 80, "Hash difference should be large for sufficiently different inputs.")
84
-
85
- def test_hash_distance_when_similar(self):
86
- """
87
- Test that the hash difference is small for similar inputs.
88
- """
89
- data1 = str(sampleData1).encode('utf-8')
90
- random_index = random.randint(0, len(data1) - 4)
91
- data2 = data1[:random_index] + random_string(length=4).encode('utf-8') + data1[random_index + 4:]
92
- hash1 = get_tlsh_hash(data1)
93
- hash2 = get_tlsh_hash(data2)
94
- difference = get_hash_difference(hash1, hash2)
95
- self.assertLess(difference, 140, "Hash difference should be small for similar inputs.")
96
-
97
- if __name__ == '__main__':
98
- unittest.main()
@@ -1,66 +0,0 @@
1
- import unittest
2
- import random
3
- from hashing import get_tlsh_hash, get_hash_difference
4
-
5
- def random_string(length=524):
6
- """
7
- Generate a random string of specified length.
8
-
9
- Args:
10
- length (int): The length of the string to generate. Default is 524.
11
-
12
- Returns:
13
- str: A random string of the specified length.
14
- """
15
- characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[];!@#$%^&*()-_=+|;:,.<>?"
16
- return ''.join(random.choice(characters) for _ in range(length))
17
-
18
-
19
- class TestHashingMethods(unittest.TestCase):
20
- def test_hash_nonempty(self):
21
- """
22
- Test that the hashing function returns a non-empty string for a non-empty input.
23
- """
24
- data = random_string()
25
- hash_value = get_tlsh_hash(data.encode('utf-8'))
26
- self.assertTrue(hash_value, "Hash value should not be empty for non-empty input.")
27
-
28
- def test_hash_identical_inputs(self):
29
- """
30
- Test that the hashing function returns the same hash for identical inputs.
31
- Also checks that the difference between the hashes is zero.
32
- """
33
- data = random_string()
34
- hash1 = get_tlsh_hash(data.encode('utf-8'))
35
- hash2 = get_tlsh_hash(data.encode('utf-8'))
36
- difference = get_hash_difference(hash1, hash2)
37
- self.assertEqual(hash1, hash2, "Hash values should be identical for identical inputs.")
38
- self.assertEqual(difference, 0, "Hash difference should be zero for identical inputs.")
39
-
40
- def test_hash_distance_when_different(self):
41
- """
42
- Test that the hash difference is non-zero for different inputs.
43
- Also checks that the difference is large for sufficiently different inputs.
44
- """
45
- data1 = random_string()
46
- data2 = random_string()
47
- hash1 = get_tlsh_hash(data1.encode('utf-8'))
48
- hash2 = get_tlsh_hash(data2.encode('utf-8'))
49
- difference = get_hash_difference(hash1, hash2)
50
- self.assertGreater(difference, 0, "Hash difference should be greater than zero for different inputs.")
51
- self.assertGreater(difference, 180, "Hash difference should be large for sufficiently different inputs.")
52
-
53
- def test_hash_distance_when_similar(self):
54
- """
55
- Test that the hash difference is small for similar inputs.
56
- """
57
- data1 = random_string()
58
- random_index = random.randint(0, len(data1) - 4)
59
- data2 = data1[:random_index] + random_string(length=4) + data1[random_index + 4:]
60
- hash1 = get_tlsh_hash(data1.encode('utf-8'))
61
- hash2 = get_tlsh_hash(data2.encode('utf-8'))
62
- difference = get_hash_difference(hash1, hash2)
63
- self.assertLess(difference, 200, "Hash difference should be small for similar inputs.")
64
-
65
- if __name__ == '__main__':
66
- unittest.main()
File without changes
File without changes