deeplotx 0.2.21__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deeplotx/__init__.py CHANGED
@@ -3,7 +3,7 @@ import os
3
3
 
4
4
  __ROOT__ = os.path.dirname(os.path.abspath(__file__))
5
5
 
6
- from .encoder import BertEncoder, LongTextEncoder
6
+ from .encoder import BertEncoder, LongTextEncoder, LongformerEncoder
7
7
  from .nn import LinearRegression, LogisticRegression, SoftmaxRegression
8
8
  from .trainer import TextBinaryClassifierTrainer
9
9
 
@@ -1,2 +1,3 @@
1
1
  from .bert_encoder import BertEncoder
2
2
  from .long_text_encoder import LongTextEncoder
3
+ from .longformer_encoder import LongformerEncoder
@@ -0,0 +1,17 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+ bias = 1e-12
5
+
6
+
7
+ def ndarray_adapter(*args) -> tuple | np.ndarray:
8
+ args = list(args)
9
+ for i, arg in enumerate(args):
10
+ match arg.__class__:
11
+ case torch.Tensor:
12
+ args[i] = arg.detach().cpu().numpy()
13
+ case List:
14
+ args[i] = np.asarray(arg)
15
+ if len(args) > 1:
16
+ return tuple(args)
17
+ return args[0]
@@ -0,0 +1,32 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+ from deeplotx.similarity import bias, ndarray_adapter
5
+
6
+
7
+ def cross_entropy(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
8
+ p, q = ndarray_adapter(p, q)
9
+ q = np.clip(q, bias, 1 - bias)
10
+ return -1 * (np.sum(p * np.log(q)) / p.shape[0])
11
+
12
+
13
+ def kl_divergence(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
14
+ p, q = ndarray_adapter(p, q)
15
+ q = np.where(q == 0, bias, q)
16
+ p = p / np.sum(p)
17
+ q = q / np.sum(q)
18
+ return np.sum(p * np.log(p / q))
19
+
20
+
21
+ def js_divergence(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
22
+ p, q = ndarray_adapter(p, q)
23
+ m = (p + q) / 2
24
+ return (kl_divergence(p, m) + kl_divergence(q, m)) / 2
25
+
26
+
27
+ def hellinger_distance(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
28
+ p, q = ndarray_adapter(p, q)
29
+ p = p / np.sum(p)
30
+ q = q / np.sum(q)
31
+ squared_diff = (np.sqrt(p) - np.sqrt(q)) ** 2
32
+ return np.sqrt(np.sum(squared_diff)) / np.sqrt(2)
@@ -0,0 +1,19 @@
1
+ from deeplotx.similarity import bias
2
+
3
+
4
+ def jaccard_similarity(set1: set, set2: set) -> float:
5
+ return (len(set1.intersection(set2)) + bias) / (len(set1.union(set2)) + bias)
6
+
7
+
8
+ def overlap_coefficient(set1: set, set2: set) -> float:
9
+ return (len(set1.intersection(set2)) + bias) / (min(len(set1), len(set2)) + bias)
10
+
11
+
12
+ def dice_coefficient(set1: set, set2: set) -> float:
13
+ return (2 * len(set1.intersection(set2)) + bias) / (len(set1) + len(set2) + bias)
14
+
15
+
16
+ def ochiai_similarity(set1: set, set2: set) -> float:
17
+ intersection = len(set1.intersection(set2))
18
+ product = len(set1) ** 0.5 * len(set2) ** 0.5
19
+ return (intersection + bias) / (product + bias)
@@ -0,0 +1,36 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+ from deeplotx.similarity import ndarray_adapter
5
+
6
+
7
+ def l2_normalize(x: torch.Tensor | np.ndarray) -> np.ndarray:
8
+ x = ndarray_adapter(x)
9
+ return x / np.sqrt(np.sum(np.multiply(x, x)))
10
+
11
+
12
+ def z_score_normalize(x: torch.Tensor | np.ndarray) -> np.ndarray:
13
+ x = ndarray_adapter(x)
14
+ mean = np.mean(x)
15
+ std_dev = np.std(x)
16
+ return (x - mean) / std_dev
17
+
18
+
19
+ def euclidean_similarity(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
20
+ p, q = ndarray_adapter(p, q)
21
+ distance = p - q
22
+ distance = np.sum(np.multiply(distance, distance))
23
+ return np.sqrt(distance)
24
+
25
+
26
+ def cosine_similarity(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
27
+ p, q = ndarray_adapter(p, q)
28
+ a = np.matmul(np.transpose(p), q)
29
+ b = np.sum(np.multiply(p, p))
30
+ c = np.sum(np.multiply(q, q))
31
+ return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
32
+
33
+
34
+ def chebyshev_similarity(p: torch.Tensor | np.ndarray, q: torch.Tensor | np.ndarray) -> np.float32:
35
+ p, q = ndarray_adapter(p, q)
36
+ return np.max(np.abs(p - q))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deeplotx
3
- Version: 0.2.21
3
+ Version: 0.3.1
4
4
  Summary: Easy-2-use long text classifier trainers.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,5 +1,5 @@
1
- deeplotx/__init__.py,sha256=Bhxc6HRnuhPZCMNlBc6oKcFTpJbWRGrZmt00vVOsNf0,916
2
- deeplotx/encoder/__init__.py,sha256=x7k8IE0FXvDl7kCJGWPsetOHFdvNCiCXHbYOdvo7_JQ,87
1
+ deeplotx/__init__.py,sha256=C6N717chqnk3jqh9nuh9oM5hPldX9mCusCn-LqGWJJg,935
2
+ deeplotx/encoder/__init__.py,sha256=EM-xrTsHoGaiiFpj-iFAxilMHXC_sQKWYrcq1qCnI3U,138
3
3
  deeplotx/encoder/bert_encoder.py,sha256=A-B7Gj94xv6UhvsFTBH7tnkAdGHRhfUZA2QjSnTKB6c,1970
4
4
  deeplotx/encoder/long_text_encoder.py,sha256=V6VxaHW6bMMaZHgU1UZ8n19UfSIV2f2sarWXquiFffQ,3018
5
5
  deeplotx/encoder/longformer_encoder.py,sha256=mZpC5TrGHQo98-ydGtVQQ9KRHgCGl1sRoxcQs7r4SSo,1409
@@ -8,14 +8,18 @@ deeplotx/nn/base_neural_network.py,sha256=Rkwu58mXXcuusf-59yLX89MywQx-EvTsSXOvlz
8
8
  deeplotx/nn/linear_regression.py,sha256=D4mEWVOq6q1Fm2otm57rgZ_E06HJLZBV5k636PprAf4,1520
9
9
  deeplotx/nn/logistic_regression.py,sha256=QAtZp2oyqOW8-1pJWVcahsSM83bzfA68EHObg-wSHHY,463
10
10
  deeplotx/nn/softmax_regression.py,sha256=eUn3mVNlye9ewVdw3McPHZuKbUvvaamsUgFIJMVMgBU,487
11
+ deeplotx/similarity/__init__.py,sha256=JA1om2zeDcQVS1R04nDMdP6yegxdLJ14WF63pSTL3oo,418
12
+ deeplotx/similarity/distribution.py,sha256=wQGouuuW531pZeBRKBujXsdsoz4fDnPw7_GW81jwepc,1066
13
+ deeplotx/similarity/set.py,sha256=zhGFxtSIXlWqvipBYzoiPahp4g0boAIoUiMfG0wl07A,686
14
+ deeplotx/similarity/vector.py,sha256=WVbDHqykt-fvuILVrhUCtIFAOEjY_zvttrXGM9eylG0,1125
11
15
  deeplotx/trainer/__init__.py,sha256=Fl5DR9UecQc5VtBcczU9sx_HtPNoFohpuELOh-Jrsks,77
12
16
  deeplotx/trainer/base_trainer.py,sha256=z0MeAT-rRYmjeBXt0ckt7J1itYArR0Cx02wHesXUoZE,385
13
17
  deeplotx/trainer/text_binary_classification_trainer.py,sha256=5O-5dwVMCj5EDX9gjJwCA468OR4UozJ7V8b-JxeUB0s,4080
14
18
  deeplotx/util/__init__.py,sha256=JxqAK_WOOHcYVSTHBT1-WuBwWrPEVDTV3titeVWvNUM,74
15
19
  deeplotx/util/hash.py,sha256=wwsC6kOQvbpuvwKsNQOARd78_wePmW9i3oaUuXRUnpc,352
16
20
  deeplotx/util/read_file.py,sha256=ptzouvEQeeW8KU5BrWNJlXw-vFXVrpS9SkAUxsu6A8A,612
17
- deeplotx-0.2.21.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
18
- deeplotx-0.2.21.dist-info/METADATA,sha256=mNUcUO4dSccX1Sz8868nrbq3qWo3cINJXPVv8XtVpzY,1617
19
- deeplotx-0.2.21.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
20
- deeplotx-0.2.21.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
21
- deeplotx-0.2.21.dist-info/RECORD,,
21
+ deeplotx-0.3.1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
22
+ deeplotx-0.3.1.dist-info/METADATA,sha256=GzIi1llGFDTLvHDLF3GOQ4G6MafM10M-7IeosZwZlaY,1616
23
+ deeplotx-0.3.1.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
24
+ deeplotx-0.3.1.dist-info/top_level.txt,sha256=hKg4pVDXZ-WWxkRfJFczRIll1Sv7VyfKCmzHLXbuh1U,9
25
+ deeplotx-0.3.1.dist-info/RECORD,,