ddi-fw 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/embedding_generator_new.py +18 -6
- ddi_fw/utils/__init__.py +1 -1
- ddi_fw/utils/py7zr_helper.py +4 -3
- ddi_fw/utils/utils.py +17 -3
- {ddi_fw-0.0.48.dist-info → ddi_fw-0.0.50.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.48.dist-info → ddi_fw-0.0.50.dist-info}/RECORD +8 -8
- {ddi_fw-0.0.48.dist-info → ddi_fw-0.0.50.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.48.dist-info → ddi_fw-0.0.50.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ from sentence_transformers import SentenceTransformer, util
|
|
17
17
|
|
18
18
|
from typing import Any, Dict, List, Optional
|
19
19
|
from langchain_core.embeddings import Embeddings
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, SecretStr, computed_field
|
21
21
|
from langchain.embeddings import SentenceTransformerEmbeddings
|
22
22
|
|
23
23
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -56,7 +56,7 @@ class SumPoolingStrategy(PoolingStrategy):
|
|
56
56
|
return np.sum(embeddings, axis=0)
|
57
57
|
|
58
58
|
|
59
|
-
class SentenceTransformerDecorator(
|
59
|
+
class SentenceTransformerDecorator(Embeddings):
|
60
60
|
def __init__(self, model_name="all-MiniLM-L6-v2", **kwargs: Any):
|
61
61
|
self.embeddings = SentenceTransformerEmbeddings(model_name=model_name)
|
62
62
|
|
@@ -67,7 +67,7 @@ class SentenceTransformerDecorator(BaseModel, Embeddings):
|
|
67
67
|
return self.embeddings.embed_query(text)
|
68
68
|
|
69
69
|
|
70
|
-
class PretrainedEmbeddings(
|
70
|
+
class PretrainedEmbeddings( Embeddings):
|
71
71
|
def __init__(self, model_name):
|
72
72
|
self.mmodel_name = model_name
|
73
73
|
self.model = AutoModel.from_pretrained(model_name)
|
@@ -88,11 +88,23 @@ class PretrainedEmbeddings(BaseModel, Embeddings):
|
|
88
88
|
|
89
89
|
|
90
90
|
class SBertEmbeddings(BaseModel, Embeddings):
|
91
|
-
|
92
|
-
|
91
|
+
# class Config:
|
92
|
+
# arbitrary_types_allowed = True
|
93
|
+
|
94
|
+
model_config = ConfigDict(
|
95
|
+
arbitrary_types_allowed = True,
|
96
|
+
protected_namespaces=()
|
97
|
+
)
|
98
|
+
|
99
|
+
model_name:str
|
100
|
+
|
101
|
+
@computed_field
|
102
|
+
@property
|
103
|
+
def model(self) -> SentenceTransformer:
|
104
|
+
return SentenceTransformer(self.model_name)
|
93
105
|
|
94
106
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
95
|
-
return self.model.encode(texts)
|
107
|
+
return self.model.encode(texts).tolist()
|
96
108
|
|
97
109
|
def embed_query(self, text: str) -> List[float]:
|
98
110
|
return self.embed_documents([text])[0]
|
ddi_fw/utils/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .utils import create_folder_if_not_exists, utc_time_as_string,utc_time_as_string_simple_format, compress_and_save_data
|
1
|
+
from .utils import clear_directory,create_folder_if_not_exists, utc_time_as_string,utc_time_as_string_simple_format, compress_and_save_data
|
2
2
|
from .zip_helper import ZipHelper
|
3
3
|
from .py7zr_helper import Py7ZipHelper
|
4
4
|
from .enums import UMLSCodeTypes, DrugBankTextDataTypes
|
ddi_fw/utils/py7zr_helper.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from collections import defaultdict
|
2
2
|
import math
|
3
|
-
from ddi_fw.utils.utils import create_folder_if_not_exists
|
3
|
+
from ddi_fw.utils.utils import clear_directory, create_folder_if_not_exists
|
4
4
|
import py7zr
|
5
5
|
import os
|
6
6
|
from os.path import basename
|
@@ -24,9 +24,10 @@ class Py7ZipHelper:
|
|
24
24
|
os.path.join(folder_path, '..')))
|
25
25
|
# archive.write(root+"/"+file)
|
26
26
|
|
27
|
-
def create_archive_multiparts(self, zip_name, file_path, output_path, chunk_size):
|
27
|
+
def create_archive_multiparts(self, zip_name, file_path, output_path, chunk_size, delete_existing_files:True):
|
28
28
|
parent_folder = os.path.dirname(file_path)
|
29
|
-
|
29
|
+
if delete_existing_files:
|
30
|
+
clear_directory(output_path)
|
30
31
|
# parts_path = f"{parent_folder}/parts"
|
31
32
|
create_folder_if_not_exists(output_path)
|
32
33
|
# file_name, file_extension = os.path.splitext(file_path)
|
ddi_fw/utils/utils.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
import gzip
|
2
2
|
import json
|
3
3
|
import os
|
4
|
-
|
5
4
|
from datetime import datetime, timezone
|
6
|
-
|
7
5
|
from matplotlib import pyplot as plt
|
8
|
-
|
6
|
+
import shutil
|
9
7
|
|
10
8
|
def create_folder_if_not_exists(path):
|
11
9
|
if not os.path.exists(path):
|
@@ -51,6 +49,22 @@ def decompress(gzip_file):
|
|
51
49
|
return data
|
52
50
|
|
53
51
|
|
52
|
+
def clear_directory(directory_path):
|
53
|
+
# Check if the directory exists
|
54
|
+
if os.path.exists(directory_path) and os.path.isdir(directory_path):
|
55
|
+
# Iterate through all files and directories in the directory
|
56
|
+
for item in os.listdir(directory_path):
|
57
|
+
item_path = os.path.join(directory_path, item)
|
58
|
+
# Check if it's a file or a directory and remove it
|
59
|
+
if os.path.isfile(item_path):
|
60
|
+
os.remove(item_path) # Remove file
|
61
|
+
elif os.path.isdir(item_path):
|
62
|
+
shutil.rmtree(item_path) # Remove directory
|
63
|
+
print(f"Cleared contents of directory: {directory_path}")
|
64
|
+
else:
|
65
|
+
print(f"The directory does not exist: {directory_path}")
|
66
|
+
|
67
|
+
|
54
68
|
if __name__ == "__main__":
|
55
69
|
# json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
|
56
70
|
# file_data = open(json_file, "r", 1).read()
|
@@ -2,7 +2,7 @@ ddi_fw/datasets/__init__.py,sha256=CqDrx7Ov83pXRh-n0ylembBmzhlW_yFWiheBcISrKdg,5
|
|
2
2
|
ddi_fw/datasets/core.py,sha256=CFmnK0_cUxQAlTHjC2LEuKX0DNVAhGjSyQaV4jURYdI,18932
|
3
3
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
4
4
|
ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
|
5
|
-
ddi_fw/datasets/embedding_generator_new.py,sha256=
|
5
|
+
ddi_fw/datasets/embedding_generator_new.py,sha256=GOE-Io6-DBwiUJSkgmxw9ZM1exCYYVu9KyP2dH3gf1o,7506
|
6
6
|
ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
|
7
7
|
ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
8
8
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
@@ -75,12 +75,12 @@ ddi_fw/test/sklearn-tfidf.py,sha256=cjtg27vLskEMXgrsqUR_EapRGVd4xgwOQ9zYsu72zjs,
|
|
75
75
|
ddi_fw/test/test.py,sha256=zJh9ZBcZl-vZIFDvuftcRrRV8WAwtiFVhPPd6Et4OU4,2997
|
76
76
|
ddi_fw/test/torch_cuda_test.py,sha256=R-4VGVErl_Ufk54DoZbgL_YXWoCYFyanIVWd6P39IEk,312
|
77
77
|
ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TGY0O8,766
|
78
|
-
ddi_fw/utils/__init__.py,sha256=
|
78
|
+
ddi_fw/utils/__init__.py,sha256=x1ypYJRKJlbF9x4psHYGXj-YbDD8T_c28gXZkr03cdE,273
|
79
79
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
80
|
-
ddi_fw/utils/py7zr_helper.py,sha256=
|
81
|
-
ddi_fw/utils/utils.py,sha256=
|
80
|
+
ddi_fw/utils/py7zr_helper.py,sha256=8U3fq9U6Y6vSx-InQ444SxFLHvOQNFFe8bXJgBrPC-Y,4570
|
81
|
+
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
82
82
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
83
|
-
ddi_fw-0.0.
|
84
|
-
ddi_fw-0.0.
|
85
|
-
ddi_fw-0.0.
|
86
|
-
ddi_fw-0.0.
|
83
|
+
ddi_fw-0.0.50.dist-info/METADATA,sha256=C4QuZ2hRXteDRlwN_SlebxopvrcRCo362XsgkWqU1KA,1565
|
84
|
+
ddi_fw-0.0.50.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
85
|
+
ddi_fw-0.0.50.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
86
|
+
ddi_fw-0.0.50.dist-info/RECORD,,
|
File without changes
|
File without changes
|