ddi-fw 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,7 @@ from sentence_transformers import SentenceTransformer, util
17
17
 
18
18
  from typing import Any, Dict, List, Optional
19
19
  from langchain_core.embeddings import Embeddings
20
- from pydantic import BaseModel, ConfigDict, Field, SecretStr
20
+ from pydantic import BaseModel, ConfigDict, Field, SecretStr, computed_field
21
21
  from langchain.embeddings import SentenceTransformerEmbeddings
22
22
 
23
23
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -56,7 +56,7 @@ class SumPoolingStrategy(PoolingStrategy):
56
56
  return np.sum(embeddings, axis=0)
57
57
 
58
58
 
59
- class SentenceTransformerDecorator(BaseModel, Embeddings):
59
+ class SentenceTransformerDecorator(Embeddings):
60
60
  def __init__(self, model_name="all-MiniLM-L6-v2", **kwargs: Any):
61
61
  self.embeddings = SentenceTransformerEmbeddings(model_name=model_name)
62
62
 
@@ -67,7 +67,7 @@ class SentenceTransformerDecorator(BaseModel, Embeddings):
67
67
  return self.embeddings.embed_query(text)
68
68
 
69
69
 
70
- class PretrainedEmbeddings(BaseModel, Embeddings):
70
+ class PretrainedEmbeddings( Embeddings):
71
71
  def __init__(self, model_name):
72
72
  self.mmodel_name = model_name
73
73
  self.model = AutoModel.from_pretrained(model_name)
@@ -88,11 +88,23 @@ class PretrainedEmbeddings(BaseModel, Embeddings):
88
88
 
89
89
 
90
90
  class SBertEmbeddings(BaseModel, Embeddings):
91
- def __init__(self, model_name):
92
- self.model = SentenceTransformer(model_name)
91
+ # class Config:
92
+ # arbitrary_types_allowed = True
93
+
94
+ model_config = ConfigDict(
95
+ arbitrary_types_allowed = True,
96
+ protected_namespaces=()
97
+ )
98
+
99
+ model_name:str
100
+
101
+ @computed_field
102
+ @property
103
+ def model(self) -> SentenceTransformer:
104
+ return SentenceTransformer(self.model_name)
93
105
 
94
106
  def embed_documents(self, texts: List[str]) -> List[List[float]]:
95
- return self.model.encode(texts)
107
+ return self.model.encode(texts).tolist()
96
108
 
97
109
  def embed_query(self, text: str) -> List[float]:
98
110
  return self.embed_documents([text])[0]
ddi_fw/utils/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .utils import create_folder_if_not_exists, utc_time_as_string,utc_time_as_string_simple_format, compress_and_save_data
1
+ from .utils import clear_directory,create_folder_if_not_exists, utc_time_as_string,utc_time_as_string_simple_format, compress_and_save_data
2
2
  from .zip_helper import ZipHelper
3
3
  from .py7zr_helper import Py7ZipHelper
4
4
  from .enums import UMLSCodeTypes, DrugBankTextDataTypes
@@ -1,6 +1,6 @@
1
1
  from collections import defaultdict
2
2
  import math
3
- from ddi_fw.utils.utils import create_folder_if_not_exists
3
+ from ddi_fw.utils.utils import clear_directory, create_folder_if_not_exists
4
4
  import py7zr
5
5
  import os
6
6
  from os.path import basename
@@ -24,9 +24,10 @@ class Py7ZipHelper:
24
24
  os.path.join(folder_path, '..')))
25
25
  # archive.write(root+"/"+file)
26
26
 
27
- def create_archive_multiparts(self, zip_name, file_path, output_path, chunk_size):
27
+ def create_archive_multiparts(self, zip_name, file_path, output_path, chunk_size, delete_existing_files:True):
28
28
  parent_folder = os.path.dirname(file_path)
29
-
29
+ if delete_existing_files:
30
+ clear_directory(output_path)
30
31
  # parts_path = f"{parent_folder}/parts"
31
32
  create_folder_if_not_exists(output_path)
32
33
  # file_name, file_extension = os.path.splitext(file_path)
ddi_fw/utils/utils.py CHANGED
@@ -1,11 +1,9 @@
1
1
  import gzip
2
2
  import json
3
3
  import os
4
-
5
4
  from datetime import datetime, timezone
6
-
7
5
  from matplotlib import pyplot as plt
8
-
6
+ import shutil
9
7
 
10
8
  def create_folder_if_not_exists(path):
11
9
  if not os.path.exists(path):
@@ -51,6 +49,22 @@ def decompress(gzip_file):
51
49
  return data
52
50
 
53
51
 
52
+ def clear_directory(directory_path):
53
+ # Check if the directory exists
54
+ if os.path.exists(directory_path) and os.path.isdir(directory_path):
55
+ # Iterate through all files and directories in the directory
56
+ for item in os.listdir(directory_path):
57
+ item_path = os.path.join(directory_path, item)
58
+ # Check if it's a file or a directory and remove it
59
+ if os.path.isfile(item_path):
60
+ os.remove(item_path) # Remove file
61
+ elif os.path.isdir(item_path):
62
+ shutil.rmtree(item_path) # Remove directory
63
+ print(f"Cleared contents of directory: {directory_path}")
64
+ else:
65
+ print(f"The directory does not exist: {directory_path}")
66
+
67
+
54
68
  if __name__ == "__main__":
55
69
  # json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
56
70
  # file_data = open(json_file, "r", 1).read()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.48
3
+ Version: 0.0.50
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -2,7 +2,7 @@ ddi_fw/datasets/__init__.py,sha256=CqDrx7Ov83pXRh-n0ylembBmzhlW_yFWiheBcISrKdg,5
2
2
  ddi_fw/datasets/core.py,sha256=CFmnK0_cUxQAlTHjC2LEuKX0DNVAhGjSyQaV4jURYdI,18932
3
3
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
4
4
  ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
5
- ddi_fw/datasets/embedding_generator_new.py,sha256=bdZ7dSrm0oJTRh84MyltB_hhgL_IQyvusO6aPfW8JxU,7257
5
+ ddi_fw/datasets/embedding_generator_new.py,sha256=GOE-Io6-DBwiUJSkgmxw9ZM1exCYYVu9KyP2dH3gf1o,7506
6
6
  ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
7
7
  ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
8
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -75,12 +75,12 @@ ddi_fw/test/sklearn-tfidf.py,sha256=cjtg27vLskEMXgrsqUR_EapRGVd4xgwOQ9zYsu72zjs,
75
75
  ddi_fw/test/test.py,sha256=zJh9ZBcZl-vZIFDvuftcRrRV8WAwtiFVhPPd6Et4OU4,2997
76
76
  ddi_fw/test/torch_cuda_test.py,sha256=R-4VGVErl_Ufk54DoZbgL_YXWoCYFyanIVWd6P39IEk,312
77
77
  ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TGY0O8,766
78
- ddi_fw/utils/__init__.py,sha256=cbvKldLWGINnVWIVwRQ04zdJpqOf-Ci9X03I8OgmJcw,257
78
+ ddi_fw/utils/__init__.py,sha256=x1ypYJRKJlbF9x4psHYGXj-YbDD8T_c28gXZkr03cdE,273
79
79
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
80
- ddi_fw/utils/py7zr_helper.py,sha256=rWhz3Trj6bk6B24hMuff1fKdRbC0yuvaHFlL8-eaAhg,4450
81
- ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
80
+ ddi_fw/utils/py7zr_helper.py,sha256=8U3fq9U6Y6vSx-InQ444SxFLHvOQNFFe8bXJgBrPC-Y,4570
81
+ ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
82
82
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
83
- ddi_fw-0.0.48.dist-info/METADATA,sha256=NT2xu52EFUO2Rtk4hDZtCftemzeHYBTLZ7ktOHnEUxM,1565
84
- ddi_fw-0.0.48.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
85
- ddi_fw-0.0.48.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
86
- ddi_fw-0.0.48.dist-info/RECORD,,
83
+ ddi_fw-0.0.50.dist-info/METADATA,sha256=C4QuZ2hRXteDRlwN_SlebxopvrcRCo362XsgkWqU1KA,1565
84
+ ddi_fw-0.0.50.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
85
+ ddi_fw-0.0.50.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
86
+ ddi_fw-0.0.50.dist-info/RECORD,,