ddi-fw 0.0.258__tar.gz → 0.0.260__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/PKG-INFO +1 -1
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/pyproject.toml +1 -1
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/faiss_storage.py +7 -2
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/kaggle.py +2 -1
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/PKG-INFO +1 -1
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/README.md +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/setup.cfg +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/core.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/db_utils.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/setup_._py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/chroma_storage.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/embeddings.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/storage.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/evaluation_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/ml_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/model_wrapper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/tracking_service.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/mmlrestclient.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/ner.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/pipeline.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/enums.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/json_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/numpy_utils.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/package_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/py7zr_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/utils.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/zip_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/__init__.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/idf_helper.py +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/requires.txt +0 -0
- {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import faiss
|
2
|
+
from langchain_text_splitters import TextSplitter
|
2
3
|
import pandas as pd
|
3
4
|
from uuid import uuid4
|
4
5
|
from langchain_community.vectorstores.faiss import FAISS
|
@@ -45,6 +46,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
45
46
|
persist_directory: str = Field(default="./embeddings/faiss")
|
46
47
|
index: Any = None
|
47
48
|
vector_store: Optional[FAISS] | None = None
|
49
|
+
text_splitter: Optional[TextSplitter] = None
|
48
50
|
class Config:
|
49
51
|
arbitrary_types_allowed = True
|
50
52
|
# def generate_vector_store(self, docs):
|
@@ -75,6 +77,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
75
77
|
dict: A dictionary with the structure {type: {drugbank_id: [embedding]}}.
|
76
78
|
"""
|
77
79
|
self.load(self.persist_directory)
|
80
|
+
# df = self.as_dataframe(formatter_fn=custom_formatter)
|
78
81
|
df = self.as_dataframe(formatter_fn=custom_formatter)
|
79
82
|
type_dict = {}
|
80
83
|
for drug_type, group in df.groupby('type'):
|
@@ -111,7 +114,9 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
111
114
|
|
112
115
|
valid_docs = []
|
113
116
|
valid_ids = []
|
114
|
-
|
117
|
+
if self.text_splitter:
|
118
|
+
docs = self.text_splitter.split_documents(docs)
|
119
|
+
|
115
120
|
for doc in docs:
|
116
121
|
content = doc.page_content if hasattr(doc, 'page_content') else ""
|
117
122
|
if content and content.strip():
|
@@ -254,7 +259,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
254
259
|
|
255
260
|
def custom_formatter(document: Document, vector: np.ndarray) -> Dict[str, Any]:
|
256
261
|
return {
|
257
|
-
"id": document.metadata.get("
|
262
|
+
"id": document.metadata.get("id", None),
|
258
263
|
"type": document.metadata.get("type", None),
|
259
264
|
"embedding": vector
|
260
265
|
}
|
@@ -22,7 +22,8 @@ def create_kaggle_dataset(base_path: str, collections: list):
|
|
22
22
|
# Step 2: Get metadata for the current folder
|
23
23
|
model_info = next((c for c in collections if c['id'] == folder_name), None)
|
24
24
|
if model_info is None:
|
25
|
-
|
25
|
+
raise FileNotFoundError(f"Model info for {folder_name} not exists")
|
26
|
+
# continue # Skip if model info is not found
|
26
27
|
|
27
28
|
title = model_info['kaggle_title']
|
28
29
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|