ddi-fw 0.0.259__py3-none-any.whl → 0.0.261__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/langchain/chroma_storage.py +7 -1
- ddi_fw/langchain/faiss_storage.py +10 -4
- {ddi_fw-0.0.259.dist-info → ddi_fw-0.0.261.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.259.dist-info → ddi_fw-0.0.261.dist-info}/RECORD +6 -6
- {ddi_fw-0.0.259.dist-info → ddi_fw-0.0.261.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.259.dist-info → ddi_fw-0.0.261.dist-info}/top_level.txt +0 -0
@@ -267,7 +267,13 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
|
|
267
267
|
# Ensure all lists are not None and have the same length
|
268
268
|
docs = results.get('documents', []) or []
|
269
269
|
metadatas = results.get('metadatas', []) or []
|
270
|
-
embeddings = results.get('embeddings', []) or []
|
270
|
+
# embeddings = results.get('embeddings', []) or []
|
271
|
+
|
272
|
+
embeddings = results.get('embeddings')
|
273
|
+
if isinstance(embeddings, np.ndarray):
|
274
|
+
pass # Keep as-is
|
275
|
+
elif embeddings is None:
|
276
|
+
embeddings = []
|
271
277
|
|
272
278
|
# Check if all lists have the same length
|
273
279
|
if not (len(docs) == len(metadatas) == len(embeddings)):
|
@@ -14,6 +14,7 @@ from langchain_core.embeddings import Embeddings
|
|
14
14
|
from langchain_core.vectorstores import VectorStore
|
15
15
|
from ddi_fw.utils import get_import
|
16
16
|
from langchain.document_loaders import DataFrameLoader
|
17
|
+
from collections import defaultdict
|
17
18
|
|
18
19
|
class BaseVectorStoreManager(BaseModel):
|
19
20
|
embeddings: Optional[Embeddings] = None
|
@@ -77,10 +78,15 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
77
78
|
dict: A dictionary with the structure {type: {drugbank_id: [embedding]}}.
|
78
79
|
"""
|
79
80
|
self.load(self.persist_directory)
|
81
|
+
# df = self.as_dataframe(formatter_fn=custom_formatter)
|
80
82
|
df = self.as_dataframe(formatter_fn=custom_formatter)
|
81
|
-
type_dict =
|
82
|
-
|
83
|
-
|
83
|
+
type_dict = defaultdict(lambda: defaultdict(list))
|
84
|
+
|
85
|
+
grouped = df.groupby(['type', 'id'])['embedding'].apply(list)
|
86
|
+
|
87
|
+
for (drug_type, id), embeddings in grouped.items():
|
88
|
+
type_dict[drug_type][id] = embeddings
|
89
|
+
|
84
90
|
return type_dict
|
85
91
|
|
86
92
|
def generate_vector_store(self, docs, handle_empty='zero'):
|
@@ -258,7 +264,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
258
264
|
|
259
265
|
def custom_formatter(document: Document, vector: np.ndarray) -> Dict[str, Any]:
|
260
266
|
return {
|
261
|
-
"id": document.metadata.get("
|
267
|
+
"id": document.metadata.get("id", None),
|
262
268
|
"type": document.metadata.get("type", None),
|
263
269
|
"embedding": vector
|
264
270
|
}
|
@@ -4,9 +4,9 @@ ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl
|
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
6
6
|
ddi_fw/langchain/__init__.py,sha256=Kk2Yr7vemjy9MNB_ImAWET808zt1JkLsWqsgEXpVPJk,421
|
7
|
-
ddi_fw/langchain/chroma_storage.py,sha256=
|
7
|
+
ddi_fw/langchain/chroma_storage.py,sha256=63_UojxGLbytgm4g2BZWdo2hvnWiVjrs4mZjxNxdkV8,15837
|
8
8
|
ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
|
9
|
-
ddi_fw/langchain/faiss_storage.py,sha256=
|
9
|
+
ddi_fw/langchain/faiss_storage.py,sha256=1G_lJ4_pKGEp5SlKBYUZWxEnGHuq3JGmvKeDaEztX8w,18646
|
10
10
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
11
11
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
12
12
|
ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
|
@@ -38,7 +38,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
38
38
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
39
39
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=92bhZw4Qxh0hqPK-bPHm9bUO7pg2p4cStQYtVrOtetE,7919
|
40
40
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
41
|
-
ddi_fw-0.0.
|
42
|
-
ddi_fw-0.0.
|
43
|
-
ddi_fw-0.0.
|
44
|
-
ddi_fw-0.0.
|
41
|
+
ddi_fw-0.0.261.dist-info/METADATA,sha256=P0xexzVAJfggUMTjYeClVcqkap4mhjRzzLJCfDR86II,2623
|
42
|
+
ddi_fw-0.0.261.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
43
|
+
ddi_fw-0.0.261.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
44
|
+
ddi_fw-0.0.261.dist-info/RECORD,,
|
File without changes
|
File without changes
|