ddi-fw 0.0.95__py3-none-any.whl → 0.0.97__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/pipeline/pipeline.py +11 -9
- {ddi_fw-0.0.95.dist-info → ddi_fw-0.0.97.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.95.dist-info → ddi_fw-0.0.97.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.95.dist-info → ddi_fw-0.0.97.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.95.dist-info → ddi_fw-0.0.97.dist-info}/top_level.txt +0 -0
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -47,7 +47,7 @@ class Pipeline:
|
|
47
47
|
self.combinations = combinations
|
48
48
|
self.model = model
|
49
49
|
|
50
|
-
def __create_or_update_embeddings__(embedding_dict, vector_db_persist_directory, vector_db_collection_name, column):
|
50
|
+
def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
|
51
51
|
"""
|
52
52
|
Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
|
53
53
|
|
@@ -56,8 +56,6 @@ class Pipeline:
|
|
56
56
|
- vector_db_collection_name (str): The name of the collection to query.
|
57
57
|
- embedding_dict (dict): The existing dictionary to update with embeddings.
|
58
58
|
|
59
|
-
Returns:
|
60
|
-
- embedding_dict (dict): The updated dictionary where embeddings are grouped by 'type' and 'id'.
|
61
59
|
"""
|
62
60
|
if vector_db_persist_directory:
|
63
61
|
# Initialize the Chroma client and get the collection
|
@@ -67,9 +65,11 @@ class Pipeline:
|
|
67
65
|
|
68
66
|
# Fetch the embeddings and metadata
|
69
67
|
if column == None:
|
70
|
-
dictionary = collection.get(
|
68
|
+
dictionary = collection.get(
|
69
|
+
include=['embeddings', 'metadatas'])
|
71
70
|
else:
|
72
|
-
dictionary = collection.get(include=['embeddings', 'metadatas'], where=
|
71
|
+
dictionary = collection.get(include=['embeddings', 'metadatas'], where={
|
72
|
+
"type": {"$eq": f"{column}"}})
|
73
73
|
# Populate the embedding dictionary with embeddings from the vector database
|
74
74
|
for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
|
75
75
|
embedding_dict[metadata["type"]
|
@@ -79,7 +79,7 @@ class Pipeline:
|
|
79
79
|
else:
|
80
80
|
raise ValueError(
|
81
81
|
"Persistent directory for the vector DB is not specified.")
|
82
|
-
|
82
|
+
|
83
83
|
def build(self):
|
84
84
|
# 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
|
85
85
|
kwargs = {"columns": self.columns}
|
@@ -91,14 +91,16 @@ class Pipeline:
|
|
91
91
|
if self.vector_db_persist_directory:
|
92
92
|
self.__create_or_update_embeddings__(
|
93
93
|
embedding_dict, self.vector_db_persist_directory, self.vector_db_collection_name)
|
94
|
-
|
94
|
+
|
95
95
|
if self.column_embedding_configs:
|
96
96
|
for item in self.column_embedding_configs:
|
97
97
|
col = item["column"]
|
98
98
|
col_db_dir = item["vector_db_persist_directory"]
|
99
99
|
col_db_collection = item["vector_db_collection_name"]
|
100
|
-
self.__create_or_update_embeddings__(
|
101
|
-
|
100
|
+
self.__create_or_update_embeddings__(
|
101
|
+
embedding_dict, col_db_dir, col_db_collection, col)
|
102
|
+
print(
|
103
|
+
f"Embedings of {col} is calculated from {col_db_collection}")
|
102
104
|
|
103
105
|
# if self.embedding_dict == None:
|
104
106
|
# if self.vector_db_persist_directory:
|
@@ -72,7 +72,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
|
|
72
72
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=qIst7vxHaOAhRv4lgozszwa3b1QE4aIrN74t41Xnvr4,1637
|
73
73
|
ddi_fw/pipeline/multi_pipeline.py,sha256=t_Z7d7xRfDnhpQTlqCf7c0isZ5hZlyXavKhC7ePsnJY,5903
|
74
74
|
ddi_fw/pipeline/ner_pipeline.py,sha256=wB7hz4YCOv7UAz6bGE6sSpPXXIdoOflOVK5UCc1fO-o,5586
|
75
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
75
|
+
ddi_fw/pipeline/pipeline.py,sha256=VX3GcoTjY7_ehX_rmpa-uh0NyBFoxF24AxbwruEWw4A,8457
|
76
76
|
ddi_fw/test/basic_test.py,sha256=fEOGcZm1ObnsDvMiXNmdmz6YCeUrGc8V0DwlSwGhsq8,376
|
77
77
|
ddi_fw/test/combination_test.py,sha256=TWNE8sf-DSh1Q9-yRaRBc774Sn1kSMGXLwQhd2_Qynk,324
|
78
78
|
ddi_fw/test/compress_json_test.py,sha256=BGny56YqiG-pzhMoDzLKQBQI1E7o3jU0S7VYWtclAx4,1045
|
@@ -89,7 +89,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
|
89
89
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
90
90
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
91
91
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
92
|
-
ddi_fw-0.0.
|
93
|
-
ddi_fw-0.0.
|
94
|
-
ddi_fw-0.0.
|
95
|
-
ddi_fw-0.0.
|
92
|
+
ddi_fw-0.0.97.dist-info/METADATA,sha256=wdTxARxGIxFpSzXZMgU22Arwx7NiomfNFMYR4NTxXlk,1966
|
93
|
+
ddi_fw-0.0.97.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
94
|
+
ddi_fw-0.0.97.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
95
|
+
ddi_fw-0.0.97.dist-info/RECORD,,
|
File without changes
|
File without changes
|