ddi-fw 0.0.126__py3-none-any.whl → 0.0.128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/langchain/storage.py +11 -8
- {ddi_fw-0.0.126.dist-info → ddi_fw-0.0.128.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.126.dist-info → ddi_fw-0.0.128.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.126.dist-info → ddi_fw-0.0.128.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.126.dist-info → ddi_fw-0.0.128.dist-info}/top_level.txt +0 -0
ddi_fw/langchain/storage.py
CHANGED
@@ -137,20 +137,23 @@ class DataFrameToVectorDB:
|
|
137
137
|
total = 0
|
138
138
|
partial_dfs = split_dataframe(col_df, min_size=partial_df_size)
|
139
139
|
for partial_df in partial_dfs:
|
140
|
-
import torch
|
140
|
+
# import torch
|
141
141
|
|
142
142
|
documents = []
|
143
143
|
loader = DataFrameLoader(
|
144
144
|
data_frame=partial_df, page_content_column=page_content_column)
|
145
145
|
loaded_docs = loader.load()
|
146
146
|
# print(loaded_docs)
|
147
|
-
|
147
|
+
documents.extend(self.__split_docs(loaded_docs))
|
148
|
+
split_docs_chunked = self.__split_list(
|
149
|
+
documents, self.batch_size)
|
150
|
+
for split_docs_chunk in split_docs_chunked:
|
151
|
+
print("entered chunks")
|
152
|
+
self.vectordb.add_documents(loaded_docs)
|
153
|
+
self.vectordb.persist()
|
148
154
|
total += len(partial_df)
|
149
|
-
|
150
|
-
self.vectordb.add_documents(loaded_docs)
|
151
|
-
self.vectordb.persist()
|
152
155
|
print(f"{page_content_column}: {total}/{len(col_df)}")
|
153
|
-
torch.cuda.empty_cache()
|
156
|
+
# torch.cuda.empty_cache()
|
154
157
|
# time.sleep(30) # The GPU will not be used during this period
|
155
158
|
|
156
159
|
# split_docs_chunked = self.__split_list(
|
@@ -171,8 +174,8 @@ class DataFrameToVectorDB:
|
|
171
174
|
split_docs_chunked = self.__split_list(
|
172
175
|
documents, self.batch_size)
|
173
176
|
for split_docs_chunk in split_docs_chunked:
|
174
|
-
import torch
|
175
|
-
torch.cuda.empty_cache()
|
177
|
+
# import torch
|
178
|
+
# torch.cuda.empty_cache()
|
176
179
|
self.vectordb.add_documents(split_docs_chunk)
|
177
180
|
self.vectordb.persist()
|
178
181
|
print(f"{page_content_column}, size:{len(split_docs_chunk)}")
|
@@ -58,7 +58,7 @@ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHr
|
|
58
58
|
ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,269
|
59
59
|
ddi_fw/langchain/embeddings.py,sha256=lU64a5AZ62jP8U3hTSwK0kXt7gThbwPACLfJMZ1baPA,7538
|
60
60
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
61
|
-
ddi_fw/langchain/storage.py,sha256=
|
61
|
+
ddi_fw/langchain/storage.py,sha256=yElFDwLaz2g0dXVYtc_ANzGOzt5Xs4qGQAu59uhnLN4,11158
|
62
62
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
63
63
|
ddi_fw/ml/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
|
64
64
|
ddi_fw/ml/ml_helper.py,sha256=fySjIAFzkeEOvaLJhDwtCOgRhgYQ7H106eqaP16GhDY,4489
|
@@ -91,7 +91,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
|
|
91
91
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
92
92
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
93
93
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
94
|
-
ddi_fw-0.0.
|
95
|
-
ddi_fw-0.0.
|
96
|
-
ddi_fw-0.0.
|
97
|
-
ddi_fw-0.0.
|
94
|
+
ddi_fw-0.0.128.dist-info/METADATA,sha256=pwm1OITkNjTUEGoATkiYnfljebCP7sBSQXyuSxEwOLA,1965
|
95
|
+
ddi_fw-0.0.128.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
96
|
+
ddi_fw-0.0.128.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
97
|
+
ddi_fw-0.0.128.dist-info/RECORD,,
|
File without changes
|
File without changes
|