ddi-fw 0.0.256__py3-none-any.whl → 0.0.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/langchain/chroma_storage.py +4 -2
- ddi_fw/langchain/faiss_storage.py +13 -2
- {ddi_fw-0.0.256.dist-info → ddi_fw-0.0.258.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.256.dist-info → ddi_fw-0.0.258.dist-info}/RECORD +6 -6
- {ddi_fw-0.0.256.dist-info → ddi_fw-0.0.258.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.256.dist-info → ddi_fw-0.0.258.dist-info}/top_level.txt +0 -0
@@ -86,7 +86,7 @@ def split_dataframe_indices(df, min_size=512):
|
|
86
86
|
|
87
87
|
class ChromaVectorStoreManager(BaseVectorStoreManager):
|
88
88
|
collection_name: str = Field(default="default")
|
89
|
-
persist_directory: str = Field(default="
|
89
|
+
persist_directory: str = Field(default=".embeddings/chroma_db")
|
90
90
|
text_splitter: Optional[TextSplitter] = None
|
91
91
|
batch_size: int = Field(default=1024)
|
92
92
|
vector_store: Optional[Chroma] | None = None
|
@@ -152,7 +152,9 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
|
|
152
152
|
# self.vector_store.persist()
|
153
153
|
# print(f"{page_content_column}, size:{len(split_docs_chunk)}")
|
154
154
|
|
155
|
-
|
155
|
+
@staticmethod
|
156
|
+
def get_persist_dir(base_dir, suffix, config=None):
|
157
|
+
return f"{base_dir}"
|
156
158
|
|
157
159
|
def generate_vector_store(self, docs: List[Document]):
|
158
160
|
self.vector_store = Chroma(
|
@@ -35,6 +35,11 @@ class BaseVectorStoreManager(BaseModel):
|
|
35
35
|
|
36
36
|
def as_dataframe(self, formatter_fn: Optional[Callable[[Document, np.ndarray], Dict[str, Any]]] = None) -> pd.DataFrame:
|
37
37
|
raise NotImplementedError("This method should be implemented by subclasses.")
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def get_persist_dir(base_dir, suffix, config=None):
|
41
|
+
raise NotImplementedError("Subclasses must implement get_persist_dir.")
|
42
|
+
|
38
43
|
|
39
44
|
class FaissVectorStoreManager(BaseVectorStoreManager):
|
40
45
|
persist_directory: str = Field(default="./embeddings/faiss")
|
@@ -57,6 +62,10 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
|
|
57
62
|
# uuids = [str(uuid4()) for _ in range(len(docs))]
|
58
63
|
# self.vector_store.add_documents(documents=docs, ids=uuids)
|
59
64
|
|
65
|
+
@staticmethod
|
66
|
+
def get_persist_dir(base_dir, suffix, config=None):
|
67
|
+
return f"{base_dir}/{suffix}"
|
68
|
+
|
60
69
|
def initialize_embedding_dict(self, **kwargs):
|
61
70
|
"""
|
62
71
|
Initializes a dictionary where keys are types (e.g., 'description', 'indication'),
|
@@ -353,7 +362,7 @@ def load_configuration(config_file):
|
|
353
362
|
# # Optionally persist/save
|
354
363
|
# vector_store_manager.save(persist_dir)
|
355
364
|
|
356
|
-
|
365
|
+
# persist_directory config'den alınsın
|
357
366
|
def generate_embeddings(
|
358
367
|
docs,
|
359
368
|
vector_store_manager_type:Type[BaseVectorStoreManager],
|
@@ -421,7 +430,9 @@ def generate_embeddings(
|
|
421
430
|
for text_splitter, suffix in zip(text_splitters, text_splitters_suffixes):
|
422
431
|
print(f"{id}_{suffix}")
|
423
432
|
# persist_dir = f'{persist_directory}/{id}/{suffix}'
|
424
|
-
persist_dir = f'{persist_directory}/{suffix}'
|
433
|
+
# persist_dir = f'{persist_directory}/{suffix}'
|
434
|
+
persist_dir = vector_store_manager_type.get_persist_dir(persist_directory, suffix, collection_config)
|
435
|
+
|
425
436
|
# Prepare manager parameters
|
426
437
|
manager_params = {
|
427
438
|
"collection_name": f"{id}_{suffix}",
|
@@ -4,9 +4,9 @@ ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl
|
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
6
6
|
ddi_fw/langchain/__init__.py,sha256=Kk2Yr7vemjy9MNB_ImAWET808zt1JkLsWqsgEXpVPJk,421
|
7
|
-
ddi_fw/langchain/chroma_storage.py,sha256=
|
7
|
+
ddi_fw/langchain/chroma_storage.py,sha256=Xh0p2d8bI9U-NYkhuu7fLtsgbExP7phkqPE17PBYWd4,15634
|
8
8
|
ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
|
9
|
-
ddi_fw/langchain/faiss_storage.py,sha256=
|
9
|
+
ddi_fw/langchain/faiss_storage.py,sha256=kFnvsRCE1Kdv77XgC2jLlYrAHZ9Qoz-ygs-IPB2Sg6g,18275
|
10
10
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
11
11
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
12
12
|
ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
|
@@ -38,7 +38,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
38
38
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
39
39
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=92bhZw4Qxh0hqPK-bPHm9bUO7pg2p4cStQYtVrOtetE,7919
|
40
40
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
41
|
-
ddi_fw-0.0.
|
42
|
-
ddi_fw-0.0.
|
43
|
-
ddi_fw-0.0.
|
44
|
-
ddi_fw-0.0.
|
41
|
+
ddi_fw-0.0.258.dist-info/METADATA,sha256=3r-JrOhNUwt3sUXiIj0Ua8a92c67V324tVHDxTBMM10,2623
|
42
|
+
ddi_fw-0.0.258.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
43
|
+
ddi_fw-0.0.258.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
44
|
+
ddi_fw-0.0.258.dist-info/RECORD,,
|
File without changes
|
File without changes
|