ddi-fw 0.0.255__tar.gz → 0.0.257__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/datasets/core.py +1 -1
  4. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/chroma_storage.py +4 -2
  5. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/faiss_storage.py +16 -4
  6. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  7. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/README.md +0 -0
  8. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/setup.cfg +0 -0
  9. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/datasets/__init__.py +0 -0
  10. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  11. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/datasets/db_utils.py +0 -0
  12. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/datasets/setup_._py +0 -0
  13. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/__init__.py +0 -0
  14. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/embeddings.py +0 -0
  15. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  16. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/langchain/storage.py +0 -0
  17. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/__init__.py +0 -0
  18. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  19. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/ml_helper.py +0 -0
  20. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/model_wrapper.py +0 -0
  21. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  22. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  23. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ml/tracking_service.py +0 -0
  24. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ner/__init__.py +0 -0
  25. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  26. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/ner/ner.py +0 -0
  27. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/__init__.py +0 -0
  28. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  29. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  30. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
  31. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  32. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/pipeline/pipeline.py +0 -0
  33. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/__init__.py +0 -0
  34. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  35. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/enums.py +0 -0
  36. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/json_helper.py +0 -0
  37. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/kaggle.py +0 -0
  38. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/numpy_utils.py +0 -0
  39. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/package_helper.py +0 -0
  40. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  41. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/utils.py +0 -0
  42. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/utils/zip_helper.py +0 -0
  43. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/vectorization/__init__.py +0 -0
  44. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  45. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  46. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  47. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  48. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw.egg-info/requires.txt +0 -0
  49. {ddi_fw-0.0.255 → ddi_fw-0.0.257}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.255
3
+ Version: 0.0.257
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.255"
9
+ version = "0.0.257"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -379,7 +379,7 @@ class TextDatasetMixin(BaseModel):
379
379
  if self.vector_store_manager is not None:
380
380
  self.embedding_dict = self.vector_store_manager.initialize_embedding_dict()
381
381
  # self.embedding_dict = self.__initialize_embedding_dict()
382
- self.__calculate_embedding_size()
382
+ self.__calculate_embedding_size()
383
383
 
384
384
 
385
385
 
@@ -86,7 +86,7 @@ def split_dataframe_indices(df, min_size=512):
86
86
 
87
87
  class ChromaVectorStoreManager(BaseVectorStoreManager):
88
88
  collection_name: str = Field(default="default")
89
- persist_directory: str = Field(default="./chroma_db")
89
+ persist_directory: str = Field(default=".embeddings/chroma_db")
90
90
  text_splitter: Optional[TextSplitter] = None
91
91
  batch_size: int = Field(default=1024)
92
92
  vector_store: Optional[Chroma] | None = None
@@ -152,7 +152,9 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
152
152
  # self.vector_store.persist()
153
153
  # print(f"{page_content_column}, size:{len(split_docs_chunk)}")
154
154
 
155
-
155
+ @staticmethod
156
+ def get_persist_dir(base_dir, suffix, config=None):
157
+ return f"{base_dir}"
156
158
 
157
159
  def generate_vector_store(self, docs: List[Document]):
158
160
  self.vector_store = Chroma(
@@ -35,9 +35,14 @@ class BaseVectorStoreManager(BaseModel):
35
35
 
36
36
  def as_dataframe(self, formatter_fn: Optional[Callable[[Document, np.ndarray], Dict[str, Any]]] = None) -> pd.DataFrame:
37
37
  raise NotImplementedError("This method should be implemented by subclasses.")
38
+
39
+ @staticmethod
40
+ def get_persist_dir(base_dir, suffix, config=None):
41
+ raise NotImplementedError("Subclasses must implement get_persist_dir.")
42
+
38
43
 
39
44
  class FaissVectorStoreManager(BaseVectorStoreManager):
40
- persist_directory: str = Field(default="./embeddings")
45
+ persist_directory: str = Field(default="./embeddings/faiss")
41
46
  index: Any = None
42
47
  vector_store: Optional[FAISS] | None = None
43
48
  class Config:
@@ -57,6 +62,10 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
57
62
  # uuids = [str(uuid4()) for _ in range(len(docs))]
58
63
  # self.vector_store.add_documents(documents=docs, ids=uuids)
59
64
 
65
+ @staticmethod
66
+ def get_persist_dir(base_dir, suffix, config=None):
67
+ return f"{base_dir}/{suffix}"
68
+
60
69
  def initialize_embedding_dict(self, **kwargs):
61
70
  """
62
71
  Initializes a dictionary where keys are types (e.g., 'description', 'indication'),
@@ -353,14 +362,14 @@ def load_configuration(config_file):
353
362
  # # Optionally persist/save
354
363
  # vector_store_manager.save(persist_dir)
355
364
 
356
-
365
+ # persist_directory config'den alınsın
357
366
  def generate_embeddings(
358
367
  docs,
359
368
  vector_store_manager_type:Type[BaseVectorStoreManager],
360
369
  config_file:Optional[str],
361
370
  new_model_names:Optional[List],
362
371
  collections:Optional[Dict],
363
- persist_directory="embeddings",
372
+ persist_directory="./embeddings",
364
373
  ):
365
374
  """
366
375
  Generate embeddings for collections based on a configuration file.
@@ -420,7 +429,10 @@ def generate_embeddings(
420
429
 
421
430
  for text_splitter, suffix in zip(text_splitters, text_splitters_suffixes):
422
431
  print(f"{id}_{suffix}")
423
- persist_dir = f'{persist_directory}/{id}/{suffix}'
432
+ # persist_dir = f'{persist_directory}/{id}/{suffix}'
433
+ # persist_dir = f'{persist_directory}/{suffix}'
434
+ persist_dir = vector_store_manager_type.get_persist_dir(persist_directory, suffix, collection_config)
435
+
424
436
  # Prepare manager parameters
425
437
  manager_params = {
426
438
  "collection_name": f"{id}_{suffix}",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.255
3
+ Version: 0.0.257
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes