ddi-fw 0.0.258__tar.gz → 0.0.260__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/faiss_storage.py +7 -2
  4. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/kaggle.py +2 -1
  5. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  6. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/README.md +0 -0
  7. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/setup.cfg +0 -0
  8. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/__init__.py +0 -0
  9. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/core.py +0 -0
  10. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  11. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/db_utils.py +0 -0
  12. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/datasets/setup_._py +0 -0
  13. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/__init__.py +0 -0
  14. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/chroma_storage.py +0 -0
  15. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/embeddings.py +0 -0
  16. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  17. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/langchain/storage.py +0 -0
  18. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/__init__.py +0 -0
  19. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  20. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/ml_helper.py +0 -0
  21. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/model_wrapper.py +0 -0
  22. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  23. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  24. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ml/tracking_service.py +0 -0
  25. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/__init__.py +0 -0
  26. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  27. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/ner/ner.py +0 -0
  28. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/__init__.py +0 -0
  29. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  30. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  31. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
  32. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  33. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/pipeline/pipeline.py +0 -0
  34. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/__init__.py +0 -0
  35. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  36. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/enums.py +0 -0
  37. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/json_helper.py +0 -0
  38. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/numpy_utils.py +0 -0
  39. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/package_helper.py +0 -0
  40. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  41. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/utils.py +0 -0
  42. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/utils/zip_helper.py +0 -0
  43. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/__init__.py +0 -0
  44. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  45. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  46. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  47. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  48. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/requires.txt +0 -0
  49. {ddi_fw-0.0.258 → ddi_fw-0.0.260}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.258
3
+ Version: 0.0.260
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.258"
9
+ version = "0.0.260"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -1,4 +1,5 @@
1
1
  import faiss
2
+ from langchain_text_splitters import TextSplitter
2
3
  import pandas as pd
3
4
  from uuid import uuid4
4
5
  from langchain_community.vectorstores.faiss import FAISS
@@ -45,6 +46,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
45
46
  persist_directory: str = Field(default="./embeddings/faiss")
46
47
  index: Any = None
47
48
  vector_store: Optional[FAISS] | None = None
49
+ text_splitter: Optional[TextSplitter] = None
48
50
  class Config:
49
51
  arbitrary_types_allowed = True
50
52
  # def generate_vector_store(self, docs):
@@ -75,6 +77,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
75
77
  dict: A dictionary with the structure {type: {drugbank_id: [embedding]}}.
76
78
  """
77
79
  self.load(self.persist_directory)
80
+ # df = self.as_dataframe(formatter_fn=custom_formatter)
78
81
  df = self.as_dataframe(formatter_fn=custom_formatter)
79
82
  type_dict = {}
80
83
  for drug_type, group in df.groupby('type'):
@@ -111,7 +114,9 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
111
114
 
112
115
  valid_docs = []
113
116
  valid_ids = []
114
-
117
+ if self.text_splitter:
118
+ docs = self.text_splitter.split_documents(docs)
119
+
115
120
  for doc in docs:
116
121
  content = doc.page_content if hasattr(doc, 'page_content') else ""
117
122
  if content and content.strip():
@@ -254,7 +259,7 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
254
259
 
255
260
  def custom_formatter(document: Document, vector: np.ndarray) -> Dict[str, Any]:
256
261
  return {
257
- "id": document.metadata.get("drugbank_id", None),
262
+ "id": document.metadata.get("id", None),
258
263
  "type": document.metadata.get("type", None),
259
264
  "embedding": vector
260
265
  }
@@ -22,7 +22,8 @@ def create_kaggle_dataset(base_path: str, collections: list):
22
22
  # Step 2: Get metadata for the current folder
23
23
  model_info = next((c for c in collections if c['id'] == folder_name), None)
24
24
  if model_info is None:
25
- continue # Skip if model info is not found
25
+ raise FileNotFoundError(f"Model info for {folder_name} not exists")
26
+ # continue # Skip if model info is not found
26
27
 
27
28
  title = model_info['kaggle_title']
28
29
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.258
3
+ Version: 0.0.260
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes