ddi-fw 0.0.254__py3-none-any.whl → 0.0.256__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -379,7 +379,7 @@ class TextDatasetMixin(BaseModel):
379
379
  if self.vector_store_manager is not None:
380
380
  self.embedding_dict = self.vector_store_manager.initialize_embedding_dict()
381
381
  # self.embedding_dict = self.__initialize_embedding_dict()
382
- self.__calculate_embedding_size()
382
+ self.__calculate_embedding_size()
383
383
 
384
384
 
385
385
 
@@ -37,7 +37,7 @@ class BaseVectorStoreManager(BaseModel):
37
37
  raise NotImplementedError("This method should be implemented by subclasses.")
38
38
 
39
39
  class FaissVectorStoreManager(BaseVectorStoreManager):
40
- persist_directory: str = Field(default="./embeddings")
40
+ persist_directory: str = Field(default="./embeddings/faiss")
41
41
  index: Any = None
42
42
  vector_store: Optional[FAISS] | None = None
43
43
  class Config:
@@ -360,7 +360,7 @@ def generate_embeddings(
360
360
  config_file:Optional[str],
361
361
  new_model_names:Optional[List],
362
362
  collections:Optional[Dict],
363
- persist_directory="embeddings",
363
+ persist_directory="./embeddings",
364
364
  ):
365
365
  """
366
366
  Generate embeddings for collections based on a configuration file.
@@ -390,7 +390,7 @@ def generate_embeddings(
390
390
  partial_df_size = collection_config.get('partial_dataframe_size')
391
391
  columns = collection_config.get('columns')
392
392
  page_content_columns = collection_config.get('page_content_columns')
393
- persist_dir = f'{persist_directory}/{id}'
393
+
394
394
 
395
395
  # Load embedding model
396
396
  try:
@@ -417,10 +417,11 @@ def generate_embeddings(
417
417
  text_splitters_suffixes.append(suffix)
418
418
  except Exception as e:
419
419
  raise Exception(f"Unknown text splitter: {text_splitter_type}") from e
420
-
420
+
421
421
  for text_splitter, suffix in zip(text_splitters, text_splitters_suffixes):
422
422
  print(f"{id}_{suffix}")
423
-
423
+ # persist_dir = f'{persist_directory}/{id}/{suffix}'
424
+ persist_dir = f'{persist_directory}/{suffix}'
424
425
  # Prepare manager parameters
425
426
  manager_params = {
426
427
  "collection_name": f"{id}_{suffix}",
@@ -3,7 +3,6 @@ import numpy as np
3
3
  import pandas as pd
4
4
  from scipy.spatial.distance import pdist, squareform
5
5
  from sklearn.preprocessing import MultiLabelBinarizer
6
- import cupy as cp
7
6
 
8
7
  # todo pd.unique kullan
9
8
  def find_distinct_elements(frame):
@@ -97,6 +96,12 @@ class SimilarityMatrixGenerator:
97
96
  Returns:
98
97
  cp.ndarray: A 2D CuPy array containing the pairwise Jaccard similarity.
99
98
  """
99
+ try:
100
+ import cupy as cp
101
+ except ImportError:
102
+ raise ImportError("cupy is required for GPU Jaccard similarity computation. Please install it with 'pip install cupy'.")
103
+
104
+
100
105
  if not ((matrix == 0) | (matrix == 1)).all():
101
106
  raise ValueError("Input matrix must be binary (contain only 0s and 1s).")
102
107
  matrix = cp.asarray(matrix)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.254
3
+ Version: 0.0.256
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=NozQvXPYIS01U0srZmcKhiqJgRDkD-C-VXHL6sKrFSw,166
2
- ddi_fw/datasets/core.py,sha256=4KtFYjf7uLsbto2_InY2DWNN0AUoAb8Yr0jYn0to4ME,17413
2
+ ddi_fw/datasets/core.py,sha256=UnbCDoWXdxeiAb0e0anhDqXiVFGUi02VA9sKl6NVBZU,17409
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
6
  ddi_fw/langchain/__init__.py,sha256=Kk2Yr7vemjy9MNB_ImAWET808zt1JkLsWqsgEXpVPJk,421
7
7
  ddi_fw/langchain/chroma_storage.py,sha256=7LSUhdiPdQHZvKC_NapOeVbHtS71iE5ABZVTrI0YQ-A,15520
8
8
  ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
9
- ddi_fw/langchain/faiss_storage.py,sha256=e6WbjKAlCbFZUq9P3LTjTeXmWTAh6Oxp_NMVbvfK-kc,17727
9
+ ddi_fw/langchain/faiss_storage.py,sha256=LG2cf6upCEHBBF4Jixnq_diA_t3A0OQZBqPc9DwmILc,17825
10
10
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
11
11
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
12
12
  ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
@@ -36,9 +36,9 @@ ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,
36
36
  ddi_fw/utils/utils.py,sha256=PY-zDawREKoXQfzX7lVkxBLVFQPkfvr9385kHCjaNXo,4391
37
37
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
38
38
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
39
- ddi_fw/vectorization/feature_vector_generation.py,sha256=QQQGhCti653BdU343Ag1bH_g1fzi2hlic7dgNy7otjE,7694
39
+ ddi_fw/vectorization/feature_vector_generation.py,sha256=92bhZw4Qxh0hqPK-bPHm9bUO7pg2p4cStQYtVrOtetE,7919
40
40
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
41
- ddi_fw-0.0.254.dist-info/METADATA,sha256=i_KV8aGISMG8quQkllnwlqFi8lkGe9eEQgmVAjBEdDw,2623
42
- ddi_fw-0.0.254.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- ddi_fw-0.0.254.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
- ddi_fw-0.0.254.dist-info/RECORD,,
41
+ ddi_fw-0.0.256.dist-info/METADATA,sha256=4Gwtn1oiXo2cJ8osjOXRRu7JIQuP8i3PWjOsE7B56t0,2623
42
+ ddi_fw-0.0.256.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ ddi_fw-0.0.256.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
+ ddi_fw-0.0.256.dist-info/RECORD,,