ddi-fw 0.0.253__py3-none-any.whl → 0.0.255__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
2
2
  from .sentence_splitter import SentenceSplitter
3
- from .storage import DataFrameToVectorDB, generate_embeddings
4
- from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager
3
+ # from .storage import DataFrameToVectorDB, generate_embeddings
4
+ from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager,generate_embeddings
5
5
  from .chroma_storage import ChromaVectorStoreManager
@@ -390,7 +390,7 @@ def generate_embeddings(
390
390
  partial_df_size = collection_config.get('partial_dataframe_size')
391
391
  columns = collection_config.get('columns')
392
392
  page_content_columns = collection_config.get('page_content_columns')
393
- persist_dir = f'{persist_directory}/{id}'
393
+
394
394
 
395
395
  # Load embedding model
396
396
  try:
@@ -417,10 +417,10 @@ def generate_embeddings(
417
417
  text_splitters_suffixes.append(suffix)
418
418
  except Exception as e:
419
419
  raise Exception(f"Unknown text splitter: {text_splitter_type}") from e
420
-
420
+
421
421
  for text_splitter, suffix in zip(text_splitters, text_splitters_suffixes):
422
422
  print(f"{id}_{suffix}")
423
-
423
+ persist_dir = f'{persist_directory}/{id}/{suffix}'
424
424
  # Prepare manager parameters
425
425
  manager_params = {
426
426
  "collection_name": f"{id}_{suffix}",
@@ -3,7 +3,6 @@ import numpy as np
3
3
  import pandas as pd
4
4
  from scipy.spatial.distance import pdist, squareform
5
5
  from sklearn.preprocessing import MultiLabelBinarizer
6
- import cupy as cp
7
6
 
8
7
  # todo pd.unique kullan
9
8
  def find_distinct_elements(frame):
@@ -97,6 +96,12 @@ class SimilarityMatrixGenerator:
97
96
  Returns:
98
97
  cp.ndarray: A 2D CuPy array containing the pairwise Jaccard similarity.
99
98
  """
99
+ try:
100
+ import cupy as cp
101
+ except ImportError:
102
+ raise ImportError("cupy is required for GPU Jaccard similarity computation. Please install it with 'pip install cupy'.")
103
+
104
+
100
105
  if not ((matrix == 0) | (matrix == 1)).all():
101
106
  raise ValueError("Input matrix must be binary (contain only 0s and 1s).")
102
107
  matrix = cp.asarray(matrix)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.253
3
+ Version: 0.0.255
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -3,10 +3,10 @@ ddi_fw/datasets/core.py,sha256=4KtFYjf7uLsbto2_InY2DWNN0AUoAb8Yr0jYn0to4ME,17413
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
- ddi_fw/langchain/__init__.py,sha256=xGNaTEZCUxyc_aT1zvzVWGRfsj-9VXqMvPKtV_G7ChA,399
6
+ ddi_fw/langchain/__init__.py,sha256=Kk2Yr7vemjy9MNB_ImAWET808zt1JkLsWqsgEXpVPJk,421
7
7
  ddi_fw/langchain/chroma_storage.py,sha256=7LSUhdiPdQHZvKC_NapOeVbHtS71iE5ABZVTrI0YQ-A,15520
8
8
  ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
9
- ddi_fw/langchain/faiss_storage.py,sha256=e6WbjKAlCbFZUq9P3LTjTeXmWTAh6Oxp_NMVbvfK-kc,17727
9
+ ddi_fw/langchain/faiss_storage.py,sha256=3AihARDZgdsE89trnTWodWDm-yF2NJyfLULmgJ_80Tg,17756
10
10
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
11
11
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
12
12
  ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
@@ -36,9 +36,9 @@ ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,
36
36
  ddi_fw/utils/utils.py,sha256=PY-zDawREKoXQfzX7lVkxBLVFQPkfvr9385kHCjaNXo,4391
37
37
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
38
38
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
39
- ddi_fw/vectorization/feature_vector_generation.py,sha256=QQQGhCti653BdU343Ag1bH_g1fzi2hlic7dgNy7otjE,7694
39
+ ddi_fw/vectorization/feature_vector_generation.py,sha256=92bhZw4Qxh0hqPK-bPHm9bUO7pg2p4cStQYtVrOtetE,7919
40
40
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
41
- ddi_fw-0.0.253.dist-info/METADATA,sha256=vYOtl4WgNa-ydlgj2dcuC2eRuFfMRlT-9OubOT3ep2U,2623
42
- ddi_fw-0.0.253.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- ddi_fw-0.0.253.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
- ddi_fw-0.0.253.dist-info/RECORD,,
41
+ ddi_fw-0.0.255.dist-info/METADATA,sha256=YiHU3_WiQ7FhmmBuGWWJCO9ZvHl84O9jbxu6XVP02_4,2623
42
+ ddi_fw-0.0.255.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ ddi_fw-0.0.255.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
44
+ ddi_fw-0.0.255.dist-info/RECORD,,