ddi-fw 0.0.253__tar.gz → 0.0.255__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/PKG-INFO +1 -1
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/pyproject.toml +1 -1
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/__init__.py +2 -2
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/faiss_storage.py +3 -3
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/vectorization/feature_vector_generation.py +6 -1
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw.egg-info/PKG-INFO +1 -1
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/README.md +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/setup.cfg +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/datasets/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/datasets/core.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/datasets/db_utils.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/datasets/setup_._py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/chroma_storage.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/embeddings.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/langchain/storage.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/evaluation_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/ml_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/model_wrapper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ml/tracking_service.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ner/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ner/mmlrestclient.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/ner/ner.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/pipeline/pipeline.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/enums.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/json_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/kaggle.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/numpy_utils.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/package_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/py7zr_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/utils.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/utils/zip_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/vectorization/__init__.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw/vectorization/idf_helper.py +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw.egg-info/requires.txt +0 -0
- {ddi_fw-0.0.253 → ddi_fw-0.0.255}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
|
2
2
|
from .sentence_splitter import SentenceSplitter
|
3
|
-
from .storage import DataFrameToVectorDB, generate_embeddings
|
4
|
-
from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager
|
3
|
+
# from .storage import DataFrameToVectorDB, generate_embeddings
|
4
|
+
from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager,generate_embeddings
|
5
5
|
from .chroma_storage import ChromaVectorStoreManager
|
@@ -390,7 +390,7 @@ def generate_embeddings(
|
|
390
390
|
partial_df_size = collection_config.get('partial_dataframe_size')
|
391
391
|
columns = collection_config.get('columns')
|
392
392
|
page_content_columns = collection_config.get('page_content_columns')
|
393
|
-
|
393
|
+
|
394
394
|
|
395
395
|
# Load embedding model
|
396
396
|
try:
|
@@ -417,10 +417,10 @@ def generate_embeddings(
|
|
417
417
|
text_splitters_suffixes.append(suffix)
|
418
418
|
except Exception as e:
|
419
419
|
raise Exception(f"Unknown text splitter: {text_splitter_type}") from e
|
420
|
-
|
420
|
+
|
421
421
|
for text_splitter, suffix in zip(text_splitters, text_splitters_suffixes):
|
422
422
|
print(f"{id}_{suffix}")
|
423
|
-
|
423
|
+
persist_dir = f'{persist_directory}/{id}/{suffix}'
|
424
424
|
# Prepare manager parameters
|
425
425
|
manager_params = {
|
426
426
|
"collection_name": f"{id}_{suffix}",
|
@@ -3,7 +3,6 @@ import numpy as np
|
|
3
3
|
import pandas as pd
|
4
4
|
from scipy.spatial.distance import pdist, squareform
|
5
5
|
from sklearn.preprocessing import MultiLabelBinarizer
|
6
|
-
import cupy as cp
|
7
6
|
|
8
7
|
# todo pd.unique kullan
|
9
8
|
def find_distinct_elements(frame):
|
@@ -97,6 +96,12 @@ class SimilarityMatrixGenerator:
|
|
97
96
|
Returns:
|
98
97
|
cp.ndarray: A 2D CuPy array containing the pairwise Jaccard similarity.
|
99
98
|
"""
|
99
|
+
try:
|
100
|
+
import cupy as cp
|
101
|
+
except ImportError:
|
102
|
+
raise ImportError("cupy is required for GPU Jaccard similarity computation. Please install it with 'pip install cupy'.")
|
103
|
+
|
104
|
+
|
100
105
|
if not ((matrix == 0) | (matrix == 1)).all():
|
101
106
|
raise ValueError("Input matrix must be binary (contain only 0s and 1s).")
|
102
107
|
matrix = cp.asarray(matrix)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|