PyPI - ddi-fw - Versions diffs - 0.0.241__py3-none-any.whl → 0.0.242__py3-none-any.whl - Mend

ddi-fw 0.0.241py3-none-any.whl → 0.0.242py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

ddi_fw/datasets/core.py +70 -65
ddi_fw/langchain/__init__.py +1 -1
ddi_fw/langchain/chroma_storage.py +134 -22
ddi_fw/langchain/faiss_storage.py +25 -9
ddi_fw/pipeline/multi_pipeline.py +5 -1
ddi_fw/pipeline/pipeline.py +16 -0
{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/METADATA +1 -1
{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/RECORD +10 -10
{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/WHEEL +0 -0
{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/top_level.txt +0 -0

ddi_fw/datasets/core.py CHANGED Viewed

@@ -3,12 +3,13 @@ from collections import defaultdict
 import glob
 import logging
 from typing import Any, Dict, List, Optional, Type
-import chromadb
+# import chromadb
 # from chromadb.api.types import IncludeEnum
 import numpy as np
 import pandas as pd
 from pydantic import BaseModel, Field, computed_field
 from ddi_fw.datasets.dataset_splitter import DatasetSplitter
+from ddi_fw.langchain.faiss_storage import BaseVectorStoreManager
 from ddi_fw.utils.utils import create_folder_if_not_exists
@@ -280,6 +281,8 @@ class TextDatasetMixin(BaseModel):
         default_factory=dict, description="Dictionary for embeddings")
     pooling_strategy: PoolingStrategy | None = None
     column_embedding_configs: Optional[List] = None
+    vector_store_manager: BaseVectorStoreManager| None = None  # <-- NEW
     vector_db_persist_directory: Optional[str] = None
     vector_db_collection_name: Optional[str] = None
     _embedding_size: int
@@ -292,70 +295,70 @@ class TextDatasetMixin(BaseModel):
     class Config:
         arbitrary_types_allowed = True
-    def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
-        """
-        Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
-        Args:
-        - vector_db_persist_directory (str): The path to the directory where the Chroma vector database is stored.
-        - vector_db_collection_name (str): The name of the collection to query.
-        - embedding_dict (dict): The existing dictionary to update with embeddings.
-        """
-        if vector_db_persist_directory:
-            # Initialize the Chroma client and get the collection
-            vector_db = chromadb.PersistentClient(
-                path=vector_db_persist_directory)
-            collection = vector_db.get_collection(vector_db_collection_name)
-            # include = [IncludeEnum.embeddings, IncludeEnum.metadatas]
-            include: chromadb.Include = ["embeddings","metadatas"]
-            dictionary: chromadb.GetResult
-            # Fetch the embeddings and metadata
-            if column == None:
-                dictionary = collection.get(
-                    include=include
-                    # include=['embeddings', 'metadatas']
-                )
-                print(
-                    f"Embeddings are calculated from {vector_db_collection_name}")
-            else:
-                dictionary = collection.get(
-                    include=include,
-                    # include=['embeddings', 'metadatas'],
-                    where={
-                        "type": {"$eq": f"{column}"}})
-                print(
-                    f"Embeddings of {column} are calculated from {vector_db_collection_name}")
-            # Populate the embedding dictionary with embeddings from the vector database
-            metadatas = dictionary["metadatas"]
-            embeddings = dictionary["embeddings"]
-            if metadatas is None or embeddings is None:
-                raise ValueError(
-                    "The collection does not contain embeddings or metadatas.")
-            for metadata, embedding in zip(metadatas, embeddings):
-                embedding_dict[metadata["type"]
-                               ][metadata["id"]].append(embedding)
-        else:
-            raise ValueError(
-                "Persistent directory for the vector DB is not specified.")
+    # def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
+    #     """
+    #     Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
+    #     Args:
+    #     - vector_db_persist_directory (str): The path to the directory where the Chroma vector database is stored.
+    #     - vector_db_collection_name (str): The name of the collection to query.
+    #     - embedding_dict (dict): The existing dictionary to update with embeddings.
+    #     """
+    #     if vector_db_persist_directory:
+    #         # Initialize the Chroma client and get the collection
+    #         vector_db = chromadb.PersistentClient(
+    #             path=vector_db_persist_directory)
+    #         collection = vector_db.get_collection(vector_db_collection_name)
+    #         # include = [IncludeEnum.embeddings, IncludeEnum.metadatas]
+    #         include: chromadb.Include = ["embeddings","metadatas"]
+    #         dictionary: chromadb.GetResult
+    #         # Fetch the embeddings and metadata
+    #         if column == None:
+    #             dictionary = collection.get(
+    #                 include=include
+    #                 # include=['embeddings', 'metadatas']
+    #             )
+    #             print(
+    #                 f"Embeddings are calculated from {vector_db_collection_name}")
+    #         else:
+    #             dictionary = collection.get(
+    #                 include=include,
+    #                 # include=['embeddings', 'metadatas'],
+    #                 where={
+    #                     "type": {"$eq": f"{column}"}})
+    #             print(
+    #                 f"Embeddings of {column} are calculated from {vector_db_collection_name}")
+    #         # Populate the embedding dictionary with embeddings from the vector database
+    #         metadatas = dictionary["metadatas"]
+    #         embeddings = dictionary["embeddings"]
+    #         if metadatas is None or embeddings is None:
+    #             raise ValueError(
+    #                 "The collection does not contain embeddings or metadatas.")
+    #         for metadata, embedding in zip(metadatas, embeddings):
+    #             embedding_dict[metadata["type"]
+    #                            ][metadata["id"]].append(embedding)
+    #     else:
+    #         raise ValueError(
+    #             "Persistent directory for the vector DB is not specified.")
-    def __initialize_embedding_dict(self):
-        embedding_dict = defaultdict(lambda: defaultdict(list))
-        if self.column_embedding_configs:
-            for item in self.column_embedding_configs:
-                col = item["column"]
-                col_db_dir = item["vector_db_persist_directory"]
-                col_db_collection = item["vector_db_collection_name"]
-                self.__create_or_update_embeddings__(embedding_dict, col_db_dir, col_db_collection, col)
-        elif self.vector_db_persist_directory:
-            self.__create_or_update_embeddings__(embedding_dict, self.vector_db_persist_directory, self.vector_db_collection_name)
-        else:
-            logging.warning("There is no configuration of Embeddings")
-            raise ValueError(
-                "There is no configuration of Embeddings. Please provide a vector database directory and collection name.")
-        return embedding_dict
+    # def __initialize_embedding_dict(self):
+    #     embedding_dict = defaultdict(lambda: defaultdict(list))
+    #     if self.column_embedding_configs:
+    #         for item in self.column_embedding_configs:
+    #             col = item["column"]
+    #             col_db_dir = item["vector_db_persist_directory"]
+    #             col_db_collection = item["vector_db_collection_name"]
+    #             self.__create_or_update_embeddings__(embedding_dict, col_db_dir, col_db_collection, col)
+    #     elif self.vector_db_persist_directory:
+    #         self.__create_or_update_embeddings__(embedding_dict, self.vector_db_persist_directory, self.vector_db_collection_name)
+    #     else:
+    #         logging.warning("There is no configuration of Embeddings")
+    #         raise ValueError(
+    #             "There is no configuration of Embeddings. Please provide a vector database directory and collection name.")
+    #     return embedding_dict
     def __calculate_embedding_size(self):
         if self.embedding_dict is None:
@@ -373,7 +376,9 @@ class TextDatasetMixin(BaseModel):
         #     for k, v in self.ner_threshold.items():
         #         kwargs[k] = v
         if self.embedding_dict is None:
-            self.embedding_dict = self.__initialize_embedding_dict()
+            if self.vector_store_manager is not None:
+                self.embedding_dict = self.vector_store_manager.initialize_embedding_dict()
+            # self.embedding_dict = self.__initialize_embedding_dict()
             self.__calculate_embedding_size()

ddi_fw/langchain/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
 from .sentence_splitter import SentenceSplitter
 from .storage import DataFrameToVectorDB, generate_embeddings
-from .faiss_storage import BaseVectorStoreManager, VectorStoreManager
+from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager
 from .chroma_storage import ChromaVectorStoreManager

ddi_fw/langchain/chroma_storage.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from collections import defaultdict
+import logging
 import pandas as pd
 from langchain.vectorstores import Chroma
 from langchain_core.embeddings import Embeddings
@@ -5,10 +7,11 @@ from langchain_core.documents import Document
 from langchain.text_splitter import TextSplitter
 from typing import Callable, Optional, Dict, Any, List
 import numpy as np
+from pydantic import Field
 from ddi_fw.langchain.faiss_storage import BaseVectorStoreManager
 from langchain.document_loaders import DataFrameLoader
+import chromadb
 def split_dataframe(df, min_size=512):
     total_size = len(df)
@@ -82,19 +85,13 @@ def split_dataframe_indices(df, min_size=512):
 class ChromaVectorStoreManager(BaseVectorStoreManager):
-    def __init__(
-        self,
-        embeddings: Embeddings,
-        collection_name: str,
-        persist_directory: str,
-        text_splitter: TextSplitter,
-        batch_size: int = 1024
-    ):
-        super().__init__(embeddings)
-        self.collection_name = collection_name
-        self.persist_directory = persist_directory
-        self.text_splitter = text_splitter
-        self.batch_size = batch_size
+    collection_name: str = Field(default="default")
+    persist_directory: str = Field(default="./chroma_db")
+    text_splitter: Optional[TextSplitter] = None
+    batch_size: int = Field(default=1024)
+    class Config:
+        arbitrary_types_allowed = True
@@ -176,6 +173,76 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
         # Chroma persists automatically, but you can copy files if needed
         print("ChromaDB persists automatically. No explicit save needed.")
+    def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
+        """
+        Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
+        Args:
+        - vector_db_persist_directory (str): The path to the directory where the Chroma vector database is stored.
+        - vector_db_collection_name (str): The name of the collection to query.
+        - embedding_dict (dict): The existing dictionary to update with embeddings.
+        """
+        if vector_db_persist_directory:
+            # Initialize the Chroma client and get the collection
+            vector_db = chromadb.PersistentClient(
+                path=vector_db_persist_directory)
+            collection = vector_db.get_collection(vector_db_collection_name)
+            # include = [IncludeEnum.embeddings, IncludeEnum.metadatas]
+            include: chromadb.Include = ["embeddings","metadatas"]
+            dictionary: chromadb.GetResult
+            # Fetch the embeddings and metadata
+            if column == None:
+                dictionary = collection.get(
+                    include=include
+                    # include=['embeddings', 'metadatas']
+                )
+                print(
+                    f"Embeddings are calculated from {vector_db_collection_name}")
+            else:
+                dictionary = collection.get(
+                    include=include,
+                    # include=['embeddings', 'metadatas'],
+                    where={
+                        "type": {"$eq": f"{column}"}})
+                print(
+                    f"Embeddings of {column} are calculated from {vector_db_collection_name}")
+            # Populate the embedding dictionary with embeddings from the vector database
+            metadatas = dictionary["metadatas"]
+            embeddings = dictionary["embeddings"]
+            if metadatas is None or embeddings is None:
+                raise ValueError(
+                    "The collection does not contain embeddings or metadatas.")
+            for metadata, embedding in zip(metadatas, embeddings):
+                embedding_dict[metadata["type"]
+                               ][metadata["id"]].append(embedding)
+        else:
+            raise ValueError(
+                "Persistent directory for the vector DB is not specified.")
+    def initialize_embedding_dict(self, **kwargs):
+        column_embedding_configs = kwargs.get("column_embedding_configs")
+        vector_db_persist_directory = kwargs.get("vector_db_persist_directory")
+        vector_db_collection_name = kwargs.get("vector_db_collection_name")
+        embedding_dict = defaultdict(lambda: defaultdict(list))
+        if column_embedding_configs:
+            for item in column_embedding_configs:
+                col = item["column"]
+                col_db_dir = item["vector_db_persist_directory"]
+                col_db_collection = item["vector_db_collection_name"]
+                self.__create_or_update_embeddings__(embedding_dict, col_db_dir, col_db_collection, col)
+        elif vector_db_persist_directory:
+            self.__create_or_update_embeddings__(embedding_dict, vector_db_persist_directory, vector_db_collection_name)
+        else:
+            logging.warning("There is no configuration of Embeddings")
+            raise ValueError(
+                "There is no configuration of Embeddings. Please provide a vector database directory and collection name.")
+        return embedding_dict
     def load(self, path):
         self.vector_store = Chroma(
             collection_name=self.collection_name,
@@ -187,21 +254,66 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
         self,
         formatter_fn: Optional[Callable[[Document, np.ndarray], Dict[str, Any]]] = None
     ) -> pd.DataFrame:
-        # Chroma does not expose direct vector access, so we fetch all docs and embeddings
-        results = self.vector_store.get()
-        docs = results['documents']
-        metadatas = results['metadatas']
-        embeddings = results['embeddings']
+        """
+        Retrieve all documents and their embeddings from the Chroma vector store
+        and return them as a pandas DataFrame.
+        """
+        # Retrieve all data from the collection
+        # include=['embeddings', 'metadatas', 'documents']
+        results = self.vector_store._collection.get(include=['embeddings', 'metadatas', 'documents'])
+        # Ensure all lists are not None and have the same length
+        docs = results.get('documents', []) or []
+        metadatas = results.get('metadatas', []) or []
+        embeddings = results.get('embeddings', []) or []
+        # Check if all lists have the same length
+        if not (len(docs) == len(metadatas) == len(embeddings)):
+             # This should not happen if Chroma returns consistent results, but as a safeguard
+             raise ValueError(
+                 "Inconsistent lengths of documents, metadatas, and embeddings. ")
+            #  print("Warning: Inconsistent lengths of documents, metadatas, and embeddings.")
+            #  # Find the minimum length to avoid errors
+            #  min_len = min(len(docs), len(metadatas), len(embeddings))
+            #  docs = docs[:min_len]
+            #  metadatas = metadatas[:min_len]
+            #  embeddings = embeddings[:min_len]
         items = []
         for doc, meta, emb in zip(docs, metadatas, embeddings):
             document = Document(page_content=doc, metadata=meta)
             if formatter_fn:
-                item = formatter_fn(document, np.array(emb))
+                formatted_doc = formatter_fn(document, np.array(emb))
             else:
-                item = {"embedding": emb, **meta}
-            items.append(item)
+                formatted_doc = document
+            items.append({
+                'document': formatted_doc,
+                'metadata': meta,
+                'embedding': emb
+            })
         return pd.DataFrame(items)
+    # def as_dataframe(
+    #     self,
+    #     formatter_fn: Optional[Callable[[Document, np.ndarray], Dict[str, Any]]] = None
+    # ) -> pd.DataFrame:
+    #     # Chroma does not expose direct vector access, so we fetch all docs and embeddings
+    #     results = self.vector_store.get()
+    #     docs = results['documents']
+    #     metadatas = results['metadatas']
+    #     embeddings = results['embeddings']
+    #     items = []
+    #     for doc, meta, emb in zip(docs, metadatas, embeddings):
+    #         document = Document(page_content=doc, metadata=meta)
+    #         if formatter_fn:
+    #             item = formatter_fn(document, np.array(emb))
+    #         else:
+    #             item = {"embedding": emb, **meta}
+    #         items.append(item)
+    #     return pd.DataFrame(items)
     def get_data(self, id):
         # Chroma does not use integer IDs, but document IDs (UUIDs)
         results = self.vector_store.get(ids=[id])

ddi_fw/langchain/faiss_storage.py CHANGED Viewed

@@ -8,9 +8,17 @@ from langchain_core.documents import Document
 import numpy as np  # optional, if you're using NumPy vectors
 from langchain_core.embeddings import Embeddings
-class BaseVectorStoreManager:
-    def __init__(self, embeddings: Embeddings):
-        self.embeddings = embeddings
+from pydantic import BaseModel, Field
+from langchain_core.embeddings import Embeddings
+class BaseVectorStoreManager(BaseModel):
+    embeddings: Embeddings
+    class Config:
+        arbitrary_types_allowed = True
+    def initialize_embedding_dict(self, **kwargs):
+        raise NotImplementedError("This method should be implemented by subclasses.")
     def generate_vector_store(self, docs):
         raise NotImplementedError("This method should be implemented by subclasses.")
@@ -24,12 +32,12 @@ class BaseVectorStoreManager:
     def as_dataframe(self, formatter_fn: Optional[Callable[[Document, np.ndarray], Dict[str, Any]]] = None) -> pd.DataFrame:
         raise NotImplementedError("This method should be implemented by subclasses.")
-class VectorStoreManager:
-    def __init__(self, embeddings:Embeddings):
-        self.embeddings = embeddings
-        self.index = None
-        self.vector_store = None
+class FaissVectorStoreManager(BaseVectorStoreManager):
+    index: Any = None
+    vector_store: Any = None
+    class Config:
+        arbitrary_types_allowed = True
     # def generate_vector_store(self, docs):
     #     dimension = len(self.embeddings.embed_query("hello world"))
     #     self.index = faiss.IndexFlatL2(dimension)
@@ -45,6 +53,14 @@ class VectorStoreManager:
     #     uuids = [str(uuid4()) for _ in range(len(docs))]
     #     self.vector_store.add_documents(documents=docs, ids=uuids)
+    def initialize_embedding_dict(self):
+        df = self.as_dataframe(formatter_fn=custom_formatter )
+        type_dict = (
+            df.groupby('type')
+            .apply(lambda group: dict(zip(group['id'], group['embedding'])))
+            .to_dict()
+            )
+        return type_dict
     def generate_vector_store(self, docs, handle_empty='zero'):
         """
@@ -217,7 +233,7 @@ class VectorStoreManager:
 def custom_formatter(document: Document, vector: np.ndarray) -> Dict[str, Any]:
     return {
-        "drugbank_id": document.metadata.get("drugbank_id", None),
+        "id": document.metadata.get("drugbank_id", None),
         "type": document.metadata.get("type", None),
         "embedding": vector
     }

ddi_fw/pipeline/multi_pipeline.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
-from typing import Optional
+from typing import Optional, Type
+from ddi_fw.langchain.faiss_storage import BaseVectorStoreManager
 from ddi_fw.pipeline.pipeline import Pipeline
 from ddi_fw.pipeline.ner_pipeline import NerParameterSearch
 import importlib
@@ -128,11 +129,13 @@ class MultiPipeline():
          # Vector database configuration
         vector_database = config.get("vector_databases", {})
+        vector_store_manager_type:Type[BaseVectorStoreManager]|None = None
         vector_db_persist_directory = None
         vector_db_collection_name = None
         embedding_pooling_strategy = None
         column_embedding_configs = None
         if vector_database:
+            vector_store_manager_type = get_import(vector_database.get("db_type"))
             vector_db_persist_directory = vector_database.get("vector_db_persist_directory")
             vector_db_collection_name = vector_database.get("vector_db_collection_name")
             embedding_pooling_strategy = get_import(vector_database.get("embedding_pooling_strategy"))
@@ -181,6 +184,7 @@ class MultiPipeline():
                 dataset_additional_config=additional_config,
                 dataset_splitter_type=dataset_splitter_type,
                 columns=columns,
+                vector_store_manager_type=vector_store_manager_type,
                 column_embedding_configs=column_embedding_configs,
                 vector_db_persist_directory=vector_db_persist_directory,
                 vector_db_collection_name=vector_db_collection_name,

ddi_fw/pipeline/pipeline.py CHANGED Viewed

@@ -3,6 +3,7 @@ from ddi_fw.datasets.dataset_splitter import DatasetSplitter
 from pydantic import BaseModel
 from ddi_fw.datasets.core import TextDatasetMixin
+from ddi_fw.langchain.faiss_storage import BaseVectorStoreManager
 from ddi_fw.ml.tracking_service import TrackingService
 from ddi_fw.langchain.embeddings import PoolingStrategy
 from ddi_fw.datasets import BaseDataset
@@ -26,6 +27,7 @@ class Pipeline(BaseModel):
     vector_db_persist_directory: Optional[str] = None
     vector_db_collection_name: Optional[str] = None
     embedding_pooling_strategy_type: Type[PoolingStrategy] | None = None
+    vector_store_manager_type: Type[BaseVectorStoreManager] | None = None
     combinations: Optional[List[tuple]] = None
     model: Optional[Any] = None
     default_model:  Optional[Any] = None
@@ -85,8 +87,22 @@ class Pipeline(BaseModel):
         dataset_splitter = self.dataset_splitter_type()
         pooling_strategy = self.embedding_pooling_strategy_type(
         ) if self.embedding_pooling_strategy_type else None
+        params = {}
+        if self.embedding_dict is not None:
+            params["embedding_dict"] = self.embedding_dict
+        if self.vector_db_persist_directory is not None:
+            params["persist_directory"] = self.vector_db_persist_directory
+        if self.vector_db_collection_name is not None:
+            params["collection_name"] = self.vector_db_collection_name
+        vector_store_manager = self.vector_store_manager_type(**params) if self.vector_store_manager_type else None
         if issubclass(self.dataset_type, TextDatasetMixin):
             dataset = self.dataset_type(
+                vector_store_manager = vector_store_manager,
                 embedding_dict=self.embedding_dict,
                 pooling_strategy=pooling_strategy,
                 column_embedding_configs=self.column_embedding_configs,

{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ddi_fw
-Version: 0.0.241
+Version: 0.0.242
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 ddi_fw/datasets/__init__.py,sha256=NozQvXPYIS01U0srZmcKhiqJgRDkD-C-VXHL6sKrFSw,166
-ddi_fw/datasets/core.py,sha256=p-e3wP5C_SCh0fMXioUHUXKvLVtyCrsQCFvKRnH4fjs,17008
+ddi_fw/datasets/core.py,sha256=FGa_OfM6oHGPYt5TmZczepkZ9F6sNxJPpVoMYa1FiB8,17421
 ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
 ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
 ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
-ddi_fw/langchain/__init__.py,sha256=dsjOi4gQFofalfERWKMWLbE0VG6us5ZnKGQGFNCGGDQ,394
-ddi_fw/langchain/chroma_storage.py,sha256=wzJoGixoUHfAbuockB6CGoI0eXsFXRl4Xzl2x8PDz0E,9927
+ddi_fw/langchain/__init__.py,sha256=xGNaTEZCUxyc_aT1zvzVWGRfsj-9VXqMvPKtV_G7ChA,399
+ddi_fw/langchain/chroma_storage.py,sha256=I8xoqlc2K4gJdOUn5b33mGGMPFKYG3UiptY2HeM34_c,15483
 ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
-ddi_fw/langchain/faiss_storage.py,sha256=lE2TKtDBp2Pi0sRozARxlT40_lFq_LJxl0N__yuHIQw,8389
+ddi_fw/langchain/faiss_storage.py,sha256=H--yYOmHX7nr34THNojqP_qhGXd-kMkhzzWDbMMeoqo,8923
 ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
 ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
 ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
@@ -21,10 +21,10 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
 ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
 ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
 ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
-ddi_fw/pipeline/multi_pipeline.py,sha256=aDK9f4uo2brjCDVVuaiOHDUO1gZ76eKwz1e8RzK6CXU,9903
+ddi_fw/pipeline/multi_pipeline.py,sha256=jHjSfQmRQ-zEwh_5ZPdG4MBVYMrRRzlqYgFAMbDZN0g,10206
 ddi_fw/pipeline/multi_pipeline_org.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
 ddi_fw/pipeline/ner_pipeline.py,sha256=1gBk81LeZlU1rhjJ1qBgHbFt_HqOeJ5WLnJ4AkYku4s,8188
-ddi_fw/pipeline/pipeline.py,sha256=q1kMkW9-fOlrA4BOGUku40U_PuEYfcbtH2EvlRM4uTM,6243
+ddi_fw/pipeline/pipeline.py,sha256=m6pZrhoBK2lUr7PwpmJl6-WEpYcPGGc9N9C1LNJ78NQ,6974
 ddi_fw/utils/__init__.py,sha256=WNxkQXk-694roG50D355TGLXstfdWVb_tUyr-PM-8rg,537
 ddi_fw/utils/categorical_data_encoding_checker.py,sha256=T1X70Rh4atucAuqyUZmz-iFULllY9dY0NRyV9-jTjJ0,3438
 ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -38,7 +38,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
 ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
 ddi_fw/vectorization/feature_vector_generation.py,sha256=QQQGhCti653BdU343Ag1bH_g1fzi2hlic7dgNy7otjE,7694
 ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
-ddi_fw-0.0.241.dist-info/METADATA,sha256=RHn48P4ndrTheF3Iaa4zp_NrW8-FMyEf6LlzHbTxb_I,2632
-ddi_fw-0.0.241.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ddi_fw-0.0.241.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
-ddi_fw-0.0.241.dist-info/RECORD,,
+ddi_fw-0.0.242.dist-info/METADATA,sha256=jq8Op7HG_u5PE0DjELixnPMKwEl6mUkNtPTyQ5uBWU8,2632
+ddi_fw-0.0.242.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ddi_fw-0.0.242.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
+ddi_fw-0.0.242.dist-info/RECORD,,

{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/WHEEL RENAMED Viewed

File without changes

{ddi_fw-0.0.241.dist-info → ddi_fw-0.0.242.dist-info}/top_level.txt RENAMED Viewed

File without changes

ddi-fw 0.0.241__py3-none-any.whl → 0.0.242__py3-none-any.whl

ddi-fw 0.0.241py3-none-any.whl → 0.0.242py3-none-any.whl