PyPI - ddi-fw - Versions diffs - 0.0.260__tar.gz → 0.0.262__tar.gz - Mend

ddi-fw 0.0.260tar.gz → 0.0.262tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ddi_fw
-Version: 0.0.260
+Version: 0.0.262
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ddi_fw"
-version = "0.0.260"
+version = "0.0.262"
 description = "Do not use :)"
 readme = "README.md"
 authors = [

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/langchain/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
-from .sentence_splitter import SentenceSplitter
+from .sentence_splitter import SentenceSplitter,PassthroughTextSplitter
 # from .storage import DataFrameToVectorDB, generate_embeddings
 from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager,generate_embeddings
 from .chroma_storage import ChromaVectorStoreManager

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/langchain/chroma_storage.py RENAMED Viewed

@@ -153,8 +153,8 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
     #                 print(f"{page_content_column}, size:{len(split_docs_chunk)}")
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
-        return f"{base_dir}"
+    def get_persist_dir(base_dir,id, suffix, config=None):
+        return f"{base_dir}/chroma_db/{id}"
     def generate_vector_store(self, docs: List[Document]):
         self.vector_store = Chroma(
@@ -267,7 +267,13 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
         # Ensure all lists are not None and have the same length
         docs = results.get('documents', []) or []
         metadatas = results.get('metadatas', []) or []
-        embeddings = results.get('embeddings', []) or []
+        # embeddings = results.get('embeddings', []) or []
+        embeddings = results.get('embeddings')
+        if isinstance(embeddings, np.ndarray):
+            pass  # Keep as-is
+        elif embeddings is None:
+            embeddings = []
         # Check if all lists have the same length
         if not (len(docs) == len(metadatas) == len(embeddings)):

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/langchain/faiss_storage.py RENAMED Viewed

@@ -14,6 +14,7 @@ from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStore
 from ddi_fw.utils import get_import
 from langchain.document_loaders import DataFrameLoader
+from collections import defaultdict
 class BaseVectorStoreManager(BaseModel):
     embeddings: Optional[Embeddings] = None
@@ -38,7 +39,7 @@ class BaseVectorStoreManager(BaseModel):
         raise NotImplementedError("This method should be implemented by subclasses.")
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
+    def get_persist_dir(base_dir, id ,suffix, config=None):
         raise NotImplementedError("Subclasses must implement get_persist_dir.")
@@ -65,8 +66,8 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
     #     self.vector_store.add_documents(documents=docs, ids=uuids)
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
-        return f"{base_dir}/{suffix}"
+    def get_persist_dir(base_dir,id, suffix, config=None):
+        return f"{base_dir}/faiss/{id}/{suffix}"
     def initialize_embedding_dict(self, **kwargs):
         """
@@ -79,9 +80,13 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
         self.load(self.persist_directory)
         # df = self.as_dataframe(formatter_fn=custom_formatter)
         df = self.as_dataframe(formatter_fn=custom_formatter)
-        type_dict = {}
-        for drug_type, group in df.groupby('type'):
-            type_dict[drug_type] = dict(zip(group['id'], group['embedding'].apply(lambda x: [x])))
+        type_dict = defaultdict(lambda: defaultdict(list))
+        grouped = df.groupby(['type', 'id'])['embedding'].apply(list)
+        for (drug_type, id), embeddings in grouped.items():
+            type_dict[drug_type][id] = embeddings
         return type_dict
     def generate_vector_store(self, docs, handle_empty='zero'):
@@ -436,7 +441,7 @@ def generate_embeddings(
             print(f"{id}_{suffix}")
             # persist_dir = f'{persist_directory}/{id}/{suffix}'
             # persist_dir = f'{persist_directory}/{suffix}'
-            persist_dir = vector_store_manager_type.get_persist_dir(persist_directory, suffix, collection_config)
+            persist_dir = vector_store_manager_type.get_persist_dir(persist_directory , id, suffix, collection_config)
             # Prepare manager parameters
             manager_params = {

ddi_fw-0.0.262/src/ddi_fw/langchain/sentence_splitter.py ADDED Viewed

@@ -0,0 +1,17 @@
+from typing import List
+import nltk
+from nltk import sent_tokenize
+from langchain_text_splitters.base import TextSplitter
+nltk.download('punkt')
+''' A text splitter that splits text into sentences using NLTK's sentence tokenizer.'''
+class SentenceSplitter(TextSplitter):
+    def split_text(self, text: str) -> List[str]:
+        return sent_tokenize(text)
+''' A text splitter that does not split the text at all, returning the entire text as a single chunk.'''
+class PassthroughTextSplitter(TextSplitter):
+    def split_text(self, text: str) -> List[str]:
+        return [text]

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ddi_fw
-Version: 0.0.260
+Version: 0.0.262
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

ddi_fw-0.0.260/src/ddi_fw/langchain/sentence_splitter.py DELETED Viewed

@@ -1,10 +0,0 @@
-from typing import List
-import nltk
-from nltk import sent_tokenize
-from langchain_text_splitters.base import TextSplitter
-nltk.download('punkt')
-class SentenceSplitter(TextSplitter):
-    def split_text(self, text: str) -> List[str]:
-        return sent_tokenize(text)

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/README.md RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/setup.cfg RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/datasets/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/datasets/core.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/datasets/dataset_splitter.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/datasets/db_utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/datasets/setup_._py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/langchain/embeddings.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/langchain/storage.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/evaluation_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/ml_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/model_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/pytorch_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/tensorflow_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ml/tracking_service.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ner/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ner/mmlrestclient.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/ner/ner.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_pipeline_org.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/ner_pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/categorical_data_encoding_checker.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/enums.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/json_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/kaggle.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/numpy_utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/package_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/py7zr_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/utils/zip_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/feature_vector_generation.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/idf_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/requires.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.260 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/top_level.txt RENAMED Viewed

File without changes

ddi-fw 0.0.260__tar.gz → 0.0.262__tar.gz

ddi-fw 0.0.260tar.gz → 0.0.262tar.gz