PyPI - ddi-fw - Versions diffs - 0.0.261__tar.gz → 0.0.263__tar.gz - Mend

ddi-fw 0.0.261tar.gz → 0.0.263tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ddi_fw
-Version: 0.0.261
+Version: 0.0.263
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ddi_fw"
-version = "0.0.261"
+version = "0.0.263"
 description = "Do not use :)"
 readme = "README.md"
 authors = [

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/langchain/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
-from .sentence_splitter import SentenceSplitter
+from .sentence_splitter import SentenceSplitter,PassthroughTextSplitter
 # from .storage import DataFrameToVectorDB, generate_embeddings
 from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager,generate_embeddings
 from .chroma_storage import ChromaVectorStoreManager

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/langchain/chroma_storage.py RENAMED Viewed

@@ -153,8 +153,8 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
     #                 print(f"{page_content_column}, size:{len(split_docs_chunk)}")
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
-        return f"{base_dir}"
+    def get_persist_dir(base_dir,id, suffix, config=None):
+        return f"{base_dir}/chroma_db/{id}"
     def generate_vector_store(self, docs: List[Document]):
         self.vector_store = Chroma(

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/langchain/faiss_storage.py RENAMED Viewed

@@ -39,7 +39,7 @@ class BaseVectorStoreManager(BaseModel):
         raise NotImplementedError("This method should be implemented by subclasses.")
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
+    def get_persist_dir(base_dir, id ,suffix, config=None):
         raise NotImplementedError("Subclasses must implement get_persist_dir.")
@@ -66,8 +66,8 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
     #     self.vector_store.add_documents(documents=docs, ids=uuids)
     @staticmethod
-    def get_persist_dir(base_dir, suffix, config=None):
-        return f"{base_dir}/{suffix}"
+    def get_persist_dir(base_dir,id, suffix, config=None):
+        return f"{base_dir}/faiss/{id}/{suffix}"
     def initialize_embedding_dict(self, **kwargs):
         """
@@ -414,8 +414,9 @@ def generate_embeddings(
         # Load embedding model
         try:
             model_kwargs = collection_config.get('model_kwargs')
+            kwargs = {"model_kwargs":model_kwargs}
             model = get_import(embedding_model_type)(
-                model_name=name, **model_kwargs)
+                model_name=name, **kwargs)
         except Exception as e:
             raise Exception(f"Unknown embedding model: {embedding_model_type}") from e
@@ -441,7 +442,7 @@ def generate_embeddings(
             print(f"{id}_{suffix}")
             # persist_dir = f'{persist_directory}/{id}/{suffix}'
             # persist_dir = f'{persist_directory}/{suffix}'
-            persist_dir = vector_store_manager_type.get_persist_dir(persist_directory, suffix, collection_config)
+            persist_dir = vector_store_manager_type.get_persist_dir(persist_directory , id, suffix, collection_config)
             # Prepare manager parameters
             manager_params = {

ddi_fw-0.0.263/src/ddi_fw/langchain/sentence_splitter.py ADDED Viewed

@@ -0,0 +1,17 @@
+from typing import List
+import nltk
+from nltk import sent_tokenize
+from langchain_text_splitters.base import TextSplitter
+nltk.download('punkt')
+''' A text splitter that splits text into sentences using NLTK's sentence tokenizer.'''
+class SentenceSplitter(TextSplitter):
+    def split_text(self, text: str) -> List[str]:
+        return sent_tokenize(text)
+''' A text splitter that does not split the text at all, returning the entire text as a single chunk.'''
+class PassthroughTextSplitter(TextSplitter):
+    def split_text(self, text: str) -> List[str]:
+        return [text]

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/kaggle.py RENAMED Viewed

@@ -37,6 +37,7 @@ def create_kaggle_dataset(base_path: str, collections: list):
         # Ensure title is between 6 and 50 characters
         if not (6 <= len(title) <= 50):
+            raise ValueError(f"Title length for {title} must be between 6 and 50 characters.")
             continue  # Skip if title length is out of the expected range
         # Step 3: Define the metadata content

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ddi_fw
-Version: 0.0.261
+Version: 0.0.263
 Summary: Do not use :)
 Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
 Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>

ddi_fw-0.0.261/src/ddi_fw/langchain/sentence_splitter.py DELETED Viewed

@@ -1,10 +0,0 @@
-from typing import List
-import nltk
-from nltk import sent_tokenize
-from langchain_text_splitters.base import TextSplitter
-nltk.download('punkt')
-class SentenceSplitter(TextSplitter):
-    def split_text(self, text: str) -> List[str]:
-        return sent_tokenize(text)

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/README.md RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/setup.cfg RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/datasets/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/datasets/core.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/datasets/dataset_splitter.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/datasets/db_utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/datasets/setup_._py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/langchain/embeddings.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/langchain/storage.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/evaluation_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/ml_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/model_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/pytorch_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/tensorflow_wrapper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ml/tracking_service.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ner/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ner/mmlrestclient.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/ner/ner.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/multi_pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/multi_pipeline_org.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/ner_pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/pipeline/pipeline.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/categorical_data_encoding_checker.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/enums.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/json_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/numpy_utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/package_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/py7zr_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/utils.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/utils/zip_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/vectorization/__init__.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/vectorization/feature_vector_generation.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw/vectorization/idf_helper.py RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw.egg-info/requires.txt RENAMED Viewed

File without changes

{ddi_fw-0.0.261 → ddi_fw-0.0.263}/src/ddi_fw.egg-info/top_level.txt RENAMED Viewed

File without changes

ddi-fw 0.0.261__tar.gz → 0.0.263__tar.gz

ddi-fw 0.0.261tar.gz → 0.0.263tar.gz