ddi-fw 0.0.261__tar.gz → 0.0.262__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/langchain/__init__.py +1 -1
  4. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/langchain/chroma_storage.py +2 -2
  5. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/langchain/faiss_storage.py +4 -4
  6. ddi_fw-0.0.262/src/ddi_fw/langchain/sentence_splitter.py +17 -0
  7. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  8. ddi_fw-0.0.261/src/ddi_fw/langchain/sentence_splitter.py +0 -10
  9. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/README.md +0 -0
  10. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/setup.cfg +0 -0
  11. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/datasets/__init__.py +0 -0
  12. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/datasets/core.py +0 -0
  13. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  14. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/datasets/db_utils.py +0 -0
  15. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/datasets/setup_._py +0 -0
  16. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/langchain/embeddings.py +0 -0
  17. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/langchain/storage.py +0 -0
  18. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/__init__.py +0 -0
  19. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  20. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/ml_helper.py +0 -0
  21. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/model_wrapper.py +0 -0
  22. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  23. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  24. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ml/tracking_service.py +0 -0
  25. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ner/__init__.py +0 -0
  26. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  27. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/ner/ner.py +0 -0
  28. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/__init__.py +0 -0
  29. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  30. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  31. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
  32. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  33. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/pipeline/pipeline.py +0 -0
  34. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/__init__.py +0 -0
  35. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  36. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/enums.py +0 -0
  37. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/json_helper.py +0 -0
  38. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/kaggle.py +0 -0
  39. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/numpy_utils.py +0 -0
  40. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/package_helper.py +0 -0
  41. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  42. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/utils.py +0 -0
  43. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/utils/zip_helper.py +0 -0
  44. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/__init__.py +0 -0
  45. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  46. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  47. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  48. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  49. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/requires.txt +0 -0
  50. {ddi_fw-0.0.261 → ddi_fw-0.0.262}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.261
3
+ Version: 0.0.262
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.261"
9
+ version = "0.0.262"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -1,5 +1,5 @@
1
1
  from ..langchain.embeddings import PoolingStrategy,SumPoolingStrategy,MeanPoolingStrategy,SentenceTransformerDecorator,PretrainedEmbeddings,SBertEmbeddings
2
- from .sentence_splitter import SentenceSplitter
2
+ from .sentence_splitter import SentenceSplitter,PassthroughTextSplitter
3
3
  # from .storage import DataFrameToVectorDB, generate_embeddings
4
4
  from .faiss_storage import BaseVectorStoreManager, FaissVectorStoreManager,generate_embeddings
5
5
  from .chroma_storage import ChromaVectorStoreManager
@@ -153,8 +153,8 @@ class ChromaVectorStoreManager(BaseVectorStoreManager):
153
153
  # print(f"{page_content_column}, size:{len(split_docs_chunk)}")
154
154
 
155
155
  @staticmethod
156
- def get_persist_dir(base_dir, suffix, config=None):
157
- return f"{base_dir}"
156
+ def get_persist_dir(base_dir,id, suffix, config=None):
157
+ return f"{base_dir}/chroma_db/{id}"
158
158
 
159
159
  def generate_vector_store(self, docs: List[Document]):
160
160
  self.vector_store = Chroma(
@@ -39,7 +39,7 @@ class BaseVectorStoreManager(BaseModel):
39
39
  raise NotImplementedError("This method should be implemented by subclasses.")
40
40
 
41
41
  @staticmethod
42
- def get_persist_dir(base_dir, suffix, config=None):
42
+ def get_persist_dir(base_dir, id ,suffix, config=None):
43
43
  raise NotImplementedError("Subclasses must implement get_persist_dir.")
44
44
 
45
45
 
@@ -66,8 +66,8 @@ class FaissVectorStoreManager(BaseVectorStoreManager):
66
66
  # self.vector_store.add_documents(documents=docs, ids=uuids)
67
67
 
68
68
  @staticmethod
69
- def get_persist_dir(base_dir, suffix, config=None):
70
- return f"{base_dir}/{suffix}"
69
+ def get_persist_dir(base_dir,id, suffix, config=None):
70
+ return f"{base_dir}/faiss/{id}/{suffix}"
71
71
 
72
72
  def initialize_embedding_dict(self, **kwargs):
73
73
  """
@@ -441,7 +441,7 @@ def generate_embeddings(
441
441
  print(f"{id}_{suffix}")
442
442
  # persist_dir = f'{persist_directory}/{id}/{suffix}'
443
443
  # persist_dir = f'{persist_directory}/{suffix}'
444
- persist_dir = vector_store_manager_type.get_persist_dir(persist_directory, suffix, collection_config)
444
+ persist_dir = vector_store_manager_type.get_persist_dir(persist_directory , id, suffix, collection_config)
445
445
 
446
446
  # Prepare manager parameters
447
447
  manager_params = {
@@ -0,0 +1,17 @@
1
+ from typing import List
2
+ import nltk
3
+ from nltk import sent_tokenize
4
+ from langchain_text_splitters.base import TextSplitter
5
+
6
+ nltk.download('punkt')
7
+
8
+ ''' A text splitter that splits text into sentences using NLTK's sentence tokenizer.'''
9
+ class SentenceSplitter(TextSplitter):
10
+ def split_text(self, text: str) -> List[str]:
11
+ return sent_tokenize(text)
12
+
13
+
14
+ ''' A text splitter that does not split the text at all, returning the entire text as a single chunk.'''
15
+ class PassthroughTextSplitter(TextSplitter):
16
+ def split_text(self, text: str) -> List[str]:
17
+ return [text]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.261
3
+ Version: 0.0.262
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,10 +0,0 @@
1
- from typing import List
2
- import nltk
3
- from nltk import sent_tokenize
4
- from langchain_text_splitters.base import TextSplitter
5
-
6
- nltk.download('punkt')
7
-
8
- class SentenceSplitter(TextSplitter):
9
- def split_text(self, text: str) -> List[str]:
10
- return sent_tokenize(text)
File without changes
File without changes
File without changes