prevectorchunks-core 0.1.36__tar.gz → 0.1.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {prevectorchunks_core-0.1.36/prevectorchunks_core.egg-info → prevectorchunks_core-0.1.38}/PKG-INFO +3 -3
  2. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/markdown_and_chunk_documents.py +11 -2
  3. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/test_loader.py +2 -2
  4. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38/prevectorchunks_core.egg-info}/PKG-INFO +3 -3
  5. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core.egg-info/requires.txt +2 -2
  6. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/pyproject.toml +3 -3
  7. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/LICENCE +0 -0
  8. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/LICENSE +0 -0
  9. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/README.md +0 -0
  10. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/__init__.py +0 -0
  11. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/config/__init__.py +0 -0
  12. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/config/splitter_config.py +0 -0
  13. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/migrations/__init__.py +0 -0
  14. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/os-llm/__init__.py +0 -0
  15. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/os-llm/llava.py +0 -0
  16. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/__init__.py +0 -0
  17. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/env.py +0 -0
  18. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/inference.py +0 -0
  19. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/model.py +0 -0
  20. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/pretrained/__init__.py +0 -0
  21. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/pretrained/model_info.txt +0 -0
  22. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/pretrained/policy_model.pt +0 -0
  23. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/reward.py +0 -0
  24. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/savepretrained.py +0 -0
  25. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/testpretrained.py +0 -0
  26. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/rlchunker/utils.py +0 -0
  27. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/DocuToImageConverter.py +0 -0
  28. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/DocuToMarkdownExtractor.py +0 -0
  29. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/__init__.py +0 -0
  30. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/audio_processor.py +0 -0
  31. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/chunk_documents_crud_vdb.py +0 -0
  32. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/chunk_to_all_content_mapper.py +0 -0
  33. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/image_processor.py +0 -0
  34. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/propositional_index.py +0 -0
  35. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/services/video_analyser.py +0 -0
  36. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/tests/__init__.py +0 -0
  37. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/tests/test_local.py +0 -0
  38. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/utils/__init__.py +0 -0
  39. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/utils/extract_content.py +0 -0
  40. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/utils/file_loader.py +0 -0
  41. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core/utils/llm_wrapper.py +0 -0
  42. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core.egg-info/SOURCES.txt +0 -0
  43. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core.egg-info/dependency_links.txt +0 -0
  44. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/prevectorchunks_core.egg-info/top_level.txt +0 -0
  45. {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.38}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prevectorchunks-core
3
- Version: 0.1.36
3
+ Version: 0.1.38
4
4
  Summary: A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database
5
5
  Author-email: Zul Al-Kabir <zul.developer.2023@gmail.com>
6
6
  License: MIT License
@@ -23,8 +23,8 @@ Requires-Dist: pytesseract~=0.3.13
23
23
  Requires-Dist: python-docx~=1.2.0
24
24
  Requires-Dist: PyPDF2~=3.0.1
25
25
  Requires-Dist: pillow~=11.3.0
26
- Requires-Dist: torch~=2.6.0
27
- Requires-Dist: torchvision~=0.21.0
26
+ Requires-Dist: torch~=2.2.2
27
+ Requires-Dist: torchvision~=0.17.2
28
28
  Requires-Dist: torchaudio~=2.6.0
29
29
  Requires-Dist: sentence-transformers~=5.1.1
30
30
  Requires-Dist: py-gutenberg~=1.0.3
@@ -244,6 +244,7 @@ class MarkdownAndChunkDocuments:
244
244
  input_bytes: bytes = None,
245
245
  include_image: bool = None,
246
246
  file_name: str = None,
247
+ embedding_client=None
247
248
  ):
248
249
  """Generator version of markdown_and_chunk_documents that yields progress JSON events"""
249
250
 
@@ -268,8 +269,16 @@ class MarkdownAndChunkDocuments:
268
269
 
269
270
  # 4️⃣ Chunking
270
271
  yield from report(55, "Chunking text...")
271
- chunk_client = OpenAI(api_key=self.api_key)
272
- cm = ChunkMapper(chunk_client, markdown_output, embedding_model="text-embedding-3-small")
272
+
273
+ # Chunking and mapping
274
+ # chunk_client = OpenAI(api_key=self.api_key)
275
+ if embedding_client is None:
276
+ embedding_client = OpenAIEmbeddings(
277
+ model="text-embedding-3-small",
278
+ api_key=self.api_key
279
+ )
280
+
281
+ cm = ChunkMapper(embedding_client, markdown_output, embedding_model="text-embedding-3-small")
273
282
 
274
283
  splitter_config = SplitterConfig(
275
284
  chunk_size=300,
@@ -20,7 +20,7 @@ def temp_json_file(tmp_path):
20
20
  return file_path
21
21
 
22
22
 
23
- def test_load_file_and_upsert_chunks_to_vdb(temp_json_file):
23
+ def test_load_file_and_upsert_chunks_to_vdb():
24
24
  splitter_config = SplitterConfig(chunk_size=300, chunk_overlap=0, separators=["\n"],
25
25
  split_type=SplitType.R_PRETRAINED_PROPOSITION.value, min_rl_chunk_size=5,
26
26
  max_rl_chunk_size=50, enableLLMTouchUp=True,llm_structured_output_type=LLM_Structured_Output_Type.STRUCTURED_WITH_VECTOR_DB_ID_GENERATED)
@@ -38,7 +38,7 @@ def test_load_file_and_upsert_chunks_to_vdb(temp_json_file):
38
38
  print(f"Chunk {i + 1}: {c}")
39
39
  print(chunks)
40
40
 
41
- def test_markdown(temp_json_file):
41
+ def test_markdown():
42
42
 
43
43
  client = init_chat_model(
44
44
  model="gpt-4o-mini",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prevectorchunks-core
3
- Version: 0.1.36
3
+ Version: 0.1.38
4
4
  Summary: A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database
5
5
  Author-email: Zul Al-Kabir <zul.developer.2023@gmail.com>
6
6
  License: MIT License
@@ -23,8 +23,8 @@ Requires-Dist: pytesseract~=0.3.13
23
23
  Requires-Dist: python-docx~=1.2.0
24
24
  Requires-Dist: PyPDF2~=3.0.1
25
25
  Requires-Dist: pillow~=11.3.0
26
- Requires-Dist: torch~=2.6.0
27
- Requires-Dist: torchvision~=0.21.0
26
+ Requires-Dist: torch~=2.2.2
27
+ Requires-Dist: torchvision~=0.17.2
28
28
  Requires-Dist: torchaudio~=2.6.0
29
29
  Requires-Dist: sentence-transformers~=5.1.1
30
30
  Requires-Dist: py-gutenberg~=1.0.3
@@ -9,8 +9,8 @@ pytesseract~=0.3.13
9
9
  python-docx~=1.2.0
10
10
  PyPDF2~=3.0.1
11
11
  pillow~=11.3.0
12
- torch~=2.6.0
13
- torchvision~=0.21.0
12
+ torch~=2.2.2
13
+ torchvision~=0.17.2
14
14
  torchaudio~=2.6.0
15
15
  sentence-transformers~=5.1.1
16
16
  py-gutenberg~=1.0.3
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "prevectorchunks-core"
7
- version = "0.1.36"
7
+ version = "0.1.38"
8
8
  description = "A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -25,8 +25,8 @@ dependencies = [
25
25
  "python-docx~=1.2.0",
26
26
  "PyPDF2~=3.0.1",
27
27
  "pillow~=11.3.0",
28
- "torch~=2.6.0",
29
- "torchvision~=0.21.0",
28
+ "torch~=2.2.2",
29
+ "torchvision~=0.17.2",
30
30
  "torchaudio~=2.6.0",
31
31
  "sentence-transformers~=5.1.1",
32
32
  "py-gutenberg~=1.0.3",