prevectorchunks-core 0.1.36__tar.gz → 0.1.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {prevectorchunks_core-0.1.36/prevectorchunks_core.egg-info → prevectorchunks_core-0.1.37}/PKG-INFO +2 -2
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/markdown_and_chunk_documents.py +11 -2
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/test_loader.py +2 -2
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37/prevectorchunks_core.egg-info}/PKG-INFO +2 -2
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core.egg-info/requires.txt +1 -1
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/pyproject.toml +2 -2
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/LICENCE +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/LICENSE +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/README.md +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/config/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/config/splitter_config.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/migrations/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/os-llm/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/os-llm/llava.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/env.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/inference.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/model.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/pretrained/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/pretrained/model_info.txt +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/pretrained/policy_model.pt +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/reward.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/savepretrained.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/testpretrained.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/utils.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/DocuToImageConverter.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/DocuToMarkdownExtractor.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/audio_processor.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/chunk_documents_crud_vdb.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/chunk_to_all_content_mapper.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/image_processor.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/propositional_index.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/services/video_analyser.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/tests/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/tests/test_local.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/utils/__init__.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/utils/extract_content.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/utils/file_loader.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/utils/llm_wrapper.py +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core.egg-info/SOURCES.txt +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core.egg-info/dependency_links.txt +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core.egg-info/top_level.txt +0 -0
- {prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/setup.cfg +0 -0
{prevectorchunks_core-0.1.36/prevectorchunks_core.egg-info → prevectorchunks_core-0.1.37}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: prevectorchunks-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.37
|
|
4
4
|
Summary: A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database
|
|
5
5
|
Author-email: Zul Al-Kabir <zul.developer.2023@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -23,7 +23,7 @@ Requires-Dist: pytesseract~=0.3.13
|
|
|
23
23
|
Requires-Dist: python-docx~=1.2.0
|
|
24
24
|
Requires-Dist: PyPDF2~=3.0.1
|
|
25
25
|
Requires-Dist: pillow~=11.3.0
|
|
26
|
-
Requires-Dist: torch~=2.
|
|
26
|
+
Requires-Dist: torch~=2.2.2
|
|
27
27
|
Requires-Dist: torchvision~=0.21.0
|
|
28
28
|
Requires-Dist: torchaudio~=2.6.0
|
|
29
29
|
Requires-Dist: sentence-transformers~=5.1.1
|
|
@@ -244,6 +244,7 @@ class MarkdownAndChunkDocuments:
|
|
|
244
244
|
input_bytes: bytes = None,
|
|
245
245
|
include_image: bool = None,
|
|
246
246
|
file_name: str = None,
|
|
247
|
+
embedding_client=None
|
|
247
248
|
):
|
|
248
249
|
"""Generator version of markdown_and_chunk_documents that yields progress JSON events"""
|
|
249
250
|
|
|
@@ -268,8 +269,16 @@ class MarkdownAndChunkDocuments:
|
|
|
268
269
|
|
|
269
270
|
# 4️⃣ Chunking
|
|
270
271
|
yield from report(55, "Chunking text...")
|
|
271
|
-
|
|
272
|
-
|
|
272
|
+
|
|
273
|
+
# Chunking and mapping
|
|
274
|
+
# chunk_client = OpenAI(api_key=self.api_key)
|
|
275
|
+
if embedding_client is None:
|
|
276
|
+
embedding_client = OpenAIEmbeddings(
|
|
277
|
+
model="text-embedding-3-small",
|
|
278
|
+
api_key=self.api_key
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
cm = ChunkMapper(embedding_client, markdown_output, embedding_model="text-embedding-3-small")
|
|
273
282
|
|
|
274
283
|
splitter_config = SplitterConfig(
|
|
275
284
|
chunk_size=300,
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/test_loader.py
RENAMED
|
@@ -20,7 +20,7 @@ def temp_json_file(tmp_path):
|
|
|
20
20
|
return file_path
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def test_load_file_and_upsert_chunks_to_vdb(
|
|
23
|
+
def test_load_file_and_upsert_chunks_to_vdb():
|
|
24
24
|
splitter_config = SplitterConfig(chunk_size=300, chunk_overlap=0, separators=["\n"],
|
|
25
25
|
split_type=SplitType.R_PRETRAINED_PROPOSITION.value, min_rl_chunk_size=5,
|
|
26
26
|
max_rl_chunk_size=50, enableLLMTouchUp=True,llm_structured_output_type=LLM_Structured_Output_Type.STRUCTURED_WITH_VECTOR_DB_ID_GENERATED)
|
|
@@ -38,7 +38,7 @@ def test_load_file_and_upsert_chunks_to_vdb(temp_json_file):
|
|
|
38
38
|
print(f"Chunk {i + 1}: {c}")
|
|
39
39
|
print(chunks)
|
|
40
40
|
|
|
41
|
-
def test_markdown(
|
|
41
|
+
def test_markdown():
|
|
42
42
|
|
|
43
43
|
client = init_chat_model(
|
|
44
44
|
model="gpt-4o-mini",
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37/prevectorchunks_core.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: prevectorchunks-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.37
|
|
4
4
|
Summary: A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database
|
|
5
5
|
Author-email: Zul Al-Kabir <zul.developer.2023@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -23,7 +23,7 @@ Requires-Dist: pytesseract~=0.3.13
|
|
|
23
23
|
Requires-Dist: python-docx~=1.2.0
|
|
24
24
|
Requires-Dist: PyPDF2~=3.0.1
|
|
25
25
|
Requires-Dist: pillow~=11.3.0
|
|
26
|
-
Requires-Dist: torch~=2.
|
|
26
|
+
Requires-Dist: torch~=2.2.2
|
|
27
27
|
Requires-Dist: torchvision~=0.21.0
|
|
28
28
|
Requires-Dist: torchaudio~=2.6.0
|
|
29
29
|
Requires-Dist: sentence-transformers~=5.1.1
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "prevectorchunks-core"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.37"
|
|
8
8
|
description = "A Python module that allows conversion of a document into chunks to be inserted into Pinecone vector database"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -25,7 +25,7 @@ dependencies = [
|
|
|
25
25
|
"python-docx~=1.2.0",
|
|
26
26
|
"PyPDF2~=3.0.1",
|
|
27
27
|
"pillow~=11.3.0",
|
|
28
|
-
"torch~=2.
|
|
28
|
+
"torch~=2.2.2",
|
|
29
29
|
"torchvision~=0.21.0",
|
|
30
30
|
"torchaudio~=2.6.0",
|
|
31
31
|
"sentence-transformers~=5.1.1",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/__init__.py
RENAMED
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/os-llm/__init__.py
RENAMED
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/os-llm/llava.py
RENAMED
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/env.py
RENAMED
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/model.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/reward.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/rlchunker/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/tests/__init__.py
RENAMED
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/tests/test_local.py
RENAMED
|
File without changes
|
{prevectorchunks_core-0.1.36 → prevectorchunks_core-0.1.37}/prevectorchunks_core/utils/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|