langroid 0.37.0__tar.gz → 0.37.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.37.0 → langroid-0.37.1}/PKG-INFO +2 -1
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/document_parser.py +12 -18
- langroid-0.37.1/langroid/parsing/pdf_utils.py +55 -0
- {langroid-0.37.0 → langroid-0.37.1}/pyproject.toml +2 -1
- {langroid-0.37.0 → langroid-0.37.1}/.gitignore +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/LICENSE +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/README.md +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/batch.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/callbacks/chainlit.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/chat_document.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/openai_assistant.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/system_messages.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/tools.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/doc_chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/system_messages.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/tools.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/task.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tool_message.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/file_tools.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/orchestration.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/retrieval_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/rewind_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/xml_tool_message.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/models.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings.proto +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/remote_embeds.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/exceptions.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/config.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/mock_lm.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/mytypes.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/parse_json.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/parser.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/repo_loader.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/routing.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/search.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/spider.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/url_loader.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/urls.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/web_search.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/dialog.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/templates.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/py.typed +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/pydantic_v1/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/pydantic_v1/main.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/configuration.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/constants.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/git_utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/globals.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/logging.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/object_registry.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/citations.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/printing.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/status.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/system.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/types.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/base.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/chromadb.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/momento.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/qdrantdb.py +0 -0
- {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.37.
|
3
|
+
Version: 0.37.1
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -102,6 +102,7 @@ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
|
|
102
102
|
Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'doc-chat'
|
103
103
|
Provides-Extra: docling
|
104
104
|
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
|
105
|
+
Requires-Dist: pypdf>=5.1.0; extra == 'docling'
|
105
106
|
Provides-Extra: docx
|
106
107
|
Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
|
107
108
|
Provides-Extra: fastembed
|
@@ -3,12 +3,12 @@ from __future__ import annotations
|
|
3
3
|
import itertools
|
4
4
|
import logging
|
5
5
|
import re
|
6
|
-
import tempfile
|
7
6
|
from enum import Enum
|
8
7
|
from io import BytesIO
|
9
8
|
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
|
10
9
|
|
11
10
|
from langroid.exceptions import LangroidImportError
|
11
|
+
from langroid.parsing.pdf_utils import pdf_split_pages
|
12
12
|
from langroid.utils.object_registry import ObjectRegistry
|
13
13
|
|
14
14
|
try:
|
@@ -515,29 +515,23 @@ class DoclingParser(DocumentParser):
|
|
515
515
|
raise LangroidImportError(
|
516
516
|
"docling", ["docling", "pdf-parsers", "all", "doc-chat"]
|
517
517
|
)
|
518
|
-
|
518
|
+
|
519
519
|
from docling.document_converter import ( # type: ignore
|
520
520
|
ConversionResult,
|
521
521
|
DocumentConverter,
|
522
522
|
)
|
523
|
+
from docling_core.types.doc import ImageRefMode # type: ignore
|
523
524
|
|
525
|
+
page_files, tmp_dir = pdf_split_pages(self.doc_bytes)
|
524
526
|
converter = DocumentConverter()
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
for i in range(n_pages):
|
534
|
-
texts = [
|
535
|
-
item[0].text
|
536
|
-
for item in doc.iterate_items(page_no=i + 1)
|
537
|
-
if isinstance(item[0], TextItem)
|
538
|
-
]
|
539
|
-
text = "\n".join(texts)
|
540
|
-
yield i, text
|
527
|
+
for i, page_file in enumerate(page_files):
|
528
|
+
result: ConversionResult = converter.convert(page_file)
|
529
|
+
md_text = result.document.export_to_markdown(
|
530
|
+
image_mode=ImageRefMode.REFERENCED
|
531
|
+
)
|
532
|
+
yield i, md_text
|
533
|
+
|
534
|
+
tmp_dir.cleanup()
|
541
535
|
|
542
536
|
def get_document_from_page(self, page: str) -> Document:
|
543
537
|
"""
|
@@ -0,0 +1,55 @@
|
|
1
|
+
import tempfile
|
2
|
+
from io import BytesIO
|
3
|
+
from pathlib import Path
|
4
|
+
from tempfile import TemporaryDirectory
|
5
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, List, Tuple, Union
|
6
|
+
|
7
|
+
try:
|
8
|
+
import pypdf
|
9
|
+
except ImportError:
|
10
|
+
if not TYPE_CHECKING:
|
11
|
+
pypdf = None
|
12
|
+
|
13
|
+
from langroid.exceptions import LangroidImportError
|
14
|
+
|
15
|
+
if pypdf is None:
|
16
|
+
raise LangroidImportError(
|
17
|
+
"pypdf", ["pypdf", "docling", "all", "pdf-parsers", "doc-chat"]
|
18
|
+
)
|
19
|
+
from pypdf import PdfReader, PdfWriter
|
20
|
+
|
21
|
+
|
22
|
+
def pdf_split_pages(
|
23
|
+
input_pdf: Union[str, Path, BytesIO, BinaryIO],
|
24
|
+
) -> Tuple[List[Path], TemporaryDirectory[Any]]:
|
25
|
+
"""Splits a PDF into individual pages in a temporary directory.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
input_pdf: Input PDF file path or file-like object
|
29
|
+
max_workers: Maximum number of concurrent workers for parallel processing
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
Tuple containing:
|
33
|
+
- List of paths to individual PDF pages
|
34
|
+
- Temporary directory object (caller must call cleanup())
|
35
|
+
|
36
|
+
Example:
|
37
|
+
paths, tmp_dir = split_pdf_temp("input.pdf")
|
38
|
+
# Use paths...
|
39
|
+
tmp_dir.cleanup() # Clean up temp files when done
|
40
|
+
"""
|
41
|
+
tmp_dir = tempfile.TemporaryDirectory()
|
42
|
+
reader = PdfReader(input_pdf)
|
43
|
+
paths = []
|
44
|
+
|
45
|
+
for i in range(len(reader.pages)):
|
46
|
+
writer = PdfWriter()
|
47
|
+
writer.add_page(reader.pages[i])
|
48
|
+
writer.add_metadata(reader.metadata or {})
|
49
|
+
|
50
|
+
output = Path(tmp_dir.name) / f"page_{i+1}.pdf"
|
51
|
+
with open(output, "wb") as f:
|
52
|
+
writer.write(f)
|
53
|
+
paths.append(output)
|
54
|
+
|
55
|
+
return paths, tmp_dir # Return dir object so caller can control cleanup
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "langroid"
|
3
|
-
version = "0.37.
|
3
|
+
version = "0.37.1"
|
4
4
|
authors = [
|
5
5
|
{name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
|
6
6
|
]
|
@@ -128,6 +128,7 @@ lancedb = [
|
|
128
128
|
|
129
129
|
docling = [
|
130
130
|
"docling<3.0.0,>=2.16.0",
|
131
|
+
"pypdf>=5.1.0", # needed to split pdf into pages, then use docling
|
131
132
|
]
|
132
133
|
|
133
134
|
pymupdf4llm = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/description_extractors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/hf_formatter.py
RENAMED
File without changes
|
{langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/llama2_formatter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|