langroid 0.37.0__tar.gz → 0.37.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {langroid-0.37.0 → langroid-0.37.1}/PKG-INFO +2 -1
  2. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/document_parser.py +12 -18
  3. langroid-0.37.1/langroid/parsing/pdf_utils.py +55 -0
  4. {langroid-0.37.0 → langroid-0.37.1}/pyproject.toml +2 -1
  5. {langroid-0.37.0 → langroid-0.37.1}/.gitignore +0 -0
  6. {langroid-0.37.0 → langroid-0.37.1}/LICENSE +0 -0
  7. {langroid-0.37.0 → langroid-0.37.1}/README.md +0 -0
  8. {langroid-0.37.0 → langroid-0.37.1}/langroid/__init__.py +0 -0
  9. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/__init__.py +0 -0
  10. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/base.py +0 -0
  11. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/batch.py +0 -0
  12. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/callbacks/__init__.py +0 -0
  13. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/callbacks/chainlit.py +0 -0
  14. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/chat_agent.py +0 -0
  15. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/chat_document.py +0 -0
  16. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/openai_assistant.py +0 -0
  17. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/__init__.py +0 -0
  18. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/__init__.py +0 -0
  19. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  20. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/system_messages.py +0 -0
  21. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/tools.py +0 -0
  22. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/arangodb/utils.py +0 -0
  23. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/doc_chat_agent.py +0 -0
  24. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  25. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/__init__.py +0 -0
  26. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  27. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  28. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  29. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/lance_tools.py +0 -0
  30. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/__init__.py +0 -0
  31. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  32. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  33. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/system_messages.py +0 -0
  34. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/neo4j/tools.py +0 -0
  35. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  36. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/retriever_agent.py +0 -0
  37. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/__init__.py +0 -0
  38. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  39. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/__init__.py +0 -0
  40. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  41. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  42. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/system_message.py +0 -0
  43. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/sql/utils/tools.py +0 -0
  44. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/special/table_chat_agent.py +0 -0
  45. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/task.py +0 -0
  46. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tool_message.py +0 -0
  47. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/__init__.py +0 -0
  48. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  49. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/file_tools.py +0 -0
  50. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/google_search_tool.py +0 -0
  51. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  52. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/orchestration.py +0 -0
  53. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/recipient_tool.py +0 -0
  54. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/retrieval_tool.py +0 -0
  55. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/rewind_tool.py +0 -0
  56. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/tools/segment_extract_tool.py +0 -0
  57. {langroid-0.37.0 → langroid-0.37.1}/langroid/agent/xml_tool_message.py +0 -0
  58. {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/__init__.py +0 -0
  59. {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/base.py +0 -0
  60. {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/momento_cachedb.py +0 -0
  61. {langroid-0.37.0 → langroid-0.37.1}/langroid/cachedb/redis_cachedb.py +0 -0
  62. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/__init__.py +0 -0
  63. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/base.py +0 -0
  64. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/models.py +0 -0
  65. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/__init__.py +0 -0
  66. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  67. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  68. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  69. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  70. {langroid-0.37.0 → langroid-0.37.1}/langroid/embedding_models/remote_embeds.py +0 -0
  71. {langroid-0.37.0 → langroid-0.37.1}/langroid/exceptions.py +0 -0
  72. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/__init__.py +0 -0
  73. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/azure_openai.py +0 -0
  74. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/base.py +0 -0
  75. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/config.py +0 -0
  76. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/mock_lm.py +0 -0
  77. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/openai_gpt.py +0 -0
  78. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  79. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/base.py +0 -0
  80. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  81. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  82. {langroid-0.37.0 → langroid-0.37.1}/langroid/language_models/utils.py +0 -0
  83. {langroid-0.37.0 → langroid-0.37.1}/langroid/mytypes.py +0 -0
  84. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/__init__.py +0 -0
  85. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/agent_chats.py +0 -0
  86. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/code_parser.py +0 -0
  87. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/para_sentence_split.py +0 -0
  88. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/parse_json.py +0 -0
  89. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/parser.py +0 -0
  90. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/repo_loader.py +0 -0
  91. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/routing.py +0 -0
  92. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/search.py +0 -0
  93. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/spider.py +0 -0
  94. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/table_loader.py +0 -0
  95. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/url_loader.py +0 -0
  96. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/urls.py +0 -0
  97. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/utils.py +0 -0
  98. {langroid-0.37.0 → langroid-0.37.1}/langroid/parsing/web_search.py +0 -0
  99. {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/__init__.py +0 -0
  100. {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/dialog.py +0 -0
  101. {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/prompts_config.py +0 -0
  102. {langroid-0.37.0 → langroid-0.37.1}/langroid/prompts/templates.py +0 -0
  103. {langroid-0.37.0 → langroid-0.37.1}/langroid/py.typed +0 -0
  104. {langroid-0.37.0 → langroid-0.37.1}/langroid/pydantic_v1/__init__.py +0 -0
  105. {langroid-0.37.0 → langroid-0.37.1}/langroid/pydantic_v1/main.py +0 -0
  106. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/__init__.py +0 -0
  107. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/algorithms/__init__.py +0 -0
  108. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/algorithms/graph.py +0 -0
  109. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/configuration.py +0 -0
  110. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/constants.py +0 -0
  111. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/git_utils.py +0 -0
  112. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/globals.py +0 -0
  113. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/logging.py +0 -0
  114. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/object_registry.py +0 -0
  115. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/__init__.py +0 -0
  116. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/citations.py +0 -0
  117. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/printing.py +0 -0
  118. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/output/status.py +0 -0
  119. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/pandas_utils.py +0 -0
  120. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/pydantic_utils.py +0 -0
  121. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/system.py +0 -0
  122. {langroid-0.37.0 → langroid-0.37.1}/langroid/utils/types.py +0 -0
  123. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/__init__.py +0 -0
  124. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/base.py +0 -0
  125. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/chromadb.py +0 -0
  126. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/lancedb.py +0 -0
  127. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/meilisearch.py +0 -0
  128. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/momento.py +0 -0
  129. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/qdrantdb.py +0 -0
  130. {langroid-0.37.0 → langroid-0.37.1}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.37.0
3
+ Version: 0.37.1
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -102,6 +102,7 @@ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
102
102
  Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'doc-chat'
103
103
  Provides-Extra: docling
104
104
  Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
105
+ Requires-Dist: pypdf>=5.1.0; extra == 'docling'
105
106
  Provides-Extra: docx
106
107
  Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
107
108
  Provides-Extra: fastembed
@@ -3,12 +3,12 @@ from __future__ import annotations
3
3
  import itertools
4
4
  import logging
5
5
  import re
6
- import tempfile
7
6
  from enum import Enum
8
7
  from io import BytesIO
9
8
  from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
10
9
 
11
10
  from langroid.exceptions import LangroidImportError
11
+ from langroid.parsing.pdf_utils import pdf_split_pages
12
12
  from langroid.utils.object_registry import ObjectRegistry
13
13
 
14
14
  try:
@@ -515,29 +515,23 @@ class DoclingParser(DocumentParser):
515
515
  raise LangroidImportError(
516
516
  "docling", ["docling", "pdf-parsers", "all", "doc-chat"]
517
517
  )
518
- from docling.datamodel.document import TextItem # type: ignore
518
+
519
519
  from docling.document_converter import ( # type: ignore
520
520
  ConversionResult,
521
521
  DocumentConverter,
522
522
  )
523
+ from docling_core.types.doc import ImageRefMode # type: ignore
523
524
 
525
+ page_files, tmp_dir = pdf_split_pages(self.doc_bytes)
524
526
  converter = DocumentConverter()
525
- file_path = self.source
526
- if file_path == "bytes":
527
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
528
- tmp.write(self.doc_bytes.getvalue())
529
- file_path = tmp.name
530
- result: ConversionResult = converter.convert(file_path)
531
- doc = result.document
532
- n_pages = doc.num_pages() # type: ignore
533
- for i in range(n_pages):
534
- texts = [
535
- item[0].text
536
- for item in doc.iterate_items(page_no=i + 1)
537
- if isinstance(item[0], TextItem)
538
- ]
539
- text = "\n".join(texts)
540
- yield i, text
527
+ for i, page_file in enumerate(page_files):
528
+ result: ConversionResult = converter.convert(page_file)
529
+ md_text = result.document.export_to_markdown(
530
+ image_mode=ImageRefMode.REFERENCED
531
+ )
532
+ yield i, md_text
533
+
534
+ tmp_dir.cleanup()
541
535
 
542
536
  def get_document_from_page(self, page: str) -> Document:
543
537
  """
@@ -0,0 +1,55 @@
1
+ import tempfile
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+ from tempfile import TemporaryDirectory
5
+ from typing import TYPE_CHECKING, Any, BinaryIO, List, Tuple, Union
6
+
7
+ try:
8
+ import pypdf
9
+ except ImportError:
10
+ if not TYPE_CHECKING:
11
+ pypdf = None
12
+
13
+ from langroid.exceptions import LangroidImportError
14
+
15
+ if pypdf is None:
16
+ raise LangroidImportError(
17
+ "pypdf", ["pypdf", "docling", "all", "pdf-parsers", "doc-chat"]
18
+ )
19
+ from pypdf import PdfReader, PdfWriter
20
+
21
+
22
+ def pdf_split_pages(
23
+ input_pdf: Union[str, Path, BytesIO, BinaryIO],
24
+ ) -> Tuple[List[Path], TemporaryDirectory[Any]]:
25
+ """Splits a PDF into individual pages in a temporary directory.
26
+
27
+ Args:
28
+ input_pdf: Input PDF file path or file-like object
29
+ max_workers: Maximum number of concurrent workers for parallel processing
30
+
31
+ Returns:
32
+ Tuple containing:
33
+ - List of paths to individual PDF pages
34
+ - Temporary directory object (caller must call cleanup())
35
+
36
+ Example:
37
+ paths, tmp_dir = split_pdf_temp("input.pdf")
38
+ # Use paths...
39
+ tmp_dir.cleanup() # Clean up temp files when done
40
+ """
41
+ tmp_dir = tempfile.TemporaryDirectory()
42
+ reader = PdfReader(input_pdf)
43
+ paths = []
44
+
45
+ for i in range(len(reader.pages)):
46
+ writer = PdfWriter()
47
+ writer.add_page(reader.pages[i])
48
+ writer.add_metadata(reader.metadata or {})
49
+
50
+ output = Path(tmp_dir.name) / f"page_{i+1}.pdf"
51
+ with open(output, "wb") as f:
52
+ writer.write(f)
53
+ paths.append(output)
54
+
55
+ return paths, tmp_dir # Return dir object so caller can control cleanup
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.37.0"
3
+ version = "0.37.1"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
@@ -128,6 +128,7 @@ lancedb = [
128
128
 
129
129
  docling = [
130
130
  "docling<3.0.0,>=2.16.0",
131
+ "pypdf>=5.1.0", # needed to split pdf into pages, then use docling
131
132
  ]
132
133
 
133
134
  pymupdf4llm = [
File without changes
File without changes
File without changes
File without changes
File without changes