langroid 0.1.211__tar.gz → 0.1.213__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.1.211 → langroid-0.1.213}/PKG-INFO +19 -1
- {langroid-0.1.211 → langroid-0.1.213}/README.md +16 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/base.py +1 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/batch.py +3 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/chat_document.py +1 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/openai_assistant.py +9 -7
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/task.py +1 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/base.py +1 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/__init__.py +4 -2
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/document_parser.py +33 -0
- langroid-0.1.213/langroid/parsing/image_text.py +32 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/parser.py +7 -1
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/repo_loader.py +6 -2
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/url_loader.py +7 -2
- {langroid-0.1.211 → langroid-0.1.213}/pyproject.toml +4 -1
- {langroid-0.1.211 → langroid-0.1.213}/LICENSE +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/callbacks/chainlit.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/helpers.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/junk +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/doc_chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/neo4j/utils/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/neo4j/utils/system_message.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tool_message.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/extract_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/generator_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/run_python_code.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/sciphi_search_rag_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/agent_config.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/cachedb/base.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/embedding_models/base.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/embedding_models/clustering.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/embedding_models/models.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/config.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/openai_assistants.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/utils.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/mytypes.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/code-parsing.md +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/config.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/para_sentence_split.py +0 -0
- /langroid-0.1.211/langroid/parsing/json.py → /langroid-0.1.213/langroid/parsing/parse_json.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/search.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/spider.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/url_loader_cookies.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/urls.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/utils.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/parsing/web_search.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/chat-gpt4-system-prompt.md +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/dialog.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/templates.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/prompts/transforms.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/configuration.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/constants.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/docker.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/globals.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/llms/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/llms/strings.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/logging.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/output/printing.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/system.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/web/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/web/login.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/utils/web/selenium_login.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/base.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/chromadb.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/momento.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/qdrant_cloud.py +0 -0
- {langroid-0.1.211 → langroid-0.1.213}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.213
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
License: MIT
|
6
6
|
Author: Prasad Chalasani
|
@@ -63,6 +63,7 @@ Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
|
63
63
|
Requires-Dist: onnxruntime (==1.16.1)
|
64
64
|
Requires-Dist: openai (>=1.2.3,<2.0.0)
|
65
65
|
Requires-Dist: pandas (>=2.0.3,<3.0.0)
|
66
|
+
Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
|
66
67
|
Requires-Dist: pdfplumber (>=0.10.2,<0.11.0)
|
67
68
|
Requires-Dist: pre-commit (>=3.3.2,<4.0.0)
|
68
69
|
Requires-Dist: prettytable (>=3.8.0,<4.0.0)
|
@@ -75,6 +76,7 @@ Requires-Dist: pymupdf (>=1.23.3,<2.0.0)
|
|
75
76
|
Requires-Dist: pymysql (>=1.1.0,<2.0.0) ; extra == "mysql"
|
76
77
|
Requires-Dist: pyparsing (>=3.0.9,<4.0.0)
|
77
78
|
Requires-Dist: pypdf (>=3.12.2,<4.0.0)
|
79
|
+
Requires-Dist: pytesseract (>=0.3.10,<0.4.0)
|
78
80
|
Requires-Dist: pytest-asyncio (>=0.21.1,<0.22.0)
|
79
81
|
Requires-Dist: pytest-mysql (>=2.4.2,<3.0.0) ; extra == "mysql"
|
80
82
|
Requires-Dist: pytest-postgresql (>=5.0.0,<6.0.0) ; extra == "postgres"
|
@@ -228,6 +230,22 @@ teacher_task.run()
|
|
228
230
|
<details>
|
229
231
|
<summary> <b>Click to expand</b></summary>
|
230
232
|
|
233
|
+
- **Mar 2024:**
|
234
|
+
- **0.1.212:** ImagePdfParser: support for extracting text from image-based PDFs.
|
235
|
+
(this means `DocChatAgent` will now work with image-pdfs).
|
236
|
+
- **0.1.194 - 0.1.211:** Misc fixes, improvements, and features:
|
237
|
+
- Big enhancement in RAG performance (mainly, recall) due to a [fix in Relevance
|
238
|
+
Extractor](https://github.com/langroid/langroid/releases/tag/0.1.209)
|
239
|
+
- `DocChatAgent` [context-window fixes](https://github.com/langroid/langroid/releases/tag/0.1.208)
|
240
|
+
- Anthropic/Claude3 support via Litellm
|
241
|
+
- `URLLoader`: detect file time from header when URL doesn't end with a
|
242
|
+
recognizable suffix like `.pdf`, `.docx`, etc.
|
243
|
+
- Misc lancedb integration fixes
|
244
|
+
- Auto-select embedding config based on whether `sentence_transformer` module is available.
|
245
|
+
- Slim down dependencies, make some heavy ones optional, e.g. `unstructured`,
|
246
|
+
`haystack`, `chromadb`, `mkdocs`, `huggingface-hub`, `sentence-transformers`.
|
247
|
+
- Easier top-level imports from `import langroid as lr`
|
248
|
+
- Improve JSON detection, esp from weak LLMs
|
231
249
|
- **Feb 2024:**
|
232
250
|
- **0.1.193:** Support local LLMs using Ollama's new OpenAI-Compatible server:
|
233
251
|
simply specify `chat_model="ollama/mistral"`. See [release notes](https://github.com/langroid/langroid/releases/tag/0.1.193).
|
@@ -122,6 +122,22 @@ teacher_task.run()
|
|
122
122
|
<details>
|
123
123
|
<summary> <b>Click to expand</b></summary>
|
124
124
|
|
125
|
+
- **Mar 2024:**
|
126
|
+
- **0.1.212:** ImagePdfParser: support for extracting text from image-based PDFs.
|
127
|
+
(this means `DocChatAgent` will now work with image-pdfs).
|
128
|
+
- **0.1.194 - 0.1.211:** Misc fixes, improvements, and features:
|
129
|
+
- Big enhancement in RAG performance (mainly, recall) due to a [fix in Relevance
|
130
|
+
Extractor](https://github.com/langroid/langroid/releases/tag/0.1.209)
|
131
|
+
- `DocChatAgent` [context-window fixes](https://github.com/langroid/langroid/releases/tag/0.1.208)
|
132
|
+
- Anthropic/Claude3 support via Litellm
|
133
|
+
- `URLLoader`: detect file time from header when URL doesn't end with a
|
134
|
+
recognizable suffix like `.pdf`, `.docx`, etc.
|
135
|
+
- Misc lancedb integration fixes
|
136
|
+
- Auto-select embedding config based on whether `sentence_transformer` module is available.
|
137
|
+
- Slim down dependencies, make some heavy ones optional, e.g. `unstructured`,
|
138
|
+
`haystack`, `chromadb`, `mkdocs`, `huggingface-hub`, `sentence-transformers`.
|
139
|
+
- Easier top-level imports from `import langroid as lr`
|
140
|
+
- Improve JSON detection, esp from weak LLMs
|
125
141
|
- **Feb 2024:**
|
126
142
|
- **0.1.193:** Support local LLMs using Ollama's new OpenAI-Compatible server:
|
127
143
|
simply specify `chat_model="ollama/mistral"`. See [release notes](https://github.com/langroid/langroid/releases/tag/0.1.193).
|
@@ -37,7 +37,7 @@ from langroid.language_models.base import (
|
|
37
37
|
)
|
38
38
|
from langroid.language_models.openai_gpt import OpenAIGPTConfig
|
39
39
|
from langroid.mytypes import Entity
|
40
|
-
from langroid.parsing.
|
40
|
+
from langroid.parsing.parse_json import extract_top_level_json
|
41
41
|
from langroid.parsing.parser import Parser, ParsingConfig
|
42
42
|
from langroid.prompts.prompts_config import PromptsConfig
|
43
43
|
from langroid.utils.configuration import settings
|
@@ -26,6 +26,7 @@ def run_batch_tasks(
|
|
26
26
|
input_map: Callable[[Any], str | ChatDocument] = lambda x: str(x),
|
27
27
|
output_map: Callable[[ChatDocument | None], Any] = lambda x: x,
|
28
28
|
sequential: bool = True,
|
29
|
+
turns: int = -1,
|
29
30
|
) -> List[Any]:
|
30
31
|
"""
|
31
32
|
Run copies of `task` async/concurrently one per item in `items` list.
|
@@ -40,6 +41,7 @@ def run_batch_tasks(
|
|
40
41
|
to final result
|
41
42
|
sequential (bool): whether to run sequentially
|
42
43
|
(e.g. some APIs such as ooba don't support concurrent requests)
|
44
|
+
turns (int): number of turns to run, -1 for infinite
|
43
45
|
|
44
46
|
Returns:
|
45
47
|
List[Any]: list of final results
|
@@ -53,7 +55,7 @@ def run_batch_tasks(
|
|
53
55
|
task_i.agent.llm.set_stream(False)
|
54
56
|
task_i.agent.config.show_stats = False
|
55
57
|
|
56
|
-
result = await task_i.run_async(input)
|
58
|
+
result = await task_i.run_async(input, turns=turns)
|
57
59
|
return output_map(result)
|
58
60
|
|
59
61
|
async def _do_all() -> List[Any]:
|
@@ -13,7 +13,7 @@ from langroid.language_models.base import (
|
|
13
13
|
)
|
14
14
|
from langroid.mytypes import DocMetaData, Document, Entity
|
15
15
|
from langroid.parsing.agent_chats import parse_message
|
16
|
-
from langroid.parsing.
|
16
|
+
from langroid.parsing.parse_json import extract_top_level_json, top_level_json_field
|
17
17
|
from langroid.utils.output.printing import shorten_text
|
18
18
|
|
19
19
|
|
@@ -8,7 +8,7 @@ from enum import Enum
|
|
8
8
|
from typing import Any, Dict, List, Optional, Tuple, Type, cast, no_type_check
|
9
9
|
|
10
10
|
from openai.types.beta import Assistant, Thread
|
11
|
-
from openai.types.beta.threads import
|
11
|
+
from openai.types.beta.threads import Message, Run
|
12
12
|
from openai.types.beta.threads.runs import RunStep
|
13
13
|
from pydantic import BaseModel
|
14
14
|
from rich import print
|
@@ -41,6 +41,8 @@ class AssistantTool(BaseModel):
|
|
41
41
|
def dct(self) -> Dict[str, Any]:
|
42
42
|
d = super().dict()
|
43
43
|
d["type"] = d["type"].value
|
44
|
+
if self.type != ToolType.FUNCTION:
|
45
|
+
d.pop("function")
|
44
46
|
return d
|
45
47
|
|
46
48
|
|
@@ -257,22 +259,22 @@ class OpenAIAssistant(ChatAgent):
|
|
257
259
|
self.llm.cache.store(assistant_key, self.assistant.id)
|
258
260
|
|
259
261
|
@staticmethod
|
260
|
-
def thread_msg_to_llm_msg(msg:
|
262
|
+
def thread_msg_to_llm_msg(msg: Message) -> LLMMessage:
|
261
263
|
"""
|
262
|
-
Convert a
|
264
|
+
Convert a Message to an LLMMessage
|
263
265
|
"""
|
264
266
|
return LLMMessage(
|
265
267
|
content=msg.content[0].text.value, # type: ignore
|
266
268
|
role=msg.role,
|
267
269
|
)
|
268
270
|
|
269
|
-
def _update_messages_hash(self, msg:
|
271
|
+
def _update_messages_hash(self, msg: Message | LLMMessage) -> None:
|
270
272
|
"""
|
271
273
|
Update the hash-state in the thread with the given message.
|
272
274
|
"""
|
273
275
|
if self.thread is None:
|
274
276
|
raise ValueError("Thread is None")
|
275
|
-
if isinstance(msg,
|
277
|
+
if isinstance(msg, Message):
|
276
278
|
llm_msg = self.thread_msg_to_llm_msg(msg)
|
277
279
|
else:
|
278
280
|
llm_msg = msg
|
@@ -491,7 +493,7 @@ class OpenAIAssistant(ChatAgent):
|
|
491
493
|
LLMMessage(
|
492
494
|
# TODO: could be image, deal with it later
|
493
495
|
content=m.content[0].text.value, # type: ignore
|
494
|
-
role=m.role,
|
496
|
+
role=Role(m.role),
|
495
497
|
)
|
496
498
|
for m in thread_msgs
|
497
499
|
]
|
@@ -646,7 +648,7 @@ class OpenAIAssistant(ChatAgent):
|
|
646
648
|
tool_outputs=tool_outputs, # type: ignore
|
647
649
|
)
|
648
650
|
|
649
|
-
def process_citations(self, thread_msg:
|
651
|
+
def process_citations(self, thread_msg: Message) -> None:
|
650
652
|
"""
|
651
653
|
Process citations in the thread message.
|
652
654
|
Modifies the thread message in-place.
|
@@ -29,7 +29,7 @@ from langroid.agent.chat_document import (
|
|
29
29
|
ChatDocument,
|
30
30
|
)
|
31
31
|
from langroid.mytypes import Entity
|
32
|
-
from langroid.parsing.
|
32
|
+
from langroid.parsing.parse_json import extract_top_level_json
|
33
33
|
from langroid.utils.configuration import settings
|
34
34
|
from langroid.utils.constants import DONE, NO_ANSWER, PASS, PASS_TO, SEND_TO, USER_QUIT
|
35
35
|
from langroid.utils.logging import RichFileLogger, setup_file_logger
|
@@ -14,7 +14,7 @@ from langroid.cachedb.momento_cachedb import MomentoCacheConfig
|
|
14
14
|
from langroid.cachedb.redis_cachedb import RedisCacheConfig
|
15
15
|
from langroid.mytypes import Document
|
16
16
|
from langroid.parsing.agent_chats import parse_message
|
17
|
-
from langroid.parsing.
|
17
|
+
from langroid.parsing.parse_json import top_level_json_field
|
18
18
|
from langroid.prompts.dialog import collate_chat_history
|
19
19
|
from langroid.prompts.templates import (
|
20
20
|
EXTRACTION_PROMPT_GPT4,
|
@@ -2,7 +2,7 @@ from . import parser
|
|
2
2
|
from . import agent_chats
|
3
3
|
from . import code_parser
|
4
4
|
from . import document_parser
|
5
|
-
from . import
|
5
|
+
from . import parse_json
|
6
6
|
from . import para_sentence_split
|
7
7
|
from . import repo_loader
|
8
8
|
from . import url_loader
|
@@ -13,12 +13,14 @@ from . import search
|
|
13
13
|
from . import web_search
|
14
14
|
from . import spider
|
15
15
|
|
16
|
+
parse_json # appease mypy
|
17
|
+
|
16
18
|
__all__ = [
|
17
19
|
"parser",
|
18
20
|
"agent_chats",
|
19
21
|
"code_parser",
|
20
22
|
"document_parser",
|
21
|
-
"
|
23
|
+
"parse_json.py",
|
22
24
|
"para_sentence_split",
|
23
25
|
"repo_loader",
|
24
26
|
"url_loader",
|
@@ -8,6 +8,7 @@ import fitz
|
|
8
8
|
import pdfplumber
|
9
9
|
import pypdf
|
10
10
|
import requests
|
11
|
+
from PIL import Image
|
11
12
|
|
12
13
|
from langroid.mytypes import DocMetaData, Document
|
13
14
|
from langroid.parsing.parser import Parser, ParsingConfig
|
@@ -53,6 +54,8 @@ class DocumentParser(Parser):
|
|
53
54
|
return PDFPlumberParser(source, config)
|
54
55
|
elif config.pdf.library == "unstructured":
|
55
56
|
return UnstructuredPDFParser(source, config)
|
57
|
+
elif config.pdf.library == "pdf2image":
|
58
|
+
return ImagePdfParser(source, config)
|
56
59
|
else:
|
57
60
|
raise ValueError(
|
58
61
|
f"Unsupported PDF library specified: {config.pdf.library}"
|
@@ -298,6 +301,36 @@ class PDFPlumberParser(DocumentParser):
|
|
298
301
|
return self.fix_text(page.extract_text())
|
299
302
|
|
300
303
|
|
304
|
+
class ImagePdfParser(DocumentParser):
|
305
|
+
"""
|
306
|
+
Parser for processing PDFs that are images, i.e. not "true" PDFs.
|
307
|
+
"""
|
308
|
+
|
309
|
+
def iterate_pages(
|
310
|
+
self,
|
311
|
+
) -> Generator[Tuple[int, Image], None, None]:
|
312
|
+
from pdf2image import convert_from_bytes
|
313
|
+
|
314
|
+
images = convert_from_bytes(self.doc_bytes.getvalue())
|
315
|
+
for i, image in enumerate(images):
|
316
|
+
yield i, image
|
317
|
+
|
318
|
+
def extract_text_from_page(self, page: Image) -> str:
|
319
|
+
"""
|
320
|
+
Extract text from a given `pdf2image` page.
|
321
|
+
|
322
|
+
Args:
|
323
|
+
page (Image): The PIL Image object.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
str: Extracted text from the image.
|
327
|
+
"""
|
328
|
+
import pytesseract
|
329
|
+
|
330
|
+
text = pytesseract.image_to_string(page)
|
331
|
+
return self.fix_text(text)
|
332
|
+
|
333
|
+
|
301
334
|
class UnstructuredPDFParser(DocumentParser):
|
302
335
|
"""
|
303
336
|
Parser for processing PDF files using the `unstructured` library.
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from typing import Union
|
2
|
+
|
3
|
+
import pytesseract
|
4
|
+
from pdf2image import convert_from_bytes, convert_from_path
|
5
|
+
|
6
|
+
|
7
|
+
def pdf_image_to_text(input_data: Union[str, bytes]) -> str:
|
8
|
+
"""
|
9
|
+
Converts a PDF that contains images to text using OCR.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
input_data (Union[str, bytes]): The file path to the PDF or a bytes-like object
|
13
|
+
of the PDF content.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
str: The extracted text from the PDF.
|
17
|
+
"""
|
18
|
+
|
19
|
+
# Check if the input is a file path (str) or bytes, and
|
20
|
+
# convert PDF to images accordingly
|
21
|
+
if isinstance(input_data, str):
|
22
|
+
images = convert_from_path(input_data)
|
23
|
+
elif isinstance(input_data, bytes):
|
24
|
+
images = convert_from_bytes(input_data)
|
25
|
+
else:
|
26
|
+
raise ValueError("input_data must be a file path (str) or bytes-like object")
|
27
|
+
|
28
|
+
text = ""
|
29
|
+
for image in images:
|
30
|
+
text += pytesseract.image_to_string(image)
|
31
|
+
|
32
|
+
return text
|
@@ -19,7 +19,13 @@ class Splitter(str, Enum):
|
|
19
19
|
|
20
20
|
|
21
21
|
class PdfParsingConfig(BaseSettings):
|
22
|
-
library: Literal[
|
22
|
+
library: Literal[
|
23
|
+
"fitz",
|
24
|
+
"pdfplumber",
|
25
|
+
"pypdf",
|
26
|
+
"unstructured",
|
27
|
+
"pdf2image",
|
28
|
+
] = "pdfplumber"
|
23
29
|
|
24
30
|
|
25
31
|
class DocxParsingConfig(BaseSettings):
|
@@ -19,7 +19,7 @@ from github.Repository import Repository
|
|
19
19
|
from pydantic import BaseModel, BaseSettings, Field
|
20
20
|
|
21
21
|
from langroid.mytypes import DocMetaData, Document
|
22
|
-
from langroid.parsing.document_parser import DocumentParser
|
22
|
+
from langroid.parsing.document_parser import DocumentParser, ImagePdfParser
|
23
23
|
from langroid.parsing.parser import Parser, ParsingConfig
|
24
24
|
|
25
25
|
logger = logging.getLogger(__name__)
|
@@ -550,7 +550,11 @@ class RepoLoader:
|
|
550
550
|
file_path,
|
551
551
|
parser.config,
|
552
552
|
)
|
553
|
-
|
553
|
+
new_chunks = doc_parser.get_doc_chunks()
|
554
|
+
if len(new_chunks) == 0 and file_extension.lower() == ".pdf":
|
555
|
+
doc_parser = ImagePdfParser(file_path, parser.config)
|
556
|
+
new_chunks = doc_parser.get_doc_chunks()
|
557
|
+
docs.extend(new_chunks)
|
554
558
|
else:
|
555
559
|
with open(file_path, "r") as f:
|
556
560
|
if lines is not None:
|
@@ -12,7 +12,7 @@ from trafilatura.downloads import (
|
|
12
12
|
)
|
13
13
|
|
14
14
|
from langroid.mytypes import DocMetaData, Document
|
15
|
-
from langroid.parsing.document_parser import DocumentParser
|
15
|
+
from langroid.parsing.document_parser import DocumentParser, ImagePdfParser
|
16
16
|
from langroid.parsing.parser import Parser, ParsingConfig
|
17
17
|
|
18
18
|
logging.getLogger("trafilatura").setLevel(logging.ERROR)
|
@@ -56,7 +56,12 @@ class URLLoader:
|
|
56
56
|
url,
|
57
57
|
self.parser.config,
|
58
58
|
)
|
59
|
-
|
59
|
+
new_chunks = doc_parser.get_doc_chunks()
|
60
|
+
if len(new_chunks) == 0:
|
61
|
+
# If the document is empty, try to extract images
|
62
|
+
img_parser = ImagePdfParser(url, self.parser.config)
|
63
|
+
new_chunks = img_parser.get_doc_chunks()
|
64
|
+
docs.extend(new_chunks)
|
60
65
|
else:
|
61
66
|
# Try to detect content type and handle accordingly
|
62
67
|
headers = requests.head(url).headers
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "langroid"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.213"
|
4
4
|
description = "Harness LLMs with Multi-Agent Programming"
|
5
5
|
authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
|
6
6
|
readme = "README.md"
|
@@ -92,6 +92,8 @@ chainlit = {version = "^1.0.400", optional = true}
|
|
92
92
|
python-socketio = {version="^5.11.0", optional=true}
|
93
93
|
duckduckgo-search = "^4.4"
|
94
94
|
huggingface-hub = {version="^0.21.2", optional=true}
|
95
|
+
pdf2image = "^1.17.0"
|
96
|
+
pytesseract = "^0.3.10"
|
95
97
|
|
96
98
|
[tool.poetry.extras]
|
97
99
|
# install these using `poetry install -E [...]` where [...] is one of the extras below
|
@@ -129,6 +131,7 @@ python_version = "3.11"
|
|
129
131
|
#check_untyped_defs = "True"
|
130
132
|
disallow_untyped_defs = "True"
|
131
133
|
ignore_missing_imports = "True"
|
134
|
+
warn_unused_ignores = "False"
|
132
135
|
strict = true
|
133
136
|
exclude = [
|
134
137
|
"docs", ".venv", "venv", "examples", "examples_dev", "langroid/utils/web",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/lance_rag/query_planner_agent.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.211 → langroid-0.1.213}/langroid/agent/special/sql/utils/description_extractors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/hf_formatter.py
RENAMED
File without changes
|
{langroid-0.1.211 → langroid-0.1.213}/langroid/language_models/prompt_formatter/llama2_formatter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
/langroid-0.1.211/langroid/parsing/json.py → /langroid-0.1.213/langroid/parsing/parse_json.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|