langroid 0.1.214__tar.gz → 0.1.217__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.1.214 → langroid-0.1.217}/PKG-INFO +7 -2
- {langroid-0.1.214 → langroid-0.1.217}/README.md +5 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/__init__.py +2 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/base.py +2 -1
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/batch.py +112 -37
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/chat_agent.py +11 -9
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/openai_assistant.py +0 -1
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/doc_chat_agent.py +75 -27
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_doc_chat_agent.py +4 -4
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/neo4j/csv_kg_chat.py +2 -3
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/relevance_extractor_agent.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/retriever_agent.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/sql_chat_agent.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/utils/populate_metadata.py +3 -3
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/table_chat_agent.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/duckduckgo_search_tool.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/google_search_tool.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/metaphor_search_tool.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/recipient_tool.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/remote_embeds.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/prompt_formatter/hf_formatter.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/utils.py +5 -2
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/output/__init__.py +7 -2
- langroid-0.1.217/langroid/utils/output/status.py +33 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/pandas_utils.py +5 -3
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/chromadb.py +8 -2
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/lancedb.py +5 -2
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/momento.py +1 -0
- {langroid-0.1.214 → langroid-0.1.217}/pyproject.toml +2 -2
- {langroid-0.1.214 → langroid-0.1.217}/LICENSE +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/callbacks/chainlit.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/chat_document.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/helpers.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/junk +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/neo4j/utils/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/neo4j/utils/system_message.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/task.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tool_message.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/extract_tool.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/generator_tool.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/run_python_code.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/sciphi_search_rag_tool.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/agent_config.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/cachedb/base.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/base.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/clustering.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/models.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/protoc/embeddings.proto +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/base.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/config.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/openai_assistants.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/language_models/utils.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/mytypes.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/code-parsing.md +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/config.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/document_parser.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/image_text.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/parse_json.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/parser.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/repo_loader.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/search.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/spider.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/url_loader.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/url_loader_cookies.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/urls.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/parsing/web_search.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/chat-gpt4-system-prompt.md +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/dialog.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/templates.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/prompts/transforms.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/configuration.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/constants.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/docker.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/globals.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/llms/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/llms/strings.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/logging.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/output/printing.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/system.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/web/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/utils/web/login.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/base.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/qdrant_cloud.py +0 -0
- {langroid-0.1.214 → langroid-0.1.217}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.217
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
License: MIT
|
6
6
|
Author: Prasad Chalasani
|
@@ -26,7 +26,7 @@ Requires-Dist: agent-search (>=0.0.7,<0.0.8) ; extra == "sciphi"
|
|
26
26
|
Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
|
27
27
|
Requires-Dist: async-generator (>=1.10,<2.0)
|
28
28
|
Requires-Dist: autopep8 (>=2.0.2,<3.0.0)
|
29
|
-
Requires-Dist: black[jupyter] (>=
|
29
|
+
Requires-Dist: black[jupyter] (>=24.3.0,<25.0.0)
|
30
30
|
Requires-Dist: bs4 (>=0.0.1,<0.0.2)
|
31
31
|
Requires-Dist: chainlit (>=1.0.400,<2.0.0) ; extra == "chainlit"
|
32
32
|
Requires-Dist: chromadb (>=0.4.21,<=0.4.23) ; extra == "chromadb"
|
@@ -232,6 +232,11 @@ teacher_task.run()
|
|
232
232
|
<summary> <b>Click to expand</b></summary>
|
233
233
|
|
234
234
|
- **Mar 2024:**
|
235
|
+
- **0.1.216:** Improvements to allow concurrent runs of `DocChatAgent`, see the
|
236
|
+
[`test_doc_chat_agent.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_doc_chat_agent.py)
|
237
|
+
in particular the `test_doc_chat_batch()`;
|
238
|
+
New task run utility: [`run_batch_task_gen`](https://github.com/langroid/langroid/blob/main/langroid/agent/batch.py)
|
239
|
+
where a task generator can be specified, to generate one task per input.
|
235
240
|
- **0.1.212:** ImagePdfParser: support for extracting text from image-based PDFs.
|
236
241
|
(this means `DocChatAgent` will now work with image-pdfs).
|
237
242
|
- **0.1.194 - 0.1.211:** Misc fixes, improvements, and features:
|
@@ -123,6 +123,11 @@ teacher_task.run()
|
|
123
123
|
<summary> <b>Click to expand</b></summary>
|
124
124
|
|
125
125
|
- **Mar 2024:**
|
126
|
+
- **0.1.216:** Improvements to allow concurrent runs of `DocChatAgent`, see the
|
127
|
+
[`test_doc_chat_agent.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_doc_chat_agent.py)
|
128
|
+
in particular the `test_doc_chat_batch()`;
|
129
|
+
New task run utility: [`run_batch_task_gen`](https://github.com/langroid/langroid/blob/main/langroid/agent/batch.py)
|
130
|
+
where a task generator can be specified, to generate one task per input.
|
126
131
|
- **0.1.212:** ImagePdfParser: support for extracting text from image-based PDFs.
|
127
132
|
(this means `DocChatAgent` will now work with image-pdfs).
|
128
133
|
- **0.1.194 - 0.1.211:** Misc fixes, improvements, and features:
|
@@ -13,6 +13,7 @@ from . import base
|
|
13
13
|
from . import chat_document
|
14
14
|
from . import chat_agent
|
15
15
|
from . import task
|
16
|
+
from . import batch
|
16
17
|
from . import tool_message
|
17
18
|
from . import tools
|
18
19
|
from . import special
|
@@ -32,6 +33,7 @@ __all__ = [
|
|
32
33
|
"chat_document",
|
33
34
|
"chat_agent",
|
34
35
|
"task",
|
36
|
+
"batch",
|
35
37
|
"tool_message",
|
36
38
|
"tools",
|
37
39
|
"special",
|
@@ -42,6 +42,7 @@ from langroid.parsing.parser import Parser, ParsingConfig
|
|
42
42
|
from langroid.prompts.prompts_config import PromptsConfig
|
43
43
|
from langroid.utils.configuration import settings
|
44
44
|
from langroid.utils.constants import NO_ANSWER
|
45
|
+
from langroid.utils.output import status
|
45
46
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
46
47
|
|
47
48
|
console = Console(quiet=settings.quiet)
|
@@ -517,7 +518,7 @@ class Agent(ABC):
|
|
517
518
|
with ExitStack() as stack: # for conditionally using rich spinner
|
518
519
|
if not self.llm.get_stream():
|
519
520
|
# show rich spinner only if not streaming!
|
520
|
-
cm =
|
521
|
+
cm = status("LLM responding to message...")
|
521
522
|
stack.enter_context(cm)
|
522
523
|
output_len = self.config.llm.max_output_tokens
|
523
524
|
if (
|
@@ -1,84 +1,159 @@
|
|
1
1
|
import asyncio
|
2
2
|
import copy
|
3
3
|
import inspect
|
4
|
-
from typing import Any, Callable, Coroutine, List
|
4
|
+
from typing import Any, Callable, Coroutine, Iterable, List, Optional, TypeVar
|
5
5
|
|
6
6
|
from dotenv import load_dotenv
|
7
|
-
from rich.console import Console
|
8
7
|
|
9
8
|
from langroid.agent.base import Agent
|
10
9
|
from langroid.agent.chat_document import ChatDocument
|
11
10
|
from langroid.agent.task import Task
|
12
|
-
from langroid.utils
|
11
|
+
from langroid.parsing.utils import batched
|
12
|
+
from langroid.utils.configuration import quiet_mode
|
13
13
|
from langroid.utils.logging import setup_colored_logging
|
14
|
-
from langroid.utils.output
|
15
|
-
|
16
|
-
console = Console(quiet=settings.quiet)
|
14
|
+
from langroid.utils.output import SuppressLoggerWarnings, status
|
17
15
|
|
18
16
|
setup_colored_logging()
|
19
17
|
|
20
18
|
load_dotenv()
|
21
19
|
|
20
|
+
T = TypeVar("T")
|
21
|
+
U = TypeVar("U")
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
|
24
|
+
def run_batch_task_gen(
|
25
|
+
gen_task: Callable[[int], Task],
|
26
|
+
items: list[T],
|
27
|
+
input_map: Callable[[T], str | ChatDocument] = lambda x: str(x),
|
28
|
+
output_map: Callable[[ChatDocument | None], U] = lambda x: x, # type: ignore
|
28
29
|
sequential: bool = True,
|
30
|
+
batch_size: Optional[int] = None,
|
29
31
|
turns: int = -1,
|
30
|
-
|
32
|
+
message: Optional[str] = None,
|
33
|
+
handle_exceptions: bool = False,
|
34
|
+
) -> list[U]:
|
31
35
|
"""
|
32
|
-
|
36
|
+
Generate and run copies of a task async/concurrently one per item in `items` list.
|
33
37
|
For each item, apply `input_map` to get the initial message to process.
|
34
38
|
For each result, apply `output_map` to get the final result.
|
35
39
|
Args:
|
36
|
-
|
37
|
-
items (
|
38
|
-
input_map (Callable[[
|
40
|
+
gen_task (Callable[[int], Task]): generates the tasks to run
|
41
|
+
items (list[T]): list of items to process
|
42
|
+
input_map (Callable[[T], str|ChatDocument]): function to map item to
|
39
43
|
initial message to process
|
40
|
-
output_map (Callable[[ChatDocument|str],
|
44
|
+
output_map (Callable[[ChatDocument|str], U]): function to map result
|
41
45
|
to final result
|
42
46
|
sequential (bool): whether to run sequentially
|
43
47
|
(e.g. some APIs such as ooba don't support concurrent requests)
|
48
|
+
batch_size (Optional[int]): The number of tasks to run at a time,
|
49
|
+
if None, unbatched
|
44
50
|
turns (int): number of turns to run, -1 for infinite
|
51
|
+
message (Optional[str]): optionally overrides the console status messages
|
52
|
+
handle_exceptions: bool: Whether to replace exceptions with outputs of None
|
45
53
|
|
46
54
|
Returns:
|
47
|
-
|
55
|
+
list[Any]: list of final results
|
48
56
|
"""
|
49
|
-
|
50
57
|
inputs = [input_map(item) for item in items]
|
51
58
|
|
52
|
-
async def _do_task(input: str | ChatDocument, i: int) ->
|
53
|
-
task_i =
|
59
|
+
async def _do_task(input: str | ChatDocument, i: int) -> Optional[ChatDocument]:
|
60
|
+
task_i = gen_task(i)
|
54
61
|
if task_i.agent.llm is not None:
|
55
62
|
task_i.agent.llm.set_stream(False)
|
56
63
|
task_i.agent.config.show_stats = False
|
57
64
|
|
58
65
|
result = await task_i.run_async(input, turns=turns)
|
59
|
-
return
|
66
|
+
return result
|
60
67
|
|
61
|
-
async def _do_all(
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
68
|
+
async def _do_all(
|
69
|
+
inputs: Iterable[str | ChatDocument], start_idx: int = 0
|
70
|
+
) -> list[U]:
|
71
|
+
results: list[Optional[ChatDocument]] = []
|
72
|
+
if sequential:
|
73
|
+
for i, input in enumerate(inputs):
|
74
|
+
try:
|
75
|
+
result = await _do_task(input, i + start_idx)
|
76
|
+
except BaseException as e:
|
77
|
+
if handle_exceptions:
|
78
|
+
result = None
|
79
|
+
else:
|
80
|
+
raise e
|
81
|
+
results.append(result)
|
82
|
+
else:
|
83
|
+
results_with_exceptions = await asyncio.gather(
|
84
|
+
*(_do_task(input, i + start_idx) for i, input in enumerate(inputs)),
|
85
|
+
return_exceptions=handle_exceptions,
|
71
86
|
)
|
72
87
|
|
73
|
-
|
88
|
+
results = [
|
89
|
+
r if not isinstance(r, BaseException) else None
|
90
|
+
for r in results_with_exceptions
|
91
|
+
]
|
74
92
|
|
75
|
-
|
76
|
-
|
77
|
-
|
93
|
+
return list(map(output_map, results))
|
94
|
+
|
95
|
+
results: List[U] = []
|
96
|
+
if batch_size is None:
|
97
|
+
msg = message or f"[bold green]Running {len(items)} tasks:"
|
98
|
+
|
99
|
+
with status(msg), SuppressLoggerWarnings():
|
100
|
+
results = asyncio.run(_do_all(inputs))
|
101
|
+
else:
|
102
|
+
batches = batched(inputs, batch_size)
|
103
|
+
|
104
|
+
for batch in batches:
|
105
|
+
start_idx = len(results)
|
106
|
+
complete_str = f", {start_idx} complete" if start_idx > 0 else ""
|
107
|
+
msg = message or f"[bold green]Running {len(items)} tasks{complete_str}:"
|
108
|
+
|
109
|
+
with status(msg), SuppressLoggerWarnings():
|
110
|
+
results.extend(asyncio.run(_do_all(batch, start_idx=start_idx)))
|
78
111
|
|
79
112
|
return results
|
80
113
|
|
81
114
|
|
115
|
+
def run_batch_tasks(
|
116
|
+
task: Task,
|
117
|
+
items: list[T],
|
118
|
+
input_map: Callable[[T], str | ChatDocument] = lambda x: str(x),
|
119
|
+
output_map: Callable[[ChatDocument | None], U] = lambda x: x, # type: ignore
|
120
|
+
sequential: bool = True,
|
121
|
+
batch_size: Optional[int] = None,
|
122
|
+
turns: int = -1,
|
123
|
+
) -> List[U]:
|
124
|
+
"""
|
125
|
+
Run copies of `task` async/concurrently one per item in `items` list.
|
126
|
+
For each item, apply `input_map` to get the initial message to process.
|
127
|
+
For each result, apply `output_map` to get the final result.
|
128
|
+
Args:
|
129
|
+
task (Task): task to run
|
130
|
+
items (list[T]): list of items to process
|
131
|
+
input_map (Callable[[T], str|ChatDocument]): function to map item to
|
132
|
+
initial message to process
|
133
|
+
output_map (Callable[[ChatDocument|str], U]): function to map result
|
134
|
+
to final result
|
135
|
+
sequential (bool): whether to run sequentially
|
136
|
+
(e.g. some APIs such as ooba don't support concurrent requests)
|
137
|
+
batch_size (Optional[int]): The number of tasks to run at a time,
|
138
|
+
if None, unbatched
|
139
|
+
turns (int): number of turns to run, -1 for infinite
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
list[Any]: list of final results
|
143
|
+
"""
|
144
|
+
message = f"[bold green]Running {len(items)} copies of {task.name}..."
|
145
|
+
return run_batch_task_gen(
|
146
|
+
lambda i: task.clone(i),
|
147
|
+
items,
|
148
|
+
input_map,
|
149
|
+
output_map,
|
150
|
+
sequential,
|
151
|
+
batch_size,
|
152
|
+
turns,
|
153
|
+
message,
|
154
|
+
)
|
155
|
+
|
156
|
+
|
82
157
|
def run_batch_agent_method(
|
83
158
|
agent: Agent,
|
84
159
|
method: Callable[
|
@@ -149,7 +224,7 @@ def run_batch_agent_method(
|
|
149
224
|
)
|
150
225
|
|
151
226
|
n = len(items)
|
152
|
-
with
|
227
|
+
with status(f"[bold green]Running {n} copies of {agent_name}..."):
|
153
228
|
results = asyncio.run(_do_all())
|
154
229
|
|
155
230
|
return results
|
@@ -20,8 +20,9 @@ from langroid.language_models.base import (
|
|
20
20
|
)
|
21
21
|
from langroid.language_models.openai_gpt import OpenAIGPT
|
22
22
|
from langroid.utils.configuration import settings
|
23
|
+
from langroid.utils.output import status
|
23
24
|
|
24
|
-
console = Console(
|
25
|
+
console = Console()
|
25
26
|
|
26
27
|
logger = logging.getLogger(__name__)
|
27
28
|
|
@@ -154,6 +155,8 @@ class ChatAgent(Agent):
|
|
154
155
|
new_agent.llm_functions_handled = self.llm_functions_handled
|
155
156
|
new_agent.llm_functions_usable = self.llm_functions_usable
|
156
157
|
new_agent.llm_function_force = self.llm_function_force
|
158
|
+
# Caution - we are copying the vector-db, maybe we don't always want this?
|
159
|
+
new_agent.vecdb = self.vecdb
|
157
160
|
return new_agent
|
158
161
|
|
159
162
|
def _fn_call_available(self) -> bool:
|
@@ -490,9 +493,7 @@ class ChatAgent(Agent):
|
|
490
493
|
response.metadata.tool_ids = (
|
491
494
|
[]
|
492
495
|
if isinstance(message, str)
|
493
|
-
else message.metadata.tool_ids
|
494
|
-
if message is not None
|
495
|
-
else []
|
496
|
+
else message.metadata.tool_ids if message is not None else []
|
496
497
|
)
|
497
498
|
return response
|
498
499
|
|
@@ -515,9 +516,7 @@ class ChatAgent(Agent):
|
|
515
516
|
response.metadata.tool_ids = (
|
516
517
|
[]
|
517
518
|
if isinstance(message, str)
|
518
|
-
else message.metadata.tool_ids
|
519
|
-
if message is not None
|
520
|
-
else []
|
519
|
+
else message.metadata.tool_ids if message is not None else []
|
521
520
|
)
|
522
521
|
return response
|
523
522
|
|
@@ -682,9 +681,12 @@ class ChatAgent(Agent):
|
|
682
681
|
streamer = self.callbacks.start_llm_stream()
|
683
682
|
self.llm.config.streamer = streamer
|
684
683
|
with ExitStack() as stack: # for conditionally using rich spinner
|
685
|
-
if not self.llm.get_stream()
|
684
|
+
if not self.llm.get_stream():
|
686
685
|
# show rich spinner only if not streaming!
|
687
|
-
cm =
|
686
|
+
cm = status(
|
687
|
+
"LLM responding to messages...",
|
688
|
+
log_if_quiet=False,
|
689
|
+
)
|
688
690
|
stack.enter_context(cm)
|
689
691
|
if self.llm.get_stream() and not settings.quiet:
|
690
692
|
console.print(f"[green]{self.indent}", end="")
|
@@ -12,14 +12,15 @@ langroid with the [hf-embeddings] extra, e.g.:
|
|
12
12
|
pip install "langroid[hf-embeddings]"
|
13
13
|
|
14
14
|
"""
|
15
|
+
|
15
16
|
import logging
|
16
17
|
from contextlib import ExitStack
|
18
|
+
from functools import cache
|
17
19
|
from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
|
18
20
|
|
21
|
+
import nest_asyncio
|
19
22
|
import numpy as np
|
20
23
|
import pandas as pd
|
21
|
-
from rich import print
|
22
|
-
from rich.console import Console
|
23
24
|
from rich.prompt import Prompt
|
24
25
|
|
25
26
|
from langroid.agent.batch import run_batch_tasks
|
@@ -49,14 +50,18 @@ from langroid.prompts.prompts_config import PromptsConfig
|
|
49
50
|
from langroid.prompts.templates import SUMMARY_ANSWER_PROMPT_GPT4
|
50
51
|
from langroid.utils.configuration import settings
|
51
52
|
from langroid.utils.constants import NO_ANSWER
|
52
|
-
from langroid.utils.output
|
53
|
+
from langroid.utils.output import show_if_debug, status
|
53
54
|
from langroid.utils.pydantic_utils import dataframe_to_documents, extract_fields
|
54
55
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
55
56
|
from langroid.vector_store.lancedb import LanceDBConfig
|
56
57
|
|
57
|
-
logger = logging.getLogger(__name__)
|
58
58
|
|
59
|
-
|
59
|
+
@cache
|
60
|
+
def apply_nest_asyncio() -> None:
|
61
|
+
nest_asyncio.apply()
|
62
|
+
|
63
|
+
|
64
|
+
logger = logging.getLogger(__name__)
|
60
65
|
|
61
66
|
DEFAULT_DOC_CHAT_INSTRUCTIONS = """
|
62
67
|
Your task is to answer questions about various documents.
|
@@ -248,10 +253,9 @@ class DocChatAgent(ChatAgent):
|
|
248
253
|
def ingest_doc_paths(
|
249
254
|
self,
|
250
255
|
paths: List[str],
|
251
|
-
metadata:
|
252
|
-
|
253
|
-
|
254
|
-
| List[DocMetaData] = [],
|
256
|
+
metadata: (
|
257
|
+
List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
|
258
|
+
) = [],
|
255
259
|
) -> List[Document]:
|
256
260
|
"""Split, ingest docs from specified paths,
|
257
261
|
do not add these to config.doc_paths.
|
@@ -272,9 +276,11 @@ class DocChatAgent(ChatAgent):
|
|
272
276
|
):
|
273
277
|
if isinstance(metadata, list):
|
274
278
|
path2meta = {
|
275
|
-
p:
|
276
|
-
|
277
|
-
|
279
|
+
p: (
|
280
|
+
m
|
281
|
+
if isinstance(m, dict)
|
282
|
+
else (isinstance(m, DocMetaData) and m.dict())
|
283
|
+
) # appease mypy
|
278
284
|
for p, m in zip(all_paths, metadata)
|
279
285
|
}
|
280
286
|
elif isinstance(metadata, dict):
|
@@ -322,10 +328,9 @@ class DocChatAgent(ChatAgent):
|
|
322
328
|
self,
|
323
329
|
docs: List[Document],
|
324
330
|
split: bool = True,
|
325
|
-
metadata:
|
326
|
-
|
327
|
-
|
328
|
-
| List[DocMetaData] = [],
|
331
|
+
metadata: (
|
332
|
+
List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
|
333
|
+
) = [],
|
329
334
|
) -> int:
|
330
335
|
"""
|
331
336
|
Chunk docs into pieces, map each chunk to vec-embedding, store in vec-db
|
@@ -623,7 +628,7 @@ class DocChatAgent(ChatAgent):
|
|
623
628
|
query_str = query_str[1:] if query_str is not None else None
|
624
629
|
if self.llm is None:
|
625
630
|
raise ValueError("LLM not set")
|
626
|
-
with StreamingIfAllowed(self.llm):
|
631
|
+
with StreamingIfAllowed(self.llm, self.llm.get_stream()):
|
627
632
|
response = super().llm_response(query_str)
|
628
633
|
if query_str is not None:
|
629
634
|
self.update_dialog(
|
@@ -649,6 +654,49 @@ class DocChatAgent(ChatAgent):
|
|
649
654
|
),
|
650
655
|
)
|
651
656
|
|
657
|
+
async def llm_response_async(
|
658
|
+
self,
|
659
|
+
query: None | str | ChatDocument = None,
|
660
|
+
) -> Optional[ChatDocument]:
|
661
|
+
apply_nest_asyncio()
|
662
|
+
if not self.llm_can_respond(query):
|
663
|
+
return None
|
664
|
+
query_str: str | None
|
665
|
+
if isinstance(query, ChatDocument):
|
666
|
+
query_str = query.content
|
667
|
+
else:
|
668
|
+
query_str = query
|
669
|
+
if query_str is None or query_str.startswith("!"):
|
670
|
+
# direct query to LLM
|
671
|
+
query_str = query_str[1:] if query_str is not None else None
|
672
|
+
if self.llm is None:
|
673
|
+
raise ValueError("LLM not set")
|
674
|
+
with StreamingIfAllowed(self.llm, self.llm.get_stream()):
|
675
|
+
response = await super().llm_response_async(query_str)
|
676
|
+
if query_str is not None:
|
677
|
+
self.update_dialog(
|
678
|
+
query_str, "" if response is None else response.content
|
679
|
+
)
|
680
|
+
return response
|
681
|
+
if query_str == "":
|
682
|
+
return None
|
683
|
+
elif query_str == "?" and self.response is not None:
|
684
|
+
return self.justify_response()
|
685
|
+
elif (query_str.startswith(("summar", "?")) and self.response is None) or (
|
686
|
+
query_str == "??"
|
687
|
+
):
|
688
|
+
return self.summarize_docs()
|
689
|
+
else:
|
690
|
+
self.callbacks.show_start_response(entity="llm")
|
691
|
+
response = self.answer_from_docs(query_str)
|
692
|
+
return ChatDocument(
|
693
|
+
content=response.content,
|
694
|
+
metadata=ChatDocMetaData(
|
695
|
+
source=response.metadata.source,
|
696
|
+
sender=Entity.LLM,
|
697
|
+
),
|
698
|
+
)
|
699
|
+
|
652
700
|
@staticmethod
|
653
701
|
def doc_string(docs: List[Document]) -> str:
|
654
702
|
"""
|
@@ -735,7 +783,7 @@ class DocChatAgent(ChatAgent):
|
|
735
783
|
def llm_hypothetical_answer(self, query: str) -> str:
|
736
784
|
if self.llm is None:
|
737
785
|
raise ValueError("LLM not set")
|
738
|
-
with
|
786
|
+
with status("[cyan]LLM generating hypothetical answer..."):
|
739
787
|
with StreamingIfAllowed(self.llm, False):
|
740
788
|
# TODO: provide an easy way to
|
741
789
|
# Adjust this prompt depending on context.
|
@@ -755,7 +803,7 @@ class DocChatAgent(ChatAgent):
|
|
755
803
|
def llm_rephrase_query(self, query: str) -> List[str]:
|
756
804
|
if self.llm is None:
|
757
805
|
raise ValueError("LLM not set")
|
758
|
-
with
|
806
|
+
with status("[cyan]LLM generating rephrases of query..."):
|
759
807
|
with StreamingIfAllowed(self.llm, False):
|
760
808
|
rephrases = self.llm_response_forget(
|
761
809
|
f"""
|
@@ -771,7 +819,7 @@ class DocChatAgent(ChatAgent):
|
|
771
819
|
) -> List[Tuple[Document, float]]:
|
772
820
|
# find similar docs using bm25 similarity:
|
773
821
|
# these may sometimes be more likely to contain a relevant verbatim extract
|
774
|
-
with
|
822
|
+
with status("[cyan]Searching for similar chunks using bm25..."):
|
775
823
|
if self.chunked_docs is None or len(self.chunked_docs) == 0:
|
776
824
|
logger.warning("No chunked docs; cannot use bm25-similarity")
|
777
825
|
return []
|
@@ -789,7 +837,7 @@ class DocChatAgent(ChatAgent):
|
|
789
837
|
def get_fuzzy_matches(self, query: str, multiple: int) -> List[Document]:
|
790
838
|
# find similar docs using fuzzy matching:
|
791
839
|
# these may sometimes be more likely to contain a relevant verbatim extract
|
792
|
-
with
|
840
|
+
with status("[cyan]Finding fuzzy matches in chunks..."):
|
793
841
|
if self.chunked_docs is None:
|
794
842
|
logger.warning("No chunked docs; cannot use fuzzy matching")
|
795
843
|
return []
|
@@ -809,7 +857,7 @@ class DocChatAgent(ChatAgent):
|
|
809
857
|
def rerank_with_cross_encoder(
|
810
858
|
self, query: str, passages: List[Document]
|
811
859
|
) -> List[Document]:
|
812
|
-
with
|
860
|
+
with status("[cyan]Re-ranking retrieved chunks using cross-encoder..."):
|
813
861
|
try:
|
814
862
|
from sentence_transformers import CrossEncoder
|
815
863
|
except ImportError:
|
@@ -954,7 +1002,7 @@ class DocChatAgent(ChatAgent):
|
|
954
1002
|
# Note: for dynamic filtering based on a query, users can
|
955
1003
|
# use the `temp_update` context-manager to pass in a `filter` to self.config,
|
956
1004
|
# e.g.:
|
957
|
-
# with temp_update(self.config, filter
|
1005
|
+
# with temp_update(self.config, {"filter": "metadata.source=='source1'"}):
|
958
1006
|
# docs_scores = self.get_semantic_search_results(query, k=k)
|
959
1007
|
# This avoids having pass the `filter` argument to every function call
|
960
1008
|
# upstream of this one.
|
@@ -1002,7 +1050,7 @@ class DocChatAgent(ChatAgent):
|
|
1002
1050
|
if self.vecdb is None:
|
1003
1051
|
raise ValueError("VecDB not set")
|
1004
1052
|
|
1005
|
-
with
|
1053
|
+
with status("[cyan]Searching VecDB for relevant doc passages..."):
|
1006
1054
|
docs_and_scores: List[Tuple[Document, float]] = []
|
1007
1055
|
for q in [query] + query_proxies:
|
1008
1056
|
docs_and_scores += self.get_semantic_search_results(
|
@@ -1078,7 +1126,7 @@ class DocChatAgent(ChatAgent):
|
|
1078
1126
|
# Regardless of whether we are in conversation mode or not,
|
1079
1127
|
# for relevant doc/chunk extraction, we must convert the query
|
1080
1128
|
# to a standalone query to get more relevant results.
|
1081
|
-
with
|
1129
|
+
with status("[cyan]Converting to stand-alone query...[/cyan]"):
|
1082
1130
|
with StreamingIfAllowed(self.llm, False):
|
1083
1131
|
query = self.llm.followup_to_standalone(self.dialog, query)
|
1084
1132
|
print(f"[orange2]New query: {query}")
|
@@ -1097,7 +1145,7 @@ class DocChatAgent(ChatAgent):
|
|
1097
1145
|
if len(passages) == 0:
|
1098
1146
|
return query, []
|
1099
1147
|
|
1100
|
-
with
|
1148
|
+
with status("[cyan]LLM Extracting verbatim passages..."):
|
1101
1149
|
with StreamingIfAllowed(self.llm, False):
|
1102
1150
|
# these are async calls, one per passage; turn off streaming
|
1103
1151
|
extracts = self.get_verbatim_extracts(query, passages)
|
@@ -1198,7 +1246,7 @@ class DocChatAgent(ChatAgent):
|
|
1198
1246
|
cm = (
|
1199
1247
|
StreamingIfAllowed(self.llm)
|
1200
1248
|
if settings.stream
|
1201
|
-
else (
|
1249
|
+
else (status("LLM Generating final answer..."))
|
1202
1250
|
)
|
1203
1251
|
stack.enter_context(cm) # type: ignore
|
1204
1252
|
response = self.get_summary_answer(query, extracts)
|
@@ -9,6 +9,7 @@ For usage see:
|
|
9
9
|
- example script `examples/docqa/lance_rag.py`.
|
10
10
|
|
11
11
|
"""
|
12
|
+
|
12
13
|
import json
|
13
14
|
import logging
|
14
15
|
from typing import Any, Dict, List, Tuple
|
@@ -162,10 +163,9 @@ class LanceDocChatAgent(DocChatAgent):
|
|
162
163
|
self,
|
163
164
|
docs: List[Document],
|
164
165
|
split: bool = True,
|
165
|
-
metadata:
|
166
|
-
|
167
|
-
|
168
|
-
| List[DocMetaData] = [],
|
166
|
+
metadata: (
|
167
|
+
List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
|
168
|
+
) = [],
|
169
169
|
) -> int:
|
170
170
|
n = super().ingest_docs(docs, split, metadata)
|
171
171
|
tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
|
@@ -2,7 +2,6 @@ from typing import List, Optional
|
|
2
2
|
|
3
3
|
import pandas as pd
|
4
4
|
import typer
|
5
|
-
from rich.console import Console
|
6
5
|
|
7
6
|
from langroid.agent.special.neo4j.neo4j_chat_agent import (
|
8
7
|
Neo4jChatAgent,
|
@@ -11,9 +10,9 @@ from langroid.agent.special.neo4j.neo4j_chat_agent import (
|
|
11
10
|
from langroid.agent.tool_message import ToolMessage
|
12
11
|
from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
|
13
12
|
from langroid.parsing.table_loader import read_tabular_data
|
13
|
+
from langroid.utils.output import status
|
14
14
|
from langroid.vector_store.base import VectorStoreConfig
|
15
15
|
|
16
|
-
console = Console()
|
17
16
|
app = typer.Typer()
|
18
17
|
|
19
18
|
|
@@ -159,7 +158,7 @@ class CSVGraphAgent(Neo4jChatAgent):
|
|
159
158
|
Returns:
|
160
159
|
str: A string indicating the success or failure of the operation.
|
161
160
|
"""
|
162
|
-
with
|
161
|
+
with status("[cyan]Generating graph database..."):
|
163
162
|
if self.df is not None and hasattr(self.df, "iterrows"):
|
164
163
|
for counter, (index, row) in enumerate(self.df.iterrows()):
|
165
164
|
row_dict = row.to_dict()
|
@@ -60,9 +60,9 @@ def populate_metadata(
|
|
60
60
|
Dict: A dictionary containing populated metadata information.
|
61
61
|
"""
|
62
62
|
# Fetch basic metadata info using available tools
|
63
|
-
db_info: Dict[
|
64
|
-
|
65
|
-
|
63
|
+
db_info: Dict[str, Dict[str, Union[str, Dict[str, str]]]] = (
|
64
|
+
populate_metadata_with_schema_tools(metadata=metadata, info=info)
|
65
|
+
)
|
66
66
|
|
67
67
|
# Iterate over tables to update column metadata
|
68
68
|
for table_name in db_info.keys():
|