langroid 0.1.196__tar.gz → 0.1.198__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.1.196 → langroid-0.1.198}/PKG-INFO +6 -5
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/chat_agent.py +10 -2
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/chat_document.py +1 -1
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/doc_chat_agent.py +2 -2
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/task.py +7 -3
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tool_message.py +31 -6
- {langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/models.py +53 -14
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/hf_formatter.py +28 -2
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/document_parser.py +34 -53
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/json.py +59 -2
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/parser.py +1 -3
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/pydantic_utils.py +47 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/system.py +35 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/chromadb.py +10 -4
- {langroid-0.1.196 → langroid-0.1.198}/pyproject.toml +7 -5
- {langroid-0.1.196 → langroid-0.1.198}/LICENSE +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/README.md +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/batch.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/callbacks/chainlit.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/helpers.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/junk +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/openai_assistant.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/lance_tools.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/utils/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/neo4j/utils/system_message.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/extract_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/generator_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/run_python_code.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/sciphi_search_rag_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/agent_config.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/embedding_models/clustering.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/config.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/openai_assistants.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/utils.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/mytypes.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/code-parsing.md +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/config.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/repo_loader.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/search.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/spider.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/url_loader.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/url_loader_cookies.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/urls.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/utils.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/parsing/web_search.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/chat-gpt4-system-prompt.md +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/dialog.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/templates.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/prompts/transforms.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/configuration.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/constants.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/docker.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/globals.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/llms/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/llms/strings.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/logging.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/output/printing.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/login.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/utils/web/selenium_login.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/base.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/momento.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/qdrant_cloud.py +0 -0
- {langroid-0.1.196 → langroid-0.1.198}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.198
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
License: MIT
|
6
6
|
Author: Prasad Chalasani
|
@@ -18,20 +18,21 @@ Provides-Extra: mysql
|
|
18
18
|
Provides-Extra: neo4j
|
19
19
|
Provides-Extra: postgres
|
20
20
|
Provides-Extra: sciphi
|
21
|
+
Provides-Extra: transformers
|
22
|
+
Provides-Extra: unstructured
|
21
23
|
Requires-Dist: agent-search (>=0.0.7,<0.0.8) ; extra == "sciphi"
|
22
24
|
Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
|
23
25
|
Requires-Dist: async-generator (>=1.10,<2.0)
|
24
26
|
Requires-Dist: autopep8 (>=2.0.2,<3.0.0)
|
25
27
|
Requires-Dist: black[jupyter] (>=23.3.0,<24.0.0)
|
26
28
|
Requires-Dist: bs4 (>=0.0.1,<0.0.2)
|
27
|
-
Requires-Dist: chainlit (>=1.0.
|
28
|
-
Requires-Dist: chromadb (
|
29
|
+
Requires-Dist: chainlit (>=1.0.301,<2.0.0) ; extra == "chainlit"
|
30
|
+
Requires-Dist: chromadb (>=0.4.21,<=0.4.23)
|
29
31
|
Requires-Dist: colorlog (>=6.7.0,<7.0.0)
|
30
32
|
Requires-Dist: docstring-parser (>=0.15,<0.16)
|
31
33
|
Requires-Dist: duckduckgo-search (>=4.4,<5.0)
|
32
34
|
Requires-Dist: faker (>=18.9.0,<19.0.0)
|
33
35
|
Requires-Dist: fakeredis (>=2.12.1,<3.0.0)
|
34
|
-
Requires-Dist: farm-haystack[file-conversion,ocr,pdf,preprocessing] (>=1.21.1,<2.0.0)
|
35
36
|
Requires-Dist: fire (>=0.5.0,<0.6.0)
|
36
37
|
Requires-Dist: flake8 (>=6.0.0,<7.0.0)
|
37
38
|
Requires-Dist: google-api-python-client (>=2.95.0,<3.0.0)
|
@@ -95,7 +96,7 @@ Requires-Dist: trafilatura (>=1.5.0,<2.0.0)
|
|
95
96
|
Requires-Dist: typer (>=0.9.0,<0.10.0)
|
96
97
|
Requires-Dist: types-redis (>=4.5.5.2,<5.0.0.0)
|
97
98
|
Requires-Dist: types-requests (>=2.31.0.1,<3.0.0.0)
|
98
|
-
Requires-Dist: unstructured[docx,pdf,pptx] (>=0.10.16,<0.10.18)
|
99
|
+
Requires-Dist: unstructured[docx,pdf,pptx] (>=0.10.16,<0.10.18) ; extra == "unstructured"
|
99
100
|
Requires-Dist: wget (>=3.2,<4.0)
|
100
101
|
Description-Content-Type: text/markdown
|
101
102
|
|
@@ -225,14 +225,22 @@ class ChatAgent(Agent):
|
|
225
225
|
enabled_classes: List[Type[ToolMessage]] = list(self.llm_tools_map.values())
|
226
226
|
if len(enabled_classes) == 0:
|
227
227
|
return "You can ask questions in natural language."
|
228
|
-
|
229
228
|
json_instructions = "\n\n".join(
|
230
229
|
[
|
231
|
-
msg_cls.json_instructions()
|
230
|
+
msg_cls.json_instructions(tool=self.config.use_tools)
|
232
231
|
for _, msg_cls in enumerate(enabled_classes)
|
233
232
|
if msg_cls.default_value("request") in self.llm_tools_usable
|
234
233
|
]
|
235
234
|
)
|
235
|
+
# if any of the enabled classes has json_group_instructions, then use that,
|
236
|
+
# else fall back to ToolMessage.json_group_instructions
|
237
|
+
for msg_cls in enabled_classes:
|
238
|
+
if hasattr(msg_cls, "json_group_instructions") and callable(
|
239
|
+
getattr(msg_cls, "json_group_instructions")
|
240
|
+
):
|
241
|
+
return msg_cls.json_group_instructions().format(
|
242
|
+
json_instructions=json_instructions
|
243
|
+
)
|
236
244
|
return ToolMessage.json_group_instructions().format(
|
237
245
|
json_instructions=json_instructions
|
238
246
|
)
|
@@ -135,7 +135,7 @@ class DocChatAgentConfig(ChatAgentConfig):
|
|
135
135
|
# NOTE: PDF parsing is extremely challenging, and each library
|
136
136
|
# has its own strengths and weaknesses.
|
137
137
|
# Try one that works for your use case.
|
138
|
-
# or "
|
138
|
+
# or "unstructured", "pdfplumber", "fitz", "pypdf"
|
139
139
|
library="pdfplumber",
|
140
140
|
),
|
141
141
|
)
|
@@ -156,7 +156,7 @@ class DocChatAgentConfig(ChatAgentConfig):
|
|
156
156
|
collection_name="doc-chat-lancedb",
|
157
157
|
replace_collection=True,
|
158
158
|
storage_path=".lancedb/data/",
|
159
|
-
embedding=
|
159
|
+
embedding=oai_embed_config,
|
160
160
|
)
|
161
161
|
llm: OpenAIGPTConfig = OpenAIGPTConfig(
|
162
162
|
type="openai",
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import copy
|
4
4
|
import logging
|
5
|
+
import re
|
5
6
|
from collections import Counter
|
6
7
|
from types import SimpleNamespace
|
7
8
|
from typing import (
|
@@ -781,17 +782,20 @@ class Task:
|
|
781
782
|
# handle routing instruction in result if any,
|
782
783
|
# of the form PASS=<recipient>
|
783
784
|
content = msg.content if isinstance(msg, ChatDocument) else msg
|
785
|
+
content = content.strip()
|
784
786
|
if PASS in content and PASS_TO not in content:
|
785
787
|
return True, None
|
786
788
|
if PASS_TO in content and content.split(":")[1] != "":
|
787
789
|
return True, content.split(":")[1]
|
788
|
-
if SEND_TO in content and
|
789
|
-
recipient
|
790
|
+
if SEND_TO in content and (send_parts := re.split(r"[,: ]", content))[1] != "":
|
791
|
+
# assume syntax is SEND_TO:<recipient> <content>
|
792
|
+
# or SEND_TO:<recipient>,<content> or SEND_TO:<recipient>:<content>
|
793
|
+
recipient = send_parts[1].strip()
|
790
794
|
# get content to send, clean out routing instruction, and
|
791
795
|
# start from 1 char after SEND_TO:<recipient>,
|
792
796
|
# because we expect there is either a blank or some other separator
|
793
797
|
# after the recipient
|
794
|
-
content_to_send = content.replace(f"{SEND_TO}
|
798
|
+
content_to_send = content.replace(f"{SEND_TO}{recipient}", "").strip()[1:]
|
795
799
|
# if no content then treat same as PASS_TO
|
796
800
|
if content_to_send == "":
|
797
801
|
return True, recipient
|
@@ -16,7 +16,10 @@ from docstring_parser import parse
|
|
16
16
|
from pydantic import BaseModel
|
17
17
|
|
18
18
|
from langroid.language_models.base import LLMFunctionSpec
|
19
|
-
from langroid.utils.pydantic_utils import
|
19
|
+
from langroid.utils.pydantic_utils import (
|
20
|
+
_recursive_purge_dict_key,
|
21
|
+
generate_simple_schema,
|
22
|
+
)
|
20
23
|
|
21
24
|
|
22
25
|
class ToolMessage(ABC, BaseModel):
|
@@ -79,6 +82,9 @@ class ToolMessage(ABC, BaseModel):
|
|
79
82
|
ex = choice(cls.examples())
|
80
83
|
return ex.json_example()
|
81
84
|
|
85
|
+
def to_json(self) -> str:
|
86
|
+
return self.json(indent=4, exclude={"result", "purpose"})
|
87
|
+
|
82
88
|
def json_example(self) -> str:
|
83
89
|
return self.json(indent=4, exclude={"result", "purpose"})
|
84
90
|
|
@@ -101,22 +107,30 @@ class ToolMessage(ABC, BaseModel):
|
|
101
107
|
return properties.get(f, {}).get("default", None)
|
102
108
|
|
103
109
|
@classmethod
|
104
|
-
def json_instructions(cls) -> str:
|
110
|
+
def json_instructions(cls, tool: bool = False) -> str:
|
105
111
|
"""
|
106
112
|
Default Instructions to the LLM showing how to use the tool/function-call.
|
107
113
|
Works for GPT4 but override this for weaker LLMs if needed.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
tool: instructions for Langroid-native tool use? (e.g. for non-OpenAI LLM)
|
117
|
+
(or else it would be for OpenAI Function calls)
|
108
118
|
Returns:
|
109
119
|
str: instructions on how to use the message
|
110
120
|
"""
|
121
|
+
# TODO: when we attempt to use a "simpler schema"
|
122
|
+
# (i.e. all nested fields explicit without definitions),
|
123
|
+
# we seem to get worse results, so we turn it off for now
|
124
|
+
param_dict = (
|
125
|
+
# cls.simple_schema() if tool else
|
126
|
+
cls.llm_function_schema(request=True).parameters
|
127
|
+
)
|
111
128
|
return textwrap.dedent(
|
112
129
|
f"""
|
113
130
|
TOOL: {cls.default_value("request")}
|
114
131
|
PURPOSE: {cls.default_value("purpose")}
|
115
132
|
JSON FORMAT: {
|
116
|
-
json.dumps(
|
117
|
-
cls.llm_function_schema(request=True).parameters,
|
118
|
-
indent=4,
|
119
|
-
)
|
133
|
+
json.dumps(param_dict, indent=4)
|
120
134
|
}
|
121
135
|
{"EXAMPLE: " + cls.usage_example() if cls.examples() else ""}
|
122
136
|
""".lstrip()
|
@@ -210,3 +224,14 @@ class ToolMessage(ABC, BaseModel):
|
|
210
224
|
description=cls.default_value("purpose"),
|
211
225
|
parameters=parameters,
|
212
226
|
)
|
227
|
+
|
228
|
+
@classmethod
|
229
|
+
def simple_schema(cls) -> Dict[str, Any]:
|
230
|
+
"""
|
231
|
+
Return a simplified schema for the message, with only the request and
|
232
|
+
required fields.
|
233
|
+
Returns:
|
234
|
+
Dict[str, Any]: simplified schema
|
235
|
+
"""
|
236
|
+
schema = generate_simple_schema(cls, exclude=["result", "purpose"])
|
237
|
+
return schema
|
@@ -6,7 +6,6 @@ from dotenv import load_dotenv
|
|
6
6
|
from openai import OpenAI
|
7
7
|
|
8
8
|
from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
|
9
|
-
from langroid.language_models.utils import retry_with_exponential_backoff
|
10
9
|
from langroid.mytypes import Embeddings
|
11
10
|
from langroid.parsing.utils import batched
|
12
11
|
|
@@ -26,6 +25,58 @@ class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig):
|
|
26
25
|
context_length: int = 512
|
27
26
|
|
28
27
|
|
28
|
+
class EmbeddingFunctionCallable:
|
29
|
+
"""
|
30
|
+
A callable class designed to generate embeddings for a list of texts using
|
31
|
+
the OpenAI API, with automatic retries on failure.
|
32
|
+
|
33
|
+
Attributes:
|
34
|
+
model (OpenAIEmbeddings): An instance of OpenAIEmbeddings that provides
|
35
|
+
configuration and utilities for generating embeddings.
|
36
|
+
|
37
|
+
Methods:
|
38
|
+
__call__(input: List[str]) -> Embeddings: Generate embeddings for
|
39
|
+
a list of input texts.
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, model: "OpenAIEmbeddings"):
|
43
|
+
"""
|
44
|
+
Initialize the EmbeddingFunctionCallable with a specific model.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
model (OpenAIEmbeddings): An instance of OpenAIEmbeddings to use for
|
48
|
+
generating embeddings.
|
49
|
+
"""
|
50
|
+
self.model = model
|
51
|
+
|
52
|
+
def __call__(self, input: List[str]) -> Embeddings:
|
53
|
+
"""
|
54
|
+
Generate embeddings for a given list of input texts using the OpenAI API,
|
55
|
+
with retries on failure.
|
56
|
+
|
57
|
+
This method:
|
58
|
+
- Truncates each text in the input list to the model's maximum context length.
|
59
|
+
- Processes the texts in batches to generate embeddings efficiently.
|
60
|
+
- Automatically retries the embedding generation process with exponential
|
61
|
+
backoff in case of failures.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
input (List[str]): A list of input texts to generate embeddings for.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
Embeddings: A list of embedding vectors corresponding to the input texts.
|
68
|
+
"""
|
69
|
+
tokenized_texts = self.model.truncate_texts(input)
|
70
|
+
embeds = []
|
71
|
+
for batch in batched(tokenized_texts, 500):
|
72
|
+
result = self.model.client.embeddings.create(
|
73
|
+
input=batch, model=self.model.config.model_name
|
74
|
+
)
|
75
|
+
batch_embeds = [d.embedding for d in result.data]
|
76
|
+
embeds.extend(batch_embeds)
|
77
|
+
return embeds
|
78
|
+
|
79
|
+
|
29
80
|
class OpenAIEmbeddings(EmbeddingModel):
|
30
81
|
def __init__(self, config: OpenAIEmbeddingsConfig = OpenAIEmbeddingsConfig()):
|
31
82
|
super().__init__()
|
@@ -56,19 +107,7 @@ class OpenAIEmbeddings(EmbeddingModel):
|
|
56
107
|
]
|
57
108
|
|
58
109
|
def embedding_fn(self) -> Callable[[List[str]], Embeddings]:
|
59
|
-
|
60
|
-
def fn(texts: List[str]) -> Embeddings:
|
61
|
-
tokenized_texts = self.truncate_texts(texts)
|
62
|
-
embeds = []
|
63
|
-
for batch in batched(tokenized_texts, 500):
|
64
|
-
result = self.client.embeddings.create(
|
65
|
-
input=batch, model=self.config.model_name
|
66
|
-
)
|
67
|
-
batch_embeds = [d.embedding for d in result.data]
|
68
|
-
embeds.extend(batch_embeds)
|
69
|
-
return embeds
|
70
|
-
|
71
|
-
return fn
|
110
|
+
return EmbeddingFunctionCallable(self)
|
72
111
|
|
73
112
|
@property
|
74
113
|
def embedding_dims(self) -> int:
|
{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/hf_formatter.py
RENAMED
@@ -6,11 +6,10 @@ models will have the same tokenizer, so we just use the first one.
|
|
6
6
|
"""
|
7
7
|
import logging
|
8
8
|
import re
|
9
|
-
from typing import List, Set
|
9
|
+
from typing import Any, List, Set, Type
|
10
10
|
|
11
11
|
from huggingface_hub import HfApi, ModelFilter
|
12
12
|
from jinja2.exceptions import TemplateError
|
13
|
-
from transformers import AutoTokenizer
|
14
13
|
|
15
14
|
from langroid.language_models.base import LanguageModel, LLMMessage, Role
|
16
15
|
from langroid.language_models.config import HFPromptFormatterConfig
|
@@ -19,6 +18,31 @@ from langroid.language_models.prompt_formatter.base import PromptFormatter
|
|
19
18
|
logger = logging.getLogger(__name__)
|
20
19
|
|
21
20
|
|
21
|
+
def try_import_AutoTokenizer() -> Type[Any]:
|
22
|
+
"""
|
23
|
+
Attempts to import the AutoTokenizer class from the transformers package.
|
24
|
+
Returns:
|
25
|
+
The AutoTokenizer class if successful.
|
26
|
+
Raises:
|
27
|
+
ImportError: If the transformers package is not installed.
|
28
|
+
"""
|
29
|
+
try:
|
30
|
+
from transformers import AutoTokenizer
|
31
|
+
|
32
|
+
return AutoTokenizer # type: ignore
|
33
|
+
except ImportError:
|
34
|
+
raise ImportError(
|
35
|
+
"""
|
36
|
+
You are trying to use the HuggingFace transformers.AutoTokenizer,
|
37
|
+
but the `transformers` package is not installed
|
38
|
+
by default with Langroid. Please install langroid using the
|
39
|
+
`transformers` extra, like so:
|
40
|
+
pip install "langroid[transformers]"
|
41
|
+
or equivalent.
|
42
|
+
"""
|
43
|
+
)
|
44
|
+
|
45
|
+
|
22
46
|
def find_hf_formatter(model_name: str) -> str:
|
23
47
|
hf_api = HfApi()
|
24
48
|
# try to find a matching model, with progressivly shorter prefixes of model_name
|
@@ -37,6 +61,7 @@ def find_hf_formatter(model_name: str) -> str:
|
|
37
61
|
mdl = next(models)
|
38
62
|
except StopIteration:
|
39
63
|
continue
|
64
|
+
AutoTokenizer = try_import_AutoTokenizer()
|
40
65
|
tokenizer = AutoTokenizer.from_pretrained(mdl.id)
|
41
66
|
if tokenizer.chat_template is not None:
|
42
67
|
return str(mdl.id)
|
@@ -60,6 +85,7 @@ class HFFormatter(PromptFormatter):
|
|
60
85
|
mdl = next(models)
|
61
86
|
except StopIteration:
|
62
87
|
raise ValueError(f"Model {config.model_name} not found on HuggingFace Hub")
|
88
|
+
AutoTokenizer = try_import_AutoTokenizer()
|
63
89
|
self.tokenizer = AutoTokenizer.from_pretrained(mdl.id)
|
64
90
|
if self.tokenizer.chat_template is None:
|
65
91
|
raise ValueError(
|
@@ -11,7 +11,6 @@ import requests
|
|
11
11
|
|
12
12
|
from langroid.mytypes import DocMetaData, Document
|
13
13
|
from langroid.parsing.parser import Parser, ParsingConfig
|
14
|
-
from langroid.parsing.urls import url_to_tempfile
|
15
14
|
|
16
15
|
logger = logging.getLogger(__name__)
|
17
16
|
|
@@ -54,8 +53,6 @@ class DocumentParser(Parser):
|
|
54
53
|
return PDFPlumberParser(source, config)
|
55
54
|
elif config.pdf.library == "unstructured":
|
56
55
|
return UnstructuredPDFParser(source, config)
|
57
|
-
elif config.pdf.library == "haystack":
|
58
|
-
return HaystackPDFParser(source, config)
|
59
56
|
else:
|
60
57
|
raise ValueError(
|
61
58
|
f"Unsupported PDF library specified: {config.pdf.library}"
|
@@ -301,59 +298,23 @@ class PDFPlumberParser(DocumentParser):
|
|
301
298
|
return self.fix_text(page.extract_text())
|
302
299
|
|
303
300
|
|
304
|
-
class HaystackPDFParser(DocumentParser):
|
305
|
-
"""
|
306
|
-
Parser for processing PDFs using the `haystack` library.
|
307
|
-
"""
|
308
|
-
|
309
|
-
def get_doc_chunks(self) -> List[Document]:
|
310
|
-
"""
|
311
|
-
Overrides the base class method to use the `haystack` library.
|
312
|
-
See there for more details.
|
313
|
-
"""
|
314
|
-
|
315
|
-
from haystack.nodes import PDFToTextConverter, PreProcessor
|
316
|
-
|
317
|
-
converter = PDFToTextConverter(
|
318
|
-
remove_numeric_tables=True,
|
319
|
-
)
|
320
|
-
path = self.source
|
321
|
-
if path.startswith(("http://", "https://")):
|
322
|
-
path = url_to_tempfile(path)
|
323
|
-
doc = converter.convert(file_path=path, meta=None)
|
324
|
-
# note self.config.chunk_size is in token units,
|
325
|
-
# and we use an approximation of 75 words per 100 tokens
|
326
|
-
# to convert to word units
|
327
|
-
preprocessor = PreProcessor(
|
328
|
-
clean_empty_lines=True,
|
329
|
-
clean_whitespace=True,
|
330
|
-
clean_header_footer=False,
|
331
|
-
split_by="word",
|
332
|
-
split_length=int(0.75 * self.config.chunk_size),
|
333
|
-
split_overlap=int(0.75 * self.config.overlap),
|
334
|
-
split_respect_sentence_boundary=True,
|
335
|
-
add_page_number=True,
|
336
|
-
)
|
337
|
-
chunks = preprocessor.process(doc)
|
338
|
-
return [
|
339
|
-
Document(
|
340
|
-
content=chunk.content,
|
341
|
-
metadata=DocMetaData(
|
342
|
-
source=f"{self.source} page {chunk.meta['page']}",
|
343
|
-
is_chunk=True,
|
344
|
-
),
|
345
|
-
)
|
346
|
-
for chunk in chunks
|
347
|
-
]
|
348
|
-
|
349
|
-
|
350
301
|
class UnstructuredPDFParser(DocumentParser):
|
351
302
|
"""
|
352
303
|
Parser for processing PDF files using the `unstructured` library.
|
353
304
|
"""
|
354
305
|
|
355
306
|
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]: # type: ignore
|
356
|
-
|
307
|
+
try:
|
308
|
+
from unstructured.partition.pdf import partition_pdf
|
309
|
+
except ImportError:
|
310
|
+
raise ImportError(
|
311
|
+
"""
|
312
|
+
The `unstructured` library is not installed by default with langroid.
|
313
|
+
To include this library, please install langroid with the
|
314
|
+
`unstructured` extra by running `pip install "langroid[unstructured]"`
|
315
|
+
or equivalent.
|
316
|
+
"""
|
317
|
+
)
|
357
318
|
|
358
319
|
# from unstructured.chunking.title import chunk_by_title
|
359
320
|
|
@@ -367,7 +328,7 @@ class UnstructuredPDFParser(DocumentParser):
|
|
367
328
|
Please try a different library by setting the `library` field
|
368
329
|
in the `pdf` section of the `parsing` field in the config file.
|
369
330
|
Supported libraries are:
|
370
|
-
fitz, pypdf, pdfplumber, unstructured
|
331
|
+
fitz, pypdf, pdfplumber, unstructured
|
371
332
|
"""
|
372
333
|
)
|
373
334
|
|
@@ -406,7 +367,17 @@ class UnstructuredDocxParser(DocumentParser):
|
|
406
367
|
"""
|
407
368
|
|
408
369
|
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]: # type: ignore
|
409
|
-
|
370
|
+
try:
|
371
|
+
from unstructured.partition.docx import partition_docx
|
372
|
+
except ImportError:
|
373
|
+
raise ImportError(
|
374
|
+
"""
|
375
|
+
The `unstructured` library is not installed by default with langroid.
|
376
|
+
To include this library, please install langroid with the
|
377
|
+
`unstructured` extra by running `pip install "langroid[unstructured]"`
|
378
|
+
or equivalent.
|
379
|
+
"""
|
380
|
+
)
|
410
381
|
|
411
382
|
elements = partition_docx(file=self.doc_bytes, include_page_breaks=True)
|
412
383
|
|
@@ -447,7 +418,17 @@ class UnstructuredDocxParser(DocumentParser):
|
|
447
418
|
|
448
419
|
class UnstructuredDocParser(UnstructuredDocxParser):
|
449
420
|
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]: # type: ignore
|
450
|
-
|
421
|
+
try:
|
422
|
+
from unstructured.partition.doc import partition_doc
|
423
|
+
except ImportError:
|
424
|
+
raise ImportError(
|
425
|
+
"""
|
426
|
+
The `unstructured` library is not installed by default with langroid.
|
427
|
+
To include this library, please install langroid with the
|
428
|
+
`unstructured` extra by running `pip install "langroid[unstructured]"`
|
429
|
+
or equivalent.
|
430
|
+
"""
|
431
|
+
)
|
451
432
|
|
452
433
|
elements = partition_doc(filename=self.source, include_page_breaks=True)
|
453
434
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import re
|
2
3
|
from typing import Any, Iterator, List
|
3
4
|
|
4
5
|
from pyparsing import nestedExpr, originalTextFor
|
@@ -44,6 +45,60 @@ def get_json_candidates(s: str) -> List[str]:
|
|
44
45
|
return []
|
45
46
|
|
46
47
|
|
48
|
+
def replace_undefined(s: str, undefined_placeholder: str = '"<undefined>"') -> str:
|
49
|
+
"""
|
50
|
+
Replace undefined values in a potential json str with a placeholder.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
- s (str): The potential JSON string to parse.
|
54
|
+
- undefined_placeholder (str): The placeholder or error message
|
55
|
+
for undefined values.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
- str: The (potential) JSON string with undefined values
|
59
|
+
replaced by the placeholder.
|
60
|
+
"""
|
61
|
+
|
62
|
+
# Preprocess the string to replace undefined values with the placeholder
|
63
|
+
# This regex looks for patterns like ": <identifier>" and replaces them
|
64
|
+
# with the placeholder.
|
65
|
+
# It's a simple approach and might need adjustments for complex cases
|
66
|
+
# This is an attempt to handle cases where a weak LLM may produce
|
67
|
+
# a JSON-like string without quotes around some values, e.g.
|
68
|
+
# {"rent": DO-NOT-KNOW }
|
69
|
+
preprocessed_s = re.sub(
|
70
|
+
r":\s*([a-zA-Z_][a-zA-Z_0-9\-]*)", f": {undefined_placeholder}", s
|
71
|
+
)
|
72
|
+
|
73
|
+
# Now, attempt to parse the preprocessed string as JSON
|
74
|
+
try:
|
75
|
+
return preprocessed_s
|
76
|
+
except Exception:
|
77
|
+
# If parsing fails, return an error message instead
|
78
|
+
# (this should be rare after preprocessing)
|
79
|
+
return s
|
80
|
+
|
81
|
+
|
82
|
+
def repair_newlines(s: str) -> str:
|
83
|
+
"""
|
84
|
+
Attempt to load as json, and if it fails, try with newlines replaced by space.
|
85
|
+
Intended to handle cases where weak LLMs produce JSON-like strings where
|
86
|
+
some string-values contain explicit newlines, e.g.:
|
87
|
+
{"text": "This is a text\n with a newline"}
|
88
|
+
These would not be valid JSON, so we try to clean them up here.
|
89
|
+
"""
|
90
|
+
try:
|
91
|
+
json.loads(s)
|
92
|
+
return s
|
93
|
+
except Exception:
|
94
|
+
try:
|
95
|
+
s = s.replace("\n", " ")
|
96
|
+
json.loads(s)
|
97
|
+
return s
|
98
|
+
except Exception:
|
99
|
+
return s
|
100
|
+
|
101
|
+
|
47
102
|
def extract_top_level_json(s: str) -> List[str]:
|
48
103
|
"""Extract all top-level JSON-formatted substrings from a given string.
|
49
104
|
|
@@ -53,15 +108,17 @@ def extract_top_level_json(s: str) -> List[str]:
|
|
53
108
|
Returns:
|
54
109
|
List[str]: A list of top-level JSON-formatted substrings.
|
55
110
|
"""
|
56
|
-
# Find JSON object and array candidates
|
111
|
+
# Find JSON object and array candidates
|
57
112
|
json_candidates = get_json_candidates(s)
|
58
113
|
|
59
114
|
normalized_candidates = [
|
60
115
|
candidate.replace("\\{", "{").replace("\\}", "}").replace("\\_", "_")
|
61
116
|
for candidate in json_candidates
|
62
117
|
]
|
118
|
+
candidates = [replace_undefined(candidate) for candidate in normalized_candidates]
|
119
|
+
candidates = [repair_newlines(candidate) for candidate in candidates]
|
63
120
|
top_level_jsons = [
|
64
|
-
candidate for candidate in
|
121
|
+
candidate for candidate in candidates if is_valid_json(candidate)
|
65
122
|
]
|
66
123
|
|
67
124
|
return top_level_jsons
|
@@ -19,9 +19,7 @@ class Splitter(str, Enum):
|
|
19
19
|
|
20
20
|
|
21
21
|
class PdfParsingConfig(BaseSettings):
|
22
|
-
library: Literal[
|
23
|
-
"fitz", "pdfplumber", "pypdf", "unstructured", "haystack"
|
24
|
-
] = "pdfplumber"
|
22
|
+
library: Literal["fitz", "pdfplumber", "pypdf", "unstructured"] = "pdfplumber"
|
25
23
|
|
26
24
|
|
27
25
|
class DocxParsingConfig(BaseSettings):
|
@@ -135,6 +135,53 @@ def flatten_pydantic_model(
|
|
135
135
|
return create_model("FlatModel", __base__=base_model, **flattened_fields)
|
136
136
|
|
137
137
|
|
138
|
+
def get_field_names(model: Type[BaseModel]) -> List[str]:
|
139
|
+
"""Get all field names from a possibly nested Pydantic model."""
|
140
|
+
mdl = flatten_pydantic_model(model)
|
141
|
+
fields = list(mdl.__fields__.keys())
|
142
|
+
# fields may be like a__b__c , so we only want the last part
|
143
|
+
return [f.split("__")[-1] for f in fields]
|
144
|
+
|
145
|
+
|
146
|
+
def generate_simple_schema(
|
147
|
+
model: Type[BaseModel], exclude: List[str] = []
|
148
|
+
) -> Dict[str, Any]:
|
149
|
+
"""
|
150
|
+
Generates a JSON schema for a Pydantic model,
|
151
|
+
with options to exclude specific fields.
|
152
|
+
|
153
|
+
This function traverses the Pydantic model's fields, including nested models,
|
154
|
+
to generate a dictionary representing the JSON schema. Fields specified in
|
155
|
+
the exclude list will not be included in the generated schema.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
model (Type[BaseModel]): The Pydantic model class to generate the schema for.
|
159
|
+
exclude (List[str]): A list of string field names to be excluded from the
|
160
|
+
generated schema. Defaults to an empty list.
|
161
|
+
|
162
|
+
Returns:
|
163
|
+
Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
|
164
|
+
with specified fields excluded.
|
165
|
+
"""
|
166
|
+
if hasattr(model, "__fields__"):
|
167
|
+
output: Dict[str, Any] = {}
|
168
|
+
for field_name, field in model.__fields__.items():
|
169
|
+
if field_name in exclude:
|
170
|
+
continue # Skip excluded fields
|
171
|
+
|
172
|
+
field_type = field.type_
|
173
|
+
if issubclass(field_type, BaseModel):
|
174
|
+
# Recursively generate schema for nested models
|
175
|
+
output[field_name] = generate_simple_schema(field_type, exclude)
|
176
|
+
else:
|
177
|
+
# Represent the type as a string here
|
178
|
+
output[field_name] = {"type": field_type.__name__}
|
179
|
+
return output
|
180
|
+
else:
|
181
|
+
# Non-model type, return a simplified representation
|
182
|
+
return {"type": model.__name__}
|
183
|
+
|
184
|
+
|
138
185
|
def flatten_pydantic_instance(
|
139
186
|
instance: BaseModel,
|
140
187
|
prefix: str = "",
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import getpass
|
2
2
|
import hashlib
|
3
|
+
import importlib
|
3
4
|
import inspect
|
4
5
|
import logging
|
5
6
|
import shutil
|
6
7
|
import socket
|
7
8
|
import traceback
|
9
|
+
from typing import Any
|
8
10
|
|
9
11
|
logger = logging.getLogger(__name__)
|
10
12
|
|
@@ -15,6 +17,39 @@ DELETION_ALLOWED_PATHS = [
|
|
15
17
|
]
|
16
18
|
|
17
19
|
|
20
|
+
class LazyLoad:
|
21
|
+
"""Lazy loading of modules or classes."""
|
22
|
+
|
23
|
+
def __init__(self, import_path: str) -> None:
|
24
|
+
self.import_path = import_path
|
25
|
+
self._target = None
|
26
|
+
self._is_target_loaded = False
|
27
|
+
|
28
|
+
def _load_target(self) -> None:
|
29
|
+
if not self._is_target_loaded:
|
30
|
+
try:
|
31
|
+
# Attempt to import as a module
|
32
|
+
self._target = importlib.import_module(self.import_path) # type: ignore
|
33
|
+
except ImportError:
|
34
|
+
# If module import fails, attempt to import as a
|
35
|
+
# class or function from a module
|
36
|
+
module_path, attr_name = self.import_path.rsplit(".", 1)
|
37
|
+
module = importlib.import_module(module_path)
|
38
|
+
self._target = getattr(module, attr_name)
|
39
|
+
self._is_target_loaded = True
|
40
|
+
|
41
|
+
def __getattr__(self, name: str) -> Any:
|
42
|
+
self._load_target()
|
43
|
+
return getattr(self._target, name)
|
44
|
+
|
45
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
46
|
+
self._load_target()
|
47
|
+
if callable(self._target):
|
48
|
+
return self._target(*args, **kwargs)
|
49
|
+
else:
|
50
|
+
raise TypeError(f"{self.import_path!r} object is not callable")
|
51
|
+
|
52
|
+
|
18
53
|
def rmdir(path: str) -> bool:
|
19
54
|
"""
|
20
55
|
Remove a directory recursively.
|
@@ -141,10 +141,16 @@ class ChromaDB(VectorStore):
|
|
141
141
|
return self._docs_from_results(results)
|
142
142
|
|
143
143
|
def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
|
144
|
-
|
145
|
-
|
146
|
-
results
|
147
|
-
|
144
|
+
# get them one by one since chroma mangles the order of the results
|
145
|
+
# when fetched from a list of ids.
|
146
|
+
results = [
|
147
|
+
self.collection.get(ids=[id], include=["documents", "metadatas"])
|
148
|
+
for id in ids
|
149
|
+
]
|
150
|
+
final_results = {}
|
151
|
+
final_results["documents"] = [[r["documents"][0] for r in results]]
|
152
|
+
final_results["metadatas"] = [[r["metadatas"][0] for r in results]]
|
153
|
+
return self._docs_from_results(final_results)
|
148
154
|
|
149
155
|
def delete_collection(self, collection_name: str) -> None:
|
150
156
|
self.client.delete_collection(name=collection_name)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "langroid"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.198"
|
4
4
|
description = "Harness LLMs with Multi-Agent Programming"
|
5
5
|
authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
|
6
6
|
readme = "README.md"
|
@@ -16,7 +16,7 @@ mkdocs-gen-files = "^0.4.0"
|
|
16
16
|
mkdocs-literate-nav = "^0.6.0"
|
17
17
|
mkdocs-section-index = "^0.3.5"
|
18
18
|
mkdocs-jupyter = "^0.24.1"
|
19
|
-
chromadb = "0.
|
19
|
+
chromadb = ">=0.4.21, <=0.4.23"
|
20
20
|
onnxruntime = "1.16.1"
|
21
21
|
fire = "^0.5.0"
|
22
22
|
black = {extras = ["jupyter"], version = "^23.3.0"}
|
@@ -56,7 +56,7 @@ prettytable = "^3.8.0"
|
|
56
56
|
tantivy = "^0.21.0"
|
57
57
|
google-api-python-client = "^2.95.0"
|
58
58
|
lxml = "^4.9.3"
|
59
|
-
unstructured = {extras = ["docx", "pptx", "pdf"], version = ">=0.10.16,<0.10.18"}
|
59
|
+
unstructured = {extras = ["docx", "pptx", "pdf"], version = ">=0.10.16,<0.10.18", optional=true}
|
60
60
|
|
61
61
|
sentence-transformers = {version="2.2.2", optional=true}
|
62
62
|
torch = {version="2.0.0", optional=true}
|
@@ -72,7 +72,6 @@ pymupdf = "^1.23.3"
|
|
72
72
|
jinja2 = "^3.1.2"
|
73
73
|
pytest-asyncio = "^0.21.1"
|
74
74
|
docstring-parser = "^0.15"
|
75
|
-
farm-haystack = {extras = ["ocr", "preprocessing", "file-conversion", "pdf"], version = "^1.21.1"}
|
76
75
|
meilisearch = "^0.28.3"
|
77
76
|
meilisearch-python-sdk = "^2.2.3"
|
78
77
|
litellm = {version = "^1.23.0", optional = true}
|
@@ -85,7 +84,7 @@ agent-search = {version = "^0.0.7", optional = true}
|
|
85
84
|
python-docx = "^1.1.0"
|
86
85
|
aiohttp = "^3.9.1"
|
87
86
|
metaphor-python = {version = "^0.1.23", optional = true}
|
88
|
-
chainlit = {version = "^1.0.
|
87
|
+
chainlit = {version = "^1.0.301", optional = true}
|
89
88
|
python-socketio = {version="^5.11.0", optional=true}
|
90
89
|
duckduckgo-search = "^4.4"
|
91
90
|
|
@@ -93,6 +92,8 @@ duckduckgo-search = "^4.4"
|
|
93
92
|
# install these using `poetry install -E [...]` where [...] is one of the extras below
|
94
93
|
# or install multiple extras using, e.g., `poetry install -E "litellm mysql"
|
95
94
|
hf-embeddings = ["sentence-transformers", "torch"]
|
95
|
+
transformers = ["transformers"]
|
96
|
+
unstructured = ["unstructured"]
|
96
97
|
postgres = ["psycopg2", "pytest-postgresql"]
|
97
98
|
mysql = ["pymysql", "pytest-mysql"]
|
98
99
|
litellm = ["litellm"]
|
@@ -127,6 +128,7 @@ exclude = [
|
|
127
128
|
"langroid/embedding_models/clustering.py",
|
128
129
|
#TODO revisit why mypy keeps failing on gh actions, but works fine locally
|
129
130
|
"langroid/agent/callbacks/chainlit.py",
|
131
|
+
"langroid/vector_store/chromadb.py"
|
130
132
|
]
|
131
133
|
files=["langroid/*"]
|
132
134
|
plugins = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/lance_rag/query_planner_agent.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.196 → langroid-0.1.198}/langroid/agent/special/sql/utils/description_extractors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{langroid-0.1.196 → langroid-0.1.198}/langroid/language_models/prompt_formatter/llama2_formatter.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|