langroid 0.8.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +353 -94
- langroid/agent/chat_agent.py +68 -9
- langroid/agent/chat_document.py +16 -7
- langroid/agent/openai_assistant.py +12 -1
- langroid/agent/special/lance_doc_chat_agent.py +25 -18
- langroid/agent/special/lance_rag/critic_agent.py +37 -5
- langroid/agent/special/lance_rag/query_planner_agent.py +102 -63
- langroid/agent/special/lance_tools.py +10 -2
- langroid/agent/task.py +156 -47
- langroid/agent/tool_message.py +12 -3
- langroid/agent/tools/__init__.py +18 -0
- langroid/agent/tools/orchestration.py +216 -0
- langroid/agent/tools/recipient_tool.py +6 -11
- langroid/agent/typed_task.py +19 -0
- langroid/language_models/base.py +3 -2
- langroid/mytypes.py +0 -1
- langroid/parsing/parse_json.py +19 -2
- langroid/utils/pydantic_utils.py +19 -0
- langroid/vector_store/base.py +3 -1
- langroid/vector_store/lancedb.py +2 -0
- {langroid-0.8.0.dist-info → langroid-0.9.1.dist-info}/METADATA +4 -2
- {langroid-0.8.0.dist-info → langroid-0.9.1.dist-info}/RECORD +25 -28
- pyproject.toml +2 -1
- langroid/agent/special/lance_rag_new/__init__.py +0 -9
- langroid/agent/special/lance_rag_new/critic_agent.py +0 -171
- langroid/agent/special/lance_rag_new/lance_rag_task.py +0 -144
- langroid/agent/special/lance_rag_new/query_planner_agent.py +0 -222
- langroid/agent/team.py +0 -1758
- {langroid-0.8.0.dist-info → langroid-0.9.1.dist-info}/LICENSE +0 -0
- {langroid-0.8.0.dist-info → langroid-0.9.1.dist-info}/WHEEL +0 -0
@@ -1,144 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
The LanceRAGTaskCreator.new() method creates a 3-Agent system that uses this agent.
|
3
|
-
It takes a LanceDocChatAgent instance as argument, and adds two more agents:
|
4
|
-
- LanceQueryPlanAgent, which is given the LanceDB schema in LanceDocChatAgent,
|
5
|
-
and based on this schema, for a given user query, creates a Query Plan
|
6
|
-
using the QueryPlanTool, which contains a filter, a rephrased query,
|
7
|
-
and a dataframe_calc.
|
8
|
-
- QueryPlanCritic, which is given the LanceDB schema in LanceDocChatAgent,
|
9
|
-
and gives feedback on the Query Plan and Result using the QueryPlanFeedbackTool.
|
10
|
-
|
11
|
-
The LanceRAGTaskCreator.new() method sets up the given LanceDocChatAgent and
|
12
|
-
QueryPlanCritic as sub-tasks of the LanceQueryPlanAgent's task.
|
13
|
-
|
14
|
-
Langroid's built-in task orchestration ensures that:
|
15
|
-
- the LanceQueryPlanAgent reformulates the plan based
|
16
|
-
on the QueryPlanCritics's feedback,
|
17
|
-
- LLM deviations are corrected via tools and overrides of ChatAgent methods.
|
18
|
-
"""
|
19
|
-
|
20
|
-
import logging
|
21
|
-
|
22
|
-
from langroid.agent.special.lance_tools import (
|
23
|
-
QueryPlanAnswerTool,
|
24
|
-
)
|
25
|
-
from langroid.agent.task import Task
|
26
|
-
from langroid.mytypes import Entity
|
27
|
-
from langroid.utils.constants import NO_ANSWER
|
28
|
-
|
29
|
-
from ..lance_doc_chat_agent import LanceDocChatAgent
|
30
|
-
from .critic_agent import (
|
31
|
-
QueryPlanCritic,
|
32
|
-
QueryPlanCriticConfig,
|
33
|
-
)
|
34
|
-
from .query_planner_agent import (
|
35
|
-
LanceQueryPlanAgent,
|
36
|
-
LanceQueryPlanAgentConfig,
|
37
|
-
)
|
38
|
-
|
39
|
-
logger = logging.getLogger(__name__)
|
40
|
-
|
41
|
-
|
42
|
-
def run_lance_rag_task(
|
43
|
-
query: str,
|
44
|
-
agent: LanceDocChatAgent,
|
45
|
-
interactive: bool = True,
|
46
|
-
) -> str:
|
47
|
-
"""
|
48
|
-
Add a LanceFilterAgent to the LanceDocChatAgent,
|
49
|
-
set up the corresponding Tasks, connect them,
|
50
|
-
and return the top-level query_plan_task.
|
51
|
-
"""
|
52
|
-
doc_agent_name = "LanceRAG"
|
53
|
-
critic_name = "QueryPlanCritic"
|
54
|
-
query_plan_agent_config = LanceQueryPlanAgentConfig(
|
55
|
-
critic_name=critic_name,
|
56
|
-
doc_agent_name=doc_agent_name,
|
57
|
-
doc_schema=agent._get_clean_vecdb_schema(),
|
58
|
-
)
|
59
|
-
query_plan_agent_config.set_system_message()
|
60
|
-
|
61
|
-
query_planner = LanceQueryPlanAgent(query_plan_agent_config)
|
62
|
-
query_plan_task = Task(
|
63
|
-
query_planner,
|
64
|
-
interactive=interactive,
|
65
|
-
restart=False,
|
66
|
-
done_if_response=[Entity.AGENT],
|
67
|
-
)
|
68
|
-
# TODO - figure out how to define the fns so we avoid re-creating
|
69
|
-
# agents in each invocation. Right now we are defining the fn
|
70
|
-
# inside this context, which may not be great.
|
71
|
-
|
72
|
-
rag_task = Task(
|
73
|
-
agent,
|
74
|
-
name="LanceRAG",
|
75
|
-
restart=True, # default; no need to accumulate dialog
|
76
|
-
interactive=False,
|
77
|
-
done_if_response=[Entity.LLM], # done when non-null response from LLM
|
78
|
-
done_if_no_response=[Entity.LLM], # done when null response from LLM
|
79
|
-
)
|
80
|
-
|
81
|
-
critic_config = QueryPlanCriticConfig(
|
82
|
-
doc_schema=agent._get_clean_vecdb_schema(),
|
83
|
-
)
|
84
|
-
critic_config.set_system_message()
|
85
|
-
|
86
|
-
critic_agent = QueryPlanCritic(critic_config)
|
87
|
-
critic_task = Task(
|
88
|
-
critic_agent,
|
89
|
-
interactive=False,
|
90
|
-
restart=True, # default; no need to accumulate dialog
|
91
|
-
)
|
92
|
-
|
93
|
-
no_answer = False
|
94
|
-
feedback = None
|
95
|
-
i = 0
|
96
|
-
while i := i + 1 < 5:
|
97
|
-
# query, feedback (QueryPlanFeedbackTool) => ChatDocument[QueryPlanTool]
|
98
|
-
if feedback is not None and feedback.suggested_fix != "":
|
99
|
-
prompt = f"""
|
100
|
-
A Critic has seen your Query Plan and the Answer, and has given the
|
101
|
-
following feedback. Take it into account and re-generate your Query Plan
|
102
|
-
for the QUERY:
|
103
|
-
|
104
|
-
QUERY: {query}
|
105
|
-
FEEDBACK: {feedback.feedback}
|
106
|
-
SUGGESTED FIX: {feedback.suggested_fix}
|
107
|
-
"""
|
108
|
-
elif no_answer:
|
109
|
-
prompt = f"There was a {NO_ANSWER} response; try a different query plan"
|
110
|
-
else:
|
111
|
-
prompt = query
|
112
|
-
|
113
|
-
while True:
|
114
|
-
plan_doc = query_plan_task.run(prompt)
|
115
|
-
if len(plan_doc.tool_messages) > 0:
|
116
|
-
break
|
117
|
-
# forgot to use QueryPlanTool
|
118
|
-
prompt = """You forgot to use the `query_plan` tool/function. Try again."""
|
119
|
-
|
120
|
-
# TODO if plan_doc does NOT have a QueryPlan, remind the agent
|
121
|
-
|
122
|
-
# ChatDocument with QueryPlanTool => ChatDocument with answer
|
123
|
-
rag_answer_doc = rag_task.run(plan_doc)
|
124
|
-
|
125
|
-
if rag_answer_doc is None:
|
126
|
-
rag_answer_doc = rag_task.agent.create_llm_response(NO_ANSWER)
|
127
|
-
# QueryPlan, answer => QueryPlanAnswerTool
|
128
|
-
plan_answer_tool = QueryPlanAnswerTool(
|
129
|
-
plan=plan_doc.tool_messages[0].plan,
|
130
|
-
answer=rag_answer_doc.content,
|
131
|
-
)
|
132
|
-
# QueryPlanAnswerTool => ChatDocument[QueryPlanAnswerTool]
|
133
|
-
plan_answer_doc = agent.create_agent_response(tool_messages=[plan_answer_tool])
|
134
|
-
|
135
|
-
# ChatDocument[QueryPlanAnswerTool] => ChatDocument[QueryPlanFeedbackTool]
|
136
|
-
feedback_doc = critic_task.run(plan_answer_doc)
|
137
|
-
# ChatDocument[QueryPlanFeedbackTool] => QueryPlanFeedbackTool
|
138
|
-
feedback = feedback_doc.tool_messages[0] # QueryPlanFeedbackTool
|
139
|
-
no_answer = NO_ANSWER in rag_answer_doc.content
|
140
|
-
if feedback.suggested_fix == "" and not no_answer:
|
141
|
-
break
|
142
|
-
|
143
|
-
# query_plan_task.add_sub_task([critic_task, rag_task])
|
144
|
-
return rag_answer_doc.content
|
@@ -1,222 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
LanceQueryPlanAgent is a ChatAgent created with a specific document schema.
|
3
|
-
Given a QUERY, the LLM constructs a Query Plan consisting of:
|
4
|
-
- filter condition if needed (or empty string if no filter is needed)
|
5
|
-
- query - a possibly rephrased query that can be used to match the `content` field
|
6
|
-
- dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
|
7
|
-
- original_query - the original query for reference
|
8
|
-
|
9
|
-
This agent has access to two tools:
|
10
|
-
- QueryPlanTool, which is used to generate the Query Plan, and the handler of
|
11
|
-
this tool simply passes it on to the RAG agent named in config.doc_agent_name.
|
12
|
-
- QueryPlanFeedbackTool, which is used to handle feedback on the Query Plan and
|
13
|
-
Result from the RAG agent. The QueryPlanFeedbackTool is used by
|
14
|
-
the QueryPlanCritic, who inserts feedback into the `feedback` field
|
15
|
-
"""
|
16
|
-
|
17
|
-
import logging
|
18
|
-
|
19
|
-
import langroid as lr
|
20
|
-
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
|
21
|
-
from langroid.agent.chat_document import ChatDocument
|
22
|
-
from langroid.agent.special.lance_tools import (
|
23
|
-
QueryPlan,
|
24
|
-
QueryPlanAnswerTool,
|
25
|
-
QueryPlanFeedbackTool,
|
26
|
-
QueryPlanTool,
|
27
|
-
)
|
28
|
-
from langroid.utils.constants import DONE, NO_ANSWER
|
29
|
-
|
30
|
-
logger = logging.getLogger(__name__)
|
31
|
-
|
32
|
-
|
33
|
-
class LanceQueryPlanAgentConfig(ChatAgentConfig):
|
34
|
-
name: str = "LancePlanner"
|
35
|
-
critic_name: str = "QueryPlanCritic"
|
36
|
-
doc_agent_name: str = "LanceRAG"
|
37
|
-
doc_schema: str = ""
|
38
|
-
use_tools = False
|
39
|
-
max_retries: int = 5 # max number of retries for query plan
|
40
|
-
use_functions_api = True
|
41
|
-
|
42
|
-
system_message = f"""
|
43
|
-
You will receive a QUERY, to be answered based on an EXTREMELY LARGE collection
|
44
|
-
of documents you DO NOT have access to, but your ASSISTANT does.
|
45
|
-
You only know that these documents have a special `content` field
|
46
|
-
and additional FILTERABLE fields in the SCHEMA below, along with the
|
47
|
-
SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
|
48
|
-
|
49
|
-
{{doc_schema}}
|
50
|
-
|
51
|
-
Based on the QUERY and the above SCHEMA, your task is to determine a QUERY PLAN,
|
52
|
-
consisting of:
|
53
|
-
- a PANDAS-TYPE FILTER (can be empty string) that would help the ASSISTANT to
|
54
|
-
answer the query.
|
55
|
-
Remember the FILTER can refer to ANY fields in the above SCHEMA
|
56
|
-
EXCEPT the `content` field of the documents.
|
57
|
-
ONLY USE A FILTER IF EXPLICITLY MENTIONED IN THE QUERY.
|
58
|
-
TO get good results, for STRING MATCHES, consider using LIKE instead of =, e.g.
|
59
|
-
"CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
|
60
|
-
YOUR FILTER MUST BE A PANDAS-TYPE FILTER, respecting the shown DTYPES.
|
61
|
-
- a possibly REPHRASED QUERY (CANNOT BE EMPTY) to be answerable given the FILTER.
|
62
|
-
Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
|
63
|
-
so the REPHRASED QUERY should NOT mention ANY FILTER fields.
|
64
|
-
The assistant will answer based on documents whose CONTENTS match the QUERY,
|
65
|
-
possibly REPHRASED.
|
66
|
-
!!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
|
67
|
-
- an OPTIONAL SINGLE-LINE Pandas-dataframe calculation/aggregation string
|
68
|
-
that can be used to calculate the answer to the original query,
|
69
|
-
e.g. "df["rating"].mean()",
|
70
|
-
or "df.groupby("director").mean()["rating"]",
|
71
|
-
or EMPTY string if no calc is needed.
|
72
|
-
The dataframe calc CAN refer to the `content` field.
|
73
|
-
If a DataFrame calculation is NOT needed, leave this field EMPTY.
|
74
|
-
|
75
|
-
IMPORTANT: The DataFrame `df` in this calculation is the result of
|
76
|
-
applying the FILTER AND REPHRASED QUERY to the documents.
|
77
|
-
|
78
|
-
WATCH OUT!! When deciding the dataframe calc, if any, CAREFULLY
|
79
|
-
note what the query is asking, and ensure that the result of your
|
80
|
-
dataframe calc expression would answer the query.
|
81
|
-
|
82
|
-
|
83
|
-
EXAMPLE:
|
84
|
-
-------
|
85
|
-
Suppose there is a document-set about crime reports, where:
|
86
|
-
CONTENT = crime report,
|
87
|
-
Filterable SCHEMA consists of City, Year, num_deaths.
|
88
|
-
|
89
|
-
Then given this ORIGINAL QUERY:
|
90
|
-
|
91
|
-
Total deaths in shoplifting crimes in Los Angeles in 2023?
|
92
|
-
|
93
|
-
A POSSIBLE QUERY PLAN could be:
|
94
|
-
|
95
|
-
FILTER: "City LIKE '%Los Angeles%' AND Year = 2023"
|
96
|
-
REPHRASED QUERY: "shoplifting crime" --> this will be used to MATCH content of docs
|
97
|
-
[NOTE: we dropped the FILTER fields City and Year since the
|
98
|
-
ASSISTANT does not know about them and only uses the query to
|
99
|
-
match the CONTENT of the docs.]
|
100
|
-
DATAFRAME CALCULATION: "df["num_deaths"].sum()"
|
101
|
-
NOTE!!! The DataFrame `df` in this calculation is the result of
|
102
|
-
applying the FILTER AND REPHRASED QUERY to the documents,
|
103
|
-
hence this computation will give the total deaths in shoplifting crimes.
|
104
|
-
------------- END OF EXAMPLE ----------------
|
105
|
-
|
106
|
-
The FILTER must be a PANDAS-like condition, e.g.
|
107
|
-
"year > 2000 AND genre = 'ScienceFiction'".
|
108
|
-
To ensure you get useful results, you should make your FILTER
|
109
|
-
NOT TOO STRICT, e.g. look for approximate match using LIKE, etc.
|
110
|
-
E.g. "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
|
111
|
-
Use DOT NOTATION to refer to nested fields, e.g. `metadata.year`, etc.
|
112
|
-
|
113
|
-
You must FIRST present the QUERY PLAN using the `query_plan` tool/function.
|
114
|
-
This will be handled by your document assistant, who will produce an ANSWER.
|
115
|
-
|
116
|
-
You may receive FEEDBACK on your QUERY PLAN and received ANSWER,
|
117
|
-
from the 'QueryPlanCritic' who may offer suggestions for
|
118
|
-
a better FILTER, REPHRASED QUERY, or DATAFRAME CALCULATION.
|
119
|
-
|
120
|
-
If you keep getting feedback or keep getting a {NO_ANSWER} from the assistant
|
121
|
-
at least 3 times, then simply say '{DONE} {NO_ANSWER}' and nothing else.
|
122
|
-
|
123
|
-
At the BEGINNING if there is no query, ASK the user what they want to know.
|
124
|
-
"""
|
125
|
-
|
126
|
-
def set_system_message(self) -> None:
|
127
|
-
self.system_message = self.system_message.format(
|
128
|
-
doc_schema=self.doc_schema,
|
129
|
-
)
|
130
|
-
|
131
|
-
|
132
|
-
class LanceQueryPlanAgent(ChatAgent):
|
133
|
-
def __init__(self, config: LanceQueryPlanAgentConfig):
|
134
|
-
super().__init__(config)
|
135
|
-
self.config: LanceQueryPlanAgentConfig = config
|
136
|
-
self.curr_query_plan: QueryPlan | None = None
|
137
|
-
# how many times re-trying query plan in response to feedback:
|
138
|
-
self.n_retries: int = 0
|
139
|
-
self.result: str = "" # answer received from LanceRAG
|
140
|
-
# This agent should generate the QueryPlanTool
|
141
|
-
# as well as handle it for validation
|
142
|
-
self.enable_message(QueryPlanTool, use=True, handle=True)
|
143
|
-
self.enable_message(QueryPlanFeedbackTool, use=False, handle=True)
|
144
|
-
|
145
|
-
def query_plan(self, msg: QueryPlanTool) -> ChatDocument:
|
146
|
-
"""Valid, forward to RAG Agent"""
|
147
|
-
# save, to be used to assemble QueryPlanResultTool
|
148
|
-
if len(msg.plan.dataframe_calc.split("\n")) > 1:
|
149
|
-
return "DATAFRAME CALCULATION must be a SINGLE LINE; Retry the `query_plan`"
|
150
|
-
self.curr_query_plan = msg.plan
|
151
|
-
# return a ChatDocument with tool_messages set to this tool,
|
152
|
-
# so caller can directly get the tool without parsing
|
153
|
-
return self.create_agent_response(tool_messages=[msg])
|
154
|
-
|
155
|
-
def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> str:
|
156
|
-
"""Process Critic feedback on QueryPlan + Answer from RAG Agent"""
|
157
|
-
# We should have saved answer in self.result by this time,
|
158
|
-
# since this Agent seeks feedback only after receiving RAG answer.
|
159
|
-
if msg.suggested_fix == "":
|
160
|
-
self.n_retries = 0
|
161
|
-
# This means the Query Plan or Result is good, as judged by Critic
|
162
|
-
if self.result == "":
|
163
|
-
# This was feedback for query with no result
|
164
|
-
return "QUERY PLAN LOOKS GOOD!"
|
165
|
-
elif self.result == NO_ANSWER:
|
166
|
-
return NO_ANSWER
|
167
|
-
else: # non-empty and non-null answer
|
168
|
-
return DONE + " " + self.result
|
169
|
-
self.n_retries += 1
|
170
|
-
if self.n_retries >= self.config.max_retries:
|
171
|
-
# bail out to avoid infinite loop
|
172
|
-
self.n_retries = 0
|
173
|
-
return DONE + " " + NO_ANSWER
|
174
|
-
return f"""
|
175
|
-
here is FEEDBACK about your QUERY PLAN, and a SUGGESTED FIX.
|
176
|
-
Modify the QUERY PLAN if needed:
|
177
|
-
FEEDBACK: {msg.feedback}
|
178
|
-
SUGGESTED FIX: {msg.suggested_fix}
|
179
|
-
"""
|
180
|
-
|
181
|
-
def handle_message_fallback(
|
182
|
-
self, msg: str | ChatDocument
|
183
|
-
) -> str | ChatDocument | None:
|
184
|
-
"""
|
185
|
-
Process answer received from RAG Agent:
|
186
|
-
Construct a QueryPlanAnswerTool with the answer,
|
187
|
-
and forward to Critic for feedback.
|
188
|
-
"""
|
189
|
-
# TODO we don't need to use this fallback method. instead we can
|
190
|
-
# first call result = super().agent_response(), and if result is None,
|
191
|
-
# then we know there was no tool, so we run below code
|
192
|
-
if (
|
193
|
-
isinstance(msg, ChatDocument)
|
194
|
-
and self.curr_query_plan is not None
|
195
|
-
and msg.metadata.parent is not None
|
196
|
-
):
|
197
|
-
# save result, to be used in query_plan_feedback()
|
198
|
-
self.result = msg.content
|
199
|
-
# assemble QueryPlanAnswerTool...
|
200
|
-
query_plan_answer_tool = QueryPlanAnswerTool( # type: ignore
|
201
|
-
plan=self.curr_query_plan,
|
202
|
-
answer=self.result,
|
203
|
-
)
|
204
|
-
response_tmpl = self.create_agent_response()
|
205
|
-
# ... add the QueryPlanAnswerTool to the response
|
206
|
-
# (Notice how the Agent is directly sending a tool, not the LLM)
|
207
|
-
response_tmpl.tool_messages = [query_plan_answer_tool]
|
208
|
-
# set the recipient to the Critic so it can give feedback
|
209
|
-
response_tmpl.metadata.recipient = self.config.critic_name
|
210
|
-
self.curr_query_plan = None # reset
|
211
|
-
return response_tmpl
|
212
|
-
if (
|
213
|
-
isinstance(msg, ChatDocument)
|
214
|
-
and not self.has_tool_message_attempt(msg)
|
215
|
-
and msg.metadata.sender == lr.Entity.LLM
|
216
|
-
):
|
217
|
-
# remind LLM to use the QueryPlanFeedbackTool
|
218
|
-
return """
|
219
|
-
You forgot to use the `query_plan` tool/function.
|
220
|
-
Re-try your response using the `query_plan` tool/function.
|
221
|
-
"""
|
222
|
-
return None
|