langroid 0.3.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +42 -6
- langroid/agent/chat_agent.py +2 -2
- langroid/agent/special/doc_chat_agent.py +14 -4
- langroid/agent/special/lance_doc_chat_agent.py +25 -28
- langroid/agent/special/lance_rag/critic_agent.py +16 -6
- langroid/agent/special/lance_rag/query_planner_agent.py +8 -4
- langroid/agent/special/lance_rag_new/__init__.py +9 -0
- langroid/agent/special/lance_rag_new/critic_agent.py +171 -0
- langroid/agent/special/lance_rag_new/lance_rag_task.py +144 -0
- langroid/agent/special/lance_rag_new/query_planner_agent.py +222 -0
- langroid/agent/special/lance_tools.py +14 -8
- langroid/agent/special/neo4j/neo4j_chat_agent.py +1 -1
- langroid/agent/tool_message.py +6 -10
- langroid/utils/pydantic_utils.py +0 -50
- langroid/vector_store/base.py +6 -4
- langroid/vector_store/chromadb.py +4 -2
- langroid/vector_store/lancedb.py +40 -172
- langroid/vector_store/qdrantdb.py +6 -2
- {langroid-0.3.1.dist-info → langroid-0.5.1.dist-info}/METADATA +1 -1
- {langroid-0.3.1.dist-info → langroid-0.5.1.dist-info}/RECORD +23 -19
- pyproject.toml +1 -1
- {langroid-0.3.1.dist-info → langroid-0.5.1.dist-info}/LICENSE +0 -0
- {langroid-0.3.1.dist-info → langroid-0.5.1.dist-info}/WHEEL +0 -0
langroid/agent/base.py
CHANGED
@@ -784,15 +784,51 @@ class Agent(ABC):
|
|
784
784
|
# ]
|
785
785
|
# }
|
786
786
|
|
787
|
+
if not isinstance(json_data, dict):
|
788
|
+
return None
|
789
|
+
|
787
790
|
properties = json_data.get("properties")
|
788
|
-
if properties
|
791
|
+
if isinstance(properties, dict):
|
789
792
|
json_data = properties
|
790
793
|
request = json_data.get("request")
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
794
|
+
|
795
|
+
if request is None:
|
796
|
+
handled = [self.llm_tools_map[r] for r in self.llm_tools_handled]
|
797
|
+
default_keys = set(ToolMessage.__fields__.keys())
|
798
|
+
request_keys = set(json_data.keys())
|
799
|
+
|
800
|
+
def maybe_parse(tool: type[ToolMessage]) -> Optional[ToolMessage]:
|
801
|
+
all_keys = set(tool.__fields__.keys())
|
802
|
+
non_inherited_keys = all_keys.difference(default_keys)
|
803
|
+
# If the request has any keys not valid for the tool and
|
804
|
+
# does not specify some key specific to the type
|
805
|
+
# (e.g. not just `purpose`), the LLM must explicitly specify `request`
|
806
|
+
if not (
|
807
|
+
request_keys.issubset(all_keys)
|
808
|
+
and len(request_keys.intersection(non_inherited_keys)) > 0
|
809
|
+
):
|
810
|
+
return None
|
811
|
+
|
812
|
+
try:
|
813
|
+
return tool.parse_obj(json_data)
|
814
|
+
except ValidationError:
|
815
|
+
return None
|
816
|
+
|
817
|
+
candidate_tools = list(
|
818
|
+
filter(
|
819
|
+
lambda t: t is not None,
|
820
|
+
map(maybe_parse, handled),
|
821
|
+
)
|
822
|
+
)
|
823
|
+
|
824
|
+
# If only one valid candidate exists, we infer
|
825
|
+
# "request" to be the only possible value
|
826
|
+
if len(candidate_tools) == 1:
|
827
|
+
return candidate_tools[0]
|
828
|
+
else:
|
829
|
+
return None
|
830
|
+
|
831
|
+
if not isinstance(request, str) or request not in self.llm_tools_handled:
|
796
832
|
return None
|
797
833
|
|
798
834
|
message_class = self.llm_tools_map.get(request)
|
langroid/agent/chat_agent.py
CHANGED
@@ -427,11 +427,11 @@ class ChatAgent(Agent):
|
|
427
427
|
but the Assistant fn-calling seems to pay attn to these,
|
428
428
|
and if we don't want this, we should set this to False.)
|
429
429
|
"""
|
430
|
+
if require_recipient and message_class is not None:
|
431
|
+
message_class = message_class.require_recipient()
|
430
432
|
super().enable_message_handling(message_class) # enables handling only
|
431
433
|
tools = self._get_tool_list(message_class)
|
432
434
|
if message_class is not None:
|
433
|
-
if require_recipient:
|
434
|
-
message_class = message_class.require_recipient()
|
435
435
|
request = message_class.default_value("request")
|
436
436
|
llm_function = message_class.llm_function_schema(defaults=include_defaults)
|
437
437
|
self.llm_functions_map[request] = llm_function
|
@@ -538,12 +538,13 @@ class DocChatAgent(ChatAgent):
|
|
538
538
|
]
|
539
539
|
|
540
540
|
def get_field_values(self, fields: list[str]) -> Dict[str, str]:
|
541
|
-
"""Get string-listing of possible values of each
|
541
|
+
"""Get string-listing of possible values of each field,
|
542
542
|
e.g.
|
543
543
|
{
|
544
544
|
"genre": "crime, drama, mystery, ... (10 more)",
|
545
545
|
"certificate": "R, PG-13, PG, R",
|
546
546
|
}
|
547
|
+
The field names may have "metadata." prefix, e.g. "metadata.genre".
|
547
548
|
"""
|
548
549
|
field_values: Dict[str, Set[str]] = {}
|
549
550
|
# make empty set for each field
|
@@ -556,8 +557,11 @@ class DocChatAgent(ChatAgent):
|
|
556
557
|
for d in docs:
|
557
558
|
# extract fields from d
|
558
559
|
doc_field_vals = extract_fields(d, fields)
|
559
|
-
|
560
|
-
|
560
|
+
# the `field` returned by extract_fields may contain only the last
|
561
|
+
# part of the field name, e.g. "genre" instead of "metadata.genre",
|
562
|
+
# so we use the orig_field name to fill in the values
|
563
|
+
for (field, val), orig_field in zip(doc_field_vals.items(), fields):
|
564
|
+
field_values[orig_field].add(val)
|
561
565
|
# For each field make a string showing list of possible values,
|
562
566
|
# truncate to 20 values, and if there are more, indicate how many
|
563
567
|
# more there are, e.g. Genre: crime, drama, mystery, ... (20 more)
|
@@ -680,7 +684,13 @@ class DocChatAgent(ChatAgent):
|
|
680
684
|
)
|
681
685
|
return response
|
682
686
|
if query_str == "":
|
683
|
-
return
|
687
|
+
return ChatDocument(
|
688
|
+
content=NO_ANSWER + " since query was empty",
|
689
|
+
metadata=ChatDocMetaData(
|
690
|
+
source="No query provided",
|
691
|
+
sender=Entity.LLM,
|
692
|
+
),
|
693
|
+
)
|
684
694
|
elif query_str == "?" and self.response is not None:
|
685
695
|
return self.justify_response()
|
686
696
|
elif (query_str.startswith(("summar", "?")) and self.response is None) or (
|
@@ -22,7 +22,6 @@ from langroid.mytypes import DocMetaData, Document
|
|
22
22
|
from langroid.parsing.table_loader import describe_dataframe
|
23
23
|
from langroid.utils.constants import DONE, NO_ANSWER
|
24
24
|
from langroid.utils.pydantic_utils import (
|
25
|
-
clean_schema,
|
26
25
|
dataframe_to_documents,
|
27
26
|
)
|
28
27
|
from langroid.vector_store.lancedb import LanceDB
|
@@ -41,24 +40,26 @@ class LanceDocChatAgent(DocChatAgent):
|
|
41
40
|
def _get_clean_vecdb_schema(self) -> str:
|
42
41
|
"""Get a cleaned schema of the vector-db, to pass to the LLM
|
43
42
|
as part of instructions on how to generate a SQL filter."""
|
43
|
+
|
44
|
+
tbl_pandas = (
|
45
|
+
self.vecdb.client.open_table(self.vecdb.config.collection_name)
|
46
|
+
.search()
|
47
|
+
.limit(1)
|
48
|
+
.to_pandas(flatten=True)
|
49
|
+
)
|
44
50
|
if len(self.config.filter_fields) == 0:
|
45
|
-
filterable_fields = (
|
46
|
-
self.vecdb.client.open_table(self.vecdb.config.collection_name)
|
47
|
-
.search()
|
48
|
-
.limit(1)
|
49
|
-
.to_pandas(flatten=True)
|
50
|
-
.columns.tolist()
|
51
|
-
)
|
51
|
+
filterable_fields = tbl_pandas.columns.tolist()
|
52
52
|
# drop id, vector, metadata.id, metadata.window_ids, metadata.is_chunk
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
filterable_fields = list(
|
54
|
+
set(filterable_fields)
|
55
|
+
- {
|
56
|
+
"id",
|
57
|
+
"vector",
|
58
|
+
"metadata.id",
|
59
|
+
"metadata.window_ids",
|
60
|
+
"metadata.is_chunk",
|
61
|
+
}
|
62
|
+
)
|
62
63
|
logger.warning(
|
63
64
|
f"""
|
64
65
|
No filter_fields set in config, so using these fields as filterable fields:
|
@@ -69,15 +70,7 @@ class LanceDocChatAgent(DocChatAgent):
|
|
69
70
|
|
70
71
|
if self.from_dataframe:
|
71
72
|
return self.df_description
|
72
|
-
|
73
|
-
self.vecdb.schema,
|
74
|
-
excludes=["id", "vector"],
|
75
|
-
)
|
76
|
-
# intersect config.filter_fields with schema_dict.keys() in case
|
77
|
-
# there are extraneous fields in config.filter_fields
|
78
|
-
filter_fields_set = set(
|
79
|
-
self.config.filter_fields or schema_dict.keys()
|
80
|
-
).intersection(schema_dict.keys())
|
73
|
+
filter_fields_set = set(self.config.filter_fields)
|
81
74
|
|
82
75
|
# remove 'content' from filter_fields_set, even if it's not in filter_fields_set
|
83
76
|
filter_fields_set.discard("content")
|
@@ -85,10 +78,14 @@ class LanceDocChatAgent(DocChatAgent):
|
|
85
78
|
# possible values of filterable fields
|
86
79
|
filter_field_values = self.get_field_values(list(filter_fields_set))
|
87
80
|
|
81
|
+
schema_dict: Dict[str, Dict[str, Any]] = dict(
|
82
|
+
(field, {}) for field in filter_fields_set
|
83
|
+
)
|
88
84
|
# add field values to schema_dict as another field `values` for each field
|
89
85
|
for field, values in filter_field_values.items():
|
90
|
-
|
91
|
-
|
86
|
+
schema_dict[field]["values"] = values
|
87
|
+
dtype = tbl_pandas[field].dtype.name
|
88
|
+
schema_dict[field]["dtype"] = dtype
|
92
89
|
# if self.config.filter_fields is set, restrict to these:
|
93
90
|
if len(self.config.filter_fields) > 0:
|
94
91
|
schema_dict = {
|
@@ -37,20 +37,30 @@ class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
|
|
37
37
|
system_message = f"""
|
38
38
|
You are an expert at carefully planning a query that needs to be answered
|
39
39
|
based on a large collection of documents. These docs have a special `content` field
|
40
|
-
and additional FILTERABLE fields in the SCHEMA below
|
40
|
+
and additional FILTERABLE fields in the SCHEMA below, along with the
|
41
|
+
SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
|
41
42
|
|
42
43
|
{{doc_schema}}
|
43
44
|
|
45
|
+
The ORIGINAL QUERY is handled by a QUERY PLANNER who sends the PLAN to an ASSISTANT,
|
46
|
+
who returns an ANSWER.
|
47
|
+
|
44
48
|
You will receive a QUERY PLAN consisting of:
|
45
|
-
- ORIGINAL QUERY,
|
46
|
-
|
49
|
+
- ORIGINAL QUERY from the user, which a QUERY PLANNER processes,
|
50
|
+
to create a QUERY PLAN, to be handled by an ASSISTANT.
|
51
|
+
- PANDAS-LIKE FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
|
47
52
|
FILTER SHOULD ONLY BE USED IF EXPLICITLY REQUIRED BY THE QUERY.
|
48
|
-
- REPHRASED QUERY that will be used to match against the
|
49
|
-
|
53
|
+
- REPHRASED QUERY (CANNOT BE EMPTY) that will be used to match against the
|
54
|
+
CONTENT (not filterable) of the documents.
|
50
55
|
In general the REPHRASED QUERY should be relied upon to match the CONTENT
|
51
56
|
of the docs. Thus the REPHRASED QUERY itself acts like a
|
52
57
|
SEMANTIC/LEXICAL/FUZZY FILTER since the Assistant is able to use it to match
|
53
|
-
the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
|
58
|
+
the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
|
59
|
+
Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
|
60
|
+
so the REPHRASED QUERY should NOT mention ANY FILTER fields.
|
61
|
+
The assistant will answer based on documents whose CONTENTS match the QUERY,
|
62
|
+
possibly REPHRASED.
|
63
|
+
!!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
|
54
64
|
- DATAFRAME CALCULATION, which must be a SINGLE LINE calculation (or empty),
|
55
65
|
[NOTE ==> This calculation is applied AFTER the FILTER and REPHRASED QUERY.],
|
56
66
|
- ANSWER received from an assistant that used this QUERY PLAN.
|
@@ -43,23 +43,27 @@ class LanceQueryPlanAgentConfig(ChatAgentConfig):
|
|
43
43
|
You will receive a QUERY, to be answered based on an EXTREMELY LARGE collection
|
44
44
|
of documents you DO NOT have access to, but your ASSISTANT does.
|
45
45
|
You only know that these documents have a special `content` field
|
46
|
-
and additional FILTERABLE fields in the SCHEMA below
|
46
|
+
and additional FILTERABLE fields in the SCHEMA below, along with the
|
47
|
+
SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
|
47
48
|
|
48
49
|
{{doc_schema}}
|
49
50
|
|
50
51
|
Based on the QUERY and the above SCHEMA, your task is to determine a QUERY PLAN,
|
51
52
|
consisting of:
|
52
|
-
- a FILTER (can be empty string) that would help the ASSISTANT to
|
53
|
+
- a PANDAS-TYPE FILTER (can be empty string) that would help the ASSISTANT to
|
54
|
+
answer the query.
|
53
55
|
Remember the FILTER can refer to ANY fields in the above SCHEMA
|
54
56
|
EXCEPT the `content` field of the documents.
|
55
57
|
ONLY USE A FILTER IF EXPLICITLY MENTIONED IN THE QUERY.
|
56
58
|
TO get good results, for STRING MATCHES, consider using LIKE instead of =, e.g.
|
57
59
|
"CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
|
58
|
-
|
60
|
+
YOUR FILTER MUST BE A PANDAS-TYPE FILTER, respecting the shown DTYPES.
|
61
|
+
- a possibly REPHRASED QUERY (CANNOT BE EMPTY) to be answerable given the FILTER.
|
59
62
|
Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
|
60
63
|
so the REPHRASED QUERY should NOT mention ANY FILTER fields.
|
61
64
|
The assistant will answer based on documents whose CONTENTS match the QUERY,
|
62
65
|
possibly REPHRASED.
|
66
|
+
!!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
|
63
67
|
- an OPTIONAL SINGLE-LINE Pandas-dataframe calculation/aggregation string
|
64
68
|
that can be used to calculate the answer to the original query,
|
65
69
|
e.g. "df["rating"].mean()",
|
@@ -99,7 +103,7 @@ class LanceQueryPlanAgentConfig(ChatAgentConfig):
|
|
99
103
|
hence this computation will give the total deaths in shoplifting crimes.
|
100
104
|
------------- END OF EXAMPLE ----------------
|
101
105
|
|
102
|
-
The FILTER must be a
|
106
|
+
The FILTER must be a PANDAS-like condition, e.g.
|
103
107
|
"year > 2000 AND genre = 'ScienceFiction'".
|
104
108
|
To ensure you get useful results, you should make your FILTER
|
105
109
|
NOT TOO STRICT, e.g. look for approximate match using LIKE, etc.
|
@@ -0,0 +1,171 @@
|
|
1
|
+
"""
|
2
|
+
QueryPlanCritic is a ChatAgent that is created with a specific document schema.
|
3
|
+
|
4
|
+
Its role is to provide feedback on a Query Plan, which consists of:
|
5
|
+
- filter condition if needed (or empty string if no filter is needed)
|
6
|
+
- query - a possibly rephrased query that can be used to match the `content` field
|
7
|
+
- dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
|
8
|
+
- original_query - the original query for reference
|
9
|
+
- result - the answer received from an assistant that used this QUERY PLAN.
|
10
|
+
|
11
|
+
This agent has access to two tools:
|
12
|
+
- QueryPlanTool: The handler method for this tool re-writes the query plan
|
13
|
+
in plain text (non-JSON) so the LLM can provide its feedback using the
|
14
|
+
QueryPlanFeedbackTool.
|
15
|
+
- QueryPlanFeedbackTool: LLM uses this tool to provide feedback on the Query Plan
|
16
|
+
"""
|
17
|
+
|
18
|
+
import logging
|
19
|
+
|
20
|
+
from langroid.agent.chat_agent import ChatAgent
|
21
|
+
from langroid.agent.chat_document import ChatDocument
|
22
|
+
from langroid.agent.special.lance_rag.query_planner_agent import (
|
23
|
+
LanceQueryPlanAgentConfig,
|
24
|
+
)
|
25
|
+
from langroid.agent.special.lance_tools import (
|
26
|
+
QueryPlanAnswerTool,
|
27
|
+
QueryPlanFeedbackTool,
|
28
|
+
)
|
29
|
+
from langroid.mytypes import Entity
|
30
|
+
from langroid.utils.constants import DONE, NO_ANSWER
|
31
|
+
|
32
|
+
logger = logging.getLogger(__name__)
|
33
|
+
|
34
|
+
|
35
|
+
class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
|
36
|
+
name = "QueryPlanCritic"
|
37
|
+
system_message = f"""
|
38
|
+
You are an expert at carefully planning a query that needs to be answered
|
39
|
+
based on a large collection of documents. These docs have a special `content` field
|
40
|
+
and additional FILTERABLE fields in the SCHEMA below, along with the
|
41
|
+
SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
|
42
|
+
|
43
|
+
{{doc_schema}}
|
44
|
+
|
45
|
+
The ORIGINAL QUERY is handled by a QUERY PLANNER who sends the PLAN to an ASSISTANT,
|
46
|
+
who returns an ANSWER.
|
47
|
+
|
48
|
+
You will receive a QUERY PLAN consisting of:
|
49
|
+
- ORIGINAL QUERY from the user, which a QUERY PLANNER processes,
|
50
|
+
to create a QUERY PLAN, to be handled by an ASSISTANT.
|
51
|
+
- PANDAS-LIKE FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
|
52
|
+
FILTER SHOULD ONLY BE USED IF EXPLICITLY REQUIRED BY THE QUERY.
|
53
|
+
- REPHRASED QUERY (CANNOT BE EMPTY) that will be used to match against the
|
54
|
+
CONTENT (not filterable) of the documents.
|
55
|
+
In general the REPHRASED QUERY should be relied upon to match the CONTENT
|
56
|
+
of the docs. Thus the REPHRASED QUERY itself acts like a
|
57
|
+
SEMANTIC/LEXICAL/FUZZY FILTER since the Assistant is able to use it to match
|
58
|
+
the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
|
59
|
+
Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
|
60
|
+
so the REPHRASED QUERY should NOT mention ANY FILTER fields.
|
61
|
+
The assistant will answer based on documents whose CONTENTS match the QUERY,
|
62
|
+
possibly REPHRASED.
|
63
|
+
!!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
|
64
|
+
- DATAFRAME CALCULATION, which must be a SINGLE LINE calculation (or empty),
|
65
|
+
[NOTE ==> This calculation is applied AFTER the FILTER and REPHRASED QUERY.],
|
66
|
+
- ANSWER received from an assistant that used this QUERY PLAN.
|
67
|
+
NOTE -- the ANSWER will usually NOT contain any references to FILTERING conditions,
|
68
|
+
and this is ALLOWED, since the ANSWER is based on documents AFTER FILTERING.
|
69
|
+
|
70
|
+
In addition to the above SCHEMA fields there is a `content` field which:
|
71
|
+
- CANNOT appear in a FILTER,
|
72
|
+
- CAN appear in the DATAFRAME CALCULATION.
|
73
|
+
THERE ARE NO OTHER FIELDS IN THE DOCUMENTS or in the RESULTING DATAFRAME.
|
74
|
+
|
75
|
+
Your job is to act as a CRITIC and provide feedback,
|
76
|
+
ONLY using the `query_plan_feedback` tool, and DO NOT SAY ANYTHING ELSE.
|
77
|
+
|
78
|
+
Here is how you must examine the QUERY PLAN + ANSWER:
|
79
|
+
- ALL filtering conditions in the original query must be EXPLICITLY
|
80
|
+
mentioned in the FILTER, and the QUERY field should not be used for filtering.
|
81
|
+
- If the ANSWER contains an ERROR message, then this means that the query
|
82
|
+
plan execution FAILED, and your feedback should say INVALID along
|
83
|
+
with the ERROR message, `suggested_fix` that aims to help the assistant
|
84
|
+
fix the problem (or simply equals "address the the error shown in feedback")
|
85
|
+
- Ask yourself, is the ANSWER in the expected form, e.g.
|
86
|
+
if the question is asking for the name of an ENTITY with max SIZE,
|
87
|
+
then the answer should be the ENTITY name, NOT the SIZE!!
|
88
|
+
- It is perfectly FINE if the ANSWER does NOT contain any references to FILTERING
|
89
|
+
conditions, since the ANSWER is obtained from documents AFTER FILTERING!
|
90
|
+
- If the ANSWER is in the expected form, then the QUERY PLAN is likely VALID,
|
91
|
+
and your feedback should say VALID, with empty `suggested_fix`.
|
92
|
+
===> HOWEVER!!! Watch out for a spurious correct-looking answer, for EXAMPLE:
|
93
|
+
the query was to find the ENTITY with a maximum SIZE,
|
94
|
+
but the dataframe calculation is find the SIZE, NOT the ENTITY!!
|
95
|
+
- If the ANSWER is {NO_ANSWER} or of the wrong form,
|
96
|
+
then try to DIAGNOSE the problem IN THE FOLLOWING ORDER:
|
97
|
+
- DATAFRAME CALCULATION -- is it doing the right thing?
|
98
|
+
Is it finding the Index of a row instead of the value in a column?
|
99
|
+
Or another example: maybe it is finding the maximum population
|
100
|
+
rather than the CITY with the maximum population?
|
101
|
+
If you notice a problem with the DATAFRAME CALCULATION, then
|
102
|
+
ONLY SUBMIT FEEDBACK ON THE DATAFRAME CALCULATION, and DO NOT
|
103
|
+
SUGGEST ANYTHING ELSE.
|
104
|
+
- If the DATAFRAME CALCULATION looks correct, then check if
|
105
|
+
the REPHRASED QUERY makes sense given the ORIGINAL QUERY and FILTER.
|
106
|
+
If this is the problem, then ONLY SUBMIT FEEDBACK ON THE REPHRASED QUERY,
|
107
|
+
and DO NOT SUGGEST ANYTHING ELSE.
|
108
|
+
- If the REPHRASED QUERY looks correct, then check if the FILTER makes sense.
|
109
|
+
REMEMBER: A filter should ONLY be used if EXPLICITLY REQUIRED BY THE QUERY.
|
110
|
+
|
111
|
+
|
112
|
+
IMPORTANT!! The DATAFRAME CALCULATION is done AFTER applying the
|
113
|
+
FILTER and REPHRASED QUERY! Keep this in mind when evaluating
|
114
|
+
the correctness of the DATAFRAME CALCULATION.
|
115
|
+
|
116
|
+
ALWAYS use `query_plan_feedback` tool/fn to present your feedback
|
117
|
+
in the `feedback` field, and if any fix is suggested,
|
118
|
+
present it in the `suggested_fix` field.
|
119
|
+
DO NOT SAY ANYTHING ELSE OUTSIDE THE TOOL/FN.
|
120
|
+
IF NO REVISION NEEDED, simply leave the `suggested_fix` field EMPTY,
|
121
|
+
and SAY NOTHING ELSE
|
122
|
+
and DO NOT EXPLAIN YOURSELF.
|
123
|
+
"""
|
124
|
+
|
125
|
+
|
126
|
+
def plain_text_query_plan(msg: QueryPlanAnswerTool) -> str:
|
127
|
+
plan = f"""
|
128
|
+
OriginalQuery: {msg.plan.original_query}
|
129
|
+
Filter: {msg.plan.filter}
|
130
|
+
Rephrased Query: {msg.plan.query}
|
131
|
+
DataframeCalc: {msg.plan.dataframe_calc}
|
132
|
+
Answer: {msg.answer}
|
133
|
+
"""
|
134
|
+
return plan
|
135
|
+
|
136
|
+
|
137
|
+
class QueryPlanCritic(ChatAgent):
|
138
|
+
"""
|
139
|
+
Critic for LanceQueryPlanAgent, provides feedback on
|
140
|
+
query plan + answer.
|
141
|
+
"""
|
142
|
+
|
143
|
+
def __init__(self, cfg: LanceQueryPlanAgentConfig):
|
144
|
+
super().__init__(cfg)
|
145
|
+
self.config = cfg
|
146
|
+
self.enable_message(QueryPlanAnswerTool, use=False, handle=True)
|
147
|
+
self.enable_message(QueryPlanFeedbackTool, use=True, handle=True)
|
148
|
+
|
149
|
+
def query_plan_answer(self, msg: QueryPlanAnswerTool) -> str:
|
150
|
+
"""Present query plan + answer in plain text (not JSON)
|
151
|
+
so LLM can give feedback"""
|
152
|
+
return plain_text_query_plan(msg)
|
153
|
+
|
154
|
+
def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> ChatDocument:
|
155
|
+
"""Format Valid so return to Query Planner"""
|
156
|
+
doc = self.create_agent_response(DONE)
|
157
|
+
doc.tool_messages = [msg]
|
158
|
+
return doc
|
159
|
+
|
160
|
+
def handle_message_fallback(
|
161
|
+
self, msg: str | ChatDocument
|
162
|
+
) -> str | ChatDocument | None:
|
163
|
+
"""Remind the LLM to use QueryPlanFeedbackTool since it forgot"""
|
164
|
+
if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
|
165
|
+
return """
|
166
|
+
You forgot to use the `query_plan_feedback` tool/function.
|
167
|
+
Re-try your response using the `query_plan_feedback` tool/function,
|
168
|
+
remember to provide feedback in the `feedback` field,
|
169
|
+
and if any fix is suggested, provide it in the `suggested_fix` field.
|
170
|
+
"""
|
171
|
+
return None
|
@@ -0,0 +1,144 @@
|
|
1
|
+
"""
|
2
|
+
The LanceRAGTaskCreator.new() method creates a 3-Agent system that uses this agent.
|
3
|
+
It takes a LanceDocChatAgent instance as argument, and adds two more agents:
|
4
|
+
- LanceQueryPlanAgent, which is given the LanceDB schema in LanceDocChatAgent,
|
5
|
+
and based on this schema, for a given user query, creates a Query Plan
|
6
|
+
using the QueryPlanTool, which contains a filter, a rephrased query,
|
7
|
+
and a dataframe_calc.
|
8
|
+
- QueryPlanCritic, which is given the LanceDB schema in LanceDocChatAgent,
|
9
|
+
and gives feedback on the Query Plan and Result using the QueryPlanFeedbackTool.
|
10
|
+
|
11
|
+
The LanceRAGTaskCreator.new() method sets up the given LanceDocChatAgent and
|
12
|
+
QueryPlanCritic as sub-tasks of the LanceQueryPlanAgent's task.
|
13
|
+
|
14
|
+
Langroid's built-in task orchestration ensures that:
|
15
|
+
- the LanceQueryPlanAgent reformulates the plan based
|
16
|
+
on the QueryPlanCritics's feedback,
|
17
|
+
- LLM deviations are corrected via tools and overrides of ChatAgent methods.
|
18
|
+
"""
|
19
|
+
|
20
|
+
import logging
|
21
|
+
|
22
|
+
from langroid.agent.special.lance_tools import (
|
23
|
+
QueryPlanAnswerTool,
|
24
|
+
)
|
25
|
+
from langroid.agent.task import Task
|
26
|
+
from langroid.mytypes import Entity
|
27
|
+
from langroid.utils.constants import NO_ANSWER
|
28
|
+
|
29
|
+
from ..lance_doc_chat_agent import LanceDocChatAgent
|
30
|
+
from .critic_agent import (
|
31
|
+
QueryPlanCritic,
|
32
|
+
QueryPlanCriticConfig,
|
33
|
+
)
|
34
|
+
from .query_planner_agent import (
|
35
|
+
LanceQueryPlanAgent,
|
36
|
+
LanceQueryPlanAgentConfig,
|
37
|
+
)
|
38
|
+
|
39
|
+
logger = logging.getLogger(__name__)
|
40
|
+
|
41
|
+
|
42
|
+
def run_lance_rag_task(
|
43
|
+
query: str,
|
44
|
+
agent: LanceDocChatAgent,
|
45
|
+
interactive: bool = True,
|
46
|
+
) -> str:
|
47
|
+
"""
|
48
|
+
Add a LanceFilterAgent to the LanceDocChatAgent,
|
49
|
+
set up the corresponding Tasks, connect them,
|
50
|
+
and return the top-level query_plan_task.
|
51
|
+
"""
|
52
|
+
doc_agent_name = "LanceRAG"
|
53
|
+
critic_name = "QueryPlanCritic"
|
54
|
+
query_plan_agent_config = LanceQueryPlanAgentConfig(
|
55
|
+
critic_name=critic_name,
|
56
|
+
doc_agent_name=doc_agent_name,
|
57
|
+
doc_schema=agent._get_clean_vecdb_schema(),
|
58
|
+
)
|
59
|
+
query_plan_agent_config.set_system_message()
|
60
|
+
|
61
|
+
query_planner = LanceQueryPlanAgent(query_plan_agent_config)
|
62
|
+
query_plan_task = Task(
|
63
|
+
query_planner,
|
64
|
+
interactive=interactive,
|
65
|
+
restart=False,
|
66
|
+
done_if_response=[Entity.AGENT],
|
67
|
+
)
|
68
|
+
# TODO - figure out how to define the fns so we avoid re-creating
|
69
|
+
# agents in each invocation. Right now we are defining the fn
|
70
|
+
# inside this context, which may not be great.
|
71
|
+
|
72
|
+
rag_task = Task(
|
73
|
+
agent,
|
74
|
+
name="LanceRAG",
|
75
|
+
restart=True, # default; no need to accumulate dialog
|
76
|
+
interactive=False,
|
77
|
+
done_if_response=[Entity.LLM], # done when non-null response from LLM
|
78
|
+
done_if_no_response=[Entity.LLM], # done when null response from LLM
|
79
|
+
)
|
80
|
+
|
81
|
+
critic_config = QueryPlanCriticConfig(
|
82
|
+
doc_schema=agent._get_clean_vecdb_schema(),
|
83
|
+
)
|
84
|
+
critic_config.set_system_message()
|
85
|
+
|
86
|
+
critic_agent = QueryPlanCritic(critic_config)
|
87
|
+
critic_task = Task(
|
88
|
+
critic_agent,
|
89
|
+
interactive=False,
|
90
|
+
restart=True, # default; no need to accumulate dialog
|
91
|
+
)
|
92
|
+
|
93
|
+
no_answer = False
|
94
|
+
feedback = None
|
95
|
+
i = 0
|
96
|
+
while i := i + 1 < 5:
|
97
|
+
# query, feedback (QueryPlanFeedbackTool) => ChatDocument[QueryPlanTool]
|
98
|
+
if feedback is not None and feedback.suggested_fix != "":
|
99
|
+
prompt = f"""
|
100
|
+
A Critic has seen your Query Plan and the Answer, and has given the
|
101
|
+
following feedback. Take it into account and re-generate your Query Plan
|
102
|
+
for the QUERY:
|
103
|
+
|
104
|
+
QUERY: {query}
|
105
|
+
FEEDBACK: {feedback.feedback}
|
106
|
+
SUGGESTED FIX: {feedback.suggested_fix}
|
107
|
+
"""
|
108
|
+
elif no_answer:
|
109
|
+
prompt = f"There was a {NO_ANSWER} response; try a different query plan"
|
110
|
+
else:
|
111
|
+
prompt = query
|
112
|
+
|
113
|
+
while True:
|
114
|
+
plan_doc = query_plan_task.run(prompt)
|
115
|
+
if len(plan_doc.tool_messages) > 0:
|
116
|
+
break
|
117
|
+
# forgot to use QueryPlanTool
|
118
|
+
prompt = """You forgot to use the `query_plan` tool/function. Try again."""
|
119
|
+
|
120
|
+
# TODO if plan_doc does NOT have a QueryPlan, remind the agent
|
121
|
+
|
122
|
+
# ChatDocument with QueryPlanTool => ChatDocument with answer
|
123
|
+
rag_answer_doc = rag_task.run(plan_doc)
|
124
|
+
|
125
|
+
if rag_answer_doc is None:
|
126
|
+
rag_answer_doc = rag_task.agent.create_llm_response(NO_ANSWER)
|
127
|
+
# QueryPlan, answer => QueryPlanAnswerTool
|
128
|
+
plan_answer_tool = QueryPlanAnswerTool(
|
129
|
+
plan=plan_doc.tool_messages[0].plan,
|
130
|
+
answer=rag_answer_doc.content,
|
131
|
+
)
|
132
|
+
# QueryPlanAnswerTool => ChatDocument[QueryPlanAnswerTool]
|
133
|
+
plan_answer_doc = agent.create_agent_response(tool_messages=[plan_answer_tool])
|
134
|
+
|
135
|
+
# ChatDocument[QueryPlanAnswerTool] => ChatDocument[QueryPlanFeedbackTool]
|
136
|
+
feedback_doc = critic_task.run(plan_answer_doc)
|
137
|
+
# ChatDocument[QueryPlanFeedbackTool] => QueryPlanFeedbackTool
|
138
|
+
feedback = feedback_doc.tool_messages[0] # QueryPlanFeedbackTool
|
139
|
+
no_answer = NO_ANSWER in rag_answer_doc.content
|
140
|
+
if feedback.suggested_fix == "" and not no_answer:
|
141
|
+
break
|
142
|
+
|
143
|
+
# query_plan_task.add_sub_task([critic_task, rag_task])
|
144
|
+
return rag_answer_doc.content
|