langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. langroid/__init__.py +95 -0
  2. langroid/agent/__init__.py +40 -0
  3. langroid/agent/base.py +222 -91
  4. langroid/agent/batch.py +264 -0
  5. langroid/agent/callbacks/chainlit.py +608 -0
  6. langroid/agent/chat_agent.py +247 -101
  7. langroid/agent/chat_document.py +41 -4
  8. langroid/agent/openai_assistant.py +842 -0
  9. langroid/agent/special/__init__.py +50 -0
  10. langroid/agent/special/doc_chat_agent.py +837 -141
  11. langroid/agent/special/lance_doc_chat_agent.py +258 -0
  12. langroid/agent/special/lance_rag/__init__.py +9 -0
  13. langroid/agent/special/lance_rag/critic_agent.py +136 -0
  14. langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
  15. langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
  16. langroid/agent/special/lance_tools.py +44 -0
  17. langroid/agent/special/neo4j/__init__.py +0 -0
  18. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
  20. langroid/agent/special/neo4j/utils/__init__.py +0 -0
  21. langroid/agent/special/neo4j/utils/system_message.py +46 -0
  22. langroid/agent/special/relevance_extractor_agent.py +127 -0
  23. langroid/agent/special/retriever_agent.py +32 -198
  24. langroid/agent/special/sql/__init__.py +11 -0
  25. langroid/agent/special/sql/sql_chat_agent.py +47 -23
  26. langroid/agent/special/sql/utils/__init__.py +22 -0
  27. langroid/agent/special/sql/utils/description_extractors.py +95 -46
  28. langroid/agent/special/sql/utils/populate_metadata.py +28 -21
  29. langroid/agent/special/table_chat_agent.py +43 -9
  30. langroid/agent/task.py +475 -122
  31. langroid/agent/tool_message.py +75 -13
  32. langroid/agent/tools/__init__.py +13 -0
  33. langroid/agent/tools/duckduckgo_search_tool.py +66 -0
  34. langroid/agent/tools/google_search_tool.py +11 -0
  35. langroid/agent/tools/metaphor_search_tool.py +67 -0
  36. langroid/agent/tools/recipient_tool.py +16 -29
  37. langroid/agent/tools/run_python_code.py +60 -0
  38. langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
  39. langroid/agent/tools/segment_extract_tool.py +36 -0
  40. langroid/cachedb/__init__.py +9 -0
  41. langroid/cachedb/base.py +22 -2
  42. langroid/cachedb/momento_cachedb.py +26 -2
  43. langroid/cachedb/redis_cachedb.py +78 -11
  44. langroid/embedding_models/__init__.py +34 -0
  45. langroid/embedding_models/base.py +21 -2
  46. langroid/embedding_models/models.py +120 -18
  47. langroid/embedding_models/protoc/embeddings.proto +19 -0
  48. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  49. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  50. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  51. langroid/embedding_models/remote_embeds.py +153 -0
  52. langroid/language_models/__init__.py +45 -0
  53. langroid/language_models/azure_openai.py +80 -27
  54. langroid/language_models/base.py +117 -12
  55. langroid/language_models/config.py +5 -0
  56. langroid/language_models/openai_assistants.py +3 -0
  57. langroid/language_models/openai_gpt.py +558 -174
  58. langroid/language_models/prompt_formatter/__init__.py +15 -0
  59. langroid/language_models/prompt_formatter/base.py +4 -6
  60. langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
  61. langroid/language_models/utils.py +18 -21
  62. langroid/mytypes.py +25 -8
  63. langroid/parsing/__init__.py +46 -0
  64. langroid/parsing/document_parser.py +260 -63
  65. langroid/parsing/image_text.py +32 -0
  66. langroid/parsing/parse_json.py +143 -0
  67. langroid/parsing/parser.py +122 -59
  68. langroid/parsing/repo_loader.py +114 -52
  69. langroid/parsing/search.py +68 -63
  70. langroid/parsing/spider.py +3 -2
  71. langroid/parsing/table_loader.py +44 -0
  72. langroid/parsing/url_loader.py +59 -11
  73. langroid/parsing/urls.py +85 -37
  74. langroid/parsing/utils.py +298 -4
  75. langroid/parsing/web_search.py +73 -0
  76. langroid/prompts/__init__.py +11 -0
  77. langroid/prompts/chat-gpt4-system-prompt.md +68 -0
  78. langroid/prompts/prompts_config.py +1 -1
  79. langroid/utils/__init__.py +17 -0
  80. langroid/utils/algorithms/__init__.py +3 -0
  81. langroid/utils/algorithms/graph.py +103 -0
  82. langroid/utils/configuration.py +36 -5
  83. langroid/utils/constants.py +4 -0
  84. langroid/utils/globals.py +2 -2
  85. langroid/utils/logging.py +2 -5
  86. langroid/utils/output/__init__.py +21 -0
  87. langroid/utils/output/printing.py +47 -1
  88. langroid/utils/output/status.py +33 -0
  89. langroid/utils/pandas_utils.py +30 -0
  90. langroid/utils/pydantic_utils.py +616 -2
  91. langroid/utils/system.py +98 -0
  92. langroid/vector_store/__init__.py +40 -0
  93. langroid/vector_store/base.py +203 -6
  94. langroid/vector_store/chromadb.py +59 -32
  95. langroid/vector_store/lancedb.py +463 -0
  96. langroid/vector_store/meilisearch.py +10 -7
  97. langroid/vector_store/momento.py +262 -0
  98. langroid/vector_store/qdrantdb.py +104 -22
  99. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
  100. langroid-0.1.219.dist-info/RECORD +127 -0
  101. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
  102. langroid/agent/special/recipient_validator_agent.py +0 -157
  103. langroid/parsing/json.py +0 -64
  104. langroid/utils/web/selenium_login.py +0 -36
  105. langroid-0.1.85.dist-info/RECORD +0 -94
  106. /langroid/{scripts → agent/callbacks}/__init__.py +0 -0
  107. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
@@ -0,0 +1,258 @@
1
+ """
2
+ LanceDocChatAgent is a subclass of DocChatAgent that uses LanceDB as a vector store:
3
+ - Uses the DocChatAgentConfig.filter variable
4
+ (a sql string) in the `where` clause to do filtered vector search.
5
+ - Overrides the get_similar_chunks_bm25() to use LanceDB FTS (Full Text Search).
6
+
7
+ For usage see:
8
+ - `tests/main/test_lance_doc_chat_agent.py`.
9
+ - example script `examples/docqa/lance_rag.py`.
10
+
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from typing import Any, Dict, List, Tuple
16
+
17
+ import pandas as pd
18
+
19
+ from langroid.agent.special.doc_chat_agent import DocChatAgent, DocChatAgentConfig
20
+ from langroid.agent.special.lance_tools import QueryPlanTool
21
+ from langroid.mytypes import DocMetaData, Document
22
+ from langroid.parsing.table_loader import describe_dataframe
23
+ from langroid.utils.constants import DONE, NO_ANSWER
24
+ from langroid.utils.pydantic_utils import (
25
+ clean_schema,
26
+ dataframe_to_documents,
27
+ )
28
+ from langroid.vector_store.lancedb import LanceDB
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class LanceDocChatAgent(DocChatAgent):
34
+ vecdb: LanceDB
35
+
36
+ def __init__(self, cfg: DocChatAgentConfig):
37
+ super().__init__(cfg)
38
+ self.config: DocChatAgentConfig = cfg
39
+ self.enable_message(QueryPlanTool, use=False, handle=True)
40
+
41
+ def _get_clean_vecdb_schema(self) -> str:
42
+ """Get a cleaned schema of the vector-db, to pass to the LLM
43
+ as part of instructions on how to generate a SQL filter."""
44
+ if len(self.config.filter_fields) == 0:
45
+ filterable_fields = (
46
+ self.vecdb.client.open_table(self.vecdb.config.collection_name)
47
+ .search()
48
+ .limit(1)
49
+ .to_pandas(flatten=True)
50
+ .columns.tolist()
51
+ )
52
+ # drop id, vector, metadata.id, metadata.window_ids, metadata.is_chunk
53
+ for fields in [
54
+ "id",
55
+ "vector",
56
+ "metadata.id",
57
+ "metadata.window_ids",
58
+ "metadata.is_chunk",
59
+ ]:
60
+ if fields in filterable_fields:
61
+ filterable_fields.remove(fields)
62
+ logger.warning(
63
+ f"""
64
+ No filter_fields set in config, so using these fields as filterable fields:
65
+ {filterable_fields}
66
+ """
67
+ )
68
+ self.config.filter_fields = filterable_fields
69
+
70
+ if self.from_dataframe:
71
+ return self.df_description
72
+ schema_dict = clean_schema(
73
+ self.vecdb.schema,
74
+ excludes=["id", "vector"],
75
+ )
76
+ # intersect config.filter_fields with schema_dict.keys() in case
77
+ # there are extraneous fields in config.filter_fields
78
+ filter_fields_set = set(
79
+ self.config.filter_fields or schema_dict.keys()
80
+ ).intersection(schema_dict.keys())
81
+
82
+ # remove 'content' from filter_fields_set, even if it's not in filter_fields_set
83
+ filter_fields_set.discard("content")
84
+
85
+ # possible values of filterable fields
86
+ filter_field_values = self.get_field_values(list(filter_fields_set))
87
+
88
+ # add field values to schema_dict as another field `values` for each field
89
+ for field, values in filter_field_values.items():
90
+ if field in schema_dict:
91
+ schema_dict[field]["values"] = values
92
+ # if self.config.filter_fields is set, restrict to these:
93
+ if len(self.config.filter_fields) > 0:
94
+ schema_dict = {
95
+ k: v for k, v in schema_dict.items() if k in self.config.filter_fields
96
+ }
97
+ schema = json.dumps(schema_dict, indent=4)
98
+
99
+ schema += f"""
100
+ NOTE when creating a filter for a query,
101
+ ONLY the following fields are allowed:
102
+ {",".join(self.config.filter_fields)}
103
+ """
104
+ if len(content_fields := self.config.add_fields_to_content) > 0:
105
+ schema += f"""
106
+ Additional fields added to `content` as key=value pairs:
107
+ NOTE that these CAN Help with matching queries!
108
+ {content_fields}
109
+ """
110
+ return schema
111
+
112
+ def query_plan(self, msg: QueryPlanTool) -> str:
113
+ """
114
+ Handle the LLM's use of the FilterTool.
115
+ Temporarily set the config filter and either return the final answer
116
+ in case there's a dataframe_calc, or return the rephrased query
117
+ so the LLM can handle it.
118
+ """
119
+ # create document-subset based on this filter
120
+ plan = msg.plan
121
+ try:
122
+ self.setup_documents(filter=plan.filter or None)
123
+ except Exception as e:
124
+ logger.error(f"Error setting up documents: {e}")
125
+ # say DONE with err msg so it goes back to LanceFilterAgent
126
+ return f"""
127
+ {DONE} Possible Filter Error:\n {e}
128
+
129
+ Note that only the following fields are allowed in the filter
130
+ of a query plan:
131
+ {", ".join(self.config.filter_fields)}
132
+ """
133
+
134
+ # update the filter so it is used in the DocChatAgent
135
+ self.config.filter = plan.filter or None
136
+ if plan.dataframe_calc:
137
+ # we just get relevant docs then do the calculation
138
+ # TODO if calc causes err, it is captured in result,
139
+ # and LLM can correct the calc based on the err,
140
+ # and this will cause retrieval all over again,
141
+ # which may be wasteful if only the calc part is wrong.
142
+ # The calc step can later be done with a separate Agent/Tool.
143
+ if plan.query is None or plan.query.strip() == "":
144
+ if plan.filter is None or plan.filter.strip() == "":
145
+ return """DONE
146
+ Cannot execute Query Plan since filter as well as
147
+ rephrased query are empty.
148
+ """
149
+ else:
150
+ # no query to match, so just get all docs matching filter
151
+ docs = self.vecdb.get_all_documents(plan.filter)
152
+ else:
153
+ _, docs = self.get_relevant_extracts(plan.query)
154
+ if len(docs) == 0:
155
+ return DONE + " " + NO_ANSWER
156
+ result = self.vecdb.compute_from_docs(docs, plan.dataframe_calc)
157
+ return DONE + " " + result
158
+ else:
159
+ # pass on the query so LLM can handle it
160
+ return plan.query
161
+
162
+ def ingest_docs(
163
+ self,
164
+ docs: List[Document],
165
+ split: bool = True,
166
+ metadata: (
167
+ List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
168
+ ) = [],
169
+ ) -> int:
170
+ n = super().ingest_docs(docs, split, metadata)
171
+ tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
172
+ # We assume "content" is available as top-level field
173
+ if "content" in tbl.schema.names:
174
+ tbl.create_fts_index("content", replace=True)
175
+ return n
176
+
177
+ def ingest_dataframe(
178
+ self,
179
+ df: pd.DataFrame,
180
+ content: str = "content",
181
+ metadata: List[str] = [],
182
+ ) -> int:
183
+ """Ingest from a dataframe. Assume we are doing this once, not incrementally"""
184
+
185
+ self.from_dataframe = True
186
+ if df.shape[0] == 0:
187
+ raise ValueError(
188
+ """
189
+ LanceDocChatAgent.ingest_dataframe() received an empty dataframe.
190
+ """
191
+ )
192
+ n = df.shape[0]
193
+
194
+ # If any additional fields need to be added to content,
195
+ # add them as key=value pairs, into the `content` field for all rows.
196
+ # This helps retrieval for table-like data.
197
+ # Note we need to do this at stage so that the embeddings
198
+ # are computed on the full content with these additional fields.
199
+ fields = [f for f in self.config.add_fields_to_content if f in df.columns]
200
+ if len(fields) > 0:
201
+ df[content] = df.apply(
202
+ lambda row: (",".join(f"{f}={row[f]}" for f in fields))
203
+ + ", content="
204
+ + row[content],
205
+ axis=1,
206
+ )
207
+
208
+ df, metadata = DocChatAgent.document_compatible_dataframe(df, content, metadata)
209
+ self.df_description = describe_dataframe(
210
+ df,
211
+ filter_fields=self.config.filter_fields,
212
+ n_vals=10,
213
+ )
214
+ self.vecdb.add_dataframe(df, content="content", metadata=metadata)
215
+
216
+ tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
217
+ # We assume "content" is available as top-level field
218
+ if "content" in tbl.schema.names:
219
+ tbl.create_fts_index("content", replace=True)
220
+ # We still need to do the below so that
221
+ # other types of searches in DocChatAgent
222
+ # can work, as they require Document objects
223
+ docs = dataframe_to_documents(df, content="content", metadata=metadata)
224
+ self.setup_documents(docs)
225
+ # mark each doc as already-chunked so we don't try to split them further
226
+ # TODO later we may want to split large text-columns
227
+ for d in docs:
228
+ d.metadata.is_chunk = True
229
+ return n # type: ignore
230
+
231
+ def get_similar_chunks_bm25(
232
+ self, query: str, multiple: int
233
+ ) -> List[Tuple[Document, float]]:
234
+ """
235
+ Override the DocChatAgent.get_similar_chunks_bm25()
236
+ to use LanceDB FTS (Full Text Search).
237
+ """
238
+ # Clean up query: replace all newlines with spaces in query,
239
+ # force special search keywords to lower case, remove quotes,
240
+ # so it's not interpreted as search syntax
241
+ query_clean = (
242
+ query.replace("\n", " ")
243
+ .replace("AND", "and")
244
+ .replace("OR", "or")
245
+ .replace("NOT", "not")
246
+ .replace("'", "")
247
+ .replace('"', "")
248
+ )
249
+
250
+ tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
251
+ result = (
252
+ tbl.search(query_clean)
253
+ .where(self.config.filter or None)
254
+ .limit(self.config.parsing.n_similar_docs * multiple)
255
+ )
256
+ docs = self.vecdb._lance_result_to_docs(result)
257
+ scores = [r["score"] for r in result.to_list()]
258
+ return list(zip(docs, scores))
@@ -0,0 +1,9 @@
1
+ from . import query_planner_agent
2
+ from . import critic_agent
3
+ from . import lance_rag_task
4
+
5
+ __all__ = [
6
+ "query_planner_agent",
7
+ "critic_agent",
8
+ "lance_rag_task",
9
+ ]
@@ -0,0 +1,136 @@
1
+ """
2
+ QueryPlanCritic is a ChatAgent that is created with a specific document schema.
3
+
4
+ Its role is to provide feedback on a Query Plan, which consists of:
5
+ - filter condition if needed (or empty string if no filter is needed)
6
+ - query - a possibly rephrased query that can be used to match the `content` field
7
+ - dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
8
+ - original_query - the original query for reference
9
+ - result - the answer received from an assistant that used this QUERY PLAN.
10
+
11
+ This agent has access to two tools:
12
+ - QueryPlanTool: The handler method for this tool re-writes the query plan
13
+ in plain text (non-JSON) so the LLM can provide its feedback using the
14
+ QueryPlanFeedbackTool.
15
+ - QueryPlanFeedbackTool: LLM uses this tool to provide feedback on the Query Plan
16
+ """
17
+
18
+ import logging
19
+
20
+ from langroid.agent.chat_agent import ChatAgent
21
+ from langroid.agent.chat_document import ChatDocument
22
+ from langroid.agent.special.lance_rag.query_planner_agent import (
23
+ LanceQueryPlanAgentConfig,
24
+ )
25
+ from langroid.agent.special.lance_tools import (
26
+ QueryPlanAnswerTool,
27
+ QueryPlanFeedbackTool,
28
+ )
29
+ from langroid.mytypes import Entity
30
+ from langroid.utils.constants import DONE, NO_ANSWER, PASS
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
36
+ name = "QueryPlanCritic"
37
+ system_message = f"""
38
+ You are an expert at carefully planning a query that needs to be answered
39
+ based on a large collection of documents. These docs have a special `content` field
40
+ and additional FILTERABLE fields in the SCHEMA below:
41
+
42
+ {{doc_schema}}
43
+
44
+ You will receive a QUERY PLAN consisting of:
45
+ - ORIGINAL QUERY,
46
+ - SQL-Like FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
47
+ FILTER SHOULD ONLY BE USED IF EXPLICITLY REQUIRED BY THE QUERY.
48
+ - REPHRASED QUERY that will be used to match against the CONTENT (not filterable)
49
+ of the documents.
50
+ In general the REPHRASED QUERY should be relied upon to match the CONTENT
51
+ of the docs. Thus the REPHRASED QUERY itself acts like a
52
+ SEMANTIC/LEXICAL/FUZZY FILTER since the Assistant is able to use it to match
53
+ the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
54
+
55
+ - DATAFRAME CALCULATION, and
56
+ - ANSWER recieved from an assistant that used this QUERY PLAN.
57
+
58
+ In addition to the above SCHEMA fields there is a `content` field which:
59
+ - CANNOT appear in a FILTER,
60
+ - CAN appear in the DATAFRAME CALCULATION.
61
+ THERE ARE NO OTHER FIELDS IN THE DOCUMENTS or in the RESULTING DATAFRAME.
62
+
63
+ Your job is to act as a CRITIC and provide feedback,
64
+ ONLY using the `query_plan_feedback` tool, and DO NOT SAY ANYTHING ELSE.
65
+
66
+ Here is how you must examine the QUERY PLAN + ANSWER:
67
+ - If the ANSWER is in the expected form, then the QUERY PLAN is likely VALID,
68
+ and your feedback should be EMPTY.
69
+ - If the ANSWER is {NO_ANSWER} or of the wrong form,
70
+ then try to DIAGNOSE the problem IN THE FOLLOWING ORDER:
71
+ - DATAFRAME CALCULATION -- is it doing the right thing?
72
+ Is it finding the Index of a row instead of the value in a column?
73
+ Or another example: mmaybe it is finding the maximum population
74
+ rather than the CITY with the maximum population?
75
+ If you notice a problem with the DATAFRAME CALCULATION, then
76
+ ONLY SUBMIT FEEDBACK ON THE DATAFRAME CALCULATION, and DO NOT
77
+ SUGGEST ANYTHING ELSE.
78
+ - If the DATAFRAME CALCULATION looks correct, then check if
79
+ the REPHRASED QUERY makes sense given the ORIGINAL QUERY and FILTER.
80
+ If this is the problem, then ONLY SUBMIT FEEDBACK ON THE REPHRASED QUERY,
81
+ and DO NOT SUGGEST ANYTHING ELSE.
82
+ - If the REPHRASED QUERY looks correct, then check if the FILTER makes sense.
83
+ REMEMBER: A filter should ONLY be used if EXPLICITLY REQUIRED BY THE QUERY.
84
+
85
+
86
+ ALWAYS use `query_plan_feedback` tool/fn to present your feedback!
87
+ and DO NOT SAY ANYTHING ELSE OUTSIDE THE TOOL/FN.
88
+ IF NO REVISION NEEDED, simply give EMPTY FEEBACK, SAY NOTHING ELSE
89
+ and DO NOT EXPLAIN YOURSELF.
90
+
91
+ """
92
+
93
+
94
+ def plain_text_query_plan(msg: QueryPlanAnswerTool) -> str:
95
+ plan = f"""
96
+ OriginalQuery: {msg.plan.original_query}
97
+ Filter: {msg.plan.filter}
98
+ Query: {msg.plan.query}
99
+ DataframeCalc: {msg.plan.dataframe_calc}
100
+ Answer: {msg.answer}
101
+ """
102
+ return plan
103
+
104
+
105
+ class QueryPlanCritic(ChatAgent):
106
+ """
107
+ Critic for LanceQueryPlanAgent, provides feedback on
108
+ query plan + answer.
109
+ """
110
+
111
+ def __init__(self, cfg: LanceQueryPlanAgentConfig):
112
+ super().__init__(cfg)
113
+ self.config = cfg
114
+ self.enable_message(QueryPlanAnswerTool, use=False, handle=True)
115
+ self.enable_message(QueryPlanFeedbackTool, use=True, handle=True)
116
+
117
+ def query_plan_answer(self, msg: QueryPlanAnswerTool) -> str:
118
+ """Present query plan + answer in plain text (not JSON)
119
+ so LLM can give feedback"""
120
+ return plain_text_query_plan(msg)
121
+
122
+ def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> str:
123
+ """Format Valid so return to Query Planner"""
124
+ return DONE + " " + PASS # return to Query Planner
125
+
126
+ def handle_message_fallback(
127
+ self, msg: str | ChatDocument
128
+ ) -> str | ChatDocument | None:
129
+ """Create QueryPlanFeedbackTool since LLM forgot"""
130
+ if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
131
+ # our LLM forgot to use the QueryPlanFeedbackTool
132
+ feedback = QueryPlanFeedbackTool(feedback=msg.content)
133
+ msg.tool_messages = [feedback]
134
+ msg.content = DONE
135
+ return msg
136
+ return None
@@ -0,0 +1,80 @@
1
+ """
2
+ The LanceRAGTaskCreator.new() method creates a 3-Agent system that uses this agent.
3
+ It takes a LanceDocChatAgent instance as argument, and adds two more agents:
4
+ - LanceQueryPlanAgent, which is given the LanceDB schema in LanceDocChatAgent,
5
+ and based on this schema, for a given user query, creates a Query Plan
6
+ using the QueryPlanTool, which contains a filter, a rephrased query,
7
+ and a dataframe_calc.
8
+ - QueryPlanCritic, which is given the LanceDB schema in LanceDocChatAgent,
9
+ and gives feedback on the Query Plan and Result using the QueryPlanFeedbackTool.
10
+
11
+ The LanceRAGTaskCreator.new() method sets up the given LanceDocChatAgent and
12
+ QueryPlanCritic as sub-tasks of the LanceQueryPlanAgent's task.
13
+
14
+ Langroid's built-in task orchestration ensures that:
15
+ - the LanceQueryPlanAgent reformulates the plan based
16
+ on the QueryPlanCritics's feedback,
17
+ - LLM deviations are corrected via tools and overrides of ChatAgent methods.
18
+ """
19
+
20
+ import logging
21
+
22
+ from langroid.agent.special.lance_doc_chat_agent import LanceDocChatAgent
23
+ from langroid.agent.special.lance_rag.critic_agent import (
24
+ QueryPlanCritic,
25
+ QueryPlanCriticConfig,
26
+ )
27
+ from langroid.agent.special.lance_rag.query_planner_agent import (
28
+ LanceQueryPlanAgent,
29
+ LanceQueryPlanAgentConfig,
30
+ )
31
+ from langroid.agent.task import Task
32
+ from langroid.mytypes import Entity
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class LanceRAGTaskCreator:
38
+ @staticmethod
39
+ def new(
40
+ agent: LanceDocChatAgent,
41
+ interactive: bool = True,
42
+ ) -> Task:
43
+ """
44
+ Add a LanceFilterAgent to the LanceDocChatAgent,
45
+ set up the corresponding Tasks, connect them,
46
+ and return the top-level query_plan_task.
47
+ """
48
+ doc_agent_name = "LanceRAG"
49
+ critic_name = "QueryPlanCritic"
50
+ query_plan_agent_config = LanceQueryPlanAgentConfig(
51
+ critic_name=critic_name,
52
+ doc_agent_name=doc_agent_name,
53
+ doc_schema=agent._get_clean_vecdb_schema(),
54
+ )
55
+ query_plan_agent_config.set_system_message()
56
+
57
+ critic_config = QueryPlanCriticConfig(
58
+ doc_schema=agent._get_clean_vecdb_schema(),
59
+ )
60
+ critic_config.set_system_message()
61
+
62
+ query_planner = LanceQueryPlanAgent(query_plan_agent_config)
63
+ query_plan_task = Task(
64
+ query_planner,
65
+ interactive=interactive,
66
+ )
67
+ critic_agent = QueryPlanCritic(critic_config)
68
+ critic_task = Task(
69
+ critic_agent,
70
+ interactive=False,
71
+ )
72
+ rag_task = Task(
73
+ agent,
74
+ name="LanceRAG",
75
+ interactive=False,
76
+ done_if_response=[Entity.LLM], # done when non-null response from LLM
77
+ done_if_no_response=[Entity.LLM], # done when null response from LLM
78
+ )
79
+ query_plan_task.add_sub_task([critic_task, rag_task])
80
+ return query_plan_task
@@ -0,0 +1,180 @@
1
+ """
2
+ LanceQueryPlanAgent is a ChatAgent created with a specific document schema.
3
+ Given a QUERY, the LLM constructs a Query Plan consisting of:
4
+ - filter condition if needed (or empty string if no filter is needed)
5
+ - query - a possibly rephrased query that can be used to match the `content` field
6
+ - dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
7
+ - original_query - the original query for reference
8
+
9
+ This agent has access to two tools:
10
+ - QueryPlanTool, which is used to generate the Query Plan, and the handler of
11
+ this tool simply passes it on to the RAG agent named in config.doc_agent_name.
12
+ - QueryPlanFeedbackTool, which is used to handle feedback on the Query Plan and
13
+ Result from the RAG agent. The QueryPlanFeedbackTool is used by
14
+ the QueryPlanCritic, who inserts feedback into the `feedback` field
15
+ """
16
+
17
+ import logging
18
+
19
+ from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
20
+ from langroid.agent.chat_document import ChatDocument
21
+ from langroid.agent.special.lance_tools import (
22
+ QueryPlan,
23
+ QueryPlanAnswerTool,
24
+ QueryPlanFeedbackTool,
25
+ QueryPlanTool,
26
+ )
27
+ from langroid.utils.constants import DONE, NO_ANSWER, PASS_TO
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class LanceQueryPlanAgentConfig(ChatAgentConfig):
33
+ name: str = "LancePlanner"
34
+ critic_name: str = "QueryPlanCritic"
35
+ doc_agent_name: str = "LanceRAG"
36
+ doc_schema: str = ""
37
+ use_tools = False
38
+ use_functions_api = True
39
+
40
+ system_message = f"""
41
+ You will receive a QUERY, to be answered based on an EXTREMELY LARGE collection
42
+ of documents you DO NOT have access to, but your ASSISTANT does.
43
+ You only know that these documents have a special `content` field
44
+ and additional FILTERABLE fields in the SCHEMA below:
45
+
46
+ {{doc_schema}}
47
+
48
+ Based on the QUERY and the above SCHEMA, your task is to determine a QUERY PLAN,
49
+ consisting of:
50
+ - a FILTER (can be empty string) that would help the ASSISTANT to answer the query.
51
+ Remember the FILTER can refer to ANY fields in the above SCHEMA
52
+ EXCEPT the `content` field of the documents.
53
+ ONLY USE A FILTER IF EXPLICITLY MENTIONED IN THE QUERY.
54
+ TO get good results, for STRING MATCHES, consider using LIKE instead of =, e.g.
55
+ "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
56
+ - a possibly REPHRASED QUERY to be answerable given the FILTER.
57
+ Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
58
+ so the REPHRASED QUERY should NOT mention ANY FILTER fields.
59
+ The answer will answer based on documents whose CONTENTS match the QUERY,
60
+ possibly REPHRASED.
61
+ - a Pandas-dataframe calculation/aggregation string that can be used to calculate
62
+ the answer to the original query, e.g. "df["rating"].mean()",
63
+ or "df.groupby("director").mean()["rating"]", etc, or empty string if no calc
64
+ is needed. The dataframe calc CAN refer to the `content` field.
65
+
66
+
67
+ EXAMPLE:
68
+ -------
69
+ Suppose there is a document-set about crime reports, where:
70
+ CONTENT = crime report,
71
+ Filterable SCHEMA consists of City, Year, num_deaths.
72
+
73
+ Then given this ORIGINAL QUERY:
74
+
75
+ Total deaths in shoplifting crimes in Los Angeles in 2023?
76
+
77
+ A POSSIBLE QUERY PLAN could be:
78
+
79
+ FILTER: "City LIKE '%Los Angeles%' AND Year = 2023"
80
+ REPHRASED QUERY: "shoplifting crime" --> this will be used to MATCH content of docs
81
+ [NOTE: we dropped the FILTER fields City and Year since the
82
+ ASSISTANT does not know about them and only uses the query to
83
+ match the CONTENT of the docs.]
84
+ DATAFRAME CALCULATION: "df["num_deaths"].sum()"
85
+
86
+ ------------- END OF EXAMPLE ----------------
87
+
88
+ The FILTER must be a SQL-like condition, e.g.
89
+ "year > 2000 AND genre = 'ScienceFiction'".
90
+ To ensure you get useful results, you should make your FILTER
91
+ NOT TOO STRICT, e.g. look for approximate match using LIKE, etc.
92
+ E.g. "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
93
+ Use DOT NOTATION to refer to nested fields, e.g. `metadata.year`, etc.
94
+
95
+ You must FIRST present the QUERY PLAN using the `query_plan` tool/function.
96
+ This will be handled by your document assistant, who will produce an ANSWER.
97
+
98
+ You may receive FEEDBACK on your QUERY PLAN and received ANSWER,
99
+ from the 'QueryPlanCritic' who may offer suggestions for
100
+ a better FILTER, REPHRASED QUERY, or DATAFRAME CALCULATION.
101
+
102
+ If you keep getting feedback or keep getting a {NO_ANSWER} from the assistant
103
+ at least 3 times, then simply say '{DONE} {NO_ANSWER}' and nothing else.
104
+
105
+ At the BEGINNING if there is no query, ASK the user what they want to know.
106
+ """
107
+
108
+ def set_system_message(self) -> None:
109
+ self.system_message = self.system_message.format(
110
+ doc_schema=self.doc_schema,
111
+ )
112
+
113
+
114
+ class LanceQueryPlanAgent(ChatAgent):
115
+ def __init__(self, config: LanceQueryPlanAgentConfig):
116
+ super().__init__(config)
117
+ self.config: LanceQueryPlanAgentConfig = config
118
+ self.curr_query_plan: QueryPlan | None = None
119
+ self.result: str = "" # answer received from LanceRAG
120
+ # This agent should generate the QueryPlanTool
121
+ # as well as handle it for validation
122
+ self.enable_message(QueryPlanTool, use=True, handle=True)
123
+ self.enable_message(QueryPlanFeedbackTool, use=False, handle=True)
124
+
125
+ def query_plan(self, msg: QueryPlanTool) -> str:
126
+ """Valid, forward to RAG Agent"""
127
+ # save, to be used to assemble QueryPlanResultTool
128
+ self.curr_query_plan = msg.plan
129
+ return PASS_TO + self.config.doc_agent_name
130
+
131
+ def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> str:
132
+ """Process Critic feedback on QueryPlan + Answer from RAG Agent"""
133
+ # We should have saved answer in self.result by this time,
134
+ # since this Agent seeks feedback only after receiving RAG answer.
135
+ if msg.feedback == "":
136
+ # This means the Query Plan or Result is good, as judged by Critic
137
+ if self.result == "":
138
+ # This was feedback for query with no result
139
+ return "QUERY PLAN LOOKS GOOD!"
140
+ elif self.result == NO_ANSWER:
141
+ return NO_ANSWER
142
+ else: # non-empty and non-null answer
143
+ return DONE + " " + self.result
144
+ return f"""
145
+ here is FEEDBACK about your QUERY PLAN. Modify it if needed:
146
+ {msg.feedback}
147
+ """
148
+
149
+ def handle_message_fallback(
150
+ self, msg: str | ChatDocument
151
+ ) -> str | ChatDocument | None:
152
+ """
153
+ Process answer received from RAG Agent:
154
+ Construct a QueryPlanAnswerTool with the answer,
155
+ and forward to Critic for feedback.
156
+ """
157
+ # TODO we don't need to use this fallback method. instead we can
158
+ # first call result = super().agent_response(), and if result is None,
159
+ # then we know there was no tool, so we run below code
160
+ if (
161
+ isinstance(msg, ChatDocument)
162
+ and self.curr_query_plan is not None
163
+ and msg.metadata.parent is not None
164
+ ):
165
+ # save result, to be used in query_plan_feedback()
166
+ self.result = msg.content
167
+ # assemble QueryPlanAnswerTool...
168
+ query_plan_answer_tool = QueryPlanAnswerTool(
169
+ plan=self.curr_query_plan,
170
+ answer=self.result,
171
+ )
172
+ response_tmpl = self.agent_response_template()
173
+ # ... add the QueryPlanAnswerTool to the response
174
+ # (Notice how the Agent is directly sending a tool, not the LLM)
175
+ response_tmpl.tool_messages = [query_plan_answer_tool]
176
+ # set the recipient to the Critic so it can give feedback
177
+ response_tmpl.metadata.recipient = self.config.critic_name
178
+ self.curr_query_plan = None # reset
179
+ return response_tmpl
180
+ return None