langroid 0.31.1__py3-none-any.whl → 0.33.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. {langroid-0.31.1.dist-info → langroid-0.33.3.dist-info}/METADATA +150 -124
  2. langroid-0.33.3.dist-info/RECORD +7 -0
  3. {langroid-0.31.1.dist-info → langroid-0.33.3.dist-info}/WHEEL +1 -1
  4. langroid-0.33.3.dist-info/entry_points.txt +4 -0
  5. pyproject.toml +317 -212
  6. langroid/__init__.py +0 -106
  7. langroid/agent/.chainlit/config.toml +0 -121
  8. langroid/agent/.chainlit/translations/bn.json +0 -231
  9. langroid/agent/.chainlit/translations/en-US.json +0 -229
  10. langroid/agent/.chainlit/translations/gu.json +0 -231
  11. langroid/agent/.chainlit/translations/he-IL.json +0 -231
  12. langroid/agent/.chainlit/translations/hi.json +0 -231
  13. langroid/agent/.chainlit/translations/kn.json +0 -231
  14. langroid/agent/.chainlit/translations/ml.json +0 -231
  15. langroid/agent/.chainlit/translations/mr.json +0 -231
  16. langroid/agent/.chainlit/translations/ta.json +0 -231
  17. langroid/agent/.chainlit/translations/te.json +0 -231
  18. langroid/agent/.chainlit/translations/zh-CN.json +0 -229
  19. langroid/agent/__init__.py +0 -41
  20. langroid/agent/base.py +0 -1981
  21. langroid/agent/batch.py +0 -398
  22. langroid/agent/callbacks/__init__.py +0 -0
  23. langroid/agent/callbacks/chainlit.py +0 -598
  24. langroid/agent/chat_agent.py +0 -1899
  25. langroid/agent/chat_document.py +0 -454
  26. langroid/agent/helpers.py +0 -0
  27. langroid/agent/junk +0 -13
  28. langroid/agent/openai_assistant.py +0 -882
  29. langroid/agent/special/__init__.py +0 -59
  30. langroid/agent/special/arangodb/__init__.py +0 -0
  31. langroid/agent/special/arangodb/arangodb_agent.py +0 -656
  32. langroid/agent/special/arangodb/system_messages.py +0 -186
  33. langroid/agent/special/arangodb/tools.py +0 -107
  34. langroid/agent/special/arangodb/utils.py +0 -36
  35. langroid/agent/special/doc_chat_agent.py +0 -1466
  36. langroid/agent/special/lance_doc_chat_agent.py +0 -262
  37. langroid/agent/special/lance_rag/__init__.py +0 -9
  38. langroid/agent/special/lance_rag/critic_agent.py +0 -198
  39. langroid/agent/special/lance_rag/lance_rag_task.py +0 -82
  40. langroid/agent/special/lance_rag/query_planner_agent.py +0 -260
  41. langroid/agent/special/lance_tools.py +0 -61
  42. langroid/agent/special/neo4j/__init__.py +0 -0
  43. langroid/agent/special/neo4j/csv_kg_chat.py +0 -174
  44. langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -433
  45. langroid/agent/special/neo4j/system_messages.py +0 -120
  46. langroid/agent/special/neo4j/tools.py +0 -32
  47. langroid/agent/special/relevance_extractor_agent.py +0 -127
  48. langroid/agent/special/retriever_agent.py +0 -56
  49. langroid/agent/special/sql/__init__.py +0 -17
  50. langroid/agent/special/sql/sql_chat_agent.py +0 -654
  51. langroid/agent/special/sql/utils/__init__.py +0 -21
  52. langroid/agent/special/sql/utils/description_extractors.py +0 -190
  53. langroid/agent/special/sql/utils/populate_metadata.py +0 -85
  54. langroid/agent/special/sql/utils/system_message.py +0 -35
  55. langroid/agent/special/sql/utils/tools.py +0 -64
  56. langroid/agent/special/table_chat_agent.py +0 -263
  57. langroid/agent/structured_message.py +0 -9
  58. langroid/agent/task.py +0 -2093
  59. langroid/agent/tool_message.py +0 -393
  60. langroid/agent/tools/__init__.py +0 -38
  61. langroid/agent/tools/duckduckgo_search_tool.py +0 -50
  62. langroid/agent/tools/file_tools.py +0 -234
  63. langroid/agent/tools/google_search_tool.py +0 -39
  64. langroid/agent/tools/metaphor_search_tool.py +0 -67
  65. langroid/agent/tools/orchestration.py +0 -303
  66. langroid/agent/tools/recipient_tool.py +0 -235
  67. langroid/agent/tools/retrieval_tool.py +0 -32
  68. langroid/agent/tools/rewind_tool.py +0 -137
  69. langroid/agent/tools/segment_extract_tool.py +0 -41
  70. langroid/agent/typed_task.py +0 -19
  71. langroid/agent/xml_tool_message.py +0 -382
  72. langroid/agent_config.py +0 -0
  73. langroid/cachedb/__init__.py +0 -17
  74. langroid/cachedb/base.py +0 -58
  75. langroid/cachedb/momento_cachedb.py +0 -108
  76. langroid/cachedb/redis_cachedb.py +0 -153
  77. langroid/embedding_models/__init__.py +0 -39
  78. langroid/embedding_models/base.py +0 -74
  79. langroid/embedding_models/clustering.py +0 -189
  80. langroid/embedding_models/models.py +0 -461
  81. langroid/embedding_models/protoc/__init__.py +0 -0
  82. langroid/embedding_models/protoc/embeddings.proto +0 -19
  83. langroid/embedding_models/protoc/embeddings_pb2.py +0 -33
  84. langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -50
  85. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -79
  86. langroid/embedding_models/remote_embeds.py +0 -153
  87. langroid/exceptions.py +0 -65
  88. langroid/experimental/team-save.py +0 -391
  89. langroid/language_models/.chainlit/config.toml +0 -121
  90. langroid/language_models/.chainlit/translations/en-US.json +0 -231
  91. langroid/language_models/__init__.py +0 -53
  92. langroid/language_models/azure_openai.py +0 -153
  93. langroid/language_models/base.py +0 -678
  94. langroid/language_models/config.py +0 -18
  95. langroid/language_models/mock_lm.py +0 -124
  96. langroid/language_models/openai_gpt.py +0 -1923
  97. langroid/language_models/prompt_formatter/__init__.py +0 -16
  98. langroid/language_models/prompt_formatter/base.py +0 -40
  99. langroid/language_models/prompt_formatter/hf_formatter.py +0 -132
  100. langroid/language_models/prompt_formatter/llama2_formatter.py +0 -75
  101. langroid/language_models/utils.py +0 -147
  102. langroid/mytypes.py +0 -84
  103. langroid/parsing/__init__.py +0 -52
  104. langroid/parsing/agent_chats.py +0 -38
  105. langroid/parsing/code-parsing.md +0 -86
  106. langroid/parsing/code_parser.py +0 -121
  107. langroid/parsing/config.py +0 -0
  108. langroid/parsing/document_parser.py +0 -718
  109. langroid/parsing/image_text.py +0 -32
  110. langroid/parsing/para_sentence_split.py +0 -62
  111. langroid/parsing/parse_json.py +0 -155
  112. langroid/parsing/parser.py +0 -313
  113. langroid/parsing/repo_loader.py +0 -790
  114. langroid/parsing/routing.py +0 -36
  115. langroid/parsing/search.py +0 -275
  116. langroid/parsing/spider.py +0 -102
  117. langroid/parsing/table_loader.py +0 -94
  118. langroid/parsing/url_loader.py +0 -111
  119. langroid/parsing/url_loader_cookies.py +0 -73
  120. langroid/parsing/urls.py +0 -273
  121. langroid/parsing/utils.py +0 -373
  122. langroid/parsing/web_search.py +0 -155
  123. langroid/prompts/__init__.py +0 -9
  124. langroid/prompts/chat-gpt4-system-prompt.md +0 -68
  125. langroid/prompts/dialog.py +0 -17
  126. langroid/prompts/prompts_config.py +0 -5
  127. langroid/prompts/templates.py +0 -141
  128. langroid/pydantic_v1/__init__.py +0 -10
  129. langroid/pydantic_v1/main.py +0 -4
  130. langroid/utils/.chainlit/config.toml +0 -121
  131. langroid/utils/.chainlit/translations/en-US.json +0 -231
  132. langroid/utils/__init__.py +0 -19
  133. langroid/utils/algorithms/__init__.py +0 -3
  134. langroid/utils/algorithms/graph.py +0 -103
  135. langroid/utils/configuration.py +0 -98
  136. langroid/utils/constants.py +0 -30
  137. langroid/utils/docker.py +0 -37
  138. langroid/utils/git_utils.py +0 -252
  139. langroid/utils/globals.py +0 -49
  140. langroid/utils/llms/__init__.py +0 -0
  141. langroid/utils/llms/strings.py +0 -8
  142. langroid/utils/logging.py +0 -135
  143. langroid/utils/object_registry.py +0 -66
  144. langroid/utils/output/__init__.py +0 -20
  145. langroid/utils/output/citations.py +0 -41
  146. langroid/utils/output/printing.py +0 -99
  147. langroid/utils/output/status.py +0 -40
  148. langroid/utils/pandas_utils.py +0 -30
  149. langroid/utils/pydantic_utils.py +0 -602
  150. langroid/utils/system.py +0 -286
  151. langroid/utils/types.py +0 -93
  152. langroid/utils/web/__init__.py +0 -0
  153. langroid/utils/web/login.py +0 -83
  154. langroid/vector_store/__init__.py +0 -50
  155. langroid/vector_store/base.py +0 -357
  156. langroid/vector_store/chromadb.py +0 -214
  157. langroid/vector_store/lancedb.py +0 -401
  158. langroid/vector_store/meilisearch.py +0 -299
  159. langroid/vector_store/momento.py +0 -278
  160. langroid/vector_store/qdrant_cloud.py +0 -6
  161. langroid/vector_store/qdrantdb.py +0 -468
  162. langroid-0.31.1.dist-info/RECORD +0 -162
  163. {langroid-0.31.1.dist-info → langroid-0.33.3.dist-info/licenses}/LICENSE +0 -0
@@ -1,262 +0,0 @@
1
- """
2
- LanceDocChatAgent is a subclass of DocChatAgent that uses LanceDB as a vector store:
3
- - Uses the DocChatAgentConfig.filter variable
4
- (a sql string) in the `where` clause to do filtered vector search.
5
- - Overrides the get_similar_chunks_bm25() to use LanceDB FTS (Full Text Search).
6
-
7
- For usage see:
8
- - `tests/main/test_lance_doc_chat_agent.py`.
9
- - example script `examples/docqa/lance_rag.py`.
10
-
11
- """
12
-
13
- import json
14
- import logging
15
- from typing import Any, Dict, List, Tuple
16
-
17
- import pandas as pd
18
-
19
- from langroid.agent.special.doc_chat_agent import DocChatAgent, DocChatAgentConfig
20
- from langroid.agent.special.lance_tools import AnswerTool, QueryPlanTool
21
- from langroid.agent.tools.orchestration import AgentDoneTool
22
- from langroid.mytypes import DocMetaData, Document
23
- from langroid.parsing.table_loader import describe_dataframe
24
- from langroid.utils.constants import NO_ANSWER
25
- from langroid.utils.pydantic_utils import (
26
- dataframe_to_documents,
27
- )
28
- from langroid.vector_store.lancedb import LanceDB
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- class LanceDocChatAgent(DocChatAgent):
34
- vecdb: LanceDB
35
-
36
- def __init__(self, cfg: DocChatAgentConfig):
37
- super().__init__(cfg)
38
- self.config: DocChatAgentConfig = cfg
39
- self.enable_message(QueryPlanTool, use=False, handle=True)
40
-
41
- def _get_clean_vecdb_schema(self) -> str:
42
- """Get a cleaned schema of the vector-db, to pass to the LLM
43
- as part of instructions on how to generate a SQL filter."""
44
-
45
- tbl_pandas = (
46
- self.vecdb.client.open_table(self.vecdb.config.collection_name)
47
- .search()
48
- .limit(1)
49
- .to_pandas(flatten=True)
50
- )
51
- if len(self.config.filter_fields) == 0:
52
- filterable_fields = tbl_pandas.columns.tolist()
53
- # drop id, vector, metadata.id, metadata.window_ids, metadata.is_chunk
54
- filterable_fields = list(
55
- set(filterable_fields)
56
- - {
57
- "id",
58
- "vector",
59
- "metadata.id",
60
- "metadata.window_ids",
61
- "metadata.is_chunk",
62
- }
63
- )
64
- logger.warning(
65
- f"""
66
- No filter_fields set in config, so using these fields as filterable fields:
67
- {filterable_fields}
68
- """
69
- )
70
- self.config.filter_fields = filterable_fields
71
-
72
- if self.from_dataframe:
73
- return self.df_description
74
- filter_fields_set = set(self.config.filter_fields)
75
-
76
- # remove 'content' from filter_fields_set, even if it's not in filter_fields_set
77
- filter_fields_set.discard("content")
78
-
79
- # possible values of filterable fields
80
- filter_field_values = self.get_field_values(list(filter_fields_set))
81
-
82
- schema_dict: Dict[str, Dict[str, Any]] = dict(
83
- (field, {}) for field in filter_fields_set
84
- )
85
- # add field values to schema_dict as another field `values` for each field
86
- for field, values in filter_field_values.items():
87
- schema_dict[field]["values"] = values
88
- dtype = tbl_pandas[field].dtype.name
89
- schema_dict[field]["dtype"] = dtype
90
- # if self.config.filter_fields is set, restrict to these:
91
- if len(self.config.filter_fields) > 0:
92
- schema_dict = {
93
- k: v for k, v in schema_dict.items() if k in self.config.filter_fields
94
- }
95
- schema = json.dumps(schema_dict, indent=4)
96
-
97
- schema += f"""
98
- NOTE when creating a filter for a query,
99
- ONLY the following fields are allowed:
100
- {",".join(self.config.filter_fields)}
101
- """
102
- if len(content_fields := self.config.add_fields_to_content) > 0:
103
- schema += f"""
104
- Additional fields added to `content` as key=value pairs:
105
- NOTE that these CAN Help with matching queries!
106
- {content_fields}
107
- """
108
- return schema
109
-
110
- def query_plan(self, msg: QueryPlanTool) -> AgentDoneTool | str:
111
- """
112
- Handle the LLM's use of the FilterTool.
113
- Temporarily set the config filter and either return the final answer
114
- in case there's a dataframe_calc, or return the rephrased query
115
- so the LLM can handle it.
116
- """
117
- # create document-subset based on this filter
118
- plan = msg.plan
119
- try:
120
- self.setup_documents(filter=plan.filter or None)
121
- except Exception as e:
122
- logger.error(f"Error setting up documents: {e}")
123
- # say DONE with err msg so it goes back to LanceFilterAgent
124
- return AgentDoneTool(
125
- content=f"""
126
- Possible Filter Error:\n {e}
127
-
128
- Note that only the following fields are allowed in the filter
129
- of a query plan:
130
- {", ".join(self.config.filter_fields)}
131
- """
132
- )
133
-
134
- # update the filter so it is used in the DocChatAgent
135
- self.config.filter = plan.filter or None
136
- if plan.dataframe_calc:
137
- # we just get relevant docs then do the calculation
138
- # TODO if calc causes err, it is captured in result,
139
- # and LLM can correct the calc based on the err,
140
- # and this will cause retrieval all over again,
141
- # which may be wasteful if only the calc part is wrong.
142
- # The calc step can later be done with a separate Agent/Tool.
143
- if plan.query is None or plan.query.strip() == "":
144
- if plan.filter is None or plan.filter.strip() == "":
145
- return AgentDoneTool(
146
- content="""
147
- Cannot execute Query Plan since filter as well as
148
- rephrased query are empty.
149
- """
150
- )
151
- else:
152
- # no query to match, so just get all docs matching filter
153
- docs = self.vecdb.get_all_documents(plan.filter)
154
- else:
155
- _, docs = self.get_relevant_extracts(plan.query)
156
- if len(docs) == 0:
157
- return AgentDoneTool(content=NO_ANSWER)
158
- answer = self.vecdb.compute_from_docs(docs, plan.dataframe_calc)
159
- else:
160
- # pass on the query so LLM can handle it
161
- response = self.llm_response(plan.query)
162
- answer = NO_ANSWER if response is None else response.content
163
- return AgentDoneTool(tools=[AnswerTool(answer=answer)])
164
-
165
- def ingest_docs(
166
- self,
167
- docs: List[Document],
168
- split: bool = True,
169
- metadata: (
170
- List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
171
- ) = [],
172
- ) -> int:
173
- n = super().ingest_docs(docs, split, metadata)
174
- tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
175
- # We assume "content" is available as top-level field
176
- if "content" in tbl.schema.names:
177
- tbl.create_fts_index("content", replace=True)
178
- return n
179
-
180
- def ingest_dataframe(
181
- self,
182
- df: pd.DataFrame,
183
- content: str = "content",
184
- metadata: List[str] = [],
185
- ) -> int:
186
- """Ingest from a dataframe. Assume we are doing this once, not incrementally"""
187
-
188
- self.from_dataframe = True
189
- if df.shape[0] == 0:
190
- raise ValueError(
191
- """
192
- LanceDocChatAgent.ingest_dataframe() received an empty dataframe.
193
- """
194
- )
195
- n = df.shape[0]
196
-
197
- # If any additional fields need to be added to content,
198
- # add them as key=value pairs, into the `content` field for all rows.
199
- # This helps retrieval for table-like data.
200
- # Note we need to do this at stage so that the embeddings
201
- # are computed on the full content with these additional fields.
202
- fields = [f for f in self.config.add_fields_to_content if f in df.columns]
203
- if len(fields) > 0:
204
- df[content] = df.apply(
205
- lambda row: (",".join(f"{f}={row[f]}" for f in fields))
206
- + ", content="
207
- + row[content],
208
- axis=1,
209
- )
210
-
211
- df, metadata = DocChatAgent.document_compatible_dataframe(df, content, metadata)
212
- self.df_description = describe_dataframe(
213
- df,
214
- filter_fields=self.config.filter_fields,
215
- n_vals=10,
216
- )
217
- self.vecdb.add_dataframe(df, content="content", metadata=metadata)
218
-
219
- tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
220
- # We assume "content" is available as top-level field
221
- if "content" in tbl.schema.names:
222
- tbl.create_fts_index("content", replace=True)
223
- # We still need to do the below so that
224
- # other types of searches in DocChatAgent
225
- # can work, as they require Document objects
226
- docs = dataframe_to_documents(df, content="content", metadata=metadata)
227
- self.setup_documents(docs)
228
- # mark each doc as already-chunked so we don't try to split them further
229
- # TODO later we may want to split large text-columns
230
- for d in docs:
231
- d.metadata.is_chunk = True
232
- return n # type: ignore
233
-
234
- def get_similar_chunks_bm25(
235
- self, query: str, multiple: int
236
- ) -> List[Tuple[Document, float]]:
237
- """
238
- Override the DocChatAgent.get_similar_chunks_bm25()
239
- to use LanceDB FTS (Full Text Search).
240
- """
241
- # Clean up query: replace all newlines with spaces in query,
242
- # force special search keywords to lower case, remove quotes,
243
- # so it's not interpreted as search syntax
244
- query_clean = (
245
- query.replace("\n", " ")
246
- .replace("AND", "and")
247
- .replace("OR", "or")
248
- .replace("NOT", "not")
249
- .replace("'", "")
250
- .replace('"', "")
251
- .replace(":", "--")
252
- )
253
-
254
- tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
255
- result = (
256
- tbl.search(query_clean)
257
- .where(self.config.filter or None)
258
- .limit(self.config.parsing.n_similar_docs * multiple)
259
- )
260
- docs = self.vecdb._lance_result_to_docs(result)
261
- scores = [r["score"] for r in result.to_list()]
262
- return list(zip(docs, scores))
@@ -1,9 +0,0 @@
1
- from . import query_planner_agent
2
- from . import critic_agent
3
- from . import lance_rag_task
4
-
5
- __all__ = [
6
- "query_planner_agent",
7
- "critic_agent",
8
- "lance_rag_task",
9
- ]
@@ -1,198 +0,0 @@
1
- """
2
- QueryPlanCritic is a ChatAgent that is created with a specific document schema.
3
-
4
- Its role is to provide feedback on a Query Plan, which consists of:
5
- - filter condition if needed (or empty string if no filter is needed)
6
- - query - a possibly rephrased query that can be used to match the `content` field
7
- - dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
8
- - original_query - the original query for reference
9
- - result - the answer received from an assistant that used this QUERY PLAN.
10
-
11
- This agent has access to two tools:
12
- - QueryPlanTool: The handler method for this tool re-writes the query plan
13
- in plain text (non-JSON) so the LLM can provide its feedback using the
14
- QueryPlanFeedbackTool.
15
- - QueryPlanFeedbackTool: LLM uses this tool to provide feedback on the Query Plan
16
- """
17
-
18
- import logging
19
-
20
- from langroid.agent.chat_agent import ChatAgent
21
- from langroid.agent.chat_document import ChatDocument
22
- from langroid.agent.special.lance_rag.query_planner_agent import (
23
- LanceQueryPlanAgentConfig,
24
- )
25
- from langroid.agent.special.lance_tools import (
26
- QueryPlanAnswerTool,
27
- QueryPlanFeedbackTool,
28
- )
29
- from langroid.agent.tools.orchestration import AgentDoneTool
30
- from langroid.utils.constants import NO_ANSWER
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- class QueryPlanCriticConfig(LanceQueryPlanAgentConfig):
36
- name = "QueryPlanCritic"
37
- system_message = f"""
38
- You are an expert at carefully planning a query that needs to be answered
39
- based on a large collection of documents. These docs have a special `content` field
40
- and additional FILTERABLE fields in the SCHEMA below, along with the
41
- SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
42
-
43
- {{doc_schema}}
44
-
45
- The ORIGINAL QUERY is handled by a QUERY PLANNER who sends the PLAN to an ASSISTANT,
46
- who returns an ANSWER.
47
-
48
- You will receive a QUERY PLAN consisting of:
49
- - ORIGINAL QUERY from the user, which a QUERY PLANNER processes,
50
- to create a QUERY PLAN, to be handled by an ASSISTANT.
51
- - PANDAS-LIKE FILTER, WHICH CAN BE EMPTY (and it's fine if results sound reasonable)
52
- FILTER SHOULD ONLY BE USED IF EXPLICITLY REQUIRED BY THE QUERY.
53
- This filter selects the documents over which the REPHRASED QUERY will be applied,
54
- thus naturally, the Re-phrased Query should NOT mention any FILTER fields,
55
- since it applies to the documents AFTER FILTERING.
56
- - REPHRASED QUERY (CANNOT BE EMPTY) that will be used to match against the
57
- CONTENT (not filterable) of the documents.
58
- In general the REPHRASED QUERY should be relied upon to match the CONTENT
59
- of the docs. Thus the REPHRASED QUERY itself acts like a
60
- SEMANTIC/LEXICAL/FUZZY FILTER since the Assistant is able to use it to match
61
- the CONTENT of the docs in various ways (semantic, lexical, fuzzy, etc.).
62
- Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
63
- so the REPHRASED QUERY should NOT mention ANY FILTER fields.
64
- The assistant will answer based on documents whose CONTENTS match the QUERY,
65
- possibly REPHRASED.
66
- !!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
67
-
68
-
69
- - DATAFRAME CALCULATION, which must be a SINGLE LINE calculation (or empty),
70
- [NOTE ==> This calculation is applied AFTER the FILTER and REPHRASED QUERY.],
71
- - ANSWER received from an assistant that used this QUERY PLAN.
72
- IT IS TOTALLY FINE FOR THE ANSWER TO NOT MENTION ANY FILTERING CONDITIONS,
73
- or if the ANSWER STATEMENT is MISSING SOME CRITERIA in the ORIGINAL QUERY.
74
-
75
- Here is an example of a VALID Plan + Answer:
76
-
77
- ORIGINAL QUERY: "Which crime novels were written by Russian authors after 1900?"
78
- FILTER: "author_nationality == 'Russian' and year_written > 1900"
79
- REPHRASED QUERY: "crime novel" [NOTICE NO FILTER FIELDS MENTIONED!!!]
80
- DATAFRAME CALC: ""
81
- ANSWER: "The Master and Margarita by Mikhail Bulgakov"
82
- [NOTICE the answer does NOT need to say "crime novel" or "russian author"]
83
-
84
-
85
- Other examples of VALID ANSWER for a given ORIGINAL QUERY:
86
-
87
- ORIGINAL QUERY: "Which mountain is taller than 8000 meters?"
88
- ANSWER: "Mount Everest" [NOTICE no mention of "taller than 8000 meters"]
89
-
90
- ORIGINAL QUERY: "Which country has hosted the most olympics?"
91
- ANSWER: "United States" [NOTICE no mention of "most olympics"]
92
-
93
- In addition to the above SCHEMA fields there is a `content` field which:
94
- - CANNOT appear in a FILTER,
95
- - CAN appear in the DATAFRAME CALCULATION.
96
- THERE ARE NO OTHER FIELDS IN THE DOCUMENTS or in the RESULTING DATAFRAME.
97
-
98
- Your job is to act as a CRITIC and provide feedback,
99
- ONLY using the `query_plan_feedback` tool, and DO NOT SAY ANYTHING ELSE.
100
-
101
- Here is how you must examine the QUERY PLAN + ANSWER:
102
- - ALL filtering conditions in the original query must be EXPLICITLY
103
- mentioned in the FILTER, and the QUERY field should not be used for filtering.
104
- - If the ANSWER contains an ERROR message, then this means that the query
105
- plan execution FAILED, and your feedback should say INVALID along
106
- with the ERROR message, `suggested_fix` that aims to help the assistant
107
- fix the problem (or simply equals "address the the error shown in feedback")
108
- - Ask yourself, is the ANSWER in the expected form, e.g.
109
- if the question is asking for the name of an ENTITY with max SIZE,
110
- then the answer should be the ENTITY name, NOT the SIZE!!
111
- - If the ANSWER is in the expected form, then the QUERY PLAN is likely VALID,
112
- and your feedback should say VALID, with empty `suggested_fix`.
113
- ===> HOWEVER!!! Watch out for a spurious correct-looking answer, for EXAMPLE:
114
- the query was to find the ENTITY with a maximum SIZE,
115
- but the dataframe calculation is find the SIZE, NOT the ENTITY!!
116
- - If the ANSWER is {NO_ANSWER} or of the wrong form,
117
- then try to DIAGNOSE the problem IN THE FOLLOWING ORDER:
118
- - DATAFRAME CALCULATION -- is it doing the right thing?
119
- Is it finding the Index of a row instead of the value in a column?
120
- Or another example: maybe it is finding the maximum population
121
- rather than the CITY with the maximum population?
122
- If you notice a problem with the DATAFRAME CALCULATION, then
123
- ONLY SUBMIT FEEDBACK ON THE DATAFRAME CALCULATION, and DO NOT
124
- SUGGEST ANYTHING ELSE.
125
- - If the DATAFRAME CALCULATION looks correct, then check if
126
- the REPHRASED QUERY makes sense given the ORIGINAL QUERY and FILTER.
127
- If this is the problem, then ONLY SUBMIT FEEDBACK ON THE REPHRASED QUERY,
128
- and DO NOT SUGGEST ANYTHING ELSE.
129
- - If the REPHRASED QUERY looks correct, then check if the FILTER makes sense.
130
- REMEMBER: A filter should ONLY be used if EXPLICITLY REQUIRED BY THE QUERY.
131
-
132
-
133
- IMPORTANT!! The DATAFRAME CALCULATION is done AFTER applying the
134
- FILTER and REPHRASED QUERY! Keep this in mind when evaluating
135
- the correctness of the DATAFRAME CALCULATION.
136
-
137
- ALWAYS use `query_plan_feedback` tool/fn to present your feedback
138
- in the `feedback` field, and if any fix is suggested,
139
- present it in the `suggested_fix` field.
140
- DO NOT SAY ANYTHING ELSE OUTSIDE THE TOOL/FN.
141
- IF NO REVISION NEEDED, simply leave the `suggested_fix` field EMPTY,
142
- and SAY NOTHING ELSE
143
- and DO NOT EXPLAIN YOURSELF.
144
- """
145
-
146
-
147
- def plain_text_query_plan(msg: QueryPlanAnswerTool) -> str:
148
- plan = f"""
149
- OriginalQuery: {msg.plan.original_query}
150
- Filter: {msg.plan.filter}
151
- Rephrased Query: {msg.plan.query}
152
- DataframeCalc: {msg.plan.dataframe_calc}
153
- Answer: {msg.answer}
154
- """
155
- return plan
156
-
157
-
158
- class QueryPlanCritic(ChatAgent):
159
- """
160
- Critic for LanceQueryPlanAgent, provides feedback on
161
- query plan + answer.
162
- """
163
-
164
- def __init__(self, cfg: LanceQueryPlanAgentConfig):
165
- super().__init__(cfg)
166
- self.config = cfg
167
- self.enable_message(QueryPlanAnswerTool, use=False, handle=True)
168
- self.enable_message(QueryPlanFeedbackTool, use=True, handle=True)
169
- self.enable_message(AgentDoneTool, use=False, handle=True)
170
-
171
- def init_state(self) -> None:
172
- super().init_state()
173
- self.expecting_feedback_tool = False
174
-
175
- def query_plan_answer(self, msg: QueryPlanAnswerTool) -> str:
176
- """Present query plan + answer in plain text (not JSON)
177
- so LLM can give feedback"""
178
- self.expecting_feedback_tool = True
179
- return plain_text_query_plan(msg)
180
-
181
- def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> AgentDoneTool:
182
- """Format Valid so return to Query Planner"""
183
- self.expecting_feedback_tool = False
184
- # indicate this task is Done, and return the tool as result
185
- return AgentDoneTool(tools=[msg])
186
-
187
- def handle_message_fallback(
188
- self, msg: str | ChatDocument
189
- ) -> str | ChatDocument | None:
190
- """Remind the LLM to use QueryPlanFeedbackTool since it forgot"""
191
- if self.expecting_feedback_tool:
192
- return """
193
- You forgot to use the `query_plan_feedback` tool/function.
194
- Re-try your response using the `query_plan_feedback` tool/function,
195
- remember to provide feedback in the `feedback` field,
196
- and if any fix is suggested, provide it in the `suggested_fix` field.
197
- """
198
- return None
@@ -1,82 +0,0 @@
1
- """
2
- The LanceRAGTaskCreator.new() method creates a 3-Agent system that uses this agent.
3
- It takes a LanceDocChatAgent instance as argument, and adds two more agents:
4
- - LanceQueryPlanAgent, which is given the LanceDB schema in LanceDocChatAgent,
5
- and based on this schema, for a given user query, creates a Query Plan
6
- using the QueryPlanTool, which contains a filter, a rephrased query,
7
- and a dataframe_calc.
8
- - QueryPlanCritic, which is given the LanceDB schema in LanceDocChatAgent,
9
- and gives feedback on the Query Plan and Result using the QueryPlanFeedbackTool.
10
-
11
- The LanceRAGTaskCreator.new() method sets up the given LanceDocChatAgent and
12
- QueryPlanCritic as sub-tasks of the LanceQueryPlanAgent's task.
13
-
14
- Langroid's built-in task orchestration ensures that:
15
- - the LanceQueryPlanAgent reformulates the plan based
16
- on the QueryPlanCritics's feedback,
17
- - LLM deviations are corrected via tools and overrides of ChatAgent methods.
18
- """
19
-
20
- import logging
21
-
22
- from langroid.agent.special.lance_doc_chat_agent import LanceDocChatAgent
23
- from langroid.agent.special.lance_rag.critic_agent import (
24
- QueryPlanCritic,
25
- QueryPlanCriticConfig,
26
- )
27
- from langroid.agent.special.lance_rag.query_planner_agent import (
28
- LanceQueryPlanAgent,
29
- LanceQueryPlanAgentConfig,
30
- )
31
- from langroid.agent.task import Task
32
- from langroid.mytypes import Entity
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- class LanceRAGTaskCreator:
38
- @staticmethod
39
- def new(
40
- agent: LanceDocChatAgent,
41
- interactive: bool = True,
42
- ) -> Task:
43
- """
44
- Add a LanceFilterAgent to the LanceDocChatAgent,
45
- set up the corresponding Tasks, connect them,
46
- and return the top-level query_plan_task.
47
- """
48
- doc_agent_name = "LanceRAG"
49
- critic_name = "QueryPlanCritic"
50
- query_plan_agent_config = LanceQueryPlanAgentConfig(
51
- critic_name=critic_name,
52
- doc_agent_name=doc_agent_name,
53
- doc_schema=agent._get_clean_vecdb_schema(),
54
- llm=agent.config.llm,
55
- )
56
- query_plan_agent_config.set_system_message()
57
-
58
- critic_config = QueryPlanCriticConfig(
59
- doc_schema=agent._get_clean_vecdb_schema(),
60
- llm=agent.config.llm,
61
- )
62
- critic_config.set_system_message()
63
-
64
- query_planner = LanceQueryPlanAgent(query_plan_agent_config)
65
- query_plan_task = Task(
66
- query_planner,
67
- interactive=interactive,
68
- )
69
- critic_agent = QueryPlanCritic(critic_config)
70
- critic_task = Task(
71
- critic_agent,
72
- interactive=False,
73
- )
74
- rag_task = Task(
75
- agent,
76
- name="LanceRAG",
77
- interactive=False,
78
- done_if_response=[Entity.LLM], # done when non-null response from LLM
79
- done_if_no_response=[Entity.LLM], # done when null response from LLM
80
- )
81
- query_plan_task.add_sub_task([critic_task, rag_task])
82
- return query_plan_task