langroid 0.3.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,222 @@
1
+ """
2
+ LanceQueryPlanAgent is a ChatAgent created with a specific document schema.
3
+ Given a QUERY, the LLM constructs a Query Plan consisting of:
4
+ - filter condition if needed (or empty string if no filter is needed)
5
+ - query - a possibly rephrased query that can be used to match the `content` field
6
+ - dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
7
+ - original_query - the original query for reference
8
+
9
+ This agent has access to two tools:
10
+ - QueryPlanTool, which is used to generate the Query Plan, and the handler of
11
+ this tool simply passes it on to the RAG agent named in config.doc_agent_name.
12
+ - QueryPlanFeedbackTool, which is used to handle feedback on the Query Plan and
13
+ Result from the RAG agent. The QueryPlanFeedbackTool is used by
14
+ the QueryPlanCritic, who inserts feedback into the `feedback` field
15
+ """
16
+
17
+ import logging
18
+
19
+ import langroid as lr
20
+ from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
21
+ from langroid.agent.chat_document import ChatDocument
22
+ from langroid.agent.special.lance_tools import (
23
+ QueryPlan,
24
+ QueryPlanAnswerTool,
25
+ QueryPlanFeedbackTool,
26
+ QueryPlanTool,
27
+ )
28
+ from langroid.utils.constants import DONE, NO_ANSWER
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class LanceQueryPlanAgentConfig(ChatAgentConfig):
34
+ name: str = "LancePlanner"
35
+ critic_name: str = "QueryPlanCritic"
36
+ doc_agent_name: str = "LanceRAG"
37
+ doc_schema: str = ""
38
+ use_tools = False
39
+ max_retries: int = 5 # max number of retries for query plan
40
+ use_functions_api = True
41
+
42
+ system_message = f"""
43
+ You will receive a QUERY, to be answered based on an EXTREMELY LARGE collection
44
+ of documents you DO NOT have access to, but your ASSISTANT does.
45
+ You only know that these documents have a special `content` field
46
+ and additional FILTERABLE fields in the SCHEMA below, along with the
47
+ SAMPLE VALUES for each field, and the DTYPE in PANDAS TERMINOLOGY.
48
+
49
+ {{doc_schema}}
50
+
51
+ Based on the QUERY and the above SCHEMA, your task is to determine a QUERY PLAN,
52
+ consisting of:
53
+ - a PANDAS-TYPE FILTER (can be empty string) that would help the ASSISTANT to
54
+ answer the query.
55
+ Remember the FILTER can refer to ANY fields in the above SCHEMA
56
+ EXCEPT the `content` field of the documents.
57
+ ONLY USE A FILTER IF EXPLICITLY MENTIONED IN THE QUERY.
58
+ TO get good results, for STRING MATCHES, consider using LIKE instead of =, e.g.
59
+ "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
60
+ YOUR FILTER MUST BE A PANDAS-TYPE FILTER, respecting the shown DTYPES.
61
+ - a possibly REPHRASED QUERY (CANNOT BE EMPTY) to be answerable given the FILTER.
62
+ Keep in mind that the ASSISTANT does NOT know anything about the FILTER fields,
63
+ so the REPHRASED QUERY should NOT mention ANY FILTER fields.
64
+ The assistant will answer based on documents whose CONTENTS match the QUERY,
65
+ possibly REPHRASED.
66
+ !!!!****THE REPHRASED QUERY SHOULD NEVER BE EMPTY****!!!
67
+ - an OPTIONAL SINGLE-LINE Pandas-dataframe calculation/aggregation string
68
+ that can be used to calculate the answer to the original query,
69
+ e.g. "df["rating"].mean()",
70
+ or "df.groupby("director").mean()["rating"]",
71
+ or EMPTY string if no calc is needed.
72
+ The dataframe calc CAN refer to the `content` field.
73
+ If a DataFrame calculation is NOT needed, leave this field EMPTY.
74
+
75
+ IMPORTANT: The DataFrame `df` in this calculation is the result of
76
+ applying the FILTER AND REPHRASED QUERY to the documents.
77
+
78
+ WATCH OUT!! When deciding the dataframe calc, if any, CAREFULLY
79
+ note what the query is asking, and ensure that the result of your
80
+ dataframe calc expression would answer the query.
81
+
82
+
83
+ EXAMPLE:
84
+ -------
85
+ Suppose there is a document-set about crime reports, where:
86
+ CONTENT = crime report,
87
+ Filterable SCHEMA consists of City, Year, num_deaths.
88
+
89
+ Then given this ORIGINAL QUERY:
90
+
91
+ Total deaths in shoplifting crimes in Los Angeles in 2023?
92
+
93
+ A POSSIBLE QUERY PLAN could be:
94
+
95
+ FILTER: "City LIKE '%Los Angeles%' AND Year = 2023"
96
+ REPHRASED QUERY: "shoplifting crime" --> this will be used to MATCH content of docs
97
+ [NOTE: we dropped the FILTER fields City and Year since the
98
+ ASSISTANT does not know about them and only uses the query to
99
+ match the CONTENT of the docs.]
100
+ DATAFRAME CALCULATION: "df["num_deaths"].sum()"
101
+ NOTE!!! The DataFrame `df` in this calculation is the result of
102
+ applying the FILTER AND REPHRASED QUERY to the documents,
103
+ hence this computation will give the total deaths in shoplifting crimes.
104
+ ------------- END OF EXAMPLE ----------------
105
+
106
+ The FILTER must be a PANDAS-like condition, e.g.
107
+ "year > 2000 AND genre = 'ScienceFiction'".
108
+ To ensure you get useful results, you should make your FILTER
109
+ NOT TOO STRICT, e.g. look for approximate match using LIKE, etc.
110
+ E.g. "CEO LIKE '%Jobs%'" instead of "CEO = 'Steve Jobs'"
111
+ Use DOT NOTATION to refer to nested fields, e.g. `metadata.year`, etc.
112
+
113
+ You must FIRST present the QUERY PLAN using the `query_plan` tool/function.
114
+ This will be handled by your document assistant, who will produce an ANSWER.
115
+
116
+ You may receive FEEDBACK on your QUERY PLAN and received ANSWER,
117
+ from the 'QueryPlanCritic' who may offer suggestions for
118
+ a better FILTER, REPHRASED QUERY, or DATAFRAME CALCULATION.
119
+
120
+ If you keep getting feedback or keep getting a {NO_ANSWER} from the assistant
121
+ at least 3 times, then simply say '{DONE} {NO_ANSWER}' and nothing else.
122
+
123
+ At the BEGINNING if there is no query, ASK the user what they want to know.
124
+ """
125
+
126
+ def set_system_message(self) -> None:
127
+ self.system_message = self.system_message.format(
128
+ doc_schema=self.doc_schema,
129
+ )
130
+
131
+
132
+ class LanceQueryPlanAgent(ChatAgent):
133
+ def __init__(self, config: LanceQueryPlanAgentConfig):
134
+ super().__init__(config)
135
+ self.config: LanceQueryPlanAgentConfig = config
136
+ self.curr_query_plan: QueryPlan | None = None
137
+ # how many times re-trying query plan in response to feedback:
138
+ self.n_retries: int = 0
139
+ self.result: str = "" # answer received from LanceRAG
140
+ # This agent should generate the QueryPlanTool
141
+ # as well as handle it for validation
142
+ self.enable_message(QueryPlanTool, use=True, handle=True)
143
+ self.enable_message(QueryPlanFeedbackTool, use=False, handle=True)
144
+
145
+ def query_plan(self, msg: QueryPlanTool) -> ChatDocument:
146
+ """Valid, forward to RAG Agent"""
147
+ # save, to be used to assemble QueryPlanResultTool
148
+ if len(msg.plan.dataframe_calc.split("\n")) > 1:
149
+ return "DATAFRAME CALCULATION must be a SINGLE LINE; Retry the `query_plan`"
150
+ self.curr_query_plan = msg.plan
151
+ # return a ChatDocument with tool_messages set to this tool,
152
+ # so caller can directly get the tool without parsing
153
+ return self.create_agent_response(tool_messages=[msg])
154
+
155
+ def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> str:
156
+ """Process Critic feedback on QueryPlan + Answer from RAG Agent"""
157
+ # We should have saved answer in self.result by this time,
158
+ # since this Agent seeks feedback only after receiving RAG answer.
159
+ if msg.suggested_fix == "":
160
+ self.n_retries = 0
161
+ # This means the Query Plan or Result is good, as judged by Critic
162
+ if self.result == "":
163
+ # This was feedback for query with no result
164
+ return "QUERY PLAN LOOKS GOOD!"
165
+ elif self.result == NO_ANSWER:
166
+ return NO_ANSWER
167
+ else: # non-empty and non-null answer
168
+ return DONE + " " + self.result
169
+ self.n_retries += 1
170
+ if self.n_retries >= self.config.max_retries:
171
+ # bail out to avoid infinite loop
172
+ self.n_retries = 0
173
+ return DONE + " " + NO_ANSWER
174
+ return f"""
175
+ here is FEEDBACK about your QUERY PLAN, and a SUGGESTED FIX.
176
+ Modify the QUERY PLAN if needed:
177
+ FEEDBACK: {msg.feedback}
178
+ SUGGESTED FIX: {msg.suggested_fix}
179
+ """
180
+
181
+ def handle_message_fallback(
182
+ self, msg: str | ChatDocument
183
+ ) -> str | ChatDocument | None:
184
+ """
185
+ Process answer received from RAG Agent:
186
+ Construct a QueryPlanAnswerTool with the answer,
187
+ and forward to Critic for feedback.
188
+ """
189
+ # TODO we don't need to use this fallback method. instead we can
190
+ # first call result = super().agent_response(), and if result is None,
191
+ # then we know there was no tool, so we run below code
192
+ if (
193
+ isinstance(msg, ChatDocument)
194
+ and self.curr_query_plan is not None
195
+ and msg.metadata.parent is not None
196
+ ):
197
+ # save result, to be used in query_plan_feedback()
198
+ self.result = msg.content
199
+ # assemble QueryPlanAnswerTool...
200
+ query_plan_answer_tool = QueryPlanAnswerTool( # type: ignore
201
+ plan=self.curr_query_plan,
202
+ answer=self.result,
203
+ )
204
+ response_tmpl = self.create_agent_response()
205
+ # ... add the QueryPlanAnswerTool to the response
206
+ # (Notice how the Agent is directly sending a tool, not the LLM)
207
+ response_tmpl.tool_messages = [query_plan_answer_tool]
208
+ # set the recipient to the Critic so it can give feedback
209
+ response_tmpl.metadata.recipient = self.config.critic_name
210
+ self.curr_query_plan = None # reset
211
+ return response_tmpl
212
+ if (
213
+ isinstance(msg, ChatDocument)
214
+ and not self.has_tool_message_attempt(msg)
215
+ and msg.metadata.sender == lr.Entity.LLM
216
+ ):
217
+ # remind LLM to use the QueryPlanFeedbackTool
218
+ return """
219
+ You forgot to use the `query_plan` tool/function.
220
+ Re-try your response using the `query_plan` tool/function.
221
+ """
222
+ return None
@@ -1,16 +1,21 @@
1
1
  import logging
2
2
 
3
3
  from langroid.agent.tool_message import ToolMessage
4
- from langroid.pydantic_v1 import BaseModel
4
+ from langroid.pydantic_v1 import BaseModel, Field
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
8
8
 
9
9
  class QueryPlan(BaseModel):
10
- original_query: str
11
- query: str
12
- filter: str
13
- dataframe_calc: str = ""
10
+ original_query: str = Field(..., description="The original query for reference")
11
+ query: str = Field(..., description="A possibly NON-EMPTY rephrased query")
12
+ filter: str = Field(
13
+ "",
14
+ description="Filter condition if needed (or empty if no filter is needed)",
15
+ )
16
+ dataframe_calc: str = Field(
17
+ "", description="An optional Pandas-dataframe calculation/aggregation string"
18
+ )
14
19
 
15
20
 
16
21
  class QueryPlanTool(ToolMessage):
@@ -19,8 +24,9 @@ class QueryPlanTool(ToolMessage):
19
24
  Given a user's query, generate a query <plan> consisting of:
20
25
  - <original_query> - the original query for reference
21
26
  - <filter> condition if needed (or empty string if no filter is needed)
22
- - <query> - a possibly rephrased query that can be used to match the CONTENT
23
- of the documents (can be same as <original_query> if no rephrasing is needed)
27
+ - <query> - a possibly NON-EMPTY rephrased query that can be used to match the
28
+ CONTENT of the documents
29
+ (can be same as <original_query> if no rephrasing is needed)
24
30
  - <dataframe_calc> - a Pandas-dataframe calculation/aggregation string
25
31
  that can be used to calculate the answer
26
32
  (or empty string if no calculation is needed).
@@ -34,7 +40,7 @@ class QueryPlanAnswerTool(ToolMessage):
34
40
  Assemble query <plan> and <answer>
35
41
  """
36
42
  plan: QueryPlan
37
- answer: str
43
+ answer: str = Field(..., description="The answer received from the assistant")
38
44
 
39
45
 
40
46
  class QueryPlanFeedbackTool(ToolMessage):
@@ -363,7 +363,7 @@ class Neo4jChatAgent(ChatAgent):
363
363
  content=content,
364
364
  metadata=ChatDocMetaData(
365
365
  # source=Entity.AGENT,
366
- sender=Entity.LLM,
366
+ sender=Entity.AGENT,
367
367
  sender_name=sender_name,
368
368
  recipient=recipient,
369
369
  ),
@@ -35,12 +35,10 @@ class ToolMessage(ABC, BaseModel):
35
35
  request (str): name of agent method to map to.
36
36
  purpose (str): purpose of agent method, expressed in general terms.
37
37
  (This is used when auto-generating the tool instruction to the LLM)
38
- result (str): example of result of agent method.
39
38
  """
40
39
 
41
40
  request: str
42
41
  purpose: str
43
- result: str = ""
44
42
 
45
43
  class Config:
46
44
  arbitrary_types_allowed = False
@@ -48,7 +46,7 @@ class ToolMessage(ABC, BaseModel):
48
46
  validate_assignment = True
49
47
  # do not include these fields in the generated schema
50
48
  # since we don't require the LLM to specify them
51
- schema_extra = {"exclude": {"purpose", "result"}}
49
+ schema_extra = {"exclude": {"purpose"}}
52
50
 
53
51
  @classmethod
54
52
  def instructions(cls) -> str:
@@ -110,13 +108,13 @@ class ToolMessage(ABC, BaseModel):
110
108
  return "\n\n".join(examples_jsons)
111
109
 
112
110
  def to_json(self) -> str:
113
- return self.json(indent=4, exclude={"result", "purpose"})
111
+ return self.json(indent=4, exclude={"purpose"})
114
112
 
115
113
  def json_example(self) -> str:
116
- return self.json(indent=4, exclude={"result", "purpose"})
114
+ return self.json(indent=4, exclude={"purpose"})
117
115
 
118
116
  def dict_example(self) -> Dict[str, Any]:
119
- return self.dict(exclude={"result", "purpose"})
117
+ return self.dict(exclude={"purpose"})
120
118
 
121
119
  @classmethod
122
120
  def default_value(cls, f: str) -> Any:
@@ -220,9 +218,7 @@ class ToolMessage(ABC, BaseModel):
220
218
  if "description" not in parameters["properties"][name]:
221
219
  parameters["properties"][name]["description"] = description
222
220
 
223
- excludes = (
224
- ["result", "purpose"] if request else ["request", "result", "purpose"]
225
- )
221
+ excludes = ["purpose"] if request else ["request", "purpose"]
226
222
  # exclude 'excludes' from parameters["properties"]:
227
223
  parameters["properties"] = {
228
224
  field: details
@@ -263,5 +259,5 @@ class ToolMessage(ABC, BaseModel):
263
259
  Returns:
264
260
  Dict[str, Any]: simplified schema
265
261
  """
266
- schema = generate_simple_schema(cls, exclude=["result", "purpose"])
262
+ schema = generate_simple_schema(cls, exclude=["purpose"])
267
263
  return schema
@@ -9,8 +9,6 @@ from typing import (
9
9
  Tuple,
10
10
  Type,
11
11
  TypeVar,
12
- get_args,
13
- get_origin,
14
12
  no_type_check,
15
13
  )
16
14
 
@@ -313,54 +311,6 @@ def pydantic_obj_from_flat_dict(
313
311
  return model(**nested_data)
314
312
 
315
313
 
316
- def clean_schema(model: Type[BaseModel], excludes: List[str] = []) -> Dict[str, Any]:
317
- """
318
- Generate a simple schema for a given Pydantic model,
319
- including inherited fields, with an option to exclude certain fields.
320
- Handles cases where fields are Lists or other generic types and includes
321
- field descriptions if available.
322
-
323
- Args:
324
- model (Type[BaseModel]): The Pydantic model class.
325
- excludes (List[str]): A list of field names to exclude.
326
-
327
- Returns:
328
- Dict[str, Any]: A dictionary representing the simple schema.
329
- """
330
- schema = {}
331
-
332
- for field_name, field_info in model.__fields__.items():
333
- if field_name in excludes:
334
- continue
335
-
336
- field_type = field_info.outer_type_
337
- description = field_info.field_info.description or ""
338
-
339
- # Handle generic types like List[...]
340
- if get_origin(field_type):
341
- inner_types = get_args(field_type)
342
- inner_type_names = [
343
- t.__name__ if hasattr(t, "__name__") else str(t) for t in inner_types
344
- ]
345
- field_type_str = (
346
- f"{get_origin(field_type).__name__}" f'[{", ".join(inner_type_names)}]'
347
- )
348
- schema[field_name] = {"type": field_type_str, "description": description}
349
- elif issubclass(field_type, BaseModel):
350
- # Directly use the nested model's schema,
351
- # integrating it into the current level
352
- nested_schema = clean_schema(field_type, excludes)
353
- schema[field_name] = {**nested_schema, "description": description}
354
- else:
355
- # For basic types, use 'type'
356
- schema[field_name] = {
357
- "type": field_type.__name__,
358
- "description": description,
359
- }
360
-
361
- return schema
362
-
363
-
364
314
  @contextmanager
365
315
  def temp_update(
366
316
  pydantic_object: BaseModel, updates: Dict[str, Any]
@@ -1,14 +1,14 @@
1
1
  import copy
2
2
  import logging
3
3
  from abc import ABC, abstractmethod
4
- from typing import Dict, List, Optional, Sequence, Tuple
4
+ from typing import Dict, List, Optional, Sequence, Tuple, Type
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
 
9
9
  from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
10
10
  from langroid.embedding_models.models import OpenAIEmbeddingsConfig
11
- from langroid.mytypes import Document
11
+ from langroid.mytypes import DocMetaData, Document
12
12
  from langroid.pydantic_v1 import BaseSettings
13
13
  from langroid.utils.algorithms.graph import components, topological_sort
14
14
  from langroid.utils.configuration import settings
@@ -32,6 +32,9 @@ class VectorStoreConfig(BaseSettings):
32
32
  timeout: int = 60
33
33
  host: str = "127.0.0.1"
34
34
  port: int = 6333
35
+ # used when parsing search results back as Document objects
36
+ document_class: Type[Document] = Document
37
+ metadata_class: Type[DocMetaData] = DocMetaData
35
38
  # compose_file: str = "langroid/vector_store/docker-compose-qdrant.yml"
36
39
 
37
40
 
@@ -113,8 +116,7 @@ class VectorStore(ABC):
113
116
  """
114
117
 
115
118
  self.config.collection_name = collection_name
116
- if collection_name not in self.list_collections() or replace:
117
- self.create_collection(collection_name, replace=replace)
119
+ self.config.replace_collection = replace
118
120
 
119
121
  @abstractmethod
120
122
  def create_collection(self, collection_name: str, replace: bool = False) -> None:
@@ -8,7 +8,7 @@ from langroid.embedding_models.base import (
8
8
  )
9
9
  from langroid.embedding_models.models import OpenAIEmbeddingsConfig
10
10
  from langroid.exceptions import LangroidImportError
11
- from langroid.mytypes import DocMetaData, Document
11
+ from langroid.mytypes import Document
12
12
  from langroid.utils.configuration import settings
13
13
  from langroid.utils.output.printing import print_long_text
14
14
  from langroid.vector_store.base import VectorStore, VectorStoreConfig
@@ -200,7 +200,9 @@ class ChromaDB(VectorStore):
200
200
  else:
201
201
  m["window_ids"] = m["window_ids"].split(",")
202
202
  docs = [
203
- Document(content=d, metadata=DocMetaData(**m))
203
+ self.config.document_class(
204
+ content=d, metadata=self.config.metadata_class(**m)
205
+ )
204
206
  for d, m in zip(contents, metadatas)
205
207
  ]
206
208
  return docs