langroid 0.1.175__py3-none-any.whl → 0.1.177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ pip install "langroid[hf-embeddings]"
14
14
  """
15
15
  import logging
16
16
  from contextlib import ExitStack
17
- from typing import Dict, List, Optional, Set, Tuple, no_type_check
17
+ from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
18
18
 
19
19
  import numpy as np
20
20
  import pandas as pd
@@ -52,7 +52,7 @@ from langroid.utils.configuration import settings
52
52
  from langroid.utils.constants import NO_ANSWER
53
53
  from langroid.utils.output.printing import show_if_debug
54
54
  from langroid.utils.pydantic_utils import dataframe_to_documents, extract_fields
55
- from langroid.vector_store.base import VectorStoreConfig
55
+ from langroid.vector_store.base import VectorStore, VectorStoreConfig
56
56
  from langroid.vector_store.lancedb import LanceDBConfig
57
57
 
58
58
  logger = logging.getLogger(__name__)
@@ -181,16 +181,40 @@ class DocChatAgent(ChatAgent):
181
181
  ):
182
182
  super().__init__(config)
183
183
  self.config: DocChatAgentConfig = config
184
- self.original_docs: None | List[Document] = None
184
+ self.original_docs: List[Document] = []
185
185
  self.original_docs_length = 0
186
186
  self.from_dataframe = False
187
187
  self.df_description = ""
188
- self.chunked_docs: None | List[Document] = None
189
- self.chunked_docs_clean: None | List[Document] = None
188
+ self.chunked_docs: List[Document] = []
189
+ self.chunked_docs_clean: List[Document] = []
190
190
  self.response: None | Document = None
191
191
  if len(config.doc_paths) > 0:
192
192
  self.ingest()
193
193
 
194
+ def clear(self) -> None:
195
+ """Clear the document collection and the specific collection in vecdb"""
196
+ if self.vecdb is None:
197
+ raise ValueError("VecDB not set")
198
+ self.original_docs = []
199
+ self.original_docs_length = 0
200
+ self.chunked_docs = []
201
+ self.chunked_docs_clean = []
202
+ collection_name = self.vecdb.config.collection_name
203
+ if collection_name is None:
204
+ return
205
+ try:
206
+ # Note we may have used a vecdb with a config.collection_name
207
+ # different from the agent's config.vecdb.collection_name!!
208
+ self.vecdb.delete_collection(collection_name)
209
+ self.vecdb = VectorStore.create(self.vecdb.config)
210
+ except Exception as e:
211
+ logger.warning(
212
+ f"""
213
+ Error while deleting collection {collection_name}:
214
+ {e}
215
+ """
216
+ )
217
+
194
218
  def ingest(self) -> None:
195
219
  """
196
220
  Chunk + embed + store docs specified by self.config.doc_paths
@@ -211,19 +235,46 @@ class DocChatAgent(ChatAgent):
211
235
  return
212
236
  self.ingest_doc_paths(self.config.doc_paths)
213
237
 
214
- def ingest_doc_paths(self, paths: List[str]) -> None:
238
+ def ingest_doc_paths(
239
+ self,
240
+ paths: List[str],
241
+ metadata: List[Dict[str, Any]] | Dict[str, Any] = [],
242
+ ) -> None:
215
243
  """Split, ingest docs from specified paths,
216
- do not add these to config.doc_paths
244
+ do not add these to config.doc_paths.
245
+
246
+ Args:
247
+ paths: List of file/folder paths or URLs
248
+ metadata: List of metadata dicts, one for each path.
249
+ If a single dict is passed in, it is used for all paths.
217
250
  """
251
+ paths_meta: Dict[str, Any] = {}
252
+ urls_meta: Dict[str, Any] = {}
218
253
  urls, paths = get_urls_and_paths(paths)
254
+ if len(metadata) > 0:
255
+ if isinstance(metadata, list):
256
+ path2meta = {p: m for p, m in zip(paths, metadata)}
257
+ else:
258
+ path2meta = {p: metadata for p in paths}
259
+ urls_meta = {u: path2meta[u] for u in urls}
260
+ paths_meta = {p: path2meta[p] for p in paths}
219
261
  docs: List[Document] = []
220
262
  parser = Parser(self.config.parsing)
221
263
  if len(urls) > 0:
222
- loader = URLLoader(urls=urls, parser=parser)
223
- docs = loader.load()
264
+ for u in urls:
265
+ meta = urls_meta.get(u, {})
266
+ loader = URLLoader(urls=[u], parser=parser)
267
+ docs = loader.load()
268
+ # update metadata of each doc with meta
269
+ for d in docs:
270
+ d.metadata = d.metadata.copy(update=meta)
224
271
  if len(paths) > 0:
225
272
  for p in paths:
273
+ meta = paths_meta.get(p, {})
226
274
  path_docs = RepoLoader.get_documents(p, parser=parser)
275
+ # update metadata of each doc with meta
276
+ for d in path_docs:
277
+ d.metadata = d.metadata.copy(update=meta)
227
278
  docs.extend(path_docs)
228
279
  n_docs = len(docs)
229
280
  n_splits = self.ingest_docs(docs)
@@ -240,7 +291,12 @@ class DocChatAgent(ChatAgent):
240
291
  print("\n".join(urls))
241
292
  print("\n".join(paths))
242
293
 
243
- def ingest_docs(self, docs: List[Document], split: bool = True) -> int:
294
+ def ingest_docs(
295
+ self,
296
+ docs: List[Document],
297
+ split: bool = True,
298
+ metadata: List[Dict[str, Any]] | Dict[str, Any] = [],
299
+ ) -> int:
244
300
  """
245
301
  Chunk docs into pieces, map each chunk to vec-embedding, store in vec-db
246
302
 
@@ -248,8 +304,19 @@ class DocChatAgent(ChatAgent):
248
304
  docs: List of Document objects
249
305
  split: Whether to split docs into chunks. Default is True.
250
306
  If False, docs are treated as "chunks" and are not split.
307
+ metadata: List of metadata dicts, one for each doc, to augment
308
+ whatever metadata is already in the doc.
309
+ [ASSUME no conflicting keys between the two metadata dicts.]
310
+ If a single dict is passed in, it is used for all docs.
251
311
  """
252
- self.original_docs = docs
312
+ if isinstance(metadata, list) and len(metadata) > 0:
313
+ for d, m in zip(docs, metadata):
314
+ d.metadata = d.metadata.copy(update=m)
315
+ elif isinstance(metadata, dict):
316
+ for d in docs:
317
+ d.metadata = d.metadata.copy(update=metadata)
318
+
319
+ self.original_docs.extend(docs)
253
320
  if self.parser is None:
254
321
  raise ValueError("Parser not set")
255
322
  for d in docs:
@@ -374,7 +441,7 @@ class DocChatAgent(ChatAgent):
374
441
  """
375
442
  if filter is None and len(docs) > 0:
376
443
  # no filter, so just use the docs passed in
377
- self.chunked_docs = docs
444
+ self.chunked_docs.extend(docs)
378
445
  else:
379
446
  if self.vecdb is None:
380
447
  raise ValueError("VecDB not set")
@@ -1098,7 +1165,7 @@ class DocChatAgent(ChatAgent):
1098
1165
  """Summarize all docs"""
1099
1166
  if self.llm is None:
1100
1167
  raise ValueError("LLM not set")
1101
- if self.original_docs is None:
1168
+ if len(self.original_docs) == 0:
1102
1169
  logger.warning(
1103
1170
  """
1104
1171
  No docs to summarize! Perhaps you are re-using a previously
@@ -11,7 +11,7 @@ For usage see:
11
11
  """
12
12
  import json
13
13
  import logging
14
- from typing import List, Tuple
14
+ from typing import Any, Dict, List, Tuple
15
15
 
16
16
  import pandas as pd
17
17
 
@@ -112,8 +112,13 @@ class LanceDocChatAgent(DocChatAgent):
112
112
  # pass on the query so LLM can handle it
113
113
  return plan.query
114
114
 
115
- def ingest_docs(self, docs: List[Document], split: bool = True) -> int:
116
- n = super().ingest_docs(docs, split)
115
+ def ingest_docs(
116
+ self,
117
+ docs: List[Document],
118
+ split: bool = True,
119
+ metadata: List[Dict[str, Any]] | Dict[str, Any] = [],
120
+ ) -> int:
121
+ n = super().ingest_docs(docs, split, metadata)
117
122
  tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
118
123
  # We assume "content" is available as top-level field
119
124
  if "content" in tbl.schema.names:
@@ -126,6 +131,8 @@ class LanceDocChatAgent(DocChatAgent):
126
131
  content: str = "content",
127
132
  metadata: List[str] = [],
128
133
  ) -> int:
134
+ """Ingest from a dataframe. Assume we are doing this once, not incrementally"""
135
+
129
136
  self.from_dataframe = True
130
137
  if df.shape[0] == 0:
131
138
  raise ValueError(
@@ -5,6 +5,8 @@ from pydantic import BaseSettings
5
5
  from rich import print
6
6
  from rich.console import Console
7
7
 
8
+ from langroid.agent import ToolMessage
9
+
8
10
  if TYPE_CHECKING:
9
11
  import neo4j
10
12
 
@@ -16,10 +18,6 @@ from langroid.agent.special.neo4j.utils.system_message import (
16
18
  DEFAULT_SYS_MSG,
17
19
  SCHEMA_TOOLS_SYS_MSG,
18
20
  )
19
- from langroid.agent.special.neo4j.utils.tools import (
20
- CypherQueryTool,
21
- GraphSchemaTool,
22
- )
23
21
  from langroid.mytypes import Entity
24
22
 
25
23
  logger = logging.getLogger(__name__)
@@ -37,6 +35,21 @@ not_valid_query_response = [
37
35
  ]
38
36
 
39
37
 
38
+ # TOOLS to be used by the agent
39
+
40
+
41
+ class CypherQueryTool(ToolMessage):
42
+ request: str = "make_query"
43
+ purpose: str = """Use this tool to send the Generated Cypher query based on
44
+ provided text description and schema."""
45
+ cypher_query: str
46
+
47
+
48
+ class GraphSchemaTool(ToolMessage):
49
+ request: str = "get_schema"
50
+ purpose: str = """To get the schema of the graph database."""
51
+
52
+
40
53
  class Neo4jSettings(BaseSettings):
41
54
  uri: str = ""
42
55
  username: str = ""
@@ -133,10 +146,11 @@ class Neo4jChatAgent(ChatAgent):
133
146
  logger.error(f"Cypher Query failed: {query}\nException: {e}")
134
147
 
135
148
  # Construct the error message
136
- error_message_template = f"""\
149
+ error_message_template = f"""
150
+ There were some errors running your Cypher query:
137
151
  {NEO4J_ERROR_MSG}: '{query}'
138
152
  {str(e)}
139
- Run a new query, correcting the errors.
153
+ Send a new query, correcting the errors.
140
154
  """
141
155
 
142
156
  return error_message_template
@@ -155,7 +169,6 @@ class Neo4jChatAgent(ChatAgent):
155
169
  Returns:
156
170
  str: The result of executing the Cypher query.
157
171
  """
158
- response_message = ""
159
172
  if not self.driver:
160
173
  raise ValueError("No database connection is established.")
161
174
 
@@ -271,7 +284,7 @@ class Neo4jChatAgent(ChatAgent):
271
284
  return "The database schema does not have any nodes or relationships."
272
285
 
273
286
  def _init_tool_messages(self) -> None:
274
- """Initialize message tools used for chatting."""
287
+ """Attach ToolMessages to the Agent."""
275
288
  message = self._format_message()
276
289
  self.config.system_message = self.config.system_message.format(mode=message)
277
290
  super().__init__(self.config)
@@ -10,7 +10,7 @@ the code and returns the result as a string.
10
10
  import io
11
11
  import logging
12
12
  import sys
13
- from typing import List, no_type_check
13
+ from typing import List, Optional, no_type_check
14
14
 
15
15
  import numpy as np
16
16
  import pandas as pd
@@ -23,7 +23,7 @@ from langroid.agent.tool_message import ToolMessage
23
23
  from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
24
24
  from langroid.parsing.table_loader import read_tabular_data
25
25
  from langroid.prompts.prompts_config import PromptsConfig
26
- from langroid.utils.constants import DONE, PASS
26
+ from langroid.utils.constants import DONE
27
27
  from langroid.vector_store.base import VectorStoreConfig
28
28
 
29
29
  logger = logging.getLogger(__name__)
@@ -34,7 +34,7 @@ DEFAULT_TABLE_CHAT_SYSTEM_MESSAGE = f"""
34
34
  You are a savvy data scientist, with expertise in analyzing tabular datasets,
35
35
  using Python and the Pandas library for dataframe manipulation.
36
36
  Since you do not have access to the dataframe 'df', you
37
- will need to use the `run_code` tool/function-call to answer the question.
37
+ will need to use the `run_code` tool/function-call to answer my questions.
38
38
  Here is a summary of the dataframe:
39
39
  {{summary}}
40
40
  Do not assume any columns other than those shown.
@@ -48,9 +48,9 @@ If you receive a null or other unexpected result, see if you have made an assump
48
48
  in your code, and try another way, or use `run_code` to explore the dataframe
49
49
  before submitting your final code.
50
50
 
51
- Once you have the answer to the question, say {DONE} and show me the answer.
52
- If you receive an error message, try using the `run_code` tool/function
53
- again with the corrected code.
51
+ Once you have the answer to the question, possibly after a few steps,
52
+ say {DONE} and show me the answer. If you receive an error message,
53
+ try using the `run_code` tool/function again with the corrected code.
54
54
 
55
55
  VERY IMPORTANT: When using the `run_code` tool/function, DO NOT EXPLAIN ANYTHING,
56
56
  SIMPLY USE THE TOOL, with the CODE.
@@ -129,6 +129,7 @@ class RunCodeTool(ToolMessage):
129
129
  purpose: str = """
130
130
  To run <code> on the dataframe 'df' and
131
131
  return the results to answer a question.
132
+ IMPORTANT: ALL the code should be in the <code> field.
132
133
  """
133
134
  code: str
134
135
 
@@ -145,6 +146,8 @@ class TableChatAgent(ChatAgent):
145
146
  Agent for chatting with a collection of documents.
146
147
  """
147
148
 
149
+ sent_code: bool = False
150
+
148
151
  def __init__(self, config: TableChatAgentConfig):
149
152
  if isinstance(config.data, pd.DataFrame):
150
153
  df = config.data
@@ -169,6 +172,15 @@ class TableChatAgent(ChatAgent):
169
172
  # enable the agent to use and handle the RunCodeTool
170
173
  self.enable_message(RunCodeTool)
171
174
 
175
+ def user_response(
176
+ self,
177
+ msg: Optional[str | ChatDocument] = None,
178
+ ) -> Optional[ChatDocument]:
179
+ response = super().user_response(msg)
180
+ if response is not None and response.content != "":
181
+ self.sent_code = False
182
+ return response
183
+
172
184
  def run_code(self, msg: RunCodeTool) -> str:
173
185
  """
174
186
  Handle a RunCodeTool message by running the code and returning the result.
@@ -178,6 +190,7 @@ class TableChatAgent(ChatAgent):
178
190
  Returns:
179
191
  str: The result of running the code along with any print output.
180
192
  """
193
+ self.sent_code = True
181
194
  code = msg.code
182
195
  # Create a dictionary that maps 'df' to the actual DataFrame
183
196
  local_vars = {"df": self.df}
@@ -230,14 +243,13 @@ class TableChatAgent(ChatAgent):
230
243
  ) -> str | ChatDocument | None:
231
244
  """Handle scenario where LLM forgets to say DONE or forgets to use run_code"""
232
245
  if isinstance(msg, ChatDocument) and msg.metadata.sender == lr.Entity.LLM:
233
- return f"""
234
- You either:
235
-
236
- (A) forgot to use the `run_code` tool/function to find the answer,
237
- ==> In this case re-try using the `run_code` tool/function.
238
- OR
239
-
240
- (B) forgot to say {DONE} after you found the answer.
241
- ==> In this case say "{DONE} {PASS}"
242
- """
246
+ if self.sent_code:
247
+ return DONE
248
+ else:
249
+ return """
250
+ You forgot to use the `run_code` tool/function to find the answer.
251
+ Try again using the `run_code` tool/function.
252
+ Remember that ALL your code, including imports,
253
+ should be in the `code` field.
254
+ """
243
255
  return None
langroid/agent/task.py CHANGED
@@ -160,6 +160,8 @@ class Task:
160
160
  self.name = name or agent.config.name
161
161
  self.value: str = self.name
162
162
  self.default_human_response = default_human_response
163
+ if default_human_response is not None and default_human_response == "":
164
+ interactive = False
163
165
  self.interactive = interactive
164
166
  self.message_history_idx = -1
165
167
  if interactive:
@@ -957,8 +959,6 @@ class Task:
957
959
  bool: True if task is done, False otherwise
958
960
  """
959
961
  result = result or self.pending_message
960
- if self.is_done:
961
- return True
962
962
  user_quit = (
963
963
  result is not None
964
964
  and result.content in USER_QUIT
@@ -968,6 +968,9 @@ class Task:
968
968
  # for top-level task, only user can quit out
969
969
  return user_quit
970
970
 
971
+ if self.is_done:
972
+ return True
973
+
971
974
  if self.n_stalled_steps >= self.max_stalled_steps:
972
975
  # we are stuck, so bail to avoid infinite loop
973
976
  logger.warning(
@@ -0,0 +1,44 @@
1
+ """
2
+ A tool to trigger a metaphor search for a given query, and return the top results with
3
+ their titles, links, summaries. Since the tool is stateless (i.e. does not need
4
+ access to agent state), it can be enabled for any agent, without having to define a
5
+ special method inside the agent: `agent.enable_message(MetaphorSearchTool)`
6
+
7
+ NOTE: Using this tool requires setting the METAPHOR_API_KEY environment variables in
8
+ your `.env` file, as explained in the
9
+ [README](https://github.com/langroid/langroid#gear-installation-and-setup).
10
+
11
+ This tool requires installing langroid with the `metaphor` extra, e.g.
12
+ `pip install langroid[metaphor]` or `poetry add langroid[metaphor]`
13
+ (it installs the `metaphor-python` package from pypi).
14
+
15
+ For more information, please refer to the official docs:
16
+ https://metaphor.systems/
17
+ """
18
+
19
+ from langroid.agent.tool_message import ToolMessage
20
+ from langroid.parsing.web_search import metaphor_search
21
+
22
+
23
+ class MetaphorSearchTool(ToolMessage):
24
+ request: str = "metaphor_search"
25
+ purpose: str = """
26
+ To search the web by metaphor api and return up to <num_results>
27
+ links relevant to the given <query>.
28
+ """
29
+ query: str
30
+ num_results: int
31
+
32
+ def handle(self) -> str:
33
+ """
34
+ Conducts a search using the metaphor API based on the provided query
35
+ and number of results by triggering a metaphor_search.
36
+
37
+ Returns:
38
+ str: A formatted string containing the titles, links, and
39
+ summaries of each search result, separated by two newlines.
40
+ """
41
+
42
+ search_results = metaphor_search(self.query, self.num_results)
43
+ # return Title, Link, Summary of each result, separated by two newlines
44
+ return "\n\n".join(str(result) for result in search_results)
@@ -60,7 +60,8 @@ class OpenAIChatModel(str, Enum):
60
60
 
61
61
  GPT3_5_TURBO = "gpt-3.5-turbo-1106"
62
62
  GPT4 = "gpt-4"
63
- GPT4_TURBO = "gpt-4-1106-preview"
63
+ GPT4_32K = "gpt-4-32k"
64
+ GPT4_TURBO = "gpt-4-turbo-preview"
64
65
 
65
66
 
66
67
  class OpenAICompletionModel(str, Enum):
@@ -72,8 +73,9 @@ class OpenAICompletionModel(str, Enum):
72
73
 
73
74
  _context_length: Dict[str, int] = {
74
75
  # can add other non-openAI models here
75
- OpenAIChatModel.GPT3_5_TURBO: 4096,
76
+ OpenAIChatModel.GPT3_5_TURBO: 16_385,
76
77
  OpenAIChatModel.GPT4: 8192,
78
+ OpenAIChatModel.GPT4_32K: 32_768,
77
79
  OpenAIChatModel.GPT4_TURBO: 128_000,
78
80
  OpenAICompletionModel.TEXT_DA_VINCI_003: 4096,
79
81
  }
@@ -81,7 +83,7 @@ _context_length: Dict[str, int] = {
81
83
  _cost_per_1k_tokens: Dict[str, Tuple[float, float]] = {
82
84
  # can add other non-openAI models here.
83
85
  # model => (prompt cost, generation cost) in USD
84
- OpenAIChatModel.GPT3_5_TURBO: (0.0015, 0.002),
86
+ OpenAIChatModel.GPT3_5_TURBO: (0.001, 0.002),
85
87
  OpenAIChatModel.GPT4: (0.03, 0.06), # 8K context
86
88
  OpenAIChatModel.GPT4_TURBO: (0.01, 0.03), # 128K context
87
89
  }
@@ -197,7 +199,7 @@ class OpenAIGPTConfig(LLMConfig):
197
199
  api_base: str | None = None # used for local or other non-OpenAI models
198
200
  litellm: bool = False # use litellm api?
199
201
  max_output_tokens: int = 1024
200
- min_output_tokens: int = 64
202
+ min_output_tokens: int = 1
201
203
  use_chat_for_completion = True # do not change this, for OpenAI models!
202
204
  timeout: int = 20
203
205
  temperature: float = 0.2
@@ -77,3 +77,49 @@ def google_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
77
77
  WebSearchResult(result["title"], result["link"], 3500, 300)
78
78
  for result in raw_results
79
79
  ]
80
+
81
+
82
+ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
83
+ """
84
+ Method that makes an API call by Metaphor client that queries
85
+ the top num_results links that matches the query. Returns a list
86
+ of WebSearchResult objects.
87
+
88
+ Args:
89
+ query (str): The query body that users wants to make.
90
+ num_results (int): Number of top matching results that we want
91
+ to grab
92
+ """
93
+
94
+ load_dotenv()
95
+
96
+ api_key = os.getenv("METAPHOR_API_KEY")
97
+ if not api_key:
98
+ raise ValueError(
99
+ """
100
+ METAPHOR_API_KEY is not set.
101
+ Please set the METAPHOR_API_KEY environment variable.
102
+ """
103
+ )
104
+
105
+ try:
106
+ from metaphor_python import Metaphor
107
+ except ImportError:
108
+ raise ImportError(
109
+ "You are attempting to use the `metaphor_python` library;"
110
+ "To use it, please install langroid with the `metaphor` extra, e.g. "
111
+ "`pip install langroid[metaphor]` or `poetry add langroid[metaphor]` "
112
+ "(it installs the `metaphor_python` package from pypi)."
113
+ )
114
+
115
+ client = Metaphor(api_key=api_key)
116
+
117
+ response = client.search(
118
+ query=query,
119
+ num_results=num_results,
120
+ )
121
+ raw_results = response.results
122
+
123
+ return [
124
+ WebSearchResult(result.title, result.url, 3500, 300) for result in raw_results
125
+ ]
langroid/utils/logging.py CHANGED
@@ -4,7 +4,6 @@ from typing import no_type_check
4
4
 
5
5
  import colorlog
6
6
  from rich.console import Console
7
- from rich.markup import escape
8
7
 
9
8
 
10
9
  # Define a function to set up the colored logger
@@ -126,6 +125,6 @@ class RichFileLogger:
126
125
  with open(self.log_file, "a") as f:
127
126
  if self.color:
128
127
  console = Console(file=f, force_terminal=True, width=200)
129
- console.print(escape(message))
128
+ console.print(message)
130
129
  else:
131
130
  print(message, file=f)
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  from dotenv import load_dotenv
7
7
  from lancedb.pydantic import LanceModel, Vector
8
8
  from lancedb.query import LanceVectorQueryBuilder
9
- from pydantic import BaseModel, create_model
9
+ from pydantic import BaseModel, ValidationError, create_model
10
10
 
11
11
  from langroid.embedding_models.base import (
12
12
  EmbeddingModel,
@@ -354,7 +354,19 @@ class LanceDB(VectorStore):
354
354
  self.unflattened_schema(**nested_dict_from_flat(rec)) for rec in records
355
355
  ]
356
356
  else:
357
- docs = [self.schema(**rec) for rec in records]
357
+ try:
358
+ docs = [self.schema(**rec) for rec in records]
359
+ except ValidationError as e:
360
+ raise ValueError(
361
+ f"""
362
+ Error validating LanceDB result: {e}
363
+ HINT: This could happen when you're re-using an
364
+ existing LanceDB store with a different schema.
365
+ Try deleting your local lancedb storage at `{self.config.storage_path}`
366
+ re-ingesting your documents and/or replacing the collections.
367
+ """
368
+ )
369
+
358
370
  doc_cls = self.config.document_class
359
371
  doc_cls_field_names = doc_cls.__fields__.keys()
360
372
  return [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.175
3
+ Version: 0.1.177
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Provides-Extra: hf-embeddings
14
14
  Provides-Extra: litellm
15
+ Provides-Extra: metaphor
15
16
  Provides-Extra: mysql
16
17
  Provides-Extra: neo4j
17
18
  Provides-Extra: postgres
@@ -38,6 +39,7 @@ Requires-Dist: litellm (>=1.0.0,<2.0.0) ; extra == "litellm"
38
39
  Requires-Dist: lxml (>=4.9.3,<5.0.0)
39
40
  Requires-Dist: meilisearch (>=0.28.3,<0.29.0)
40
41
  Requires-Dist: meilisearch-python-sdk (>=2.2.3,<3.0.0)
42
+ Requires-Dist: metaphor-python (>=0.1.23,<0.2.0) ; extra == "metaphor"
41
43
  Requires-Dist: mkdocs (>=1.4.2,<2.0.0)
42
44
  Requires-Dist: mkdocs-awesome-pages-plugin (>=2.8.0,<3.0.0)
43
45
  Requires-Dist: mkdocs-gen-files (>=0.4.0,<0.5.0)
@@ -160,11 +162,14 @@ Sponsorship is also accepted via [GitHub Sponsors](https://github.com/sponsors/l
160
162
  This is just a teaser; there's much more, like function-calling/tools,
161
163
  Multi-Agent Collaboration, Structured Information Extraction, DocChatAgent
162
164
  (RAG), SQLChatAgent, non-OpenAI local/remote LLMs, etc. Scroll down or see docs for more.
165
+ See the Langroid Quick-Start [Colab](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)
166
+ that builds up to a 2-agent information-extraction example using the OpenAI ChatCompletion API.
167
+ See also this [version](https://colab.research.google.com/drive/190Tk7t4AdY1P9F_NlZ33-YEoGnHweQQ0) that uses the OpenAI Assistants API instead.
163
168
 
164
- :fire: Just released! Updated Langroid Quick-Start [Colab](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)
165
- that builds up to a 2-agent chat example using the OpenAI ChatCompletion API.
166
- See also this [version](https://colab.research.google.com/drive/190Tk7t4AdY1P9F_NlZ33-YEoGnHweQQ0)
167
- that uses the OpenAI Assistants API instead.
169
+ :fire: just released! [Example](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-multi-extract-local.py)
170
+ script showing how you can use Langroid multi-agents and tools
171
+ to extract structured information from a document using **only a local LLM**
172
+ (Mistral-7b-instruct-v0.2).
168
173
 
169
174
  ```python
170
175
  import langroid as lr
@@ -215,6 +220,15 @@ teacher_task.run()
215
220
  <summary> <b>Click to expand</b></summary>
216
221
 
217
222
  - **Jan 2024:**
223
+ - **0.1.175**
224
+ - [Neo4jChatAgent](https://github.com/langroid/langroid/tree/main/langroid/agent/special/neo4j) to chat with a neo4j knowledge-graph.
225
+ (Thanks to [Mohannad](https://github.com/Mohannadcse)!). The agent uses tools to query the Neo4j schema and translate user queries to Cypher queries,
226
+ and the tool handler executes these queries, returning them to the LLM to compose
227
+ a natural language response (analogous to how `SQLChatAgent` works).
228
+ See example [script](https://github.com/langroid/langroid/tree/main/examples/kg-chat) using this Agent to answer questions about Python pkg dependencies.
229
+ - Support for `.doc` file parsing (in addition to `.docx`)
230
+ - Specify optional [`formatter` param](https://github.com/langroid/langroid/releases/tag/0.1.171)
231
+ in `OpenAIGPTConfig` to ensure accurate chat formatting for local LLMs.
218
232
  - **[0.1.157](https://github.com/langroid/langroid/releases/tag/0.1.157):** `DocChatAgentConfig`
219
233
  has a new param: `add_fields_to_content`, to specify additional document fields to insert into
220
234
  the main `content` field, to help improve retrieval.
@@ -316,6 +330,8 @@ See [this test](tests/main/test_recipient_tool.py) for example usage.
316
330
  Suppose you want to extract structured information about the key terms
317
331
  of a commercial lease document. You can easily do this with Langroid using a two-agent system,
318
332
  as we show in the [langroid-examples](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py) repo.
333
+ (See [this script](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-multi-extract-local.py)
334
+ for a version with the same functionality using a local Mistral-7b model.)
319
335
  The demo showcases just a few of the many features of Langroid, such as:
320
336
  - Multi-agent collaboration: `LeaseExtractor` is in charge of the task, and its LLM (GPT4) generates questions
321
337
  to be answered by the `DocAgent`.
@@ -332,7 +348,9 @@ Here is what it looks like in action
332
348
 
333
349
 
334
350
  # :zap: Highlights
335
-
351
+ (For a more up-to-date list see the
352
+ [release](https://github.com/langroid/langroid?tab=readme-ov-file#fire-updatesreleases)
353
+ section above)
336
354
  - **Agents as first-class citizens:** The [Agent](https://langroid.github.io/langroid/reference/agent/base/#langroid.agent.base.Agent) class encapsulates LLM conversation state,
337
355
  and optionally a vector-store and tools. Agents are a core abstraction in Langroid;
338
356
  Agents act as _message transformers_, and by default provide 3 _responder_ methods, one corresponding to each entity: LLM, Agent, User.
@@ -8,18 +8,17 @@ langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
9
9
  langroid/agent/openai_assistant.py,sha256=yBtxis64XOnxtJzlkwUoTm-wCyvKr4DGo9-laXYMok0,32654
10
10
  langroid/agent/special/__init__.py,sha256=xj4TvQ_oQX_xYPySbhmQAi2CPhuy_3yQPqqwzb4wsc0,943
11
- langroid/agent/special/doc_chat_agent.py,sha256=S5BNRvELBZLFY-3TwrraGeW2XCYCU5NaOEO36zbSeRQ,45363
12
- langroid/agent/special/lance_doc_chat_agent.py,sha256=5wwmoPtHfc_AfQNwZ7L2GnJvtOONGnDsiKzwjgbmHt8,8168
11
+ langroid/agent/special/doc_chat_agent.py,sha256=zW5qG0N8TpW1RfkK9vPBWcWtFqD0aM9KzcpaUeASMJE,48083
12
+ langroid/agent/special/lance_doc_chat_agent.py,sha256=tUpn6vnbHzlkkpKEHJym7dtxS-s39AwPfH2PqyNJuxY,8371
13
13
  langroid/agent/special/lance_rag/__init__.py,sha256=-pq--upe-8vycYoTwxoomBnuUqrcRFUukmW3uBL1cFM,219
14
14
  langroid/agent/special/lance_rag/critic_agent.py,sha256=9izW4keCxVZEqrFOgyVUHD7N1vTXLkRynXYYd1Vpwzw,5785
15
15
  langroid/agent/special/lance_rag/lance_rag_task.py,sha256=l_HQgrYY-CX2FwIsS961aEF3bYog3GDYo98fj0C0mSk,2889
16
16
  langroid/agent/special/lance_rag/lance_tools.py,sha256=WypIS-3ZMDqY_PZEGB2K80-o4RfS43_OnER0dyFlsDY,1339
17
17
  langroid/agent/special/lance_rag/query_planner_agent.py,sha256=dZXVano2NbRZy91nBcEW6LrvedsHfxL1oNCgMQEHZ-U,8016
18
18
  langroid/agent/special/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- langroid/agent/special/neo4j/neo4j_chat_agent.py,sha256=ytx-FBDCGsn5m2sUzaYj6zHQhwsOGgdX1anuIXtvebI,11391
19
+ langroid/agent/special/neo4j/neo4j_chat_agent.py,sha256=ZLTTGwoKSW-Lw7G8o9Fi7kMiSZGNxsfK3GymanfcJvc,11738
20
20
  langroid/agent/special/neo4j/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  langroid/agent/special/neo4j/utils/system_message.py,sha256=7rwq4CYpb1-1AvGLJBI6-X8UIu5RP5LiWU9re-umXxs,1959
22
- langroid/agent/special/neo4j/utils/tools.py,sha256=dSbL9cNzu6ZUofyK2eaSrHPzFzS_-YEruGodsVvMrQQ,441
23
22
  langroid/agent/special/relevance_extractor_agent.py,sha256=uJ1jS_7S_gnJeuOUxEren-IHi1kfVx475B8L4H8RcEM,4795
24
23
  langroid/agent/special/retriever_agent.py,sha256=uu6vqFg85uCVM-_DrXesYe2gH_-WcoHhlsKRlLuZPXk,1867
25
24
  langroid/agent/special/sql/__init__.py,sha256=qUM-b4FfvIt0gYWP7_niyqR3OwVMMkuK2SyqUYWjyxs,207
@@ -29,13 +28,14 @@ langroid/agent/special/sql/utils/description_extractors.py,sha256=GcQ82IhKPInS_3
29
28
  langroid/agent/special/sql/utils/populate_metadata.py,sha256=zRjw31a1ZXvpx9bcmbtC2mngdHl-bp1ZNHStcPG8_Qk,2712
30
29
  langroid/agent/special/sql/utils/system_message.py,sha256=qKLHkvQWRQodTtPLPxr1GSLUYUFASZU8x-ybV67cB68,1885
31
30
  langroid/agent/special/sql/utils/tools.py,sha256=6uB2424SLtmapui9ggcEr0ZTiB6_dL1-JRGgN8RK9Js,1332
32
- langroid/agent/special/table_chat_agent.py,sha256=Gb4wx27A-L96s0vFbTQoiVPLMKQpif-DDwnnojxnaJU,8609
33
- langroid/agent/task.py,sha256=hW-vDwQkhFUtKvXUHr0Q9gylhtNfh6-bqoxM1c5SLHE,46425
31
+ langroid/agent/special/table_chat_agent.py,sha256=GEUTP-VdtMXq4CcPV80gDQrCEn-ZFb9IhuRMtLN5I1o,9030
32
+ langroid/agent/task.py,sha256=oa6SDHusxwvut2u8CGQ_0AQ-aONexFx5Bk2Yol3LeN8,46538
34
33
  langroid/agent/tool_message.py,sha256=ngmWdiqMYbjF4Am0hsLyA9zK0Q9QF2ziec6FW0lPD90,7399
35
34
  langroid/agent/tools/__init__.py,sha256=q-maq3k2BXhPAU99G0H6-j_ozoRvx15I1RFpPVicQIU,304
36
35
  langroid/agent/tools/extract_tool.py,sha256=u5lL9rKBzaLBOrRyLnTAZ97pQ1uxyLP39XsWMnpaZpw,3789
37
36
  langroid/agent/tools/generator_tool.py,sha256=y0fB0ZObjA0b3L0uSTtrqRCKHDUR95arBftqiUeKD2o,663
38
37
  langroid/agent/tools/google_search_tool.py,sha256=64F9oMNdS237BBOitrvYXN4Il_ES_fNrHkh35tBEDfA,1160
38
+ langroid/agent/tools/metaphor_search_tool.py,sha256=3leL7aqGUu31VZLsQJA8-zUgdMGDhLRpV8MdgX-_oPE,1780
39
39
  langroid/agent/tools/recipient_tool.py,sha256=o4qWuz-Lp2QkOQNy6PkRMT9AEc7btuk8EJXpTg0uPV4,9170
40
40
  langroid/agent/tools/run_python_code.py,sha256=V3mHdHQYn0M0PAtyoHxjNvk6KvWWcQ4ugo0TOKc8HyI,1752
41
41
  langroid/agent/tools/sciphi_search_rag_tool.py,sha256=_BpQJtBui7A92iY0qOHh3iNn7HTszQG9Ddm7YQmHSgk,2533
@@ -54,7 +54,7 @@ langroid/language_models/azure_openai.py,sha256=ncRCbKooqLVOY-PWQUIo9C3yTuKEFbAw
54
54
  langroid/language_models/base.py,sha256=N9Jn-veKBCd_ky9mhHVSmf1iRQ1RBdaxm45yrY9Njl0,20616
55
55
  langroid/language_models/config.py,sha256=5UF3DzO1a-Dfsc3vghE0XGq7g9t_xDsRCsuRiU4dgBg,366
56
56
  langroid/language_models/openai_assistants.py,sha256=9K-DEAL2aSWHeXj2hwCo2RAlK9_1oCPtqX2u1wISCj8,36
57
- langroid/language_models/openai_gpt.py,sha256=yp1OzxqbLAET-kC09dkQGcoldfCV-1XRRVj17lAMjlI,47798
57
+ langroid/language_models/openai_gpt.py,sha256=TnIvA0aNk6NlwgPpjqadf7nF7A-Rvr4kQLc41xDUyno,47864
58
58
  langroid/language_models/prompt_formatter/__init__.py,sha256=9JXFF22QNMmbQV1q4nrIeQVTtA3Tx8tEZABLtLBdFyc,352
59
59
  langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
60
60
  langroid/language_models/prompt_formatter/hf_formatter.py,sha256=_XT5oHM6TmT1x4hU38lGR_b7ej6rAMHEBQUJ-w2nSDY,3587
@@ -78,7 +78,7 @@ langroid/parsing/url_loader.py,sha256=54c6yt9grfUyImauSdM5UM4_ulU4JEz0ehdugAkxKI
78
78
  langroid/parsing/url_loader_cookies.py,sha256=Lg4sNpRz9MByWq2mde6T0hKv68VZSV3mtMjNEHuFeSU,2327
79
79
  langroid/parsing/urls.py,sha256=Nv4yCWQLLBEjaiRdaZZVQNBEl_cfK_V6cVuPm91wGtU,7686
80
80
  langroid/parsing/utils.py,sha256=NVX4D43taqjnQJ0P4tRKB5tX6iXfVXWKxWgGNhQsc5c,10030
81
- langroid/parsing/web_search.py,sha256=hGUVoSJNdpoT5rsm-ikAteMiUropHrzKaxN8EVVqO2U,2496
81
+ langroid/parsing/web_search.py,sha256=rQSDMrJTmhB8z5bXKxaAdt1Y5h13oaGxooRhsOHB2T0,3879
82
82
  langroid/prompts/__init__.py,sha256=B0vpJzIJlMR3mFRtoQwyALsFzBHvLp9f92acD8xJA_0,185
83
83
  langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
84
84
  langroid/prompts/prompts_config.py,sha256=XRQHzod7KBnoKn3B_V878jZiqBA7rcn-CtGPkuAe_yM,131
@@ -93,7 +93,7 @@ langroid/utils/docker.py,sha256=kJQOLTgM0x9j9pgIIqp0dZNZCTvoUDhp6i8tYBq1Jr0,1105
93
93
  langroid/utils/globals.py,sha256=VkTHhlqSz86oOPq65sjul0XU8I52UNaFC5vwybMQ74w,1343
94
94
  langroid/utils/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
95
  langroid/utils/llms/strings.py,sha256=CSAX9Z6FQOLXOzbLMe_Opqtc3ruDAKTTk7cPqc6Blh0,263
96
- langroid/utils/logging.py,sha256=CXe8w2gjFhhvgyGr1iwU--Je916H2rOrDNJosv5Tl3Y,3956
96
+ langroid/utils/logging.py,sha256=R8TN-FqVpwZ4Ajgls9TDMthLvPpQd0QVNXK-PJDj1Z8,3917
97
97
  langroid/utils/output/__init__.py,sha256=Z58-2ZKnGpGNaKw_nEjHV_CHTzjMz-WRSRQnazTLrWU,289
98
98
  langroid/utils/output/printing.py,sha256=5EsYB1O4qKhocW19aebOUzK82RD9U5nygbY21yo8gfg,2872
99
99
  langroid/utils/pandas_utils.py,sha256=nSA1tIgOUTkRDn-IKq7HP8XGJcL6bA110LcPfRF7h8I,707
@@ -105,12 +105,12 @@ langroid/utils/web/selenium_login.py,sha256=mYI6EvVmne34N9RajlsxxRqJQJvV-WG4LGp6
105
105
  langroid/vector_store/__init__.py,sha256=qOa3_BLvf8tjdUBT4Zq7pSLTY9TD2Fgw62UHHJWNu8w,557
106
106
  langroid/vector_store/base.py,sha256=JNk-2f6t_WCavizU332tOoZcXHP73RpobRk88Aus52w,13706
107
107
  langroid/vector_store/chromadb.py,sha256=Y80k6an5sN0cRWtcl78Xr-Ht87nd_hBjvkSU5OdCyY8,7312
108
- langroid/vector_store/lancedb.py,sha256=sKjscEpfDR560_pjkH6UyGiN3kSG_rYu1uxOJOo1zN0,16146
108
+ langroid/vector_store/lancedb.py,sha256=dsdZVHfnvOcs1BhVK99UknbIjbND0j6I6d2_QozuL7A,16671
109
109
  langroid/vector_store/meilisearch.py,sha256=d2huA9P-NoYRuAQ9ZeXJmMKr7ry8u90RUSR28k2ecQg,11340
110
110
  langroid/vector_store/momento.py,sha256=j6Eo6oIDN2fe7lsBOlCXJn3uvvERHHTFL5QJfeREeOM,10044
111
111
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
112
112
  langroid/vector_store/qdrantdb.py,sha256=_egbsP9SWBwmI827EDYSSOqfIQSmwNsmJfFTxrLpWYE,13457
113
- langroid-0.1.175.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
114
- langroid-0.1.175.dist-info/METADATA,sha256=2eqvV9ZkRME_CyHkBqFz8Th76R2Im7qZahnI0qcyIVw,43613
115
- langroid-0.1.175.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
116
- langroid-0.1.175.dist-info/RECORD,,
113
+ langroid-0.1.177.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
114
+ langroid-0.1.177.dist-info/METADATA,sha256=0ID1A5oyoMecA7dcEUQNlw_VQ2uB64VrV0IwjFtJhoQ,45218
115
+ langroid-0.1.177.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
116
+ langroid-0.1.177.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- from langroid.agent.tool_message import ToolMessage
2
-
3
-
4
- class CypherQueryTool(ToolMessage):
5
- request: str = "make_query"
6
- purpose: str = """Use this tool to send me the Generated Cypher query based on
7
- text description and schema that I will provide you."""
8
- cypher_query: str
9
-
10
-
11
- class GraphSchemaTool(ToolMessage):
12
- request: str = "get_schema"
13
- purpose: str = """Use this tool to get me the schema of the graph database."""