langroid 0.33.6__py3-none-any.whl → 0.33.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. langroid/__init__.py +106 -0
  2. langroid/agent/__init__.py +41 -0
  3. langroid/agent/base.py +1983 -0
  4. langroid/agent/batch.py +398 -0
  5. langroid/agent/callbacks/__init__.py +0 -0
  6. langroid/agent/callbacks/chainlit.py +598 -0
  7. langroid/agent/chat_agent.py +1899 -0
  8. langroid/agent/chat_document.py +454 -0
  9. langroid/agent/openai_assistant.py +882 -0
  10. langroid/agent/special/__init__.py +59 -0
  11. langroid/agent/special/arangodb/__init__.py +0 -0
  12. langroid/agent/special/arangodb/arangodb_agent.py +656 -0
  13. langroid/agent/special/arangodb/system_messages.py +186 -0
  14. langroid/agent/special/arangodb/tools.py +107 -0
  15. langroid/agent/special/arangodb/utils.py +36 -0
  16. langroid/agent/special/doc_chat_agent.py +1466 -0
  17. langroid/agent/special/lance_doc_chat_agent.py +262 -0
  18. langroid/agent/special/lance_rag/__init__.py +9 -0
  19. langroid/agent/special/lance_rag/critic_agent.py +198 -0
  20. langroid/agent/special/lance_rag/lance_rag_task.py +82 -0
  21. langroid/agent/special/lance_rag/query_planner_agent.py +260 -0
  22. langroid/agent/special/lance_tools.py +61 -0
  23. langroid/agent/special/neo4j/__init__.py +0 -0
  24. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  25. langroid/agent/special/neo4j/neo4j_chat_agent.py +433 -0
  26. langroid/agent/special/neo4j/system_messages.py +120 -0
  27. langroid/agent/special/neo4j/tools.py +32 -0
  28. langroid/agent/special/relevance_extractor_agent.py +127 -0
  29. langroid/agent/special/retriever_agent.py +56 -0
  30. langroid/agent/special/sql/__init__.py +17 -0
  31. langroid/agent/special/sql/sql_chat_agent.py +654 -0
  32. langroid/agent/special/sql/utils/__init__.py +21 -0
  33. langroid/agent/special/sql/utils/description_extractors.py +190 -0
  34. langroid/agent/special/sql/utils/populate_metadata.py +85 -0
  35. langroid/agent/special/sql/utils/system_message.py +35 -0
  36. langroid/agent/special/sql/utils/tools.py +64 -0
  37. langroid/agent/special/table_chat_agent.py +263 -0
  38. langroid/agent/task.py +2099 -0
  39. langroid/agent/tool_message.py +393 -0
  40. langroid/agent/tools/__init__.py +38 -0
  41. langroid/agent/tools/duckduckgo_search_tool.py +50 -0
  42. langroid/agent/tools/file_tools.py +234 -0
  43. langroid/agent/tools/google_search_tool.py +39 -0
  44. langroid/agent/tools/metaphor_search_tool.py +68 -0
  45. langroid/agent/tools/orchestration.py +303 -0
  46. langroid/agent/tools/recipient_tool.py +235 -0
  47. langroid/agent/tools/retrieval_tool.py +32 -0
  48. langroid/agent/tools/rewind_tool.py +137 -0
  49. langroid/agent/tools/segment_extract_tool.py +41 -0
  50. langroid/agent/xml_tool_message.py +382 -0
  51. langroid/cachedb/__init__.py +17 -0
  52. langroid/cachedb/base.py +58 -0
  53. langroid/cachedb/momento_cachedb.py +108 -0
  54. langroid/cachedb/redis_cachedb.py +153 -0
  55. langroid/embedding_models/__init__.py +39 -0
  56. langroid/embedding_models/base.py +74 -0
  57. langroid/embedding_models/models.py +461 -0
  58. langroid/embedding_models/protoc/__init__.py +0 -0
  59. langroid/embedding_models/protoc/embeddings.proto +19 -0
  60. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  61. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  62. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  63. langroid/embedding_models/remote_embeds.py +153 -0
  64. langroid/exceptions.py +71 -0
  65. langroid/language_models/__init__.py +53 -0
  66. langroid/language_models/azure_openai.py +153 -0
  67. langroid/language_models/base.py +678 -0
  68. langroid/language_models/config.py +18 -0
  69. langroid/language_models/mock_lm.py +124 -0
  70. langroid/language_models/openai_gpt.py +1964 -0
  71. langroid/language_models/prompt_formatter/__init__.py +16 -0
  72. langroid/language_models/prompt_formatter/base.py +40 -0
  73. langroid/language_models/prompt_formatter/hf_formatter.py +132 -0
  74. langroid/language_models/prompt_formatter/llama2_formatter.py +75 -0
  75. langroid/language_models/utils.py +151 -0
  76. langroid/mytypes.py +84 -0
  77. langroid/parsing/__init__.py +52 -0
  78. langroid/parsing/agent_chats.py +38 -0
  79. langroid/parsing/code_parser.py +121 -0
  80. langroid/parsing/document_parser.py +718 -0
  81. langroid/parsing/para_sentence_split.py +62 -0
  82. langroid/parsing/parse_json.py +155 -0
  83. langroid/parsing/parser.py +313 -0
  84. langroid/parsing/repo_loader.py +790 -0
  85. langroid/parsing/routing.py +36 -0
  86. langroid/parsing/search.py +275 -0
  87. langroid/parsing/spider.py +102 -0
  88. langroid/parsing/table_loader.py +94 -0
  89. langroid/parsing/url_loader.py +115 -0
  90. langroid/parsing/urls.py +273 -0
  91. langroid/parsing/utils.py +373 -0
  92. langroid/parsing/web_search.py +156 -0
  93. langroid/prompts/__init__.py +9 -0
  94. langroid/prompts/dialog.py +17 -0
  95. langroid/prompts/prompts_config.py +5 -0
  96. langroid/prompts/templates.py +141 -0
  97. langroid/pydantic_v1/__init__.py +10 -0
  98. langroid/pydantic_v1/main.py +4 -0
  99. langroid/utils/__init__.py +19 -0
  100. langroid/utils/algorithms/__init__.py +3 -0
  101. langroid/utils/algorithms/graph.py +103 -0
  102. langroid/utils/configuration.py +98 -0
  103. langroid/utils/constants.py +30 -0
  104. langroid/utils/git_utils.py +252 -0
  105. langroid/utils/globals.py +49 -0
  106. langroid/utils/logging.py +135 -0
  107. langroid/utils/object_registry.py +66 -0
  108. langroid/utils/output/__init__.py +20 -0
  109. langroid/utils/output/citations.py +41 -0
  110. langroid/utils/output/printing.py +99 -0
  111. langroid/utils/output/status.py +40 -0
  112. langroid/utils/pandas_utils.py +30 -0
  113. langroid/utils/pydantic_utils.py +602 -0
  114. langroid/utils/system.py +286 -0
  115. langroid/utils/types.py +93 -0
  116. langroid/vector_store/__init__.py +50 -0
  117. langroid/vector_store/base.py +359 -0
  118. langroid/vector_store/chromadb.py +214 -0
  119. langroid/vector_store/lancedb.py +406 -0
  120. langroid/vector_store/meilisearch.py +299 -0
  121. langroid/vector_store/momento.py +278 -0
  122. langroid/vector_store/qdrantdb.py +468 -0
  123. {langroid-0.33.6.dist-info → langroid-0.33.8.dist-info}/METADATA +95 -94
  124. langroid-0.33.8.dist-info/RECORD +127 -0
  125. {langroid-0.33.6.dist-info → langroid-0.33.8.dist-info}/WHEEL +1 -1
  126. langroid-0.33.6.dist-info/RECORD +0 -7
  127. langroid-0.33.6.dist-info/entry_points.txt +0 -4
  128. pyproject.toml +0 -356
  129. {langroid-0.33.6.dist-info → langroid-0.33.8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,278 @@
1
+ """
2
+ Momento Vector Index.
3
+ https://docs.momentohq.com/vector-index/develop/api-reference
4
+ DEPRECATED: API is unstable.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import os
11
+ from typing import List, Optional, Sequence, Tuple, no_type_check
12
+
13
+ from dotenv import load_dotenv
14
+
15
+ from langroid.exceptions import LangroidImportError
16
+
17
+ try:
18
+ import momento.responses.vector_index as mvi_response
19
+ from momento import (
20
+ # PreviewVectorIndexClientAsync,
21
+ CredentialProvider,
22
+ PreviewVectorIndexClient,
23
+ VectorIndexConfigurations,
24
+ )
25
+ from momento.requests.vector_index import (
26
+ ALL_METADATA,
27
+ Item,
28
+ SimilarityMetric,
29
+ )
30
+
31
+ has_momento = True
32
+ except ImportError:
33
+ has_momento = False
34
+
35
+
36
+ from langroid.embedding_models.base import (
37
+ EmbeddingModelsConfig,
38
+ )
39
+ from langroid.embedding_models.models import OpenAIEmbeddingsConfig
40
+ from langroid.mytypes import Document, EmbeddingFunction
41
+ from langroid.utils.configuration import settings
42
+ from langroid.utils.pydantic_utils import (
43
+ flatten_pydantic_instance,
44
+ nested_dict_from_flat,
45
+ )
46
+ from langroid.vector_store.base import VectorStore, VectorStoreConfig
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ class MomentoVIConfig(VectorStoreConfig):
52
+ cloud: bool = True
53
+ collection_name: str | None = "temp"
54
+ embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
55
+
56
+
57
+ class MomentoVI(VectorStore):
58
+ def __init__(self, config: MomentoVIConfig = MomentoVIConfig()):
59
+ super().__init__(config)
60
+ if not has_momento:
61
+ raise LangroidImportError("momento", "momento")
62
+ self.distance = SimilarityMetric.COSINE_SIMILARITY
63
+ self.config: MomentoVIConfig = config
64
+ self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
65
+ self.embedding_dim = self.embedding_model.embedding_dims
66
+ self.host = config.host
67
+ self.port = config.port
68
+ load_dotenv()
69
+ api_key = os.getenv("MOMENTO_API_KEY")
70
+ if config.cloud:
71
+ if api_key is None:
72
+ raise ValueError(
73
+ """MOMENTO_API_KEY env variable must be set to
74
+ MomentoVI hosted service. Please set this in your .env file.
75
+ """
76
+ )
77
+ self.client = PreviewVectorIndexClient(
78
+ configuration=VectorIndexConfigurations.Default.latest(),
79
+ credential_provider=CredentialProvider.from_string(api_key),
80
+ )
81
+ else:
82
+ raise NotImplementedError("MomentoVI local not available yet")
83
+
84
+ # Note: Only create collection if a non-null collection name is provided.
85
+ # This is useful to delay creation of vecdb until we have a suitable
86
+ # collection name (e.g. we could get it from the url or folder path).
87
+ if config.collection_name is not None:
88
+ self.create_collection(
89
+ config.collection_name, replace=config.replace_collection
90
+ )
91
+
92
+ def clear_empty_collections(self) -> int:
93
+ logger.warning(
94
+ """
95
+ Momento VI does not yet have a way to easily get size of indices,
96
+ so clear_empty_collections is not deleting any indices.
97
+ """
98
+ )
99
+ return 0
100
+
101
+ def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
102
+ """Clear all collections with the given prefix."""
103
+
104
+ if not really:
105
+ logger.warning("Not deleting all collections, set really=True to confirm")
106
+ return 0
107
+ coll_names = self.list_collections(empty=False)
108
+ coll_names = [name for name in coll_names if name.startswith(prefix)]
109
+ if len(coll_names) == 0:
110
+ logger.warning(f"No collections found with prefix {prefix}")
111
+ return 0
112
+ for name in coll_names:
113
+ self.delete_collection(name)
114
+ logger.warning(
115
+ f"""
116
+ Deleted {len(coll_names)} indices from Momento VI
117
+ """
118
+ )
119
+ return len(coll_names)
120
+
121
+ def list_collections(self, empty: bool = False) -> List[str]:
122
+ """
123
+ Returns:
124
+ List of collection names that have at least one vector.
125
+
126
+ Args:
127
+ empty (bool, optional): Whether to include empty collections.
128
+ """
129
+ if not has_momento:
130
+ raise LangroidImportError("momento", "momento")
131
+ response = self.client.list_indexes()
132
+ if isinstance(response, mvi_response.ListIndexes.Success):
133
+ return [ind.name for ind in response.indexes]
134
+ elif isinstance(response, mvi_response.ListIndexes.Error):
135
+ raise ValueError(f"Error listing collections: {response.message}")
136
+ else:
137
+ raise ValueError(f"Unexpected response: {response}")
138
+
139
+ def create_collection(self, collection_name: str, replace: bool = False) -> None:
140
+ """
141
+ Create a collection with the given name, optionally replacing an existing
142
+ collection if `replace` is True.
143
+ Args:
144
+ collection_name (str): Name of the collection to create.
145
+ replace (bool): Whether to replace an existing collection
146
+ with the same name. Defaults to False.
147
+ """
148
+ if not has_momento:
149
+ raise LangroidImportError("momento", "momento")
150
+ self.config.collection_name = collection_name
151
+ response = self.client.create_index(
152
+ index_name=collection_name,
153
+ num_dimensions=self.embedding_dim,
154
+ similarity_metric=self.distance,
155
+ )
156
+ if isinstance(response, mvi_response.CreateIndex.Success):
157
+ logger.info(f"Created collection {collection_name}")
158
+ elif isinstance(response, mvi_response.CreateIndex.IndexAlreadyExists):
159
+ logger.warning(f"Collection {collection_name} already exists")
160
+ elif isinstance(response, mvi_response.CreateIndex.Error):
161
+ raise ValueError(
162
+ f"Error creating collection {collection_name}: {response.message}"
163
+ )
164
+ if settings.debug:
165
+ level = logger.getEffectiveLevel()
166
+ logger.setLevel(logging.INFO)
167
+ logger.info(f"Collection {collection_name} created")
168
+ logger.setLevel(level)
169
+
170
+ def add_documents(self, documents: Sequence[Document]) -> None:
171
+ super().maybe_add_ids(documents)
172
+ if len(documents) == 0:
173
+ return
174
+ embedding_vecs = self.embedding_fn([doc.content for doc in documents])
175
+ if self.config.collection_name is None:
176
+ raise ValueError("No collection name set, cannot ingest docs")
177
+
178
+ self.create_collection(self.config.collection_name, replace=True)
179
+
180
+ items = [
181
+ Item(
182
+ id=str(d.id()),
183
+ vector=embedding_vecs[i],
184
+ metadata=flatten_pydantic_instance(d, force_str=True),
185
+ # force all values to str since Momento requires it
186
+ )
187
+ for i, d in enumerate(documents)
188
+ ]
189
+
190
+ # don't insert all at once, batch in chunks of b,
191
+ # else we get an API error
192
+ b = self.config.batch_size
193
+ for i in range(0, len(documents), b):
194
+ response = self.client.upsert_item_batch(
195
+ index_name=self.config.collection_name,
196
+ items=items[i : i + b],
197
+ )
198
+ if isinstance(response, mvi_response.UpsertItemBatch.Success):
199
+ continue
200
+ elif isinstance(response, mvi_response.UpsertItemBatch.Error):
201
+ raise ValueError(f"Error adding documents: {response.message}")
202
+ else:
203
+ raise ValueError(f"Unexpected response: {response}")
204
+
205
+ def delete_collection(self, collection_name: str) -> None:
206
+ delete_response = self.client.delete_index(collection_name)
207
+ if isinstance(delete_response, mvi_response.DeleteIndex.Success):
208
+ logger.warning(f"Deleted index {collection_name}")
209
+ elif isinstance(delete_response, mvi_response.DeleteIndex.Error):
210
+ logger.error(
211
+ f"Error while deleting index {collection_name}: "
212
+ f" {delete_response.message}"
213
+ )
214
+
215
+ def _to_int_or_uuid(self, id: str) -> int | str:
216
+ try:
217
+ return int(id)
218
+ except ValueError:
219
+ return id
220
+
221
+ def get_all_documents(self, where: str = "") -> List[Document]:
222
+ raise NotImplementedError(
223
+ """
224
+ MomentoVI does not support get_all_documents().
225
+ Please use a different vector database, e.g. qdrant or chromadb.
226
+ """
227
+ )
228
+
229
+ def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
230
+ raise NotImplementedError(
231
+ """
232
+ MomentoVI does not support get_documents_by_ids.
233
+ Please use a different vector database, e.g. qdrant or chromadb.
234
+ """
235
+ )
236
+
237
+ @no_type_check
238
+ def similar_texts_with_scores(
239
+ self,
240
+ text: str,
241
+ k: int = 1,
242
+ where: Optional[str] = None,
243
+ neighbors: int = 0, # ignored
244
+ ) -> List[Tuple[Document, float]]:
245
+ if self.config.collection_name is None:
246
+ raise ValueError("No collection name set, cannot search")
247
+ embedding = self.embedding_fn([text])[0]
248
+ response = self.client.search(
249
+ index_name=self.config.collection_name,
250
+ query_vector=embedding,
251
+ top_k=k,
252
+ metadata_fields=ALL_METADATA,
253
+ )
254
+
255
+ if isinstance(response, mvi_response.Search.Error):
256
+ logger.warning(
257
+ f"Error while searching on index {self.config.collection_name}:"
258
+ f" {response.message}"
259
+ )
260
+ return []
261
+ elif not isinstance(response, mvi_response.Search.Success):
262
+ logger.warning(f"Unexpected response: {response}")
263
+ return []
264
+
265
+ scores = [match.metadata["distance"] for match in response.hits]
266
+ docs = [
267
+ Document.parse_obj(nested_dict_from_flat(match.metadata))
268
+ for match in response.hits
269
+ if match is not None
270
+ ]
271
+ if len(docs) == 0:
272
+ logger.warning(f"No matches found for {text}")
273
+ return []
274
+ if settings.debug:
275
+ logger.info(f"Found {len(docs)} matches, max score: {max(scores)}")
276
+ doc_score_pairs = list(zip(docs, scores))
277
+ self.show_if_debug(doc_score_pairs)
278
+ return doc_score_pairs