langroid 0.1.263__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +15 -1
- langroid/agent/chat_agent.py +68 -16
- langroid/agent/chat_document.py +57 -3
- langroid/agent/special/doc_chat_agent.py +8 -26
- langroid/agent/task.py +163 -32
- langroid/agent/tools/__init__.py +4 -0
- langroid/agent/tools/rewind_tool.py +136 -0
- langroid/cachedb/redis_cachedb.py +8 -4
- langroid/language_models/__init__.py +3 -0
- langroid/language_models/base.py +23 -4
- langroid/language_models/mock_lm.py +96 -0
- langroid/language_models/utils.py +2 -1
- langroid/mytypes.py +4 -35
- langroid/parsing/document_parser.py +5 -0
- langroid/parsing/parser.py +17 -2
- langroid/utils/__init__.py +2 -0
- langroid/utils/object_registry.py +66 -0
- langroid/utils/system.py +11 -2
- langroid/vector_store/base.py +3 -2
- langroid/vector_store/lancedb.py +32 -23
- {langroid-0.1.263.dist-info → langroid-0.2.0.dist-info}/METADATA +5 -8
- {langroid-0.1.263.dist-info → langroid-0.2.0.dist-info}/RECORD +25 -23
- pyproject.toml +3 -6
- langroid/language_models/openai_assistants.py +0 -3
- {langroid-0.1.263.dist-info → langroid-0.2.0.dist-info}/LICENSE +0 -0
- {langroid-0.1.263.dist-info → langroid-0.2.0.dist-info}/WHEEL +0 -0
langroid/language_models/base.py
CHANGED
@@ -4,7 +4,17 @@ import logging
|
|
4
4
|
from abc import ABC, abstractmethod
|
5
5
|
from datetime import datetime
|
6
6
|
from enum import Enum
|
7
|
-
from typing import
|
7
|
+
from typing import (
|
8
|
+
Any,
|
9
|
+
Callable,
|
10
|
+
Dict,
|
11
|
+
List,
|
12
|
+
Optional,
|
13
|
+
Tuple,
|
14
|
+
Type,
|
15
|
+
Union,
|
16
|
+
cast,
|
17
|
+
)
|
8
18
|
|
9
19
|
from langroid.cachedb.base import CacheDBConfig
|
10
20
|
from langroid.parsing.agent_chats import parse_message
|
@@ -134,12 +144,15 @@ class LLMMessage(BaseModel):
|
|
134
144
|
content: str
|
135
145
|
function_call: Optional[LLMFunctionCall] = None
|
136
146
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
147
|
+
# link to corresponding chat document, for provenance/rewind purposes
|
148
|
+
chat_document_id: str = ""
|
137
149
|
|
138
150
|
def api_dict(self) -> Dict[str, Any]:
|
139
151
|
"""
|
140
|
-
Convert to dictionary for API request
|
141
|
-
|
142
|
-
|
152
|
+
Convert to dictionary for API request, keeping ONLY
|
153
|
+
the fields that are expected in an API call!
|
154
|
+
E.g., DROP the tool_id, since it is only for use in the Assistant API,
|
155
|
+
not the completion API.
|
143
156
|
Returns:
|
144
157
|
dict: dictionary representation of LLM message
|
145
158
|
"""
|
@@ -155,8 +168,10 @@ class LLMMessage(BaseModel):
|
|
155
168
|
dict_no_none["function_call"]["arguments"] = json.dumps(
|
156
169
|
dict_no_none["function_call"]["arguments"]
|
157
170
|
)
|
171
|
+
# IMPORTANT! drop fields that are not expected in API call
|
158
172
|
dict_no_none.pop("tool_id", None)
|
159
173
|
dict_no_none.pop("timestamp", None)
|
174
|
+
dict_no_none.pop("chat_document_id", None)
|
160
175
|
return dict_no_none
|
161
176
|
|
162
177
|
def __str__(self) -> str:
|
@@ -268,11 +283,15 @@ class LanguageModel(ABC):
|
|
268
283
|
"""
|
269
284
|
)
|
270
285
|
from langroid.language_models.azure_openai import AzureGPT
|
286
|
+
from langroid.language_models.mock_lm import MockLM, MockLMConfig
|
271
287
|
from langroid.language_models.openai_gpt import OpenAIGPT
|
272
288
|
|
273
289
|
if config is None or config.type is None:
|
274
290
|
return None
|
275
291
|
|
292
|
+
if config.type == "mock":
|
293
|
+
return MockLM(cast(MockLMConfig, config))
|
294
|
+
|
276
295
|
openai: Union[Type[AzureGPT], Type[OpenAIGPT]]
|
277
296
|
|
278
297
|
if config.type == "azure":
|
@@ -0,0 +1,96 @@
|
|
1
|
+
"""Mock Language Model for testing"""
|
2
|
+
|
3
|
+
from typing import Dict, List, Optional, Union
|
4
|
+
|
5
|
+
import langroid.language_models as lm
|
6
|
+
from langroid.language_models import LLMResponse
|
7
|
+
from langroid.language_models.base import LanguageModel, LLMConfig
|
8
|
+
|
9
|
+
|
10
|
+
class MockLMConfig(LLMConfig):
|
11
|
+
"""
|
12
|
+
Mock Language Model Configuration.
|
13
|
+
|
14
|
+
Attributes:
|
15
|
+
response_dict (Dict[str, str]): A "response rule-book", in the form of a
|
16
|
+
dictionary; if last msg in dialog is x,then respond with response_dict[x]
|
17
|
+
"""
|
18
|
+
|
19
|
+
response_dict: Dict[str, str] = {}
|
20
|
+
default_response: str = "Mock response"
|
21
|
+
type: str = "mock"
|
22
|
+
|
23
|
+
|
24
|
+
class MockLM(LanguageModel):
|
25
|
+
|
26
|
+
def __init__(self, config: MockLMConfig = MockLMConfig()):
|
27
|
+
super().__init__(config)
|
28
|
+
self.config: MockLMConfig = config
|
29
|
+
|
30
|
+
def chat(
|
31
|
+
self,
|
32
|
+
messages: Union[str, List[lm.LLMMessage]],
|
33
|
+
max_tokens: int = 200,
|
34
|
+
functions: Optional[List[lm.LLMFunctionSpec]] = None,
|
35
|
+
function_call: str | Dict[str, str] = "auto",
|
36
|
+
) -> lm.LLMResponse:
|
37
|
+
"""
|
38
|
+
Mock chat function for testing
|
39
|
+
"""
|
40
|
+
last_msg = messages[-1].content if isinstance(messages, list) else messages
|
41
|
+
return lm.LLMResponse(
|
42
|
+
message=self.config.response_dict.get(
|
43
|
+
last_msg,
|
44
|
+
self.config.default_response,
|
45
|
+
),
|
46
|
+
cached=False,
|
47
|
+
)
|
48
|
+
|
49
|
+
async def achat(
|
50
|
+
self,
|
51
|
+
messages: Union[str, List[lm.LLMMessage]],
|
52
|
+
max_tokens: int = 200,
|
53
|
+
functions: Optional[List[lm.LLMFunctionSpec]] = None,
|
54
|
+
function_call: str | Dict[str, str] = "auto",
|
55
|
+
) -> lm.LLMResponse:
|
56
|
+
"""
|
57
|
+
Mock chat function for testing
|
58
|
+
"""
|
59
|
+
last_msg = messages[-1].content if isinstance(messages, list) else messages
|
60
|
+
return lm.LLMResponse(
|
61
|
+
message=self.config.response_dict.get(
|
62
|
+
last_msg,
|
63
|
+
self.config.default_response,
|
64
|
+
),
|
65
|
+
cached=False,
|
66
|
+
)
|
67
|
+
|
68
|
+
def generate(self, prompt: str, max_tokens: int = 200) -> lm.LLMResponse:
|
69
|
+
"""
|
70
|
+
Mock generate function for testing
|
71
|
+
"""
|
72
|
+
return lm.LLMResponse(
|
73
|
+
message=self.config.response_dict.get(
|
74
|
+
prompt,
|
75
|
+
self.config.default_response,
|
76
|
+
),
|
77
|
+
cached=False,
|
78
|
+
)
|
79
|
+
|
80
|
+
async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
|
81
|
+
"""
|
82
|
+
Mock generate function for testing
|
83
|
+
"""
|
84
|
+
return lm.LLMResponse(
|
85
|
+
message=self.config.response_dict.get(
|
86
|
+
prompt,
|
87
|
+
self.config.default_response,
|
88
|
+
),
|
89
|
+
cached=False,
|
90
|
+
)
|
91
|
+
|
92
|
+
def get_stream(self) -> bool:
|
93
|
+
return False
|
94
|
+
|
95
|
+
def set_stream(self, stream: bool) -> bool:
|
96
|
+
return False
|
@@ -62,7 +62,7 @@ def retry_with_exponential_backoff(
|
|
62
62
|
if num_retries > max_retries:
|
63
63
|
raise Exception(
|
64
64
|
f"Maximum number of retries ({max_retries}) exceeded."
|
65
|
-
f" Last error: {e}."
|
65
|
+
f" Last error: {str(e)}."
|
66
66
|
)
|
67
67
|
|
68
68
|
# Increment the delay
|
@@ -128,6 +128,7 @@ def async_retry_with_exponential_backoff(
|
|
128
128
|
if num_retries > max_retries:
|
129
129
|
raise Exception(
|
130
130
|
f"Maximum number of retries ({max_retries}) exceeded."
|
131
|
+
f" Last error: {str(e)}."
|
131
132
|
)
|
132
133
|
|
133
134
|
# Increment the delay
|
langroid/mytypes.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
import hashlib
|
2
|
-
import uuid
|
3
1
|
from enum import Enum
|
4
2
|
from textwrap import dedent
|
5
3
|
from typing import Any, Callable, Dict, List, Union
|
4
|
+
from uuid import uuid4
|
6
5
|
|
7
|
-
from langroid.pydantic_v1 import BaseModel, Extra
|
6
|
+
from langroid.pydantic_v1 import BaseModel, Extra, Field
|
8
7
|
|
9
8
|
Number = Union[int, float]
|
10
9
|
Embedding = List[Number]
|
@@ -40,7 +39,7 @@ class DocMetaData(BaseModel):
|
|
40
39
|
|
41
40
|
source: str = "context"
|
42
41
|
is_chunk: bool = False # if it is a chunk, don't split
|
43
|
-
id: str =
|
42
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
44
43
|
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
45
44
|
|
46
45
|
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
@@ -67,41 +66,11 @@ class Document(BaseModel):
|
|
67
66
|
content: str
|
68
67
|
metadata: DocMetaData
|
69
68
|
|
70
|
-
@staticmethod
|
71
|
-
def hash_id(doc: str) -> str:
|
72
|
-
# Encode the document as UTF-8
|
73
|
-
doc_utf8 = str(doc).encode("utf-8")
|
74
|
-
|
75
|
-
# Create a SHA256 hash object
|
76
|
-
sha256_hash = hashlib.sha256()
|
77
|
-
|
78
|
-
# Update the hash object with the bytes of the document
|
79
|
-
sha256_hash.update(doc_utf8)
|
80
|
-
|
81
|
-
# Get the hexadecimal representation of the hash
|
82
|
-
hash_hex = sha256_hash.hexdigest()
|
83
|
-
|
84
|
-
# Convert the first part of the hash to a UUID
|
85
|
-
hash_uuid = uuid.UUID(hash_hex[:32])
|
86
|
-
|
87
|
-
return str(hash_uuid)
|
88
|
-
|
89
|
-
def _unique_hash_id(self) -> str:
|
90
|
-
return self.hash_id(str(self))
|
91
|
-
|
92
69
|
def id(self) -> str:
|
93
|
-
|
94
|
-
hasattr(self.metadata, "id")
|
95
|
-
and self.metadata.id is not None
|
96
|
-
and self.metadata.id != ""
|
97
|
-
):
|
98
|
-
return self.metadata.id
|
99
|
-
else:
|
100
|
-
return self._unique_hash_id()
|
70
|
+
return self.metadata.id
|
101
71
|
|
102
72
|
def __str__(self) -> str:
|
103
73
|
# TODO: make metadata a pydantic model to enforce "source"
|
104
|
-
self.metadata.json()
|
105
74
|
return dedent(
|
106
75
|
f"""
|
107
76
|
CONTENT: {self.content}
|
@@ -8,6 +8,7 @@ from io import BytesIO
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Generator, List, Tuple
|
9
9
|
|
10
10
|
from langroid.exceptions import LangroidImportError
|
11
|
+
from langroid.utils.object_registry import ObjectRegistry
|
11
12
|
|
12
13
|
try:
|
13
14
|
import fitz
|
@@ -341,6 +342,8 @@ class DocumentParser(Parser):
|
|
341
342
|
split = [] # tokens in curr split
|
342
343
|
pages: List[str] = []
|
343
344
|
docs: List[Document] = []
|
345
|
+
# metadata.id to be shared by ALL chunks of this document
|
346
|
+
common_id = ObjectRegistry.new_id()
|
344
347
|
for i, page in self.iterate_pages():
|
345
348
|
page_text = self.extract_text_from_page(page)
|
346
349
|
split += self.tokenizer.encode(page_text)
|
@@ -358,6 +361,7 @@ class DocumentParser(Parser):
|
|
358
361
|
metadata=DocMetaData(
|
359
362
|
source=f"{self.source} pages {pg}",
|
360
363
|
is_chunk=True,
|
364
|
+
id=common_id,
|
361
365
|
),
|
362
366
|
)
|
363
367
|
)
|
@@ -372,6 +376,7 @@ class DocumentParser(Parser):
|
|
372
376
|
metadata=DocMetaData(
|
373
377
|
source=f"{self.source} pages {pg}",
|
374
378
|
is_chunk=True,
|
379
|
+
id=common_id,
|
375
380
|
),
|
376
381
|
)
|
377
382
|
)
|
langroid/parsing/parser.py
CHANGED
@@ -7,6 +7,7 @@ import tiktoken
|
|
7
7
|
from langroid.mytypes import Document
|
8
8
|
from langroid.parsing.para_sentence_split import create_chunks, remove_extra_whitespace
|
9
9
|
from langroid.pydantic_v1 import BaseSettings
|
10
|
+
from langroid.utils.object_registry import ObjectRegistry
|
10
11
|
|
11
12
|
logger = logging.getLogger(__name__)
|
12
13
|
logger.setLevel(logging.WARNING)
|
@@ -75,11 +76,13 @@ class Parser:
|
|
75
76
|
return
|
76
77
|
# The original metadata.id (if any) is ignored since it will be same for all
|
77
78
|
# chunks and is useless. We want a distinct id for each chunk.
|
79
|
+
# ASSUMPTION: all chunks c of a doc have same c.metadata.id !
|
78
80
|
orig_ids = [c.metadata.id for c in chunks]
|
79
|
-
ids = [
|
81
|
+
ids = [ObjectRegistry.new_id() for c in chunks]
|
80
82
|
id2chunk = {id: c for id, c in zip(ids, chunks)}
|
81
83
|
|
82
84
|
# group the ids by orig_id
|
85
|
+
# (each distinct orig_id refers to a different document)
|
83
86
|
orig_id_to_ids: Dict[str, List[str]] = {}
|
84
87
|
for orig_id, id in zip(orig_ids, ids):
|
85
88
|
if orig_id not in orig_id_to_ids:
|
@@ -108,6 +111,10 @@ class Parser:
|
|
108
111
|
if d.content.strip() == "":
|
109
112
|
continue
|
110
113
|
chunks = remove_extra_whitespace(d.content).split(self.config.separators[0])
|
114
|
+
# note we are ensuring we COPY the document metadata into each chunk,
|
115
|
+
# which ensures all chunks of a given doc have same metadata
|
116
|
+
# (and in particular same metadata.id, which is important later for
|
117
|
+
# add_window_ids)
|
111
118
|
chunk_docs = [
|
112
119
|
Document(
|
113
120
|
content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
|
@@ -156,6 +163,10 @@ class Parser:
|
|
156
163
|
if d.content.strip() == "":
|
157
164
|
continue
|
158
165
|
chunks = create_chunks(d.content, self.config.chunk_size, self.num_tokens)
|
166
|
+
# note we are ensuring we COPY the document metadata into each chunk,
|
167
|
+
# which ensures all chunks of a given doc have same metadata
|
168
|
+
# (and in particular same metadata.id, which is important later for
|
169
|
+
# add_window_ids)
|
159
170
|
chunk_docs = [
|
160
171
|
Document(
|
161
172
|
content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
|
@@ -171,6 +182,10 @@ class Parser:
|
|
171
182
|
final_docs = []
|
172
183
|
for d in docs:
|
173
184
|
chunks = self.chunk_tokens(d.content)
|
185
|
+
# note we are ensuring we COPY the document metadata into each chunk,
|
186
|
+
# which ensures all chunks of a given doc have same metadata
|
187
|
+
# (and in particular same metadata.id, which is important later for
|
188
|
+
# add_window_ids)
|
174
189
|
chunk_docs = [
|
175
190
|
Document(
|
176
191
|
content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
|
@@ -274,7 +289,7 @@ class Parser:
|
|
274
289
|
# we need this to distinguish docs later in add_window_ids
|
275
290
|
for d in docs:
|
276
291
|
if d.metadata.id in [None, ""]:
|
277
|
-
d.metadata.id =
|
292
|
+
d.metadata.id = ObjectRegistry.new_id()
|
278
293
|
# some docs are already splits, so don't split them further!
|
279
294
|
chunked_docs = [d for d in docs if d.metadata.is_chunk]
|
280
295
|
big_docs = [d for d in docs if not d.metadata.is_chunk]
|
langroid/utils/__init__.py
CHANGED
@@ -5,6 +5,7 @@ from . import logging
|
|
5
5
|
from . import pydantic_utils
|
6
6
|
from . import system
|
7
7
|
from . import output
|
8
|
+
from . import object_registry
|
8
9
|
|
9
10
|
__all__ = [
|
10
11
|
"configuration",
|
@@ -14,4 +15,5 @@ __all__ = [
|
|
14
15
|
"pydantic_utils",
|
15
16
|
"system",
|
16
17
|
"output",
|
18
|
+
"object_registry",
|
17
19
|
]
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import time
|
2
|
+
from typing import TYPE_CHECKING, Dict, Optional, TypeAlias, TypeVar
|
3
|
+
from uuid import uuid4
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from langroid.agent.base import Agent
|
9
|
+
from langroid.agent.chat_agent import ChatAgent
|
10
|
+
from langroid.agent.chat_document import ChatDocument
|
11
|
+
|
12
|
+
# any derivative of BaseModel that has an id() method or an id attribute
|
13
|
+
ObjWithId: TypeAlias = ChatDocument | ChatAgent | Agent
|
14
|
+
else:
|
15
|
+
ObjWithId = BaseModel
|
16
|
+
|
17
|
+
# Define a type variable that can be any subclass of BaseModel
|
18
|
+
T = TypeVar("T", bound=BaseModel)
|
19
|
+
|
20
|
+
|
21
|
+
class ObjectRegistry:
|
22
|
+
"""A global registry to hold id -> object mappings."""
|
23
|
+
|
24
|
+
registry: Dict[str, ObjWithId] = {}
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
def add(cls, obj: ObjWithId) -> str:
|
28
|
+
"""Adds an object to the registry, returning the object's ID."""
|
29
|
+
object_id = obj.id() if callable(obj.id) else obj.id
|
30
|
+
cls.registry[object_id] = obj
|
31
|
+
return object_id
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def get(cls, obj_id: str) -> Optional[ObjWithId]:
|
35
|
+
"""Retrieves an object by ID if it still exists."""
|
36
|
+
return cls.registry.get(obj_id)
|
37
|
+
|
38
|
+
@classmethod
|
39
|
+
def register_object(cls, obj: ObjWithId) -> str:
|
40
|
+
"""Registers an object in the registry, returning the object's ID."""
|
41
|
+
return cls.add(obj)
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
def remove(cls, obj_id: str) -> None:
|
45
|
+
"""Removes an object from the registry."""
|
46
|
+
if obj_id in cls.registry:
|
47
|
+
del cls.registry[obj_id]
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def cleanup(cls) -> None:
|
51
|
+
"""Cleans up the registry by removing entries where the object is None."""
|
52
|
+
to_remove = [key for key, value in cls.registry.items() if value is None]
|
53
|
+
for key in to_remove:
|
54
|
+
del cls.registry[key]
|
55
|
+
|
56
|
+
@staticmethod
|
57
|
+
def new_id() -> str:
|
58
|
+
"""Generates a new unique ID."""
|
59
|
+
return str(uuid4())
|
60
|
+
|
61
|
+
|
62
|
+
def scheduled_cleanup(interval: int = 600) -> None:
|
63
|
+
"""Periodically cleans up the global registry every 'interval' seconds."""
|
64
|
+
while True:
|
65
|
+
ObjectRegistry.cleanup()
|
66
|
+
time.sleep(interval)
|
langroid/utils/system.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import getpass
|
2
2
|
import hashlib
|
3
3
|
import importlib
|
4
|
+
import importlib.metadata
|
4
5
|
import inspect
|
5
6
|
import logging
|
6
7
|
import shutil
|
@@ -18,6 +19,15 @@ DELETION_ALLOWED_PATHS = [
|
|
18
19
|
]
|
19
20
|
|
20
21
|
|
22
|
+
def pydantic_major_version() -> int:
|
23
|
+
try:
|
24
|
+
pydantic_version = importlib.metadata.version("pydantic")
|
25
|
+
major_version = int(pydantic_version.split(".")[0])
|
26
|
+
return major_version
|
27
|
+
except importlib.metadata.PackageNotFoundError:
|
28
|
+
return -1
|
29
|
+
|
30
|
+
|
21
31
|
class LazyLoad:
|
22
32
|
"""Lazy loading of modules or classes."""
|
23
33
|
|
@@ -171,5 +181,4 @@ def hash(s: str) -> str:
|
|
171
181
|
|
172
182
|
def generate_unique_id() -> str:
|
173
183
|
"""Generate a unique ID using UUID4."""
|
174
|
-
|
175
|
-
return unique_id
|
184
|
+
return str(uuid.uuid4())
|
langroid/vector_store/base.py
CHANGED
@@ -12,6 +12,7 @@ from langroid.mytypes import Document
|
|
12
12
|
from langroid.pydantic_v1 import BaseSettings
|
13
13
|
from langroid.utils.algorithms.graph import components, topological_sort
|
14
14
|
from langroid.utils.configuration import settings
|
15
|
+
from langroid.utils.object_registry import ObjectRegistry
|
15
16
|
from langroid.utils.output.printing import print_long_text
|
16
17
|
from langroid.utils.pandas_utils import stringify
|
17
18
|
|
@@ -163,7 +164,7 @@ class VectorStore(ABC):
|
|
163
164
|
vecdbs don't like having blank ids."""
|
164
165
|
for d in documents:
|
165
166
|
if d.metadata.id in [None, ""]:
|
166
|
-
d.metadata.id =
|
167
|
+
d.metadata.id = ObjectRegistry.new_id()
|
167
168
|
|
168
169
|
@abstractmethod
|
169
170
|
def similar_texts_with_scores(
|
@@ -254,7 +255,7 @@ class VectorStore(ABC):
|
|
254
255
|
metadata=metadata,
|
255
256
|
)
|
256
257
|
# make a fresh id since content is in general different
|
257
|
-
document.metadata.id =
|
258
|
+
document.metadata.id = ObjectRegistry.new_id()
|
258
259
|
final_docs += [document]
|
259
260
|
final_scores += [max(id2max_score[id] for id in w)]
|
260
261
|
return list(zip(final_docs, final_scores))
|
langroid/vector_store/lancedb.py
CHANGED
@@ -38,6 +38,7 @@ from langroid.utils.pydantic_utils import (
|
|
38
38
|
flatten_pydantic_model,
|
39
39
|
nested_dict_from_flat,
|
40
40
|
)
|
41
|
+
from langroid.utils.system import pydantic_major_version
|
41
42
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
42
43
|
|
43
44
|
try:
|
@@ -121,17 +122,21 @@ class LanceDB(VectorStore):
|
|
121
122
|
else self.unflattened_schema
|
122
123
|
)
|
123
124
|
except (AttributeError, TypeError) as e:
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
125
|
+
pydantic_version = pydantic_major_version()
|
126
|
+
if pydantic_version > 1:
|
127
|
+
raise ValueError(
|
128
|
+
f"""
|
129
|
+
{e}
|
130
|
+
====
|
131
|
+
You are using Pydantic v{pydantic_version},
|
132
|
+
which is not yet compatible with Langroid's LanceDB integration.
|
133
|
+
To use Lancedb with Langroid, please install the
|
134
|
+
latest pydantic 1.x instead of pydantic v2, e.g.
|
135
|
+
pip install "pydantic<2.0.0"
|
136
|
+
"""
|
137
|
+
)
|
138
|
+
else:
|
139
|
+
raise e
|
135
140
|
|
136
141
|
def clear_empty_collections(self) -> int:
|
137
142
|
coll_names = self.list_collections()
|
@@ -264,18 +269,22 @@ class LanceDB(VectorStore):
|
|
264
269
|
self.client.create_table(
|
265
270
|
collection_name, schema=self.schema, mode="overwrite"
|
266
271
|
)
|
267
|
-
except TypeError as e:
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
272
|
+
except (AttributeError, TypeError) as e:
|
273
|
+
pydantic_version = pydantic_major_version()
|
274
|
+
if pydantic_version > 1:
|
275
|
+
raise ValueError(
|
276
|
+
f"""
|
277
|
+
{e}
|
278
|
+
====
|
279
|
+
You are using Pydantic v{pydantic_version},
|
280
|
+
which is not yet compatible with Langroid's LanceDB integration.
|
281
|
+
To use Lancedb with Langroid, please install the
|
282
|
+
latest pydantic 1.x instead of pydantic v2, e.g.
|
283
|
+
pip install "pydantic<2.0.0"
|
284
|
+
"""
|
285
|
+
)
|
286
|
+
else:
|
287
|
+
raise e
|
279
288
|
|
280
289
|
if settings.debug:
|
281
290
|
level = logger.getEffectiveLevel()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
License: MIT
|
6
6
|
Author: Prasad Chalasani
|
@@ -19,7 +19,6 @@ Provides-Extra: docx
|
|
19
19
|
Provides-Extra: hf-embeddings
|
20
20
|
Provides-Extra: hf-transformers
|
21
21
|
Provides-Extra: lancedb
|
22
|
-
Provides-Extra: langroid-pydantic-v1
|
23
22
|
Provides-Extra: litellm
|
24
23
|
Provides-Extra: meilisearch
|
25
24
|
Provides-Extra: metaphor
|
@@ -53,10 +52,8 @@ Requires-Dist: halo (>=0.0.31,<0.0.32)
|
|
53
52
|
Requires-Dist: huggingface-hub (>=0.21.2,<0.22.0) ; extra == "hf-transformers" or extra == "all" or extra == "transformers"
|
54
53
|
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
55
54
|
Requires-Dist: lancedb (>=0.8.2,<0.9.0) ; extra == "vecdbs" or extra == "lancedb"
|
56
|
-
Requires-Dist: langroid_pydantic_v1 (>=0.1.0,<0.2.0) ; extra == "langroid-pydantic-v1" or extra == "lancedb"
|
57
55
|
Requires-Dist: litellm (>=1.30.1,<2.0.0) ; extra == "all" or extra == "litellm"
|
58
56
|
Requires-Dist: lxml (>=4.9.3,<5.0.0)
|
59
|
-
Requires-Dist: meilisearch (>=0.28.3,<0.29.0) ; extra == "meilisearch"
|
60
57
|
Requires-Dist: meilisearch-python-sdk (>=2.2.3,<3.0.0) ; extra == "meilisearch"
|
61
58
|
Requires-Dist: metaphor-python (>=0.1.23,<0.2.0) ; extra == "all" or extra == "metaphor"
|
62
59
|
Requires-Dist: momento (>=1.10.2,<2.0.0) ; extra == "momento"
|
@@ -426,7 +423,7 @@ Here is what it looks like in action
|
|
426
423
|
|
427
424
|
# :zap: Highlights
|
428
425
|
(For a more up-to-date list see the
|
429
|
-
[
|
426
|
+
[Updates/Releases](https://github.com/langroid/langroid?tab=readme-ov-file#fire-updatesreleases)
|
430
427
|
section above)
|
431
428
|
- **Agents as first-class citizens:** The [Agent](https://langroid.github.io/langroid/reference/agent/base/#langroid.agent.base.Agent) class encapsulates LLM conversation state,
|
432
429
|
and optionally a vector-store and tools. Agents are a core abstraction in Langroid;
|
@@ -442,8 +439,8 @@ section above)
|
|
442
439
|
- **Modularity, Reusability, Loose coupling:** The `Agent` and `Task` abstractions allow users to design
|
443
440
|
Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
|
444
441
|
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
|
445
|
-
providers (local/open or remote/commercial) via proxy libraries and local model servers
|
446
|
-
such as [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API.
|
442
|
+
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
|
443
|
+
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui), [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API.
|
447
444
|
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
|
448
445
|
[Momento](https://www.gomomento.com/) to cache LLM responses.
|
449
446
|
- **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
|
@@ -453,7 +450,7 @@ such as [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic t
|
|
453
450
|
- **Observability, Logging, Lineage:** Langroid generates detailed logs of multi-agent interactions and
|
454
451
|
maintains provenance/lineage of messages, so that you can trace back
|
455
452
|
the origin of a message.
|
456
|
-
- **Tools/Plugins/Function-calling**: Langroid supports OpenAI's recently
|
453
|
+
- **[Tools/Plugins/Function-calling](https://langroid.github.io/langroid/quick-start/chat-agent-tool/)**: Langroid supports OpenAI's recently
|
457
454
|
released [function calling](https://platform.openai.com/docs/guides/gpt/function-calling)
|
458
455
|
feature. In addition, Langroid has its own native equivalent, which we
|
459
456
|
call **tools** (also known as "plugins" in other contexts). Function
|