langroid 0.58.2__py3-none-any.whl → 0.59.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_document.py +7 -7
- langroid/agent/done_sequence_parser.py +46 -11
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +53 -94
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/xml_tool_message.py +90 -35
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/base.py +6 -4
- langroid/language_models/client_cache.py +64 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/openai_gpt.py +119 -20
- langroid/language_models/provider_params.py +3 -22
- langroid/mytypes.py +11 -4
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/parser.py +22 -7
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/urls.py +5 -4
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +61 -4
- langroid/pydantic_v1/main.py +10 -4
- langroid/utils/configuration.py +13 -11
- langroid/utils/constants.py +1 -1
- langroid/utils/globals.py +21 -5
- langroid/utils/html_logger.py +2 -1
- langroid/utils/object_registry.py +1 -1
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/types.py +2 -2
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.2.dist-info → langroid-0.59.0.dist-info}/METADATA +3 -2
- {langroid-0.58.2.dist-info → langroid-0.59.0.dist-info}/RECORD +64 -64
- {langroid-0.58.2.dist-info → langroid-0.59.0.dist-info}/WHEEL +0 -0
- {langroid-0.58.2.dist-info → langroid-0.59.0.dist-info}/licenses/LICENSE +0 -0
langroid/parsing/url_loader.py
CHANGED
@@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional
|
|
7
7
|
|
8
8
|
import markdownify as md
|
9
9
|
from dotenv import load_dotenv
|
10
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
10
11
|
|
11
12
|
from langroid.exceptions import LangroidImportError
|
12
13
|
from langroid.mytypes import DocMetaData, Document
|
13
14
|
from langroid.parsing.document_parser import DocumentParser, ImagePdfParser
|
14
15
|
from langroid.parsing.parser import Parser, ParsingConfig
|
15
|
-
from langroid.pydantic_v1 import BaseSettings
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from firecrawl import FirecrawlApp
|
@@ -54,20 +54,13 @@ class FirecrawlConfig(BaseCrawlerConfig):
|
|
54
54
|
params: Dict[str, Any] = {}
|
55
55
|
timeout: Optional[int] = None
|
56
56
|
|
57
|
-
|
58
|
-
# Leverage Pydantic's BaseSettings to
|
59
|
-
# allow setting of fields via env vars,
|
60
|
-
# e.g. FIRECRAWL_MODE=scrape and FIRECRAWL_API_KEY=...
|
61
|
-
env_prefix = "FIRECRAWL_"
|
57
|
+
model_config = SettingsConfigDict(env_prefix="FIRECRAWL_")
|
62
58
|
|
63
59
|
|
64
60
|
class ExaCrawlerConfig(BaseCrawlerConfig):
|
65
61
|
api_key: str = ""
|
66
62
|
|
67
|
-
|
68
|
-
# Allow setting of fields via env vars with prefix EXA_
|
69
|
-
# e.g., EXA_API_KEY=your_api_key
|
70
|
-
env_prefix = "EXA_"
|
63
|
+
model_config = SettingsConfigDict(env_prefix="EXA_")
|
71
64
|
|
72
65
|
|
73
66
|
class Crawl4aiConfig(BaseCrawlerConfig):
|
@@ -81,49 +74,22 @@ class Crawl4aiConfig(BaseCrawlerConfig):
|
|
81
74
|
browser_config: Optional["BrowserConfig"] = None
|
82
75
|
run_config: Optional["CrawlerRunConfig"] = None
|
83
76
|
|
84
|
-
|
77
|
+
model_config = SettingsConfigDict(arbitrary_types_allowed=True)
|
85
78
|
|
86
|
-
def __init_subclass__(cls, **kwargs: Any) -> None:
|
87
|
-
"""Resolve forward references when class is first subclassed or instantiated."""
|
88
|
-
super().__init_subclass__(**kwargs)
|
89
|
-
cls._resolve_forward_refs()
|
90
79
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
from crawl4ai.deep_crawling import DeepCrawlStrategy
|
99
|
-
from crawl4ai.extraction_strategy import ExtractionStrategy
|
100
|
-
from crawl4ai.markdown_generation_strategy import (
|
101
|
-
MarkdownGenerationStrategy,
|
102
|
-
)
|
103
|
-
|
104
|
-
# Create namespace for update_forward_refs
|
105
|
-
namespace = {
|
106
|
-
"BrowserConfig": BrowserConfig,
|
107
|
-
"CrawlerRunConfig": CrawlerRunConfig,
|
108
|
-
"ContentScrapingStrategy": ContentScrapingStrategy,
|
109
|
-
"DeepCrawlStrategy": DeepCrawlStrategy,
|
110
|
-
"ExtractionStrategy": ExtractionStrategy,
|
111
|
-
"MarkdownGenerationStrategy": MarkdownGenerationStrategy,
|
112
|
-
}
|
113
|
-
|
114
|
-
cls.update_forward_refs(**namespace)
|
115
|
-
cls._refs_resolved = True
|
116
|
-
except ImportError:
|
117
|
-
# If crawl4ai is not installed, leave forward refs as strings
|
118
|
-
pass
|
119
|
-
|
120
|
-
def __init__(self, **kwargs: Any) -> None:
|
121
|
-
"""Initialize and ensure forward refs are resolved."""
|
122
|
-
self._resolve_forward_refs()
|
123
|
-
super().__init__(**kwargs)
|
80
|
+
# Resolve forward references for Crawl4aiConfig after the class is defined
|
81
|
+
try:
|
82
|
+
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
|
83
|
+
from crawl4ai.content_scraping_strategy import ContentScrapingStrategy
|
84
|
+
from crawl4ai.deep_crawling import DeepCrawlStrategy
|
85
|
+
from crawl4ai.extraction_strategy import ExtractionStrategy
|
86
|
+
from crawl4ai.markdown_generation_strategy import MarkdownGenerationStrategy
|
124
87
|
|
125
|
-
|
126
|
-
|
88
|
+
# Rebuild the model with resolved references
|
89
|
+
Crawl4aiConfig.model_rebuild()
|
90
|
+
except ImportError:
|
91
|
+
# If crawl4ai is not installed, leave forward refs as strings
|
92
|
+
pass
|
127
93
|
|
128
94
|
|
129
95
|
class BaseCrawler(ABC):
|
@@ -347,7 +313,7 @@ class FirecrawlCrawler(BaseCrawler):
|
|
347
313
|
)
|
348
314
|
processed_urls.add(url)
|
349
315
|
new_pages += 1
|
350
|
-
pbar.update
|
316
|
+
pbar.model_copy(update=new_pages) # Update progress bar with new pages
|
351
317
|
|
352
318
|
# Break if crawl is complete
|
353
319
|
if status["status"] == "completed":
|
langroid/parsing/urls.py
CHANGED
@@ -9,11 +9,10 @@ from urllib.parse import urldefrag, urljoin, urlparse
|
|
9
9
|
import fire
|
10
10
|
import requests
|
11
11
|
from bs4 import BeautifulSoup
|
12
|
+
from pydantic import BaseModel, HttpUrl, TypeAdapter, ValidationError
|
12
13
|
from rich import print
|
13
14
|
from rich.prompt import Prompt
|
14
15
|
|
15
|
-
from langroid.pydantic_v1 import BaseModel, HttpUrl, ValidationError, parse_obj_as
|
16
|
-
|
17
16
|
logger = logging.getLogger(__name__)
|
18
17
|
|
19
18
|
|
@@ -106,7 +105,8 @@ class Url(BaseModel):
|
|
106
105
|
|
107
106
|
def is_url(s: str) -> bool:
|
108
107
|
try:
|
109
|
-
|
108
|
+
url_adapter = TypeAdapter(HttpUrl)
|
109
|
+
Url(url=url_adapter.validate_python(s))
|
110
110
|
return True
|
111
111
|
except ValidationError:
|
112
112
|
return False
|
@@ -133,7 +133,8 @@ def get_urls_paths_bytes_indices(
|
|
133
133
|
byte_list.append(i)
|
134
134
|
continue
|
135
135
|
try:
|
136
|
-
|
136
|
+
url_adapter = TypeAdapter(HttpUrl)
|
137
|
+
Url(url=url_adapter.validate_python(item))
|
137
138
|
urls.append(i)
|
138
139
|
except ValidationError:
|
139
140
|
if os.path.exists(item):
|
langroid/pydantic_v1/__init__.py
CHANGED
@@ -1,10 +1,67 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
Compatibility layer for Langroid's Pydantic migration.
|
3
3
|
|
4
|
-
|
4
|
+
IMPORTANT: You are importing from langroid.pydantic_v1 but getting Pydantic v2 classes!
|
5
|
+
Langroid has fully migrated to Pydantic v2, and this compatibility layer is deprecated.
|
5
6
|
"""
|
6
7
|
|
8
|
+
import warnings
|
9
|
+
import logging
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
# Only show the visual warning, not the standard deprecation warning
|
14
|
+
# The standard warning is too noisy and shows the import line
|
15
|
+
logger.warning(
|
16
|
+
"""
|
17
|
+
╔════════════════════════════════════════════════════════════════════════╗
|
18
|
+
║ ⚠️ DEPRECATION WARNING ⚠️ ║
|
19
|
+
╠════════════════════════════════════════════════════════════════════════╣
|
20
|
+
║ ║
|
21
|
+
║ You are importing from langroid.pydantic_v1, but you're actually ║
|
22
|
+
║ getting Pydantic v2 classes. Langroid has fully migrated to v2. ║
|
23
|
+
║ ║
|
24
|
+
║ Please update your imports: ║
|
25
|
+
║ OLD: from langroid.pydantic_v1 import BaseModel, Field ║
|
26
|
+
║ NEW: from pydantic import BaseModel, Field ║
|
27
|
+
║ ║
|
28
|
+
║ Also ensure your code uses Pydantic v2 patterns: ║
|
29
|
+
║ • Use model_dump() instead of dict() ║
|
30
|
+
║ • Use model_dump_json() instead of json() ║
|
31
|
+
║ • Use ConfigDict instead of class Config ║
|
32
|
+
║ • Use model_validate() instead of parse_obj() ║
|
33
|
+
║ ║
|
34
|
+
║ This compatibility layer will be removed in a future version. ║
|
35
|
+
╚════════════════════════════════════════════════════════════════════════╝
|
36
|
+
"""
|
37
|
+
)
|
38
|
+
|
39
|
+
# Import from pydantic v2 directly (not from pydantic.v1)
|
40
|
+
# This allows existing code to continue working if it's already v2-compatible
|
41
|
+
from pydantic import * # noqa: F403, F401
|
42
|
+
|
43
|
+
# BaseSettings has moved in v2, import it explicitly
|
7
44
|
try:
|
8
|
-
from
|
45
|
+
from pydantic_settings import BaseSettings # noqa: F401
|
9
46
|
except ImportError:
|
10
|
-
|
47
|
+
# Fallback for older pydantic versions
|
48
|
+
from pydantic import BaseSettings # type: ignore[no-redef] # noqa: F401
|
49
|
+
|
50
|
+
# Explicitly export all items for mypy
|
51
|
+
__all__ = [
|
52
|
+
"BaseModel",
|
53
|
+
"BaseSettings",
|
54
|
+
"SettingsConfigDict",
|
55
|
+
"Field",
|
56
|
+
"ConfigDict",
|
57
|
+
"ValidationError",
|
58
|
+
"field_validator",
|
59
|
+
"model_validator",
|
60
|
+
"create_model",
|
61
|
+
"HttpUrl",
|
62
|
+
"AnyUrl",
|
63
|
+
"TypeAdapter",
|
64
|
+
"parse_obj_as",
|
65
|
+
"validator",
|
66
|
+
"root_validator",
|
67
|
+
]
|
langroid/pydantic_v1/main.py
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
"""
|
2
|
+
Compatibility layer for Pydantic v2 migration.
|
3
|
+
|
4
|
+
This module now imports directly from Pydantic v2 since all internal code
|
5
|
+
has been migrated to use Pydantic v2 patterns.
|
6
|
+
"""
|
7
|
+
|
8
|
+
# Import from pydantic.main but don't trigger the warning again
|
9
|
+
# The warning is already shown when importing from langroid.pydantic_v1
|
10
|
+
from pydantic.main import * # noqa: F403, F401
|
langroid/utils/configuration.py
CHANGED
@@ -4,8 +4,7 @@ from contextlib import contextmanager
|
|
4
4
|
from typing import Any, Dict, Iterator, List, Literal, cast
|
5
5
|
|
6
6
|
from dotenv import find_dotenv, load_dotenv
|
7
|
-
|
8
|
-
from langroid.pydantic_v1 import BaseSettings
|
7
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
9
8
|
|
10
9
|
# Global reentrant lock to serialize any modifications to the global settings.
|
11
10
|
_global_lock = threading.RLock()
|
@@ -22,8 +21,7 @@ class Settings(BaseSettings):
|
|
22
21
|
quiet: bool = False # quiet mode (i.e. suppress all output)?
|
23
22
|
notebook: bool = False # running in a notebook?
|
24
23
|
|
25
|
-
|
26
|
-
extra = "forbid"
|
24
|
+
model_config = SettingsConfigDict(extra="forbid")
|
27
25
|
|
28
26
|
|
29
27
|
# Load environment variables from .env file.
|
@@ -60,8 +58,10 @@ class SettingsProxy:
|
|
60
58
|
# Return a dict view of the settings as seen by the caller.
|
61
59
|
# Note that temporary overrides are not “merged” with global settings.
|
62
60
|
if hasattr(_thread_local, "override"):
|
63
|
-
return cast(
|
64
|
-
|
61
|
+
return cast(
|
62
|
+
Dict[str, Any], cast(Settings, _thread_local.override.model_dump())
|
63
|
+
)
|
64
|
+
return _global_settings.model_dump()
|
65
65
|
|
66
66
|
|
67
67
|
settings = SettingsProxy()
|
@@ -76,7 +76,7 @@ def update_global_settings(cfg: BaseSettings, keys: List[str]) -> None:
|
|
76
76
|
|
77
77
|
This updates the global default.
|
78
78
|
"""
|
79
|
-
config_dict = cfg.
|
79
|
+
config_dict = cfg.model_dump()
|
80
80
|
filtered_config = {key: config_dict[key] for key in keys if key in config_dict}
|
81
81
|
new_settings = Settings(**filtered_config)
|
82
82
|
_global_settings.__dict__.update(new_settings.__dict__)
|
@@ -117,7 +117,9 @@ def quiet_mode(quiet: bool = True) -> Iterator[None]:
|
|
117
117
|
if quiet is already True (from an outer context),
|
118
118
|
then it remains True even if a nested context passes quiet=False.
|
119
119
|
"""
|
120
|
-
current_effective =
|
120
|
+
current_effective = (
|
121
|
+
settings.model_dump()
|
122
|
+
) # get the current thread's effective settings
|
121
123
|
# Create a new settings instance from the current effective state.
|
122
124
|
temp = Settings(**current_effective)
|
123
125
|
# Merge the new flag: once quiet is enabled, it stays enabled.
|
@@ -132,6 +134,6 @@ def set_env(settings_instance: BaseSettings) -> None:
|
|
132
134
|
|
133
135
|
Each field in the settings is written to os.environ.
|
134
136
|
"""
|
135
|
-
for field_name, field in settings_instance.__class__.
|
136
|
-
env_var_name = field.
|
137
|
-
os.environ[env_var_name] = str(settings_instance.
|
137
|
+
for field_name, field in settings_instance.__class__.model_fields.items():
|
138
|
+
env_var_name = field.alias or field_name.upper()
|
139
|
+
os.environ[env_var_name] = str(settings_instance.model_dump()[field_name])
|
langroid/utils/constants.py
CHANGED
langroid/utils/globals.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
from typing import Any, Dict, Optional, Type, TypeVar
|
1
|
+
from typing import Any, Dict, Optional, Type, TypeVar, cast
|
2
2
|
|
3
|
-
from
|
3
|
+
from pydantic import BaseModel
|
4
|
+
from pydantic.fields import ModelPrivateAttr
|
5
|
+
from pydantic_core import PydanticUndefined
|
4
6
|
|
5
7
|
T = TypeVar("T", bound="GlobalState")
|
6
8
|
|
@@ -18,9 +20,23 @@ class GlobalState(BaseModel):
|
|
18
20
|
Returns:
|
19
21
|
The global instance of the subclass.
|
20
22
|
"""
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
# Get the actual value from ModelPrivateAttr when accessing on class
|
24
|
+
instance_attr = getattr(cls, "_instance", None)
|
25
|
+
actual_instance: Optional["GlobalState"]
|
26
|
+
if isinstance(instance_attr, ModelPrivateAttr):
|
27
|
+
default_value = instance_attr.default
|
28
|
+
if default_value is PydanticUndefined:
|
29
|
+
actual_instance = None
|
30
|
+
else:
|
31
|
+
actual_instance = cast(Optional["GlobalState"], default_value)
|
32
|
+
else:
|
33
|
+
actual_instance = instance_attr
|
34
|
+
|
35
|
+
if actual_instance is None:
|
36
|
+
new_instance = cls()
|
37
|
+
cls._instance = new_instance
|
38
|
+
return new_instance
|
39
|
+
return actual_instance # type: ignore
|
24
40
|
|
25
41
|
@classmethod
|
26
42
|
def set_values(cls: Type[T], **kwargs: Dict[str, Any]) -> None:
|
langroid/utils/html_logger.py
CHANGED
langroid/utils/pydantic_utils.py
CHANGED
@@ -15,9 +15,9 @@ from typing import (
|
|
15
15
|
|
16
16
|
import numpy as np
|
17
17
|
import pandas as pd
|
18
|
+
from pydantic import BaseModel, ValidationError, create_model
|
18
19
|
|
19
20
|
from langroid.mytypes import DocMetaData, Document
|
20
|
-
from langroid.pydantic_v1 import BaseModel, ValidationError, create_model
|
21
21
|
|
22
22
|
logger = logging.getLogger(__name__)
|
23
23
|
|
@@ -42,7 +42,7 @@ def flatten_dict(
|
|
42
42
|
|
43
43
|
def has_field(model_class: Type[BaseModel], field_name: str) -> bool:
|
44
44
|
"""Check if a Pydantic model class has a field with the given name."""
|
45
|
-
return field_name in model_class.
|
45
|
+
return field_name in model_class.model_fields
|
46
46
|
|
47
47
|
|
48
48
|
def _recursive_purge_dict_key(d: Dict[str, Any], k: str) -> None:
|
@@ -125,29 +125,31 @@ def flatten_pydantic_model(
|
|
125
125
|
while models_to_process:
|
126
126
|
current_model, current_prefix = models_to_process.pop()
|
127
127
|
|
128
|
-
for name, field in current_model.
|
129
|
-
if
|
130
|
-
|
131
|
-
):
|
128
|
+
for name, field in current_model.model_fields.items():
|
129
|
+
field_type = field.annotation if hasattr(field, "annotation") else field
|
130
|
+
if isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
132
131
|
new_prefix = (
|
133
132
|
f"{current_prefix}{name}__" if current_prefix else f"{name}__"
|
134
133
|
)
|
135
|
-
models_to_process.append((
|
134
|
+
models_to_process.append((field_type, new_prefix))
|
136
135
|
else:
|
137
136
|
flattened_name = f"{current_prefix}{name}"
|
138
137
|
|
139
|
-
if
|
138
|
+
if (
|
139
|
+
hasattr(field, "default_factory")
|
140
|
+
and field.default_factory is not None
|
141
|
+
):
|
140
142
|
flattened_fields[flattened_name] = (
|
141
|
-
|
143
|
+
field_type,
|
142
144
|
field.default_factory,
|
143
145
|
)
|
144
|
-
elif field.default is not
|
146
|
+
elif hasattr(field, "default") and field.default is not ...:
|
145
147
|
flattened_fields[flattened_name] = (
|
146
|
-
|
148
|
+
field_type,
|
147
149
|
field.default,
|
148
150
|
)
|
149
151
|
else:
|
150
|
-
flattened_fields[flattened_name] = (
|
152
|
+
flattened_fields[flattened_name] = (field_type, ...)
|
151
153
|
|
152
154
|
return create_model("FlatModel", __base__=base_model, **flattened_fields)
|
153
155
|
|
@@ -155,7 +157,7 @@ def flatten_pydantic_model(
|
|
155
157
|
def get_field_names(model: Type[BaseModel]) -> List[str]:
|
156
158
|
"""Get all field names from a possibly nested Pydantic model."""
|
157
159
|
mdl = flatten_pydantic_model(model)
|
158
|
-
fields = list(mdl.
|
160
|
+
fields = list(mdl.model_fields.keys())
|
159
161
|
# fields may be like a__b__c , so we only want the last part
|
160
162
|
return [f.split("__")[-1] for f in fields]
|
161
163
|
|
@@ -180,19 +182,22 @@ def generate_simple_schema(
|
|
180
182
|
Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
|
181
183
|
with specified fields excluded.
|
182
184
|
"""
|
183
|
-
if hasattr(model, "
|
185
|
+
if hasattr(model, "model_fields"):
|
184
186
|
output: Dict[str, Any] = {}
|
185
|
-
for field_name, field in model.
|
187
|
+
for field_name, field in model.model_fields.items():
|
186
188
|
if field_name in exclude:
|
187
189
|
continue # Skip excluded fields
|
188
190
|
|
189
|
-
field_type = field.
|
190
|
-
if issubclass(field_type, BaseModel):
|
191
|
+
field_type = field.annotation if hasattr(field, "annotation") else field
|
192
|
+
if isinstance(field_type, type) and issubclass(field_type, BaseModel):
|
191
193
|
# Recursively generate schema for nested models
|
192
194
|
output[field_name] = generate_simple_schema(field_type, exclude)
|
193
|
-
|
195
|
+
elif field_type is not None and hasattr(field_type, "__name__"):
|
194
196
|
# Represent the type as a string here
|
195
197
|
output[field_name] = {"type": field_type.__name__}
|
198
|
+
else:
|
199
|
+
# Fallback for complex types
|
200
|
+
output[field_name] = {"type": str(field_type)}
|
196
201
|
return output
|
197
202
|
else:
|
198
203
|
# Non-model type, return a simplified representation
|
@@ -218,14 +223,28 @@ def flatten_pydantic_instance(
|
|
218
223
|
|
219
224
|
"""
|
220
225
|
flat_data: Dict[str, Any] = {}
|
221
|
-
for name, value in instance.
|
226
|
+
for name, value in instance.model_dump().items():
|
222
227
|
# Assuming nested pydantic model will be a dict here
|
223
228
|
if isinstance(value, dict):
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
229
|
+
# Get field info from model_fields
|
230
|
+
field_info = instance.model_fields[name]
|
231
|
+
# Try to get the nested model type from field annotation
|
232
|
+
field_type = (
|
233
|
+
field_info.annotation if hasattr(field_info, "annotation") else None
|
228
234
|
)
|
235
|
+
if (
|
236
|
+
field_type
|
237
|
+
and isinstance(field_type, type)
|
238
|
+
and issubclass(field_type, BaseModel)
|
239
|
+
):
|
240
|
+
nested_flat_data = flatten_pydantic_instance(
|
241
|
+
field_type(**value),
|
242
|
+
prefix=f"{prefix}{name}__",
|
243
|
+
force_str=force_str,
|
244
|
+
)
|
245
|
+
else:
|
246
|
+
# Skip non-Pydantic nested fields for safety
|
247
|
+
continue
|
229
248
|
flat_data.update(nested_flat_data)
|
230
249
|
else:
|
231
250
|
flat_data[f"{prefix}{name}"] = str(value) if force_str else value
|
@@ -531,10 +550,19 @@ def extra_metadata(document: Document, doc_cls: Type[Document] = Document) -> Li
|
|
531
550
|
in the document's metadata.
|
532
551
|
"""
|
533
552
|
# Convert metadata to dict, including extra fields.
|
534
|
-
metadata_fields = set(document.metadata.
|
553
|
+
metadata_fields = set(document.metadata.model_dump().keys())
|
535
554
|
|
536
555
|
# Get defined fields in the metadata of doc_cls
|
537
|
-
|
556
|
+
metadata_field = doc_cls.model_fields["metadata"]
|
557
|
+
metadata_type = (
|
558
|
+
metadata_field.annotation
|
559
|
+
if hasattr(metadata_field, "annotation")
|
560
|
+
else metadata_field
|
561
|
+
)
|
562
|
+
if isinstance(metadata_type, type) and hasattr(metadata_type, "model_fields"):
|
563
|
+
defined_fields = set(metadata_type.model_fields.keys())
|
564
|
+
else:
|
565
|
+
defined_fields = set()
|
538
566
|
|
539
567
|
# Identify extra fields not in defined fields.
|
540
568
|
extra_fields = list(metadata_fields - defined_fields)
|
@@ -561,14 +589,13 @@ def extend_document_class(d: Document) -> Type[Document]:
|
|
561
589
|
# Extract the fields from the original metadata class, including types,
|
562
590
|
# correctly handling special types like List[str].
|
563
591
|
original_metadata_fields = {
|
564
|
-
k: (v.
|
565
|
-
for k, v in DocMetaData.__fields__.items()
|
592
|
+
k: (v.annotation, ...) for k, v in DocMetaData.model_fields.items()
|
566
593
|
}
|
567
594
|
# Extract extra fields from the metadata instance with their types
|
568
595
|
extra_fields = {
|
569
596
|
k: (type(v), ...)
|
570
597
|
for k, v in d.metadata.__dict__.items()
|
571
|
-
if k not in DocMetaData.
|
598
|
+
if k not in DocMetaData.model_fields
|
572
599
|
}
|
573
600
|
|
574
601
|
# Combine original and extra fields for the new metadata class
|
langroid/utils/types.py
CHANGED
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
from inspect import signature
|
4
4
|
from typing import Any, Optional, Type, TypeVar, Union, get_args, get_origin
|
5
5
|
|
6
|
-
from
|
6
|
+
from pydantic import BaseModel
|
7
7
|
|
8
8
|
logger = logging.getLogger(__name__)
|
9
9
|
PrimitiveType = Union[int, float, bool, str]
|
@@ -55,7 +55,7 @@ def to_string(msg: Any) -> str:
|
|
55
55
|
if isinstance(msg, str):
|
56
56
|
return msg
|
57
57
|
if isinstance(msg, BaseModel):
|
58
|
-
return msg.
|
58
|
+
return msg.model_dump_json()
|
59
59
|
# last resort: use json.dumps() or str() to make it a str
|
60
60
|
try:
|
61
61
|
return json.dumps(msg)
|
langroid/vector_store/base.py
CHANGED
@@ -5,11 +5,11 @@ from typing import Dict, List, Optional, Sequence, Tuple, Type
|
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
|
+
from pydantic_settings import BaseSettings
|
8
9
|
|
9
10
|
from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
|
10
11
|
from langroid.embedding_models.models import OpenAIEmbeddingsConfig
|
11
12
|
from langroid.mytypes import DocMetaData, Document, EmbeddingFunction
|
12
|
-
from langroid.pydantic_v1 import BaseSettings
|
13
13
|
from langroid.utils.algorithms.graph import components, topological_sort
|
14
14
|
from langroid.utils.configuration import settings
|
15
15
|
from langroid.utils.object_registry import ObjectRegistry
|
@@ -82,7 +82,7 @@ class VectorStore(ABC):
|
|
82
82
|
else:
|
83
83
|
logger.warning(
|
84
84
|
f"""
|
85
|
-
Unknown vector store config: {config.
|
85
|
+
Unknown vector store config: {config.__class__.__name__},
|
86
86
|
so skipping vector store creation!
|
87
87
|
If you intended to use a vector-store, please set a specific
|
88
88
|
vector-store in your script, typically in the `vecdb` field of a
|
@@ -160,7 +160,7 @@ class VectorStore(ABC):
|
|
160
160
|
If full_eval is True, sanitization is bypassed - use only with trusted input!
|
161
161
|
"""
|
162
162
|
# convert each doc to a dict, using dotted paths for nested fields
|
163
|
-
dicts = [flatten_dict(doc.
|
163
|
+
dicts = [flatten_dict(doc.model_dump(by_alias=True)) for doc in docs]
|
164
164
|
df = pd.DataFrame(dicts)
|
165
165
|
|
166
166
|
try:
|
langroid/vector_store/lancedb.py
CHANGED
@@ -15,8 +15,7 @@ from typing import (
|
|
15
15
|
|
16
16
|
import pandas as pd
|
17
17
|
from dotenv import load_dotenv
|
18
|
-
|
19
|
-
from langroid.pydantic_v1 import BaseModel, ValidationError, create_model
|
18
|
+
from pydantic import BaseModel, ValidationError, create_model
|
20
19
|
|
21
20
|
if TYPE_CHECKING:
|
22
21
|
from lancedb.query import LanceVectorQueryBuilder
|
@@ -175,11 +174,12 @@ class LanceDB(VectorStore):
|
|
175
174
|
fields = {"id": (str, ...), "vector": (Vector(n), ...)}
|
176
175
|
|
177
176
|
sorted_fields = dict(
|
178
|
-
sorted(doc_cls.
|
177
|
+
sorted(doc_cls.model_fields.items(), key=lambda item: item[0])
|
179
178
|
)
|
180
179
|
# Add both statically and dynamically defined fields from doc_cls
|
181
180
|
for field_name, field in sorted_fields.items():
|
182
|
-
|
181
|
+
field_type = field.annotation if hasattr(field, "annotation") else field
|
182
|
+
fields[field_name] = (field_type, field.default)
|
183
183
|
|
184
184
|
# Create the new model with dynamic fields
|
185
185
|
NewModel = create_model(
|
@@ -227,7 +227,7 @@ class LanceDB(VectorStore):
|
|
227
227
|
dict(
|
228
228
|
id=ids[i + j],
|
229
229
|
vector=embedding_vecs[i + j],
|
230
|
-
**doc.
|
230
|
+
**doc.model_dump(),
|
231
231
|
)
|
232
232
|
for j, doc in enumerate(documents[i : i + b])
|
233
233
|
]
|
@@ -33,7 +33,7 @@ class MeiliSearchConfig(VectorStoreConfig):
|
|
33
33
|
cloud: bool = False
|
34
34
|
collection_name: str | None = None
|
35
35
|
primary_key: str = "id"
|
36
|
-
port = 7700
|
36
|
+
port: int = 7700
|
37
37
|
|
38
38
|
|
39
39
|
class MeiliSearch(VectorStore):
|
@@ -193,7 +193,7 @@ class MeiliSearch(VectorStore):
|
|
193
193
|
dict(
|
194
194
|
id=d.id(),
|
195
195
|
content=d.content,
|
196
|
-
metadata=d.metadata.
|
196
|
+
metadata=d.metadata.model_dump(),
|
197
197
|
)
|
198
198
|
for d in documents
|
199
199
|
]
|
@@ -17,11 +17,11 @@ from typing import (
|
|
17
17
|
|
18
18
|
from dotenv import load_dotenv
|
19
19
|
|
20
|
+
# import dataclass
|
21
|
+
from pydantic import BaseModel
|
22
|
+
|
20
23
|
from langroid import LangroidImportError
|
21
24
|
from langroid.mytypes import Document
|
22
|
-
|
23
|
-
# import dataclass
|
24
|
-
from langroid.pydantic_v1 import BaseModel
|
25
25
|
from langroid.utils.configuration import settings
|
26
26
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
27
27
|
|
@@ -246,7 +246,7 @@ class PineconeDB(VectorStore):
|
|
246
246
|
return
|
247
247
|
|
248
248
|
super().maybe_add_ids(documents)
|
249
|
-
document_dicts = [doc.
|
249
|
+
document_dicts = [doc.model_dump() for doc in documents]
|
250
250
|
document_ids = [doc.id() for doc in documents]
|
251
251
|
embedding_vectors = self.embedding_fn([doc.content for doc in documents])
|
252
252
|
vectors = [
|