langroid 0.58.2__py3-none-any.whl → 0.59.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +39 -17
- langroid/agent/base.py-e +2216 -0
- langroid/agent/callbacks/chainlit.py +2 -1
- langroid/agent/chat_agent.py +73 -55
- langroid/agent/chat_agent.py-e +2086 -0
- langroid/agent/chat_document.py +7 -7
- langroid/agent/chat_document.py-e +513 -0
- langroid/agent/openai_assistant.py +9 -9
- langroid/agent/openai_assistant.py-e +882 -0
- langroid/agent/special/arangodb/arangodb_agent.py +10 -18
- langroid/agent/special/arangodb/arangodb_agent.py-e +648 -0
- langroid/agent/special/arangodb/tools.py +3 -3
- langroid/agent/special/doc_chat_agent.py +16 -14
- langroid/agent/special/lance_rag/critic_agent.py +2 -2
- langroid/agent/special/lance_rag/query_planner_agent.py +4 -4
- langroid/agent/special/lance_tools.py +6 -5
- langroid/agent/special/lance_tools.py-e +61 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +3 -7
- langroid/agent/special/neo4j/neo4j_chat_agent.py-e +430 -0
- langroid/agent/special/relevance_extractor_agent.py +1 -1
- langroid/agent/special/sql/sql_chat_agent.py +11 -3
- langroid/agent/task.py +9 -87
- langroid/agent/task.py-e +2418 -0
- langroid/agent/tool_message.py +33 -17
- langroid/agent/tool_message.py-e +400 -0
- langroid/agent/tools/file_tools.py +4 -2
- langroid/agent/tools/file_tools.py-e +234 -0
- langroid/agent/tools/mcp/fastmcp_client.py +19 -6
- langroid/agent/tools/mcp/fastmcp_client.py-e +584 -0
- langroid/agent/tools/orchestration.py +22 -17
- langroid/agent/tools/orchestration.py-e +301 -0
- langroid/agent/tools/recipient_tool.py +3 -3
- langroid/agent/tools/task_tool.py +22 -16
- langroid/agent/tools/task_tool.py-e +249 -0
- langroid/agent/xml_tool_message.py +90 -35
- langroid/agent/xml_tool_message.py-e +392 -0
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +2 -2
- langroid/embedding_models/models.py +3 -7
- langroid/embedding_models/models.py-e +563 -0
- langroid/exceptions.py +4 -1
- langroid/language_models/azure_openai.py +2 -2
- langroid/language_models/azure_openai.py-e +134 -0
- langroid/language_models/base.py +6 -4
- langroid/language_models/base.py-e +812 -0
- langroid/language_models/client_cache.py +64 -0
- langroid/language_models/config.py +2 -4
- langroid/language_models/config.py-e +18 -0
- langroid/language_models/model_info.py +9 -1
- langroid/language_models/model_info.py-e +483 -0
- langroid/language_models/openai_gpt.py +119 -20
- langroid/language_models/openai_gpt.py-e +2280 -0
- langroid/language_models/provider_params.py +3 -22
- langroid/language_models/provider_params.py-e +153 -0
- langroid/mytypes.py +11 -4
- langroid/mytypes.py-e +132 -0
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/file_attachment.py +1 -1
- langroid/parsing/file_attachment.py-e +246 -0
- langroid/parsing/md_parser.py +14 -4
- langroid/parsing/md_parser.py-e +574 -0
- langroid/parsing/parser.py +22 -7
- langroid/parsing/parser.py-e +410 -0
- langroid/parsing/repo_loader.py +3 -1
- langroid/parsing/repo_loader.py-e +812 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/url_loader.py +17 -51
- langroid/parsing/url_loader.py-e +683 -0
- langroid/parsing/urls.py +5 -4
- langroid/parsing/urls.py-e +279 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +45 -6
- langroid/pydantic_v1/__init__.py-e +36 -0
- langroid/pydantic_v1/main.py +11 -4
- langroid/pydantic_v1/main.py-e +11 -0
- langroid/utils/configuration.py +13 -11
- langroid/utils/configuration.py-e +141 -0
- langroid/utils/constants.py +1 -1
- langroid/utils/constants.py-e +32 -0
- langroid/utils/globals.py +21 -5
- langroid/utils/globals.py-e +49 -0
- langroid/utils/html_logger.py +2 -1
- langroid/utils/html_logger.py-e +825 -0
- langroid/utils/object_registry.py +1 -1
- langroid/utils/object_registry.py-e +66 -0
- langroid/utils/pydantic_utils.py +55 -28
- langroid/utils/pydantic_utils.py-e +602 -0
- langroid/utils/types.py +2 -2
- langroid/utils/types.py-e +113 -0
- langroid/vector_store/base.py +3 -3
- langroid/vector_store/lancedb.py +5 -5
- langroid/vector_store/lancedb.py-e +404 -0
- langroid/vector_store/meilisearch.py +2 -2
- langroid/vector_store/pineconedb.py +4 -4
- langroid/vector_store/pineconedb.py-e +427 -0
- langroid/vector_store/postgres.py +1 -1
- langroid/vector_store/qdrantdb.py +3 -3
- langroid/vector_store/weaviatedb.py +1 -1
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/METADATA +3 -2
- langroid-0.59.0b1.dist-info/RECORD +181 -0
- langroid/agent/special/doc_chat_task.py +0 -0
- langroid/mcp/__init__.py +0 -1
- langroid/mcp/server/__init__.py +0 -1
- langroid-0.58.2.dist-info/RECORD +0 -145
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/WHEEL +0 -0
- {langroid-0.58.2.dist-info → langroid-0.59.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ Provider-specific parameter configurations for various LLM providers.
|
|
4
4
|
|
5
5
|
from typing import Any, Dict, Optional
|
6
6
|
|
7
|
-
from
|
7
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
8
8
|
|
9
9
|
# Constants
|
10
10
|
LANGDB_BASE_URL = "https://api.us-east-1.langdb.ai"
|
@@ -24,10 +24,7 @@ class LangDBParams(BaseSettings):
|
|
24
24
|
thread_id: Optional[str] = None
|
25
25
|
base_url: str = LANGDB_BASE_URL
|
26
26
|
|
27
|
-
|
28
|
-
# allow setting of fields via env vars,
|
29
|
-
# e.g. LANGDB_PROJECT_ID=1234
|
30
|
-
env_prefix = "LANGDB_"
|
27
|
+
model_config = SettingsConfigDict(env_prefix="LANGDB_")
|
31
28
|
|
32
29
|
|
33
30
|
class PortkeyParams(BaseSettings):
|
@@ -61,10 +58,7 @@ class PortkeyParams(BaseSettings):
|
|
61
58
|
custom_headers: Optional[Dict[str, str]] = None # Optional: additional headers
|
62
59
|
base_url: str = PORTKEY_BASE_URL
|
63
60
|
|
64
|
-
|
65
|
-
# allow setting of fields via env vars,
|
66
|
-
# e.g. PORTKEY_API_KEY=xxx, PORTKEY_PROVIDER=anthropic
|
67
|
-
env_prefix = "PORTKEY_"
|
61
|
+
model_config = SettingsConfigDict(env_prefix="PORTKEY_")
|
68
62
|
|
69
63
|
def get_headers(self) -> Dict[str, str]:
|
70
64
|
"""Generate Portkey-specific headers from parameters."""
|
@@ -73,7 +67,6 @@ class PortkeyParams(BaseSettings):
|
|
73
67
|
|
74
68
|
headers = {}
|
75
69
|
|
76
|
-
# API key - from params or environment
|
77
70
|
if self.api_key and self.api_key != DUMMY_API_KEY:
|
78
71
|
headers["x-portkey-api-key"] = self.api_key
|
79
72
|
else:
|
@@ -81,45 +74,35 @@ class PortkeyParams(BaseSettings):
|
|
81
74
|
if portkey_key:
|
82
75
|
headers["x-portkey-api-key"] = portkey_key
|
83
76
|
|
84
|
-
# Provider
|
85
77
|
if self.provider:
|
86
78
|
headers["x-portkey-provider"] = self.provider
|
87
79
|
|
88
|
-
# Virtual key
|
89
80
|
if self.virtual_key:
|
90
81
|
headers["x-portkey-virtual-key"] = self.virtual_key
|
91
82
|
|
92
|
-
# Trace ID
|
93
83
|
if self.trace_id:
|
94
84
|
headers["x-portkey-trace-id"] = self.trace_id
|
95
85
|
|
96
|
-
# Metadata
|
97
86
|
if self.metadata:
|
98
87
|
headers["x-portkey-metadata"] = json.dumps(self.metadata)
|
99
88
|
|
100
|
-
# Retry configuration
|
101
89
|
if self.retry:
|
102
90
|
headers["x-portkey-retry"] = json.dumps(self.retry)
|
103
91
|
|
104
|
-
# Cache configuration
|
105
92
|
if self.cache:
|
106
93
|
headers["x-portkey-cache"] = json.dumps(self.cache)
|
107
94
|
|
108
|
-
# Cache force refresh
|
109
95
|
if self.cache_force_refresh is not None:
|
110
96
|
headers["x-portkey-cache-force-refresh"] = str(
|
111
97
|
self.cache_force_refresh
|
112
98
|
).lower()
|
113
99
|
|
114
|
-
# User identifier
|
115
100
|
if self.user:
|
116
101
|
headers["x-portkey-user"] = self.user
|
117
102
|
|
118
|
-
# Organization identifier
|
119
103
|
if self.organization:
|
120
104
|
headers["x-portkey-organization"] = self.organization
|
121
105
|
|
122
|
-
# Add any custom headers
|
123
106
|
if self.custom_headers:
|
124
107
|
headers.update(self.custom_headers)
|
125
108
|
|
@@ -138,7 +121,6 @@ class PortkeyParams(BaseSettings):
|
|
138
121
|
_, provider, model = parts
|
139
122
|
return provider, model
|
140
123
|
else:
|
141
|
-
# Fallback: just remove "portkey/" prefix and return empty provider
|
142
124
|
model = model_string.replace("portkey/", "")
|
143
125
|
return "", model
|
144
126
|
|
@@ -157,7 +139,6 @@ class PortkeyParams(BaseSettings):
|
|
157
139
|
"""
|
158
140
|
import os
|
159
141
|
|
160
|
-
# Common environment variable patterns for different providers
|
161
142
|
env_patterns = [
|
162
143
|
f"{provider.upper()}_API_KEY",
|
163
144
|
f"{provider.upper()}_KEY",
|
@@ -0,0 +1,153 @@
|
|
1
|
+
"""
|
2
|
+
Provider-specific parameter configurations for various LLM providers.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Any, Dict, Optional
|
6
|
+
|
7
|
+
from pydantic import ConfigDict
|
8
|
+
from pydantic_settings import BaseSettings
|
9
|
+
|
10
|
+
# Constants
|
11
|
+
LANGDB_BASE_URL = "https://api.us-east-1.langdb.ai"
|
12
|
+
PORTKEY_BASE_URL = "https://api.portkey.ai"
|
13
|
+
DUMMY_API_KEY = "xxx"
|
14
|
+
|
15
|
+
|
16
|
+
class LangDBParams(BaseSettings):
|
17
|
+
"""
|
18
|
+
Parameters specific to LangDB integration.
|
19
|
+
"""
|
20
|
+
|
21
|
+
api_key: str = DUMMY_API_KEY
|
22
|
+
project_id: str = ""
|
23
|
+
label: Optional[str] = None
|
24
|
+
run_id: Optional[str] = None
|
25
|
+
thread_id: Optional[str] = None
|
26
|
+
base_url: str = LANGDB_BASE_URL
|
27
|
+
|
28
|
+
model_config = ConfigDict(env_prefix="LANGDB_")
|
29
|
+
|
30
|
+
|
31
|
+
class PortkeyParams(BaseSettings):
|
32
|
+
"""
|
33
|
+
Parameters specific to Portkey integration.
|
34
|
+
|
35
|
+
Portkey is an AI gateway that provides a unified API for multiple LLM providers,
|
36
|
+
with features like automatic retries, fallbacks, load balancing, and observability.
|
37
|
+
|
38
|
+
Example usage:
|
39
|
+
# Use Portkey with Anthropic
|
40
|
+
config = OpenAIGPTConfig(
|
41
|
+
chat_model="portkey/anthropic/claude-3-sonnet-20240229",
|
42
|
+
portkey_params=PortkeyParams(
|
43
|
+
api_key="your-portkey-api-key",
|
44
|
+
provider="anthropic"
|
45
|
+
)
|
46
|
+
)
|
47
|
+
"""
|
48
|
+
|
49
|
+
api_key: str = DUMMY_API_KEY # Portkey API key
|
50
|
+
provider: str = "" # Required: e.g., "openai", "anthropic", "cohere", etc.
|
51
|
+
virtual_key: Optional[str] = None # Optional: virtual key for the provider
|
52
|
+
trace_id: Optional[str] = None # Optional: trace ID for request tracking
|
53
|
+
metadata: Optional[Dict[str, Any]] = None # Optional: metadata for logging
|
54
|
+
retry: Optional[Dict[str, Any]] = None # Optional: retry configuration
|
55
|
+
cache: Optional[Dict[str, Any]] = None # Optional: cache configuration
|
56
|
+
cache_force_refresh: Optional[bool] = None # Optional: force cache refresh
|
57
|
+
user: Optional[str] = None # Optional: user identifier
|
58
|
+
organization: Optional[str] = None # Optional: organization identifier
|
59
|
+
custom_headers: Optional[Dict[str, str]] = None # Optional: additional headers
|
60
|
+
base_url: str = PORTKEY_BASE_URL
|
61
|
+
|
62
|
+
model_config = ConfigDict(env_prefix="PORTKEY_")
|
63
|
+
|
64
|
+
def get_headers(self) -> Dict[str, str]:
|
65
|
+
"""Generate Portkey-specific headers from parameters."""
|
66
|
+
import json
|
67
|
+
import os
|
68
|
+
|
69
|
+
headers = {}
|
70
|
+
|
71
|
+
if self.api_key and self.api_key != DUMMY_API_KEY:
|
72
|
+
headers["x-portkey-api-key"] = self.api_key
|
73
|
+
else:
|
74
|
+
portkey_key = os.getenv("PORTKEY_API_KEY", "")
|
75
|
+
if portkey_key:
|
76
|
+
headers["x-portkey-api-key"] = portkey_key
|
77
|
+
|
78
|
+
if self.provider:
|
79
|
+
headers["x-portkey-provider"] = self.provider
|
80
|
+
|
81
|
+
if self.virtual_key:
|
82
|
+
headers["x-portkey-virtual-key"] = self.virtual_key
|
83
|
+
|
84
|
+
if self.trace_id:
|
85
|
+
headers["x-portkey-trace-id"] = self.trace_id
|
86
|
+
|
87
|
+
if self.metadata:
|
88
|
+
headers["x-portkey-metadata"] = json.dumps(self.metadata)
|
89
|
+
|
90
|
+
if self.retry:
|
91
|
+
headers["x-portkey-retry"] = json.dumps(self.retry)
|
92
|
+
|
93
|
+
if self.cache:
|
94
|
+
headers["x-portkey-cache"] = json.dumps(self.cache)
|
95
|
+
|
96
|
+
if self.cache_force_refresh is not None:
|
97
|
+
headers["x-portkey-cache-force-refresh"] = str(
|
98
|
+
self.cache_force_refresh
|
99
|
+
).lower()
|
100
|
+
|
101
|
+
if self.user:
|
102
|
+
headers["x-portkey-user"] = self.user
|
103
|
+
|
104
|
+
if self.organization:
|
105
|
+
headers["x-portkey-organization"] = self.organization
|
106
|
+
|
107
|
+
if self.custom_headers:
|
108
|
+
headers.update(self.custom_headers)
|
109
|
+
|
110
|
+
return headers
|
111
|
+
|
112
|
+
def parse_model_string(self, model_string: str) -> tuple[str, str]:
|
113
|
+
"""
|
114
|
+
Parse a model string like "portkey/anthropic/claude-3-sonnet"
|
115
|
+
and extract provider and model name.
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
tuple: (provider, model_name)
|
119
|
+
"""
|
120
|
+
parts = model_string.split("/", 2)
|
121
|
+
if len(parts) >= 3 and parts[0] == "portkey":
|
122
|
+
_, provider, model = parts
|
123
|
+
return provider, model
|
124
|
+
else:
|
125
|
+
model = model_string.replace("portkey/", "")
|
126
|
+
return "", model
|
127
|
+
|
128
|
+
def get_provider_api_key(
|
129
|
+
self, provider: str, default_key: str = DUMMY_API_KEY
|
130
|
+
) -> str:
|
131
|
+
"""
|
132
|
+
Get the API key for the provider from environment variables.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
provider: The provider name (e.g., "anthropic", "openai")
|
136
|
+
default_key: Default key to return if not found
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
The API key for the provider
|
140
|
+
"""
|
141
|
+
import os
|
142
|
+
|
143
|
+
env_patterns = [
|
144
|
+
f"{provider.upper()}_API_KEY",
|
145
|
+
f"{provider.upper()}_KEY",
|
146
|
+
]
|
147
|
+
|
148
|
+
for pattern in env_patterns:
|
149
|
+
key = os.getenv(pattern, "")
|
150
|
+
if key:
|
151
|
+
return key
|
152
|
+
|
153
|
+
return default_key
|
langroid/mytypes.py
CHANGED
@@ -3,7 +3,7 @@ from textwrap import dedent
|
|
3
3
|
from typing import Any, Callable, Dict, List, Union
|
4
4
|
from uuid import uuid4
|
5
5
|
|
6
|
-
from
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
7
7
|
|
8
8
|
Number = Union[int, float]
|
9
9
|
Embedding = List[Number]
|
@@ -51,13 +51,21 @@ class DocMetaData(BaseModel):
|
|
51
51
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
52
52
|
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
53
53
|
|
54
|
+
@field_validator("id", mode="before")
|
55
|
+
@classmethod
|
56
|
+
def convert_id_to_string(cls, v: Any) -> str:
|
57
|
+
"""Convert id to string if it's not already."""
|
58
|
+
if v is None:
|
59
|
+
return str(uuid4())
|
60
|
+
return str(v)
|
61
|
+
|
54
62
|
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
55
63
|
"""
|
56
64
|
Special dict method to convert bool fields to int, to appease some
|
57
65
|
downstream libraries, e.g. Chroma which complains about bool fields in
|
58
66
|
metadata.
|
59
67
|
"""
|
60
|
-
original_dict = super().
|
68
|
+
original_dict = super().model_dump(*args, **kwargs)
|
61
69
|
|
62
70
|
for key, value in original_dict.items():
|
63
71
|
if isinstance(value, bool):
|
@@ -92,8 +100,7 @@ class DocMetaData(BaseModel):
|
|
92
100
|
)
|
93
101
|
return ", ".join(components)
|
94
102
|
|
95
|
-
|
96
|
-
extra = Extra.allow
|
103
|
+
model_config = ConfigDict(extra="allow")
|
97
104
|
|
98
105
|
|
99
106
|
class Document(BaseModel):
|
langroid/mytypes.py-e
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from textwrap import dedent
|
3
|
+
from typing import Any, Callable, Dict, List, Union
|
4
|
+
from uuid import uuid4
|
5
|
+
|
6
|
+
from pydantic import BaseModel, Extra, Field
|
7
|
+
|
8
|
+
Number = Union[int, float]
|
9
|
+
Embedding = List[Number]
|
10
|
+
Embeddings = List[Embedding]
|
11
|
+
EmbeddingFunction = Callable[[List[str]], Embeddings]
|
12
|
+
|
13
|
+
|
14
|
+
class Entity(str, Enum):
|
15
|
+
"""
|
16
|
+
Enum for the different types of entities that can respond to the current message.
|
17
|
+
"""
|
18
|
+
|
19
|
+
AGENT = "Agent"
|
20
|
+
LLM = "LLM"
|
21
|
+
USER = "User"
|
22
|
+
SYSTEM = "System"
|
23
|
+
|
24
|
+
def __eq__(self, other: object) -> bool:
|
25
|
+
"""Allow case-insensitive equality (==) comparison with strings."""
|
26
|
+
if other is None:
|
27
|
+
return False
|
28
|
+
if isinstance(other, str):
|
29
|
+
return self.value.lower() == other.lower()
|
30
|
+
return super().__eq__(other)
|
31
|
+
|
32
|
+
def __ne__(self, other: object) -> bool:
|
33
|
+
"""Allow case-insensitive non-equality (!=) comparison with strings."""
|
34
|
+
return not self.__eq__(other)
|
35
|
+
|
36
|
+
def __hash__(self) -> int:
|
37
|
+
"""Override this to ensure hashability of the enum,
|
38
|
+
so it can be used sets and dictionary keys.
|
39
|
+
"""
|
40
|
+
return hash(self.value.lower())
|
41
|
+
|
42
|
+
|
43
|
+
class DocMetaData(BaseModel):
|
44
|
+
"""Metadata for a document."""
|
45
|
+
|
46
|
+
source: str = "context" # just reference
|
47
|
+
source_content: str = "context" # reference and content
|
48
|
+
title: str = "Unknown Title"
|
49
|
+
published_date: str = "Unknown Date"
|
50
|
+
is_chunk: bool = False # if it is a chunk, don't split
|
51
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
52
|
+
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
53
|
+
|
54
|
+
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
55
|
+
"""
|
56
|
+
Special dict method to convert bool fields to int, to appease some
|
57
|
+
downstream libraries, e.g. Chroma which complains about bool fields in
|
58
|
+
metadata.
|
59
|
+
"""
|
60
|
+
original_dict = super().model_dump(*args, **kwargs)
|
61
|
+
|
62
|
+
for key, value in original_dict.items():
|
63
|
+
if isinstance(value, bool):
|
64
|
+
original_dict[key] = 1 * value
|
65
|
+
|
66
|
+
return original_dict
|
67
|
+
|
68
|
+
def __str__(self) -> str:
|
69
|
+
title_str = (
|
70
|
+
""
|
71
|
+
if "unknown" in self.title.lower() or self.title.strip() == ""
|
72
|
+
else f"Title: {self.title}"
|
73
|
+
)
|
74
|
+
date_str = ""
|
75
|
+
if (
|
76
|
+
"unknown" not in self.published_date.lower()
|
77
|
+
and self.published_date.strip() != ""
|
78
|
+
):
|
79
|
+
try:
|
80
|
+
from dateutil import parser
|
81
|
+
|
82
|
+
# Try to parse the date string
|
83
|
+
date_obj = parser.parse(self.published_date)
|
84
|
+
# Format to include only the date part (year-month-day)
|
85
|
+
date_only = date_obj.strftime("%Y-%m-%d")
|
86
|
+
date_str = f"Date: {date_only}"
|
87
|
+
except (ValueError, ImportError, TypeError):
|
88
|
+
# If parsing fails, just use the original date
|
89
|
+
date_str = f"Date: {self.published_date}"
|
90
|
+
components = [self.source] + (
|
91
|
+
[] if title_str + date_str == "" else [title_str, date_str]
|
92
|
+
)
|
93
|
+
return ", ".join(components)
|
94
|
+
|
95
|
+
model_config = ConfigDict(extra="allow")
|
96
|
+
|
97
|
+
class Document(BaseModel):
|
98
|
+
"""Interface for interacting with a document."""
|
99
|
+
|
100
|
+
content: str
|
101
|
+
metadata: DocMetaData
|
102
|
+
|
103
|
+
def id(self) -> str:
|
104
|
+
return self.metadata.id
|
105
|
+
|
106
|
+
@staticmethod
|
107
|
+
def from_string(
|
108
|
+
content: str,
|
109
|
+
source: str = "context",
|
110
|
+
is_chunk: bool = True,
|
111
|
+
) -> "Document":
|
112
|
+
return Document(
|
113
|
+
content=content,
|
114
|
+
metadata=DocMetaData(source=source, is_chunk=is_chunk),
|
115
|
+
)
|
116
|
+
|
117
|
+
def __str__(self) -> str:
|
118
|
+
return dedent(
|
119
|
+
f"""
|
120
|
+
CONTENT: {self.content}
|
121
|
+
SOURCE:{str(self.metadata)}
|
122
|
+
"""
|
123
|
+
)
|
124
|
+
|
125
|
+
|
126
|
+
class NonToolAction(str, Enum):
|
127
|
+
"""
|
128
|
+
Possible options to handle non-tool msgs from LLM.
|
129
|
+
"""
|
130
|
+
|
131
|
+
FORWARD_USER = "user" # forward msg to user
|
132
|
+
DONE = "done" # task done
|
langroid/parsing/code_parser.py
CHANGED
@@ -2,12 +2,12 @@ from functools import reduce
|
|
2
2
|
from typing import Callable, List
|
3
3
|
|
4
4
|
import tiktoken
|
5
|
+
from pydantic_settings import BaseSettings
|
5
6
|
from pygments import lex
|
6
7
|
from pygments.lexers import get_lexer_by_name
|
7
8
|
from pygments.token import Token
|
8
9
|
|
9
10
|
from langroid.mytypes import Document
|
10
|
-
from langroid.pydantic_v1 import BaseSettings
|
11
11
|
|
12
12
|
|
13
13
|
def chunk_code(
|
@@ -0,0 +1,246 @@
|
|
1
|
+
import base64
|
2
|
+
import mimetypes
|
3
|
+
import uuid
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, BinaryIO, Dict, Optional, Union
|
6
|
+
from urllib.parse import urlparse
|
7
|
+
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
|
11
|
+
class FileAttachment(BaseModel):
|
12
|
+
"""Represents a file attachment to be sent to an LLM API."""
|
13
|
+
|
14
|
+
content: bytes
|
15
|
+
filename: Optional[str] = None
|
16
|
+
mime_type: str = "application/octet-stream"
|
17
|
+
url: str | None = None
|
18
|
+
detail: str | None = None
|
19
|
+
|
20
|
+
def __init__(self, **data: Any) -> None:
|
21
|
+
"""Initialize with sensible defaults for filename if not provided."""
|
22
|
+
if "filename" not in data or data["filename"] is None:
|
23
|
+
# Generate a more readable unique filename
|
24
|
+
unique_id = str(uuid.uuid4())[:8]
|
25
|
+
data["filename"] = f"attachment_{unique_id}.bin"
|
26
|
+
super().__init__(**data)
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
def _from_path(
|
30
|
+
cls,
|
31
|
+
file_path: Union[str, Path],
|
32
|
+
detail: Optional[str] = None,
|
33
|
+
) -> "FileAttachment":
|
34
|
+
"""Create a FileAttachment from a file path.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
file_path: Path to the file to attach
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
FileAttachment instance
|
41
|
+
"""
|
42
|
+
path = Path(file_path)
|
43
|
+
with open(path, "rb") as f:
|
44
|
+
content = f.read()
|
45
|
+
|
46
|
+
mime_type, _ = mimetypes.guess_type(path)
|
47
|
+
if mime_type is None:
|
48
|
+
mime_type = "application/octet-stream"
|
49
|
+
|
50
|
+
return cls(
|
51
|
+
content=content,
|
52
|
+
filename=path.name,
|
53
|
+
mime_type=mime_type,
|
54
|
+
detail=detail,
|
55
|
+
)
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def _from_url(
|
59
|
+
cls,
|
60
|
+
url: str,
|
61
|
+
content: Optional[bytes] = None,
|
62
|
+
filename: Optional[str] = None,
|
63
|
+
mime_type: Optional[str] = None,
|
64
|
+
detail: Optional[str] = None,
|
65
|
+
) -> "FileAttachment":
|
66
|
+
"""Create a FileAttachment from a URL.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
url: URL to the file
|
70
|
+
content: Optional raw bytes content (if already fetched)
|
71
|
+
filename: Optional name to use for the file
|
72
|
+
mime_type: MIME type of the content, guessed from filename or url
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
FileAttachment instance
|
76
|
+
"""
|
77
|
+
if filename is None and url:
|
78
|
+
# Extract filename from URL if possible
|
79
|
+
|
80
|
+
parsed_url = urlparse(url)
|
81
|
+
path = parsed_url.path
|
82
|
+
filename = path.split("/")[-1] if path else None
|
83
|
+
|
84
|
+
if mime_type is None and filename:
|
85
|
+
mime_type, _ = mimetypes.guess_type(filename)
|
86
|
+
|
87
|
+
return cls(
|
88
|
+
content=content or b"", # Empty bytes if no content provided
|
89
|
+
filename=filename,
|
90
|
+
mime_type=mime_type or "application/octet-stream",
|
91
|
+
url=url,
|
92
|
+
detail=detail,
|
93
|
+
)
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def from_path(
|
97
|
+
cls,
|
98
|
+
path: Union[str, Path],
|
99
|
+
detail: str | None = None,
|
100
|
+
) -> "FileAttachment":
|
101
|
+
"""Create a FileAttachment from either a local file path or a URL.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
path_or_url: Path to the file or URL to fetch
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
FileAttachment instance
|
108
|
+
"""
|
109
|
+
# Convert to string if Path object
|
110
|
+
path_str = str(path)
|
111
|
+
|
112
|
+
# Check if it's a URL
|
113
|
+
if path_str.startswith(("http://", "https://", "ftp://")):
|
114
|
+
return cls._from_url(url=path_str, detail=detail)
|
115
|
+
else:
|
116
|
+
# Assume it's a local file path
|
117
|
+
return cls._from_path(path_str, detail=detail)
|
118
|
+
|
119
|
+
@classmethod
|
120
|
+
def from_bytes(
|
121
|
+
cls,
|
122
|
+
content: bytes,
|
123
|
+
filename: Optional[str] = None,
|
124
|
+
mime_type: Optional[str] = None,
|
125
|
+
) -> "FileAttachment":
|
126
|
+
"""Create a FileAttachment from bytes content.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
content: Raw bytes content
|
130
|
+
filename: Optional name to use for the file
|
131
|
+
mime_type: MIME type of the content, guessed from filename if provided
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
FileAttachment instance
|
135
|
+
"""
|
136
|
+
if mime_type is None and filename is not None:
|
137
|
+
mime_type, _ = mimetypes.guess_type(filename)
|
138
|
+
|
139
|
+
return cls(
|
140
|
+
content=content,
|
141
|
+
filename=filename,
|
142
|
+
mime_type=mime_type or "application/octet-stream",
|
143
|
+
)
|
144
|
+
|
145
|
+
@classmethod
|
146
|
+
def from_io(
|
147
|
+
cls,
|
148
|
+
file_obj: BinaryIO,
|
149
|
+
filename: Optional[str] = None,
|
150
|
+
mime_type: Optional[str] = None,
|
151
|
+
) -> "FileAttachment":
|
152
|
+
"""Create a FileAttachment from a file-like object.
|
153
|
+
|
154
|
+
Args:
|
155
|
+
file_obj: File-like object with binary content
|
156
|
+
filename: Optional name to use for the file
|
157
|
+
mime_type: MIME type of the content, guessed from filename if provided
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
FileAttachment instance
|
161
|
+
"""
|
162
|
+
content = file_obj.read()
|
163
|
+
return cls.from_bytes(content, filename, mime_type)
|
164
|
+
|
165
|
+
@classmethod
|
166
|
+
def from_text(
|
167
|
+
cls,
|
168
|
+
text: str,
|
169
|
+
filename: Optional[str] = None,
|
170
|
+
mime_type: str = "text/plain",
|
171
|
+
encoding: str = "utf-8",
|
172
|
+
) -> "FileAttachment":
|
173
|
+
"""Create a FileAttachment from text content.
|
174
|
+
|
175
|
+
Args:
|
176
|
+
text: Text content to include
|
177
|
+
filename: Optional name to use for the file
|
178
|
+
mime_type: MIME type of the content
|
179
|
+
encoding: Text encoding to use
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
FileAttachment instance
|
183
|
+
"""
|
184
|
+
content = text.encode(encoding)
|
185
|
+
return cls(content=content, filename=filename, mime_type=mime_type)
|
186
|
+
|
187
|
+
def to_base64(self) -> str:
|
188
|
+
"""Convert content to base64 encoding.
|
189
|
+
|
190
|
+
Returns:
|
191
|
+
Base64 encoded string
|
192
|
+
"""
|
193
|
+
return base64.b64encode(self.content).decode("utf-8")
|
194
|
+
|
195
|
+
def to_data_uri(self) -> str:
|
196
|
+
"""Convert content to a data URI.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
A data URI string containing the base64-encoded content with MIME type
|
200
|
+
"""
|
201
|
+
base64_content = self.to_base64()
|
202
|
+
return f"data:{self.mime_type};base64,{base64_content}"
|
203
|
+
|
204
|
+
def to_dict(self, model: str) -> Dict[str, Any]:
|
205
|
+
"""
|
206
|
+
Convert to a dictionary suitable for API requests.
|
207
|
+
Tested only for PDF files.
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
Dictionary with file data
|
211
|
+
"""
|
212
|
+
if (
|
213
|
+
self.mime_type
|
214
|
+
and self.mime_type.startswith("image/")
|
215
|
+
or "gemini" in model.lower()
|
216
|
+
):
|
217
|
+
# for gemini models, we use `image_url` for both pdf-files and images
|
218
|
+
|
219
|
+
image_url_dict = {}
|
220
|
+
|
221
|
+
# If we have a URL and it's a full http/https URL, use it directly
|
222
|
+
if self.url and (
|
223
|
+
self.url.startswith("http://") or self.url.startswith("https://")
|
224
|
+
):
|
225
|
+
image_url_dict["url"] = self.url
|
226
|
+
# Otherwise use base64 data URI
|
227
|
+
else:
|
228
|
+
image_url_dict["url"] = self.to_data_uri()
|
229
|
+
|
230
|
+
# Add detail parameter if specified
|
231
|
+
if self.detail:
|
232
|
+
image_url_dict["detail"] = self.detail
|
233
|
+
|
234
|
+
return dict(
|
235
|
+
type="image_url",
|
236
|
+
image_url=image_url_dict,
|
237
|
+
)
|
238
|
+
else:
|
239
|
+
# For non-image files
|
240
|
+
return dict(
|
241
|
+
type="file",
|
242
|
+
file=dict(
|
243
|
+
filename=self.filename,
|
244
|
+
file_data=self.to_data_uri(),
|
245
|
+
),
|
246
|
+
)
|