remdb 0.3.133__py3-none-any.whl → 0.3.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +310 -0
- rem/agentic/context_builder.py +5 -3
- rem/agentic/mcp/tool_wrapper.py +48 -6
- rem/agentic/providers/phoenix.py +91 -21
- rem/agentic/providers/pydantic_ai.py +77 -43
- rem/api/deps.py +2 -2
- rem/api/main.py +1 -1
- rem/api/mcp_router/server.py +2 -0
- rem/api/mcp_router/tools.py +90 -0
- rem/api/routers/auth.py +208 -4
- rem/api/routers/chat/streaming.py +77 -22
- rem/auth/__init__.py +13 -3
- rem/auth/middleware.py +66 -1
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +50 -49
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +4 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +134 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +459 -0
- rem/services/email/templates.py +360 -0
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +19 -3
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/session/compression.py +113 -50
- rem/services/session/reload.py +14 -7
- rem/settings.py +191 -4
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/vision.py +1 -1
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/METADATA +7 -5
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/RECORD +51 -42
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/WHEEL +0 -0
- {remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
rem/agentic/agents/__init__.py
CHANGED
|
@@ -6,6 +6,8 @@ Use create_agent_from_schema_file() to instantiate agents.
|
|
|
6
6
|
|
|
7
7
|
The SSE Simulator is a special programmatic "agent" that generates
|
|
8
8
|
scripted SSE events for testing and demonstration - it doesn't use an LLM.
|
|
9
|
+
|
|
10
|
+
Agent Manager provides functions for saving/loading user-created agents.
|
|
9
11
|
"""
|
|
10
12
|
|
|
11
13
|
from .sse_simulator import (
|
|
@@ -14,9 +16,23 @@ from .sse_simulator import (
|
|
|
14
16
|
stream_error_demo,
|
|
15
17
|
)
|
|
16
18
|
|
|
19
|
+
from .agent_manager import (
|
|
20
|
+
save_agent,
|
|
21
|
+
get_agent,
|
|
22
|
+
list_agents,
|
|
23
|
+
delete_agent,
|
|
24
|
+
build_agent_spec,
|
|
25
|
+
)
|
|
26
|
+
|
|
17
27
|
__all__ = [
|
|
18
28
|
# SSE Simulator (programmatic, no LLM)
|
|
19
29
|
"stream_simulator_events",
|
|
20
30
|
"stream_minimal_demo",
|
|
21
31
|
"stream_error_demo",
|
|
32
|
+
# Agent Manager
|
|
33
|
+
"save_agent",
|
|
34
|
+
"get_agent",
|
|
35
|
+
"list_agents",
|
|
36
|
+
"delete_agent",
|
|
37
|
+
"build_agent_spec",
|
|
22
38
|
]
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Agent Manager - Save, load, and manage user-created agents.
|
|
3
|
+
|
|
4
|
+
This module provides the core functionality for persisting agent schemas
|
|
5
|
+
to the database with user scoping.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from rem.agentic.agents.agent_manager import save_agent, get_agent, list_agents
|
|
9
|
+
|
|
10
|
+
# Save an agent
|
|
11
|
+
result = await save_agent(
|
|
12
|
+
name="my-assistant",
|
|
13
|
+
description="You are a helpful assistant.",
|
|
14
|
+
user_id="user-123"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Get an agent
|
|
18
|
+
agent = await get_agent("my-assistant", user_id="user-123")
|
|
19
|
+
|
|
20
|
+
# List user's agents
|
|
21
|
+
agents = await list_agents(user_id="user-123")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from typing import Any
|
|
25
|
+
from loguru import logger
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DEFAULT_TOOLS = ["search_rem", "register_metadata"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_agent_spec(
|
|
32
|
+
name: str,
|
|
33
|
+
description: str,
|
|
34
|
+
properties: dict[str, Any] | None = None,
|
|
35
|
+
required: list[str] | None = None,
|
|
36
|
+
tools: list[str] | None = None,
|
|
37
|
+
tags: list[str] | None = None,
|
|
38
|
+
version: str = "1.0.0",
|
|
39
|
+
) -> dict[str, Any]:
|
|
40
|
+
"""
|
|
41
|
+
Build a valid agent schema spec.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
name: Agent name in kebab-case
|
|
45
|
+
description: System prompt for the agent
|
|
46
|
+
properties: Output schema properties
|
|
47
|
+
required: Required property names
|
|
48
|
+
tools: Tool names (defaults to search_rem, register_metadata)
|
|
49
|
+
tags: Categorization tags
|
|
50
|
+
version: Semantic version
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Valid agent schema spec dict
|
|
54
|
+
"""
|
|
55
|
+
# Default properties
|
|
56
|
+
if properties is None:
|
|
57
|
+
properties = {
|
|
58
|
+
"answer": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"description": "Natural language response to the user"
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Default required
|
|
65
|
+
if required is None:
|
|
66
|
+
required = ["answer"]
|
|
67
|
+
|
|
68
|
+
# Default tools
|
|
69
|
+
if tools is None:
|
|
70
|
+
tools = DEFAULT_TOOLS.copy()
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
"type": "object",
|
|
74
|
+
"description": description,
|
|
75
|
+
"properties": properties,
|
|
76
|
+
"required": required,
|
|
77
|
+
"json_schema_extra": {
|
|
78
|
+
"kind": "agent",
|
|
79
|
+
"name": name,
|
|
80
|
+
"version": version,
|
|
81
|
+
"tags": tags or [],
|
|
82
|
+
"tools": [{"name": t, "description": f"Tool: {t}"} for t in tools],
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def save_agent(
|
|
88
|
+
name: str,
|
|
89
|
+
description: str,
|
|
90
|
+
user_id: str,
|
|
91
|
+
properties: dict[str, Any] | None = None,
|
|
92
|
+
required: list[str] | None = None,
|
|
93
|
+
tools: list[str] | None = None,
|
|
94
|
+
tags: list[str] | None = None,
|
|
95
|
+
version: str = "1.0.0",
|
|
96
|
+
) -> dict[str, Any]:
|
|
97
|
+
"""
|
|
98
|
+
Save an agent schema to the database.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
name: Agent name in kebab-case (e.g., "code-reviewer")
|
|
102
|
+
description: The agent's system prompt
|
|
103
|
+
user_id: User identifier for scoping
|
|
104
|
+
properties: Output schema properties
|
|
105
|
+
required: Required property names
|
|
106
|
+
tools: Tool names
|
|
107
|
+
tags: Categorization tags
|
|
108
|
+
version: Semantic version
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dict with status, agent_name, version, message
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
RuntimeError: If database is not available
|
|
115
|
+
"""
|
|
116
|
+
from rem.models.entities import Schema
|
|
117
|
+
from rem.services.postgres import get_postgres_service
|
|
118
|
+
|
|
119
|
+
# Build the spec
|
|
120
|
+
spec = build_agent_spec(
|
|
121
|
+
name=name,
|
|
122
|
+
description=description,
|
|
123
|
+
properties=properties,
|
|
124
|
+
required=required,
|
|
125
|
+
tools=tools,
|
|
126
|
+
tags=tags,
|
|
127
|
+
version=version,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Create Schema entity (user-scoped)
|
|
131
|
+
schema_entity = Schema(
|
|
132
|
+
tenant_id=user_id,
|
|
133
|
+
user_id=user_id,
|
|
134
|
+
name=name,
|
|
135
|
+
spec=spec,
|
|
136
|
+
category="agent",
|
|
137
|
+
metadata={
|
|
138
|
+
"version": version,
|
|
139
|
+
"tags": tags or [],
|
|
140
|
+
"created_via": "agent_manager",
|
|
141
|
+
},
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Save to database
|
|
145
|
+
postgres = get_postgres_service()
|
|
146
|
+
if not postgres:
|
|
147
|
+
raise RuntimeError("Database not available")
|
|
148
|
+
|
|
149
|
+
await postgres.connect()
|
|
150
|
+
try:
|
|
151
|
+
await postgres.batch_upsert(
|
|
152
|
+
records=[schema_entity],
|
|
153
|
+
model=Schema,
|
|
154
|
+
table_name="schemas",
|
|
155
|
+
entity_key_field="name",
|
|
156
|
+
generate_embeddings=False,
|
|
157
|
+
)
|
|
158
|
+
logger.info(f"✅ Agent saved: {name} (user={user_id}, version={version})")
|
|
159
|
+
finally:
|
|
160
|
+
await postgres.disconnect()
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"status": "success",
|
|
164
|
+
"agent_name": name,
|
|
165
|
+
"version": version,
|
|
166
|
+
"message": f"Agent '{name}' saved successfully.",
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def get_agent(
|
|
171
|
+
name: str,
|
|
172
|
+
user_id: str,
|
|
173
|
+
) -> dict[str, Any] | None:
|
|
174
|
+
"""
|
|
175
|
+
Get an agent schema by name.
|
|
176
|
+
|
|
177
|
+
Checks user's schemas first, then falls back to system schemas.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
name: Agent name
|
|
181
|
+
user_id: User identifier
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Agent spec dict if found, None otherwise
|
|
185
|
+
"""
|
|
186
|
+
from rem.services.postgres import get_postgres_service
|
|
187
|
+
|
|
188
|
+
postgres = get_postgres_service()
|
|
189
|
+
if not postgres:
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
await postgres.connect()
|
|
193
|
+
try:
|
|
194
|
+
query = """
|
|
195
|
+
SELECT spec FROM schemas
|
|
196
|
+
WHERE LOWER(name) = LOWER($1)
|
|
197
|
+
AND category = 'agent'
|
|
198
|
+
AND (user_id = $2 OR user_id IS NULL OR tenant_id = 'system')
|
|
199
|
+
ORDER BY CASE WHEN user_id = $2 THEN 0 ELSE 1 END
|
|
200
|
+
LIMIT 1
|
|
201
|
+
"""
|
|
202
|
+
row = await postgres.fetchrow(query, name, user_id)
|
|
203
|
+
if row:
|
|
204
|
+
return row["spec"]
|
|
205
|
+
return None
|
|
206
|
+
finally:
|
|
207
|
+
await postgres.disconnect()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
async def list_agents(
|
|
211
|
+
user_id: str,
|
|
212
|
+
include_system: bool = True,
|
|
213
|
+
) -> list[dict[str, Any]]:
|
|
214
|
+
"""
|
|
215
|
+
List available agents for a user.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
user_id: User identifier
|
|
219
|
+
include_system: Include system agents
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
List of agent metadata dicts
|
|
223
|
+
"""
|
|
224
|
+
from rem.services.postgres import get_postgres_service
|
|
225
|
+
|
|
226
|
+
postgres = get_postgres_service()
|
|
227
|
+
if not postgres:
|
|
228
|
+
return []
|
|
229
|
+
|
|
230
|
+
await postgres.connect()
|
|
231
|
+
try:
|
|
232
|
+
if include_system:
|
|
233
|
+
query = """
|
|
234
|
+
SELECT name, metadata, user_id, tenant_id
|
|
235
|
+
FROM schemas
|
|
236
|
+
WHERE category = 'agent'
|
|
237
|
+
AND (user_id = $1 OR user_id IS NULL OR tenant_id = 'system')
|
|
238
|
+
ORDER BY name
|
|
239
|
+
"""
|
|
240
|
+
rows = await postgres.fetch(query, user_id)
|
|
241
|
+
else:
|
|
242
|
+
query = """
|
|
243
|
+
SELECT name, metadata, user_id, tenant_id
|
|
244
|
+
FROM schemas
|
|
245
|
+
WHERE category = 'agent'
|
|
246
|
+
AND user_id = $1
|
|
247
|
+
ORDER BY name
|
|
248
|
+
"""
|
|
249
|
+
rows = await postgres.fetch(query, user_id)
|
|
250
|
+
|
|
251
|
+
return [
|
|
252
|
+
{
|
|
253
|
+
"name": row["name"],
|
|
254
|
+
"version": row["metadata"].get("version", "1.0.0") if row["metadata"] else "1.0.0",
|
|
255
|
+
"tags": row["metadata"].get("tags", []) if row["metadata"] else [],
|
|
256
|
+
"is_system": row["tenant_id"] == "system" or row["user_id"] is None,
|
|
257
|
+
}
|
|
258
|
+
for row in rows
|
|
259
|
+
]
|
|
260
|
+
finally:
|
|
261
|
+
await postgres.disconnect()
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
async def delete_agent(
|
|
265
|
+
name: str,
|
|
266
|
+
user_id: str,
|
|
267
|
+
) -> dict[str, Any]:
|
|
268
|
+
"""
|
|
269
|
+
Delete a user's agent.
|
|
270
|
+
|
|
271
|
+
Only allows deleting user-owned agents, not system agents.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
name: Agent name
|
|
275
|
+
user_id: User identifier
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dict with status and message
|
|
279
|
+
"""
|
|
280
|
+
from rem.services.postgres import get_postgres_service
|
|
281
|
+
|
|
282
|
+
postgres = get_postgres_service()
|
|
283
|
+
if not postgres:
|
|
284
|
+
raise RuntimeError("Database not available")
|
|
285
|
+
|
|
286
|
+
await postgres.connect()
|
|
287
|
+
try:
|
|
288
|
+
# Only delete user's own agents
|
|
289
|
+
query = """
|
|
290
|
+
DELETE FROM schemas
|
|
291
|
+
WHERE LOWER(name) = LOWER($1)
|
|
292
|
+
AND category = 'agent'
|
|
293
|
+
AND user_id = $2
|
|
294
|
+
RETURNING name
|
|
295
|
+
"""
|
|
296
|
+
row = await postgres.fetchrow(query, name, user_id)
|
|
297
|
+
|
|
298
|
+
if row:
|
|
299
|
+
logger.info(f"🗑️ Agent deleted: {name} (user={user_id})")
|
|
300
|
+
return {
|
|
301
|
+
"status": "success",
|
|
302
|
+
"message": f"Agent '{name}' deleted.",
|
|
303
|
+
}
|
|
304
|
+
else:
|
|
305
|
+
return {
|
|
306
|
+
"status": "error",
|
|
307
|
+
"message": f"Agent '{name}' not found or not owned by you.",
|
|
308
|
+
}
|
|
309
|
+
finally:
|
|
310
|
+
await postgres.disconnect()
|
rem/agentic/context_builder.py
CHANGED
|
@@ -184,13 +184,15 @@ class ContextBuilder:
|
|
|
184
184
|
# Add system context hint
|
|
185
185
|
messages.append(ContextMessage(role="system", content=context_hint))
|
|
186
186
|
|
|
187
|
-
# ALWAYS load session history (if session_id provided)
|
|
187
|
+
# ALWAYS load session history (if session_id provided)
|
|
188
|
+
# - Long assistant messages are compressed on load with REM LOOKUP hints
|
|
189
|
+
# - Tool messages are never compressed (contain structured metadata)
|
|
188
190
|
if context.session_id and settings.postgres.enabled:
|
|
189
191
|
store = SessionMessageStore(user_id=context.user_id or "default")
|
|
190
192
|
session_history = await store.load_session_messages(
|
|
191
193
|
session_id=context.session_id,
|
|
192
194
|
user_id=context.user_id,
|
|
193
|
-
|
|
195
|
+
compress_on_load=True, # Compress long assistant messages
|
|
194
196
|
)
|
|
195
197
|
|
|
196
198
|
# Convert to ContextMessage format
|
|
@@ -202,7 +204,7 @@ class ContextBuilder:
|
|
|
202
204
|
)
|
|
203
205
|
)
|
|
204
206
|
|
|
205
|
-
logger.debug(f"Loaded {len(session_history)}
|
|
207
|
+
logger.debug(f"Loaded {len(session_history)} messages for session {context.session_id}")
|
|
206
208
|
|
|
207
209
|
# Add new messages from request
|
|
208
210
|
if new_messages:
|
rem/agentic/mcp/tool_wrapper.py
CHANGED
|
@@ -107,7 +107,7 @@ def create_mcp_tool_wrapper(
|
|
|
107
107
|
return Tool(tool_func)
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
110
|
+
def create_resource_tool(uri: str, usage: str = "", mcp_server: Any = None) -> Tool:
|
|
111
111
|
"""
|
|
112
112
|
Build a Tool instance from an MCP resource URI.
|
|
113
113
|
|
|
@@ -122,6 +122,9 @@ def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
|
122
122
|
Args:
|
|
123
123
|
uri: The resource URI (concrete or template with {variable} placeholders).
|
|
124
124
|
usage: The description of what this resource provides.
|
|
125
|
+
mcp_server: Optional FastMCP server instance to resolve resources from.
|
|
126
|
+
If provided, resources are resolved from this server's registry.
|
|
127
|
+
If not provided, falls back to REM's built-in load_resource().
|
|
125
128
|
|
|
126
129
|
Returns:
|
|
127
130
|
A Pydantic AI Tool instance that fetches the resource.
|
|
@@ -131,7 +134,7 @@ def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
|
131
134
|
tool = create_resource_tool("rem://schemas", "List all agent schemas")
|
|
132
135
|
|
|
133
136
|
# Template URI -> parameterized tool
|
|
134
|
-
tool = create_resource_tool("patient-profile://field/{field_key}", "Get field definition")
|
|
137
|
+
tool = create_resource_tool("patient-profile://field/{field_key}", "Get field definition", mcp_server=mcp)
|
|
135
138
|
# Agent calls: get_patient_profile_field(field_key="safety.suicidality")
|
|
136
139
|
"""
|
|
137
140
|
import json
|
|
@@ -156,7 +159,29 @@ def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
|
156
159
|
# Template URI -> create parameterized tool
|
|
157
160
|
async def wrapper(**kwargs: Any) -> str:
|
|
158
161
|
"""Fetch MCP resource with substituted parameters."""
|
|
159
|
-
|
|
162
|
+
import asyncio
|
|
163
|
+
import inspect
|
|
164
|
+
|
|
165
|
+
# Try to resolve from MCP server's resource templates first
|
|
166
|
+
if mcp_server is not None:
|
|
167
|
+
try:
|
|
168
|
+
# Get resource templates from MCP server
|
|
169
|
+
templates = await mcp_server.get_resource_templates()
|
|
170
|
+
if uri in templates:
|
|
171
|
+
template = templates[uri]
|
|
172
|
+
# Call the template's underlying function directly
|
|
173
|
+
# The fn expects the template variables as kwargs
|
|
174
|
+
fn_result = template.fn(**kwargs)
|
|
175
|
+
# Handle both sync and async functions
|
|
176
|
+
if inspect.iscoroutine(fn_result):
|
|
177
|
+
fn_result = await fn_result
|
|
178
|
+
if isinstance(fn_result, str):
|
|
179
|
+
return fn_result
|
|
180
|
+
return json.dumps(fn_result, indent=2)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.warning(f"Failed to resolve resource {uri} from MCP server: {e}")
|
|
183
|
+
|
|
184
|
+
# Fallback: substitute template variables and use load_resource
|
|
160
185
|
resolved_uri = uri
|
|
161
186
|
for var in template_vars:
|
|
162
187
|
if var in kwargs:
|
|
@@ -164,9 +189,7 @@ def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
|
164
189
|
else:
|
|
165
190
|
return json.dumps({"error": f"Missing required parameter: {var}"})
|
|
166
191
|
|
|
167
|
-
# Import resource loading here to avoid circular imports
|
|
168
192
|
from rem.api.mcp_router.resources import load_resource
|
|
169
|
-
|
|
170
193
|
result = await load_resource(resolved_uri)
|
|
171
194
|
if isinstance(result, str):
|
|
172
195
|
return result
|
|
@@ -184,11 +207,30 @@ def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
|
184
207
|
# Concrete URI -> no-param tool
|
|
185
208
|
async def wrapper(**kwargs: Any) -> str:
|
|
186
209
|
"""Fetch MCP resource and return contents."""
|
|
210
|
+
import asyncio
|
|
211
|
+
import inspect
|
|
212
|
+
|
|
187
213
|
if kwargs:
|
|
188
214
|
logger.warning(f"Resource tool {func_name} called with unexpected kwargs: {list(kwargs.keys())}")
|
|
189
215
|
|
|
216
|
+
# Try to resolve from MCP server's resources first
|
|
217
|
+
if mcp_server is not None:
|
|
218
|
+
try:
|
|
219
|
+
resources = await mcp_server.get_resources()
|
|
220
|
+
if uri in resources:
|
|
221
|
+
resource = resources[uri]
|
|
222
|
+
# Call the resource's underlying function
|
|
223
|
+
fn_result = resource.fn()
|
|
224
|
+
if inspect.iscoroutine(fn_result):
|
|
225
|
+
fn_result = await fn_result
|
|
226
|
+
if isinstance(fn_result, str):
|
|
227
|
+
return fn_result
|
|
228
|
+
return json.dumps(fn_result, indent=2)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
logger.warning(f"Failed to resolve resource {uri} from MCP server: {e}")
|
|
231
|
+
|
|
232
|
+
# Fallback to load_resource
|
|
190
233
|
from rem.api.mcp_router.resources import load_resource
|
|
191
|
-
|
|
192
234
|
result = await load_resource(uri)
|
|
193
235
|
if isinstance(result, str):
|
|
194
236
|
return result
|
rem/agentic/providers/phoenix.py
CHANGED
|
@@ -94,6 +94,82 @@ def _check_phoenix_available() -> bool:
|
|
|
94
94
|
return PHOENIX_AVAILABLE
|
|
95
95
|
|
|
96
96
|
|
|
97
|
+
def validate_evaluator_credentials(
|
|
98
|
+
model_name: str | None = None,
|
|
99
|
+
) -> tuple[bool, str | None]:
|
|
100
|
+
"""Validate that the evaluator's LLM provider has working credentials.
|
|
101
|
+
|
|
102
|
+
Performs a minimal API call to verify credentials before running experiments.
|
|
103
|
+
This prevents running expensive agent tasks only to have evaluations fail.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
model_name: Model to validate (defaults to claude-sonnet-4-5-20250929)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (success: bool, error_message: str | None)
|
|
110
|
+
- (True, None) if credentials are valid
|
|
111
|
+
- (False, "error description") if validation fails
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> success, error = validate_evaluator_credentials()
|
|
115
|
+
>>> if not success:
|
|
116
|
+
... print(f"Evaluator validation failed: {error}")
|
|
117
|
+
... return
|
|
118
|
+
"""
|
|
119
|
+
if not _check_phoenix_available():
|
|
120
|
+
return False, "arize-phoenix package not installed"
|
|
121
|
+
|
|
122
|
+
from phoenix.evals import OpenAIModel, AnthropicModel
|
|
123
|
+
|
|
124
|
+
# Default model (check env var first)
|
|
125
|
+
if model_name is None:
|
|
126
|
+
import os
|
|
127
|
+
model_name = os.environ.get("EVALUATOR_MODEL", "claude-sonnet-4-5-20250929")
|
|
128
|
+
|
|
129
|
+
# Parse provider
|
|
130
|
+
if ":" in model_name:
|
|
131
|
+
provider, phoenix_model_name = model_name.split(":", 1)
|
|
132
|
+
else:
|
|
133
|
+
if model_name.startswith("claude"):
|
|
134
|
+
provider = "anthropic"
|
|
135
|
+
else:
|
|
136
|
+
provider = "openai"
|
|
137
|
+
phoenix_model_name = model_name
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
# Create LLM wrapper
|
|
141
|
+
if provider.lower() == "anthropic":
|
|
142
|
+
llm = AnthropicModel(
|
|
143
|
+
model=phoenix_model_name,
|
|
144
|
+
temperature=0.0,
|
|
145
|
+
top_p=None,
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
llm = OpenAIModel(model=phoenix_model_name, temperature=0.0)
|
|
149
|
+
|
|
150
|
+
# Test with minimal prompt
|
|
151
|
+
logger.info(f"Validating evaluator credentials for {provider}:{phoenix_model_name}")
|
|
152
|
+
response = llm("Say 'ok' if you can read this.")
|
|
153
|
+
|
|
154
|
+
if response and len(response) > 0:
|
|
155
|
+
logger.info(f"Evaluator credentials validated successfully for {provider}")
|
|
156
|
+
return True, None
|
|
157
|
+
else:
|
|
158
|
+
return False, f"Empty response from {provider} model"
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
error_msg = str(e)
|
|
162
|
+
# Extract meaningful error from common API errors
|
|
163
|
+
if "credit balance is too low" in error_msg.lower():
|
|
164
|
+
return False, f"Anthropic API credits exhausted. Add credits at https://console.anthropic.com/settings/billing"
|
|
165
|
+
elif "api key" in error_msg.lower() or "authentication" in error_msg.lower():
|
|
166
|
+
return False, f"{provider.capitalize()} API key missing or invalid. Set ANTHROPIC_API_KEY or OPENAI_API_KEY environment variable."
|
|
167
|
+
elif "rate limit" in error_msg.lower():
|
|
168
|
+
return False, f"{provider.capitalize()} rate limit exceeded. Wait and retry."
|
|
169
|
+
else:
|
|
170
|
+
return False, f"{provider.capitalize()} API error: {error_msg[:200]}"
|
|
171
|
+
|
|
172
|
+
|
|
97
173
|
# =============================================================================
|
|
98
174
|
# NAME SANITIZATION
|
|
99
175
|
# =============================================================================
|
|
@@ -207,8 +283,9 @@ def create_phoenix_evaluator(
|
|
|
207
283
|
|
|
208
284
|
# Default model (use Claude Sonnet 4.5 for evaluators)
|
|
209
285
|
if model_name is None:
|
|
210
|
-
|
|
211
|
-
|
|
286
|
+
import os
|
|
287
|
+
model_name = os.environ.get("EVALUATOR_MODEL", "claude-sonnet-4-5-20250929")
|
|
288
|
+
logger.debug(f"Using evaluator model: {model_name}")
|
|
212
289
|
|
|
213
290
|
logger.info(f"Creating Phoenix evaluator: {evaluator_name} with model={model_name}")
|
|
214
291
|
|
|
@@ -589,33 +666,26 @@ Please evaluate the agent's answer according to the evaluation criteria."""
|
|
|
589
666
|
|
|
590
667
|
logger.debug(f"Created {len(evaluations)} evaluations")
|
|
591
668
|
|
|
592
|
-
# Phoenix
|
|
593
|
-
#
|
|
594
|
-
from phoenix.experiments.evaluators.base import EvaluationResult
|
|
595
|
-
|
|
669
|
+
# Phoenix client expects a dict with score, label, explanation
|
|
670
|
+
# (not the old EvaluationResult class)
|
|
596
671
|
overall_eval = next(
|
|
597
672
|
(e for e in evaluations if e["name"] == "overall"),
|
|
598
673
|
{"score": 0.0, "label": "unknown", "explanation": None}
|
|
599
674
|
)
|
|
600
675
|
|
|
601
|
-
return
|
|
602
|
-
score
|
|
603
|
-
label
|
|
604
|
-
explanation
|
|
605
|
-
|
|
606
|
-
"evaluations": evaluations,
|
|
607
|
-
"raw_response": response_json,
|
|
608
|
-
}
|
|
609
|
-
)
|
|
676
|
+
return {
|
|
677
|
+
"score": overall_eval.get("score", 0.0),
|
|
678
|
+
"label": overall_eval.get("label", "unknown"),
|
|
679
|
+
"explanation": overall_eval.get("explanation"),
|
|
680
|
+
}
|
|
610
681
|
|
|
611
682
|
except Exception as e:
|
|
612
683
|
logger.error(f"Evaluator error: {e}")
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
)
|
|
684
|
+
return {
|
|
685
|
+
"score": 0.0,
|
|
686
|
+
"label": "error",
|
|
687
|
+
"explanation": f"Evaluator failed: {str(e)}",
|
|
688
|
+
}
|
|
619
689
|
|
|
620
690
|
return evaluator_fn
|
|
621
691
|
|