remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +302 -109
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +70 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +418 -27
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +27 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/registry.py +10 -4
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/service.py +92 -20
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/settings.py +324 -23
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
rem/agentic/context.py
CHANGED
|
@@ -2,10 +2,18 @@
|
|
|
2
2
|
Agent execution context and configuration.
|
|
3
3
|
|
|
4
4
|
Design pattern for session context that can be constructed from:
|
|
5
|
-
- HTTP headers (X-User-Id, X-Session-Id, X-Model-Name)
|
|
5
|
+
- HTTP headers (X-User-Id, X-Session-Id, X-Model-Name, X-Is-Eval, etc.)
|
|
6
6
|
- Direct instantiation for testing/CLI
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Headers Mapping:
|
|
9
|
+
X-User-Id → context.user_id
|
|
10
|
+
X-Tenant-Id → context.tenant_id (default: "default")
|
|
11
|
+
X-Session-Id → context.session_id
|
|
12
|
+
X-Agent-Schema → context.agent_schema_uri (default: "rem")
|
|
13
|
+
X-Model-Name → context.default_model
|
|
14
|
+
X-Is-Eval → context.is_eval (marks session as evaluation)
|
|
15
|
+
|
|
16
|
+
Key Design Pattern:
|
|
9
17
|
- AgentContext is passed to agent factory, not stored in agents
|
|
10
18
|
- Enables session tracking across API, CLI, and test execution
|
|
11
19
|
- Supports header-based configuration override (model, schema URI)
|
|
@@ -66,6 +74,11 @@ class AgentContext(BaseModel):
|
|
|
66
74
|
description="Agent schema URI (e.g., 'rem-agents-query-agent')",
|
|
67
75
|
)
|
|
68
76
|
|
|
77
|
+
is_eval: bool = Field(
|
|
78
|
+
default=False,
|
|
79
|
+
description="Whether this is an evaluation session (set via X-Is-Eval header)",
|
|
80
|
+
)
|
|
81
|
+
|
|
69
82
|
model_config = {"populate_by_name": True}
|
|
70
83
|
|
|
71
84
|
@staticmethod
|
|
@@ -73,43 +86,47 @@ class AgentContext(BaseModel):
|
|
|
73
86
|
user_id: str | None,
|
|
74
87
|
source: str = "context",
|
|
75
88
|
default: str | None = None,
|
|
76
|
-
) -> str:
|
|
89
|
+
) -> str | None:
|
|
77
90
|
"""
|
|
78
|
-
Get user_id or
|
|
91
|
+
Get user_id or return None for anonymous access.
|
|
79
92
|
|
|
80
|
-
|
|
81
|
-
|
|
93
|
+
User ID convention:
|
|
94
|
+
- user_id is a deterministic UUID5 hash of the user's email address
|
|
95
|
+
- Use rem.utils.user_id.email_to_user_id(email) to generate
|
|
96
|
+
- The JWT's `sub` claim is NOT directly used as user_id
|
|
97
|
+
- Authentication middleware extracts email from JWT and hashes it
|
|
98
|
+
|
|
99
|
+
When user_id is None, queries return data with user_id IS NULL
|
|
100
|
+
(shared/public data). This is intentional - no fake user IDs.
|
|
82
101
|
|
|
83
102
|
Args:
|
|
84
|
-
user_id: User identifier (may be None)
|
|
103
|
+
user_id: User identifier (UUID5 hash of email, may be None for anonymous)
|
|
85
104
|
source: Source of the call (for logging clarity)
|
|
86
|
-
default:
|
|
105
|
+
default: Explicit default (only for testing, not auto-generated)
|
|
87
106
|
|
|
88
107
|
Returns:
|
|
89
|
-
user_id if provided,
|
|
108
|
+
user_id if provided, explicit default if provided, otherwise None
|
|
90
109
|
|
|
91
110
|
Example:
|
|
92
|
-
#
|
|
93
|
-
user_id
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# In API endpoint
|
|
98
|
-
user_id = AgentContext.get_user_id_or_default(
|
|
99
|
-
temp_context.user_id, source="chat_completions"
|
|
100
|
-
)
|
|
111
|
+
# Generate user_id from email (done by auth middleware)
|
|
112
|
+
from rem.utils.user_id import email_to_user_id
|
|
113
|
+
user_id = email_to_user_id("alice@example.com")
|
|
114
|
+
# -> "2c5ea4c0-4067-5fef-942d-0a20124e06d8"
|
|
101
115
|
|
|
102
|
-
# In
|
|
116
|
+
# In MCP tool - anonymous user sees shared data
|
|
103
117
|
user_id = AgentContext.get_user_id_or_default(
|
|
104
|
-
|
|
118
|
+
user_id, source="ask_rem_agent"
|
|
105
119
|
)
|
|
120
|
+
# Returns None if not authenticated -> queries WHERE user_id IS NULL
|
|
106
121
|
"""
|
|
107
|
-
if user_id is None:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
logger.debug(f"
|
|
111
|
-
return
|
|
112
|
-
return
|
|
122
|
+
if user_id is not None:
|
|
123
|
+
return user_id
|
|
124
|
+
if default is not None:
|
|
125
|
+
logger.debug(f"Using explicit default user_id '{default}' from {source}")
|
|
126
|
+
return default
|
|
127
|
+
# No fake user IDs - return None for anonymous/unauthenticated
|
|
128
|
+
logger.debug(f"No user_id from {source}, using None (anonymous/shared data)")
|
|
129
|
+
return None
|
|
113
130
|
|
|
114
131
|
@classmethod
|
|
115
132
|
def from_headers(cls, headers: dict[str, str]) -> "AgentContext":
|
|
@@ -122,6 +139,7 @@ class AgentContext(BaseModel):
|
|
|
122
139
|
- X-Session-Id: Session identifier
|
|
123
140
|
- X-Model-Name: Model override
|
|
124
141
|
- X-Agent-Schema: Agent schema URI
|
|
142
|
+
- X-Is-Eval: Whether this is an evaluation session (true/false)
|
|
125
143
|
|
|
126
144
|
Args:
|
|
127
145
|
headers: Dictionary of HTTP headers (case-insensitive)
|
|
@@ -134,17 +152,23 @@ class AgentContext(BaseModel):
|
|
|
134
152
|
"X-User-Id": "user123",
|
|
135
153
|
"X-Tenant-Id": "acme-corp",
|
|
136
154
|
"X-Session-Id": "sess-456",
|
|
137
|
-
"X-Model-Name": "anthropic:claude-opus-4-20250514"
|
|
155
|
+
"X-Model-Name": "anthropic:claude-opus-4-20250514",
|
|
156
|
+
"X-Is-Eval": "true"
|
|
138
157
|
}
|
|
139
158
|
context = AgentContext.from_headers(headers)
|
|
140
159
|
"""
|
|
141
160
|
# Normalize header keys to lowercase for case-insensitive lookup
|
|
142
161
|
normalized = {k.lower(): v for k, v in headers.items()}
|
|
143
162
|
|
|
163
|
+
# Parse X-Is-Eval header (accepts "true", "1", "yes" as truthy)
|
|
164
|
+
is_eval_str = normalized.get("x-is-eval", "").lower()
|
|
165
|
+
is_eval = is_eval_str in ("true", "1", "yes")
|
|
166
|
+
|
|
144
167
|
return cls(
|
|
145
168
|
user_id=normalized.get("x-user-id"),
|
|
146
169
|
tenant_id=normalized.get("x-tenant-id", "default"),
|
|
147
170
|
session_id=normalized.get("x-session-id"),
|
|
148
171
|
default_model=normalized.get("x-model-name") or settings.llm.default_model,
|
|
149
172
|
agent_schema_uri=normalized.get("x-agent-schema"),
|
|
173
|
+
is_eval=is_eval,
|
|
150
174
|
)
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Provider Model Registry.
|
|
3
|
+
|
|
4
|
+
Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
|
|
5
|
+
Used by the models API endpoint and for validating model requests.
|
|
6
|
+
|
|
7
|
+
Future: Models will be stored in database for dynamic management.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelInfo(BaseModel):
|
|
15
|
+
"""Information about a single model."""
|
|
16
|
+
|
|
17
|
+
id: str = Field(description="Model ID in provider:model format")
|
|
18
|
+
object: Literal["model"] = "model"
|
|
19
|
+
created: int = Field(description="Unix timestamp of model availability")
|
|
20
|
+
owned_by: str = Field(description="Provider name")
|
|
21
|
+
description: str | None = Field(default=None, description="Model description")
|
|
22
|
+
context_window: int | None = Field(default=None, description="Max context tokens")
|
|
23
|
+
max_output_tokens: int | None = Field(default=None, description="Max output tokens")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Model definitions with 2025 releases
|
|
27
|
+
# Using Unix timestamps for created dates (approximate release dates)
|
|
28
|
+
AVAILABLE_MODELS: list[ModelInfo] = [
|
|
29
|
+
# ==========================================================================
|
|
30
|
+
# OpenAI Models (2025)
|
|
31
|
+
# ==========================================================================
|
|
32
|
+
# GPT-4.1 series (Released April 14, 2025)
|
|
33
|
+
ModelInfo(
|
|
34
|
+
id="openai:gpt-4.1",
|
|
35
|
+
created=1744588800, # April 14, 2025
|
|
36
|
+
owned_by="openai",
|
|
37
|
+
description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
|
|
38
|
+
context_window=1047576,
|
|
39
|
+
max_output_tokens=32768,
|
|
40
|
+
),
|
|
41
|
+
ModelInfo(
|
|
42
|
+
id="openai:gpt-4.1-mini",
|
|
43
|
+
created=1744588800,
|
|
44
|
+
owned_by="openai",
|
|
45
|
+
description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
|
|
46
|
+
context_window=1047576,
|
|
47
|
+
max_output_tokens=32768,
|
|
48
|
+
),
|
|
49
|
+
ModelInfo(
|
|
50
|
+
id="openai:gpt-4.1-nano",
|
|
51
|
+
created=1744588800,
|
|
52
|
+
owned_by="openai",
|
|
53
|
+
description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
|
|
54
|
+
context_window=1047576,
|
|
55
|
+
max_output_tokens=32768,
|
|
56
|
+
),
|
|
57
|
+
# GPT-4o (legacy but still supported)
|
|
58
|
+
ModelInfo(
|
|
59
|
+
id="openai:gpt-4o",
|
|
60
|
+
created=1715644800, # May 13, 2024
|
|
61
|
+
owned_by="openai",
|
|
62
|
+
description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
|
|
63
|
+
context_window=128000,
|
|
64
|
+
max_output_tokens=16384,
|
|
65
|
+
),
|
|
66
|
+
ModelInfo(
|
|
67
|
+
id="openai:gpt-4o-mini",
|
|
68
|
+
created=1721347200, # July 18, 2024
|
|
69
|
+
owned_by="openai",
|
|
70
|
+
description="Cost-efficient smaller GPT-4o variant.",
|
|
71
|
+
context_window=128000,
|
|
72
|
+
max_output_tokens=16384,
|
|
73
|
+
),
|
|
74
|
+
# o1 reasoning models
|
|
75
|
+
ModelInfo(
|
|
76
|
+
id="openai:o1",
|
|
77
|
+
created=1733961600, # December 12, 2024
|
|
78
|
+
owned_by="openai",
|
|
79
|
+
description="Advanced reasoning model for complex problems. Extended thinking.",
|
|
80
|
+
context_window=200000,
|
|
81
|
+
max_output_tokens=100000,
|
|
82
|
+
),
|
|
83
|
+
ModelInfo(
|
|
84
|
+
id="openai:o1-mini",
|
|
85
|
+
created=1726099200, # September 12, 2024
|
|
86
|
+
owned_by="openai",
|
|
87
|
+
description="Smaller reasoning model, fast for coding and math.",
|
|
88
|
+
context_window=128000,
|
|
89
|
+
max_output_tokens=65536,
|
|
90
|
+
),
|
|
91
|
+
ModelInfo(
|
|
92
|
+
id="openai:o3-mini",
|
|
93
|
+
created=1738195200, # January 30, 2025
|
|
94
|
+
owned_by="openai",
|
|
95
|
+
description="Latest mini reasoning model with improved performance.",
|
|
96
|
+
context_window=200000,
|
|
97
|
+
max_output_tokens=100000,
|
|
98
|
+
),
|
|
99
|
+
# ==========================================================================
|
|
100
|
+
# Anthropic Models (2025)
|
|
101
|
+
# ==========================================================================
|
|
102
|
+
# Claude 4.5 series (Latest - November 2025)
|
|
103
|
+
ModelInfo(
|
|
104
|
+
id="anthropic:claude-opus-4-5-20251124",
|
|
105
|
+
created=1732406400, # November 24, 2025
|
|
106
|
+
owned_by="anthropic",
|
|
107
|
+
description="Most capable Claude model. World-class coding with 'effort' parameter control.",
|
|
108
|
+
context_window=200000,
|
|
109
|
+
max_output_tokens=128000,
|
|
110
|
+
),
|
|
111
|
+
ModelInfo(
|
|
112
|
+
id="anthropic:claude-sonnet-4-5-20250929",
|
|
113
|
+
created=1727568000, # September 29, 2025
|
|
114
|
+
owned_by="anthropic",
|
|
115
|
+
description="Best balance of intelligence and speed. Excellent for coding and agents.",
|
|
116
|
+
context_window=200000,
|
|
117
|
+
max_output_tokens=128000,
|
|
118
|
+
),
|
|
119
|
+
ModelInfo(
|
|
120
|
+
id="anthropic:claude-haiku-4-5-20251101",
|
|
121
|
+
created=1730419200, # November 1, 2025
|
|
122
|
+
owned_by="anthropic",
|
|
123
|
+
description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
|
|
124
|
+
context_window=200000,
|
|
125
|
+
max_output_tokens=128000,
|
|
126
|
+
),
|
|
127
|
+
# Claude 4 series
|
|
128
|
+
ModelInfo(
|
|
129
|
+
id="anthropic:claude-opus-4-20250514",
|
|
130
|
+
created=1715644800, # May 14, 2025
|
|
131
|
+
owned_by="anthropic",
|
|
132
|
+
description="World's best coding model. Sustained performance on complex agent workflows.",
|
|
133
|
+
context_window=200000,
|
|
134
|
+
max_output_tokens=128000,
|
|
135
|
+
),
|
|
136
|
+
ModelInfo(
|
|
137
|
+
id="anthropic:claude-sonnet-4-20250514",
|
|
138
|
+
created=1715644800, # May 14, 2025
|
|
139
|
+
owned_by="anthropic",
|
|
140
|
+
description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
|
|
141
|
+
context_window=200000,
|
|
142
|
+
max_output_tokens=128000,
|
|
143
|
+
),
|
|
144
|
+
ModelInfo(
|
|
145
|
+
id="anthropic:claude-opus-4-1-20250805",
|
|
146
|
+
created=1722816000, # August 5, 2025
|
|
147
|
+
owned_by="anthropic",
|
|
148
|
+
description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
|
|
149
|
+
context_window=200000,
|
|
150
|
+
max_output_tokens=128000,
|
|
151
|
+
),
|
|
152
|
+
# Aliases for convenience
|
|
153
|
+
ModelInfo(
|
|
154
|
+
id="anthropic:claude-opus-4-5",
|
|
155
|
+
created=1732406400,
|
|
156
|
+
owned_by="anthropic",
|
|
157
|
+
description="Alias for latest Claude Opus 4.5",
|
|
158
|
+
context_window=200000,
|
|
159
|
+
max_output_tokens=128000,
|
|
160
|
+
),
|
|
161
|
+
ModelInfo(
|
|
162
|
+
id="anthropic:claude-sonnet-4-5",
|
|
163
|
+
created=1727568000,
|
|
164
|
+
owned_by="anthropic",
|
|
165
|
+
description="Alias for latest Claude Sonnet 4.5",
|
|
166
|
+
context_window=200000,
|
|
167
|
+
max_output_tokens=128000,
|
|
168
|
+
),
|
|
169
|
+
ModelInfo(
|
|
170
|
+
id="anthropic:claude-haiku-4-5",
|
|
171
|
+
created=1730419200,
|
|
172
|
+
owned_by="anthropic",
|
|
173
|
+
description="Alias for latest Claude Haiku 4.5",
|
|
174
|
+
context_window=200000,
|
|
175
|
+
max_output_tokens=128000,
|
|
176
|
+
),
|
|
177
|
+
# ==========================================================================
|
|
178
|
+
# Google Models (2025)
|
|
179
|
+
# ==========================================================================
|
|
180
|
+
# Gemini 3 (Latest)
|
|
181
|
+
ModelInfo(
|
|
182
|
+
id="google:gemini-3-pro",
|
|
183
|
+
created=1730419200, # November 2025
|
|
184
|
+
owned_by="google",
|
|
185
|
+
description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
|
|
186
|
+
context_window=2000000,
|
|
187
|
+
max_output_tokens=65536,
|
|
188
|
+
),
|
|
189
|
+
# Gemini 2.5 series
|
|
190
|
+
ModelInfo(
|
|
191
|
+
id="google:gemini-2.5-pro",
|
|
192
|
+
created=1727568000, # September 2025
|
|
193
|
+
owned_by="google",
|
|
194
|
+
description="High-capability model with adaptive thinking. 1M context window.",
|
|
195
|
+
context_window=1000000,
|
|
196
|
+
max_output_tokens=65536,
|
|
197
|
+
),
|
|
198
|
+
ModelInfo(
|
|
199
|
+
id="google:gemini-2.5-flash",
|
|
200
|
+
created=1727568000,
|
|
201
|
+
owned_by="google",
|
|
202
|
+
description="Fast and capable. Best for large-scale processing and agentic tasks.",
|
|
203
|
+
context_window=1000000,
|
|
204
|
+
max_output_tokens=65536,
|
|
205
|
+
),
|
|
206
|
+
ModelInfo(
|
|
207
|
+
id="google:gemini-2.5-flash-lite",
|
|
208
|
+
created=1727568000,
|
|
209
|
+
owned_by="google",
|
|
210
|
+
description="Optimized for massive scale. Balances cost and performance.",
|
|
211
|
+
context_window=1000000,
|
|
212
|
+
max_output_tokens=32768,
|
|
213
|
+
),
|
|
214
|
+
# Gemini 2.0
|
|
215
|
+
ModelInfo(
|
|
216
|
+
id="google:gemini-2.0-flash",
|
|
217
|
+
created=1733875200, # December 2024
|
|
218
|
+
owned_by="google",
|
|
219
|
+
description="Fast multimodal model with native tool use.",
|
|
220
|
+
context_window=1000000,
|
|
221
|
+
max_output_tokens=8192,
|
|
222
|
+
),
|
|
223
|
+
# Gemma open models
|
|
224
|
+
ModelInfo(
|
|
225
|
+
id="google:gemma-3",
|
|
226
|
+
created=1727568000,
|
|
227
|
+
owned_by="google",
|
|
228
|
+
description="Open model with text/image input, 140+ languages, 128K context.",
|
|
229
|
+
context_window=128000,
|
|
230
|
+
max_output_tokens=8192,
|
|
231
|
+
),
|
|
232
|
+
ModelInfo(
|
|
233
|
+
id="google:gemma-3n",
|
|
234
|
+
created=1730419200,
|
|
235
|
+
owned_by="google",
|
|
236
|
+
description="Efficient open model for low-resource devices. Multimodal input.",
|
|
237
|
+
context_window=128000,
|
|
238
|
+
max_output_tokens=8192,
|
|
239
|
+
),
|
|
240
|
+
# ==========================================================================
|
|
241
|
+
# Cerebras Models (Ultra-fast inference)
|
|
242
|
+
# ==========================================================================
|
|
243
|
+
ModelInfo(
|
|
244
|
+
id="cerebras:llama-3.3-70b",
|
|
245
|
+
created=1733875200, # December 2024
|
|
246
|
+
owned_by="cerebras",
|
|
247
|
+
description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
|
|
248
|
+
context_window=128000,
|
|
249
|
+
max_output_tokens=8192,
|
|
250
|
+
),
|
|
251
|
+
ModelInfo(
|
|
252
|
+
id="cerebras:qwen-3-32b",
|
|
253
|
+
created=1733875200, # December 2024
|
|
254
|
+
owned_by="cerebras",
|
|
255
|
+
description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
|
|
256
|
+
context_window=32000,
|
|
257
|
+
max_output_tokens=8192,
|
|
258
|
+
),
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
# Set of valid model IDs for fast O(1) lookup
|
|
262
|
+
ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def is_valid_model(model_id: str | None) -> bool:
|
|
266
|
+
"""Check if a model ID is in the allowed list."""
|
|
267
|
+
if model_id is None:
|
|
268
|
+
return False
|
|
269
|
+
return model_id in ALLOWED_MODEL_IDS
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
|
|
273
|
+
"""
|
|
274
|
+
Return the model_id if it's valid, otherwise return the default.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
model_id: The requested model ID (may be None or invalid)
|
|
278
|
+
default_model: Fallback model from settings
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Valid model ID to use
|
|
282
|
+
"""
|
|
283
|
+
if is_valid_model(model_id):
|
|
284
|
+
return model_id # type: ignore[return-value]
|
|
285
|
+
return default_model
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_model_by_id(model_id: str) -> ModelInfo | None:
|
|
289
|
+
"""
|
|
290
|
+
Get model info by ID.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
model_id: Model identifier in provider:model format
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
ModelInfo if found, None otherwise
|
|
297
|
+
"""
|
|
298
|
+
for model in AVAILABLE_MODELS:
|
|
299
|
+
if model.id == model_id:
|
|
300
|
+
return model
|
|
301
|
+
return None
|
rem/agentic/mcp/tool_wrapper.py
CHANGED
|
@@ -28,7 +28,12 @@ def create_pydantic_tool(func: Callable[..., Any]) -> Tool:
|
|
|
28
28
|
return Tool(func)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def create_mcp_tool_wrapper(
|
|
31
|
+
def create_mcp_tool_wrapper(
|
|
32
|
+
tool_name: str,
|
|
33
|
+
mcp_tool: Any,
|
|
34
|
+
user_id: str | None = None,
|
|
35
|
+
description_suffix: str | None = None,
|
|
36
|
+
) -> Tool:
|
|
32
37
|
"""
|
|
33
38
|
Create a Pydantic AI Tool from a FastMCP FunctionTool.
|
|
34
39
|
|
|
@@ -40,6 +45,8 @@ def create_mcp_tool_wrapper(tool_name: str, mcp_tool: Any, user_id: str | None =
|
|
|
40
45
|
tool_name: Name of the MCP tool
|
|
41
46
|
mcp_tool: The FastMCP FunctionTool object
|
|
42
47
|
user_id: Optional user_id to inject into tool calls
|
|
48
|
+
description_suffix: Optional text to append to the tool's docstring.
|
|
49
|
+
Used to add schema-specific context (e.g., default table for search_rem).
|
|
43
50
|
|
|
44
51
|
Returns:
|
|
45
52
|
A Pydantic AI Tool instance
|
|
@@ -52,7 +59,11 @@ def create_mcp_tool_wrapper(tool_name: str, mcp_tool: Any, user_id: str | None =
|
|
|
52
59
|
sig = inspect.signature(tool_func)
|
|
53
60
|
has_user_id = "user_id" in sig.parameters
|
|
54
61
|
|
|
55
|
-
#
|
|
62
|
+
# Build the docstring with optional suffix
|
|
63
|
+
base_doc = tool_func.__doc__ or ""
|
|
64
|
+
final_doc = base_doc + description_suffix if description_suffix else base_doc
|
|
65
|
+
|
|
66
|
+
# If we need to inject user_id or modify docstring, create a wrapper
|
|
56
67
|
# Otherwise, use the function directly for better signature preservation
|
|
57
68
|
if user_id and has_user_id:
|
|
58
69
|
async def wrapped_tool(**kwargs) -> Any:
|
|
@@ -69,39 +80,123 @@ def create_mcp_tool_wrapper(tool_name: str, mcp_tool: Any, user_id: str | None =
|
|
|
69
80
|
|
|
70
81
|
# Copy signature from original function for Pydantic AI inspection
|
|
71
82
|
wrapped_tool.__name__ = tool_name
|
|
72
|
-
wrapped_tool.__doc__ =
|
|
83
|
+
wrapped_tool.__doc__ = final_doc
|
|
73
84
|
wrapped_tool.__annotations__ = tool_func.__annotations__
|
|
74
85
|
wrapped_tool.__signature__ = sig # Important: preserve full signature
|
|
75
86
|
|
|
76
87
|
logger.debug(f"Creating MCP tool wrapper with user_id injection: {tool_name}")
|
|
77
88
|
return Tool(wrapped_tool)
|
|
89
|
+
elif description_suffix:
|
|
90
|
+
# Need to wrap just for docstring modification
|
|
91
|
+
async def wrapped_tool(**kwargs) -> Any:
|
|
92
|
+
"""Wrapper for docstring modification."""
|
|
93
|
+
valid_params = set(sig.parameters.keys())
|
|
94
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_params}
|
|
95
|
+
return await tool_func(**filtered_kwargs)
|
|
96
|
+
|
|
97
|
+
wrapped_tool.__name__ = tool_name
|
|
98
|
+
wrapped_tool.__doc__ = final_doc
|
|
99
|
+
wrapped_tool.__annotations__ = tool_func.__annotations__
|
|
100
|
+
wrapped_tool.__signature__ = sig
|
|
101
|
+
|
|
102
|
+
logger.debug(f"Creating MCP tool wrapper with description suffix: {tool_name}")
|
|
103
|
+
return Tool(wrapped_tool)
|
|
78
104
|
else:
|
|
79
105
|
# No injection needed - use original function directly
|
|
80
106
|
logger.debug(f"Creating MCP tool wrapper (no injection): {tool_name}")
|
|
81
107
|
return Tool(tool_func)
|
|
82
108
|
|
|
83
109
|
|
|
84
|
-
def create_resource_tool(uri: str, usage: str) -> Tool:
|
|
110
|
+
def create_resource_tool(uri: str, usage: str = "") -> Tool:
|
|
85
111
|
"""
|
|
86
112
|
Build a Tool instance from an MCP resource URI.
|
|
87
113
|
|
|
88
|
-
|
|
89
|
-
|
|
114
|
+
Creates a tool that fetches the resource content when called.
|
|
115
|
+
Resources declared in agent YAML become callable tools - this eliminates
|
|
116
|
+
the artificial MCP distinction between tools and resources.
|
|
117
|
+
|
|
118
|
+
Supports both:
|
|
119
|
+
- Concrete URIs: "rem://schemas" -> tool with no parameters
|
|
120
|
+
- Template URIs: "patient-profile://field/{field_key}" -> tool with field_key parameter
|
|
90
121
|
|
|
91
122
|
Args:
|
|
92
|
-
uri: The resource URI (
|
|
93
|
-
usage: The description of
|
|
123
|
+
uri: The resource URI (concrete or template with {variable} placeholders).
|
|
124
|
+
usage: The description of what this resource provides.
|
|
94
125
|
|
|
95
126
|
Returns:
|
|
96
|
-
A Pydantic AI Tool instance.
|
|
127
|
+
A Pydantic AI Tool instance that fetches the resource.
|
|
128
|
+
|
|
129
|
+
Example:
|
|
130
|
+
# Concrete URI -> no-param tool
|
|
131
|
+
tool = create_resource_tool("rem://schemas", "List all agent schemas")
|
|
132
|
+
|
|
133
|
+
# Template URI -> parameterized tool
|
|
134
|
+
tool = create_resource_tool("patient-profile://field/{field_key}", "Get field definition")
|
|
135
|
+
# Agent calls: get_patient_profile_field(field_key="safety.suicidality")
|
|
97
136
|
"""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
137
|
+
import json
|
|
138
|
+
import re
|
|
139
|
+
|
|
140
|
+
# Extract template variables from URI (e.g., {field_key}, {domain_name})
|
|
141
|
+
template_vars = re.findall(r'\{([^}]+)\}', uri)
|
|
142
|
+
|
|
143
|
+
# Parse URI to create function name (strip template vars for cleaner name)
|
|
144
|
+
clean_uri = re.sub(r'\{[^}]+\}', '', uri)
|
|
145
|
+
parts = clean_uri.replace("://", "_").replace("-", "_").replace("/", "_").replace(".", "_")
|
|
146
|
+
parts = re.sub(r'_+', '_', parts).strip('_') # Clean up multiple underscores
|
|
147
|
+
func_name = f"get_{parts}"
|
|
148
|
+
|
|
149
|
+
# Build description including parameter info
|
|
150
|
+
description = usage or f"Fetch {uri} resource"
|
|
151
|
+
if template_vars:
|
|
152
|
+
param_desc = ", ".join(template_vars)
|
|
153
|
+
description = f"{description}\n\nParameters: {param_desc}"
|
|
154
|
+
|
|
155
|
+
if template_vars:
|
|
156
|
+
# Template URI -> create parameterized tool
|
|
157
|
+
async def wrapper(**kwargs: Any) -> str:
|
|
158
|
+
"""Fetch MCP resource with substituted parameters."""
|
|
159
|
+
# Substitute template variables into URI
|
|
160
|
+
resolved_uri = uri
|
|
161
|
+
for var in template_vars:
|
|
162
|
+
if var in kwargs:
|
|
163
|
+
resolved_uri = resolved_uri.replace(f"{{{var}}}", str(kwargs[var]))
|
|
164
|
+
else:
|
|
165
|
+
return json.dumps({"error": f"Missing required parameter: {var}"})
|
|
166
|
+
|
|
167
|
+
# Import resource loading here to avoid circular imports
|
|
168
|
+
from rem.api.mcp_router.resources import load_resource
|
|
169
|
+
|
|
170
|
+
result = await load_resource(resolved_uri)
|
|
171
|
+
if isinstance(result, str):
|
|
172
|
+
return result
|
|
173
|
+
return json.dumps(result, indent=2)
|
|
174
|
+
|
|
175
|
+
# Build parameter annotations for Pydantic AI
|
|
176
|
+
wrapper.__name__ = func_name
|
|
177
|
+
wrapper.__doc__ = description
|
|
178
|
+
# Add type hints for parameters
|
|
179
|
+
wrapper.__annotations__ = {var: str for var in template_vars}
|
|
180
|
+
wrapper.__annotations__['return'] = str
|
|
181
|
+
|
|
182
|
+
logger.info(f"Built parameterized resource tool: {func_name} (uri: {uri}, params: {template_vars})")
|
|
183
|
+
else:
|
|
184
|
+
# Concrete URI -> no-param tool
|
|
185
|
+
async def wrapper(**kwargs: Any) -> str:
|
|
186
|
+
"""Fetch MCP resource and return contents."""
|
|
187
|
+
if kwargs:
|
|
188
|
+
logger.warning(f"Resource tool {func_name} called with unexpected kwargs: {list(kwargs.keys())}")
|
|
189
|
+
|
|
190
|
+
from rem.api.mcp_router.resources import load_resource
|
|
191
|
+
|
|
192
|
+
result = await load_resource(uri)
|
|
193
|
+
if isinstance(result, str):
|
|
194
|
+
return result
|
|
195
|
+
return json.dumps(result, indent=2)
|
|
196
|
+
|
|
197
|
+
wrapper.__name__ = func_name
|
|
198
|
+
wrapper.__doc__ = description
|
|
102
199
|
|
|
103
|
-
|
|
104
|
-
read_resource.__doc__ = usage
|
|
200
|
+
logger.info(f"Built resource tool: {func_name} (uri: {uri})")
|
|
105
201
|
|
|
106
|
-
|
|
107
|
-
return Tool(read_resource)
|
|
202
|
+
return Tool(wrapper)
|