remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +7 -5
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +84 -10
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +70 -22
- rem/api/mcp_router/server.py +8 -1
- rem/api/mcp_router/tools.py +80 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +277 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +123 -14
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +468 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +455 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/ask.py +15 -11
- rem/cli/commands/configure.py +169 -94
- rem/cli/commands/db.py +53 -7
- rem/cli/commands/experiments.py +278 -96
- rem/cli/commands/process.py +8 -7
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +9 -9
- rem/cli/main.py +10 -0
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +206 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +367 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +85 -16
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +20 -13
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +29 -10
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +86 -5
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/session/compression.py +17 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +115 -17
- rem/sql/background_indexes.sql +10 -0
- rem/sql/migrations/001_install.sql +152 -2
- rem/sql/migrations/002_install_models.sql +580 -231
- rem/sql/migrations/003_seed_default_user.sql +48 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +273 -14
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/db_maintainer.py +74 -0
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Provider Model Registry.
|
|
3
|
+
|
|
4
|
+
Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
|
|
5
|
+
Used by the models API endpoint and for validating model requests.
|
|
6
|
+
|
|
7
|
+
Future: Models will be stored in database for dynamic management.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelInfo(BaseModel):
|
|
15
|
+
"""Information about a single model."""
|
|
16
|
+
|
|
17
|
+
id: str = Field(description="Model ID in provider:model format")
|
|
18
|
+
object: Literal["model"] = "model"
|
|
19
|
+
created: int = Field(description="Unix timestamp of model availability")
|
|
20
|
+
owned_by: str = Field(description="Provider name")
|
|
21
|
+
description: str | None = Field(default=None, description="Model description")
|
|
22
|
+
context_window: int | None = Field(default=None, description="Max context tokens")
|
|
23
|
+
max_output_tokens: int | None = Field(default=None, description="Max output tokens")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Model definitions with 2025 releases
|
|
27
|
+
# Using Unix timestamps for created dates (approximate release dates)
|
|
28
|
+
AVAILABLE_MODELS: list[ModelInfo] = [
|
|
29
|
+
# ==========================================================================
|
|
30
|
+
# OpenAI Models (2025)
|
|
31
|
+
# ==========================================================================
|
|
32
|
+
# GPT-4.1 series (Released April 14, 2025)
|
|
33
|
+
ModelInfo(
|
|
34
|
+
id="openai:gpt-4.1",
|
|
35
|
+
created=1744588800, # April 14, 2025
|
|
36
|
+
owned_by="openai",
|
|
37
|
+
description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
|
|
38
|
+
context_window=1047576,
|
|
39
|
+
max_output_tokens=32768,
|
|
40
|
+
),
|
|
41
|
+
ModelInfo(
|
|
42
|
+
id="openai:gpt-4.1-mini",
|
|
43
|
+
created=1744588800,
|
|
44
|
+
owned_by="openai",
|
|
45
|
+
description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
|
|
46
|
+
context_window=1047576,
|
|
47
|
+
max_output_tokens=32768,
|
|
48
|
+
),
|
|
49
|
+
ModelInfo(
|
|
50
|
+
id="openai:gpt-4.1-nano",
|
|
51
|
+
created=1744588800,
|
|
52
|
+
owned_by="openai",
|
|
53
|
+
description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
|
|
54
|
+
context_window=1047576,
|
|
55
|
+
max_output_tokens=32768,
|
|
56
|
+
),
|
|
57
|
+
# GPT-4o (legacy but still supported)
|
|
58
|
+
ModelInfo(
|
|
59
|
+
id="openai:gpt-4o",
|
|
60
|
+
created=1715644800, # May 13, 2024
|
|
61
|
+
owned_by="openai",
|
|
62
|
+
description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
|
|
63
|
+
context_window=128000,
|
|
64
|
+
max_output_tokens=16384,
|
|
65
|
+
),
|
|
66
|
+
ModelInfo(
|
|
67
|
+
id="openai:gpt-4o-mini",
|
|
68
|
+
created=1721347200, # July 18, 2024
|
|
69
|
+
owned_by="openai",
|
|
70
|
+
description="Cost-efficient smaller GPT-4o variant.",
|
|
71
|
+
context_window=128000,
|
|
72
|
+
max_output_tokens=16384,
|
|
73
|
+
),
|
|
74
|
+
# o1 reasoning models
|
|
75
|
+
ModelInfo(
|
|
76
|
+
id="openai:o1",
|
|
77
|
+
created=1733961600, # December 12, 2024
|
|
78
|
+
owned_by="openai",
|
|
79
|
+
description="Advanced reasoning model for complex problems. Extended thinking.",
|
|
80
|
+
context_window=200000,
|
|
81
|
+
max_output_tokens=100000,
|
|
82
|
+
),
|
|
83
|
+
ModelInfo(
|
|
84
|
+
id="openai:o1-mini",
|
|
85
|
+
created=1726099200, # September 12, 2024
|
|
86
|
+
owned_by="openai",
|
|
87
|
+
description="Smaller reasoning model, fast for coding and math.",
|
|
88
|
+
context_window=128000,
|
|
89
|
+
max_output_tokens=65536,
|
|
90
|
+
),
|
|
91
|
+
ModelInfo(
|
|
92
|
+
id="openai:o3-mini",
|
|
93
|
+
created=1738195200, # January 30, 2025
|
|
94
|
+
owned_by="openai",
|
|
95
|
+
description="Latest mini reasoning model with improved performance.",
|
|
96
|
+
context_window=200000,
|
|
97
|
+
max_output_tokens=100000,
|
|
98
|
+
),
|
|
99
|
+
# ==========================================================================
|
|
100
|
+
# Anthropic Models (2025)
|
|
101
|
+
# ==========================================================================
|
|
102
|
+
# Claude 4.5 series (Latest - November 2025)
|
|
103
|
+
ModelInfo(
|
|
104
|
+
id="anthropic:claude-opus-4-5-20251124",
|
|
105
|
+
created=1732406400, # November 24, 2025
|
|
106
|
+
owned_by="anthropic",
|
|
107
|
+
description="Most capable Claude model. World-class coding with 'effort' parameter control.",
|
|
108
|
+
context_window=200000,
|
|
109
|
+
max_output_tokens=128000,
|
|
110
|
+
),
|
|
111
|
+
ModelInfo(
|
|
112
|
+
id="anthropic:claude-sonnet-4-5-20250929",
|
|
113
|
+
created=1727568000, # September 29, 2025
|
|
114
|
+
owned_by="anthropic",
|
|
115
|
+
description="Best balance of intelligence and speed. Excellent for coding and agents.",
|
|
116
|
+
context_window=200000,
|
|
117
|
+
max_output_tokens=128000,
|
|
118
|
+
),
|
|
119
|
+
ModelInfo(
|
|
120
|
+
id="anthropic:claude-haiku-4-5-20251101",
|
|
121
|
+
created=1730419200, # November 1, 2025
|
|
122
|
+
owned_by="anthropic",
|
|
123
|
+
description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
|
|
124
|
+
context_window=200000,
|
|
125
|
+
max_output_tokens=128000,
|
|
126
|
+
),
|
|
127
|
+
# Claude 4 series
|
|
128
|
+
ModelInfo(
|
|
129
|
+
id="anthropic:claude-opus-4-20250514",
|
|
130
|
+
created=1715644800, # May 14, 2025
|
|
131
|
+
owned_by="anthropic",
|
|
132
|
+
description="World's best coding model. Sustained performance on complex agent workflows.",
|
|
133
|
+
context_window=200000,
|
|
134
|
+
max_output_tokens=128000,
|
|
135
|
+
),
|
|
136
|
+
ModelInfo(
|
|
137
|
+
id="anthropic:claude-sonnet-4-20250514",
|
|
138
|
+
created=1715644800, # May 14, 2025
|
|
139
|
+
owned_by="anthropic",
|
|
140
|
+
description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
|
|
141
|
+
context_window=200000,
|
|
142
|
+
max_output_tokens=128000,
|
|
143
|
+
),
|
|
144
|
+
ModelInfo(
|
|
145
|
+
id="anthropic:claude-opus-4-1-20250805",
|
|
146
|
+
created=1722816000, # August 5, 2025
|
|
147
|
+
owned_by="anthropic",
|
|
148
|
+
description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
|
|
149
|
+
context_window=200000,
|
|
150
|
+
max_output_tokens=128000,
|
|
151
|
+
),
|
|
152
|
+
# Aliases for convenience
|
|
153
|
+
ModelInfo(
|
|
154
|
+
id="anthropic:claude-opus-4-5",
|
|
155
|
+
created=1732406400,
|
|
156
|
+
owned_by="anthropic",
|
|
157
|
+
description="Alias for latest Claude Opus 4.5",
|
|
158
|
+
context_window=200000,
|
|
159
|
+
max_output_tokens=128000,
|
|
160
|
+
),
|
|
161
|
+
ModelInfo(
|
|
162
|
+
id="anthropic:claude-sonnet-4-5",
|
|
163
|
+
created=1727568000,
|
|
164
|
+
owned_by="anthropic",
|
|
165
|
+
description="Alias for latest Claude Sonnet 4.5",
|
|
166
|
+
context_window=200000,
|
|
167
|
+
max_output_tokens=128000,
|
|
168
|
+
),
|
|
169
|
+
ModelInfo(
|
|
170
|
+
id="anthropic:claude-haiku-4-5",
|
|
171
|
+
created=1730419200,
|
|
172
|
+
owned_by="anthropic",
|
|
173
|
+
description="Alias for latest Claude Haiku 4.5",
|
|
174
|
+
context_window=200000,
|
|
175
|
+
max_output_tokens=128000,
|
|
176
|
+
),
|
|
177
|
+
# ==========================================================================
|
|
178
|
+
# Google Models (2025)
|
|
179
|
+
# ==========================================================================
|
|
180
|
+
# Gemini 3 (Latest)
|
|
181
|
+
ModelInfo(
|
|
182
|
+
id="google:gemini-3-pro",
|
|
183
|
+
created=1730419200, # November 2025
|
|
184
|
+
owned_by="google",
|
|
185
|
+
description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
|
|
186
|
+
context_window=2000000,
|
|
187
|
+
max_output_tokens=65536,
|
|
188
|
+
),
|
|
189
|
+
# Gemini 2.5 series
|
|
190
|
+
ModelInfo(
|
|
191
|
+
id="google:gemini-2.5-pro",
|
|
192
|
+
created=1727568000, # September 2025
|
|
193
|
+
owned_by="google",
|
|
194
|
+
description="High-capability model with adaptive thinking. 1M context window.",
|
|
195
|
+
context_window=1000000,
|
|
196
|
+
max_output_tokens=65536,
|
|
197
|
+
),
|
|
198
|
+
ModelInfo(
|
|
199
|
+
id="google:gemini-2.5-flash",
|
|
200
|
+
created=1727568000,
|
|
201
|
+
owned_by="google",
|
|
202
|
+
description="Fast and capable. Best for large-scale processing and agentic tasks.",
|
|
203
|
+
context_window=1000000,
|
|
204
|
+
max_output_tokens=65536,
|
|
205
|
+
),
|
|
206
|
+
ModelInfo(
|
|
207
|
+
id="google:gemini-2.5-flash-lite",
|
|
208
|
+
created=1727568000,
|
|
209
|
+
owned_by="google",
|
|
210
|
+
description="Optimized for massive scale. Balances cost and performance.",
|
|
211
|
+
context_window=1000000,
|
|
212
|
+
max_output_tokens=32768,
|
|
213
|
+
),
|
|
214
|
+
# Gemini 2.0
|
|
215
|
+
ModelInfo(
|
|
216
|
+
id="google:gemini-2.0-flash",
|
|
217
|
+
created=1733875200, # December 2024
|
|
218
|
+
owned_by="google",
|
|
219
|
+
description="Fast multimodal model with native tool use.",
|
|
220
|
+
context_window=1000000,
|
|
221
|
+
max_output_tokens=8192,
|
|
222
|
+
),
|
|
223
|
+
# Gemma open models
|
|
224
|
+
ModelInfo(
|
|
225
|
+
id="google:gemma-3",
|
|
226
|
+
created=1727568000,
|
|
227
|
+
owned_by="google",
|
|
228
|
+
description="Open model with text/image input, 140+ languages, 128K context.",
|
|
229
|
+
context_window=128000,
|
|
230
|
+
max_output_tokens=8192,
|
|
231
|
+
),
|
|
232
|
+
ModelInfo(
|
|
233
|
+
id="google:gemma-3n",
|
|
234
|
+
created=1730419200,
|
|
235
|
+
owned_by="google",
|
|
236
|
+
description="Efficient open model for low-resource devices. Multimodal input.",
|
|
237
|
+
context_window=128000,
|
|
238
|
+
max_output_tokens=8192,
|
|
239
|
+
),
|
|
240
|
+
# ==========================================================================
|
|
241
|
+
# Cerebras Models (Ultra-fast inference)
|
|
242
|
+
# ==========================================================================
|
|
243
|
+
ModelInfo(
|
|
244
|
+
id="cerebras:llama-3.3-70b",
|
|
245
|
+
created=1733875200, # December 2024
|
|
246
|
+
owned_by="cerebras",
|
|
247
|
+
description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
|
|
248
|
+
context_window=128000,
|
|
249
|
+
max_output_tokens=8192,
|
|
250
|
+
),
|
|
251
|
+
ModelInfo(
|
|
252
|
+
id="cerebras:qwen-3-32b",
|
|
253
|
+
created=1733875200, # December 2024
|
|
254
|
+
owned_by="cerebras",
|
|
255
|
+
description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
|
|
256
|
+
context_window=32000,
|
|
257
|
+
max_output_tokens=8192,
|
|
258
|
+
),
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
# Set of valid model IDs for fast O(1) lookup
|
|
262
|
+
ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def is_valid_model(model_id: str | None) -> bool:
|
|
266
|
+
"""Check if a model ID is in the allowed list."""
|
|
267
|
+
if model_id is None:
|
|
268
|
+
return False
|
|
269
|
+
return model_id in ALLOWED_MODEL_IDS
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
|
|
273
|
+
"""
|
|
274
|
+
Return the model_id if it's valid, otherwise return the default.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
model_id: The requested model ID (may be None or invalid)
|
|
278
|
+
default_model: Fallback model from settings
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Valid model ID to use
|
|
282
|
+
"""
|
|
283
|
+
if is_valid_model(model_id):
|
|
284
|
+
return model_id # type: ignore[return-value]
|
|
285
|
+
return default_model
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_model_by_id(model_id: str) -> ModelInfo | None:
|
|
289
|
+
"""
|
|
290
|
+
Get model info by ID.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
model_id: Model identifier in provider:model format
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
ModelInfo if found, None otherwise
|
|
297
|
+
"""
|
|
298
|
+
for model in AVAILABLE_MODELS:
|
|
299
|
+
if model.id == model_id:
|
|
300
|
+
return model
|
|
301
|
+
return None
|
rem/agentic/providers/phoenix.py
CHANGED
|
@@ -128,15 +128,16 @@ def sanitize_tool_name(tool_name: str) -> str:
|
|
|
128
128
|
|
|
129
129
|
|
|
130
130
|
def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
|
|
131
|
-
"""Load evaluator schema
|
|
131
|
+
"""Load evaluator schema using centralized schema loader.
|
|
132
132
|
|
|
133
|
-
|
|
134
|
-
|
|
133
|
+
Uses the same unified search logic as agent schemas:
|
|
134
|
+
- "hello-world/default" → schemas/evaluators/hello-world/default.yaml
|
|
135
|
+
- "lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
|
|
136
|
+
- "rem-lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
|
|
135
137
|
|
|
136
138
|
Args:
|
|
137
|
-
evaluator_name: Evaluator name
|
|
138
|
-
e.g., "
|
|
139
|
-
"rem-lookup-correctness.yaml"
|
|
139
|
+
evaluator_name: Evaluator name or path
|
|
140
|
+
e.g., "hello-world/default", "lookup-correctness"
|
|
140
141
|
|
|
141
142
|
Returns:
|
|
142
143
|
Evaluator schema dictionary with keys:
|
|
@@ -150,43 +151,13 @@ def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
|
|
|
150
151
|
FileNotFoundError: If evaluator schema not found
|
|
151
152
|
|
|
152
153
|
Example:
|
|
153
|
-
>>> schema = load_evaluator_schema("
|
|
154
|
+
>>> schema = load_evaluator_schema("hello-world/default")
|
|
154
155
|
>>> print(schema["description"])
|
|
155
156
|
"""
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
#
|
|
159
|
-
|
|
160
|
-
rem_module_dir = Path(rem.__file__).parent # rem/src/rem
|
|
161
|
-
rem_package_root = rem_module_dir.parent.parent # rem/src/rem -> rem/src -> rem
|
|
162
|
-
schema_dir = rem_package_root / "schemas" / "evaluators"
|
|
163
|
-
|
|
164
|
-
# Try .yaml first (preferred format)
|
|
165
|
-
yaml_path = schema_dir / f"{evaluator_name}.yaml"
|
|
166
|
-
if yaml_path.exists():
|
|
167
|
-
logger.debug(f"Loading evaluator schema from {yaml_path}")
|
|
168
|
-
with open(yaml_path) as f:
|
|
169
|
-
return yaml.safe_load(f)
|
|
170
|
-
|
|
171
|
-
# Try .yml
|
|
172
|
-
yml_path = schema_dir / f"{evaluator_name}.yml"
|
|
173
|
-
if yml_path.exists():
|
|
174
|
-
logger.debug(f"Loading evaluator schema from {yml_path}")
|
|
175
|
-
with open(yml_path) as f:
|
|
176
|
-
return yaml.safe_load(f)
|
|
177
|
-
|
|
178
|
-
# Try .json
|
|
179
|
-
json_path = schema_dir / f"{evaluator_name}.json"
|
|
180
|
-
if json_path.exists():
|
|
181
|
-
logger.debug(f"Loading evaluator schema from {json_path}")
|
|
182
|
-
with open(json_path) as f:
|
|
183
|
-
return json.load(f)
|
|
184
|
-
|
|
185
|
-
raise FileNotFoundError(
|
|
186
|
-
f"Evaluator schema not found: {evaluator_name}\n"
|
|
187
|
-
f"Searched in: {schema_dir}\n"
|
|
188
|
-
f"Supported formats: .yaml, .yml, .json"
|
|
189
|
-
)
|
|
157
|
+
from ...utils.schema_loader import load_agent_schema
|
|
158
|
+
|
|
159
|
+
# Use centralized schema loader (searches evaluator paths too)
|
|
160
|
+
return load_agent_schema(evaluator_name)
|
|
190
161
|
|
|
191
162
|
|
|
192
163
|
# =============================================================================
|
|
@@ -338,6 +309,22 @@ def create_evaluator_from_schema(
|
|
|
338
309
|
# Already a dict
|
|
339
310
|
schema = evaluator_schema_path
|
|
340
311
|
|
|
312
|
+
# Extract model from schema's provider_configs if not explicitly provided
|
|
313
|
+
if model_name is None:
|
|
314
|
+
json_schema_extra = schema.get("json_schema_extra", {})
|
|
315
|
+
provider_configs = json_schema_extra.get("provider_configs", [])
|
|
316
|
+
if provider_configs:
|
|
317
|
+
# Use first provider config
|
|
318
|
+
first_provider = provider_configs[0]
|
|
319
|
+
provider_name = first_provider.get("provider_name", "openai")
|
|
320
|
+
schema_model_name = first_provider.get("model_name", "gpt-4o-mini")
|
|
321
|
+
# Format as "provider:model" if not OpenAI (OpenAI is default)
|
|
322
|
+
if provider_name == "openai":
|
|
323
|
+
model_name = schema_model_name
|
|
324
|
+
else:
|
|
325
|
+
model_name = f"{provider_name}:{schema_model_name}"
|
|
326
|
+
logger.debug(f"Using model from schema provider_configs: {model_name}")
|
|
327
|
+
|
|
341
328
|
# Create evaluator config
|
|
342
329
|
evaluator_config = create_phoenix_evaluator(
|
|
343
330
|
evaluator_schema=schema,
|
|
@@ -361,7 +348,8 @@ def create_evaluator_from_schema(
|
|
|
361
348
|
Returns:
|
|
362
349
|
Evaluation result with score, label, explanation
|
|
363
350
|
"""
|
|
364
|
-
|
|
351
|
+
input_preview = str(example.get('input', ''))[:100]
|
|
352
|
+
logger.debug(f"Evaluating example: {input_preview}...")
|
|
365
353
|
|
|
366
354
|
# Phoenix llm_classify() expects a flat dict with string values
|
|
367
355
|
# Build evaluation input by flattening nested dicts
|
|
@@ -393,6 +381,7 @@ def create_evaluator_from_schema(
|
|
|
393
381
|
|
|
394
382
|
try:
|
|
395
383
|
# Create single-row DataFrame for llm_classify
|
|
384
|
+
# Note: Phoenix's llm_classify requires pandas DataFrame (imported above)
|
|
396
385
|
df = pd.DataFrame([eval_input])
|
|
397
386
|
|
|
398
387
|
# Call Phoenix llm_classify
|
|
@@ -404,7 +393,7 @@ def create_evaluator_from_schema(
|
|
|
404
393
|
provide_explanation=True,
|
|
405
394
|
)
|
|
406
395
|
|
|
407
|
-
# Extract result
|
|
396
|
+
# Extract result (results_df is pandas DataFrame from Phoenix)
|
|
408
397
|
if not results_df.empty:
|
|
409
398
|
row = results_df.iloc[0]
|
|
410
399
|
label = row.get("label", "error")
|
|
@@ -303,6 +303,68 @@ def _prepare_schema_for_qwen(schema: dict[str, Any]) -> dict[str, Any]:
|
|
|
303
303
|
return schema_copy
|
|
304
304
|
|
|
305
305
|
|
|
306
|
+
def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
|
|
307
|
+
"""
|
|
308
|
+
Convert schema properties to prompt guidance text.
|
|
309
|
+
|
|
310
|
+
When structured_output is disabled, this converts the properties
|
|
311
|
+
definition into natural language guidance that informs the agent
|
|
312
|
+
about the expected response structure without forcing JSON output.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
properties: JSON Schema properties dict
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Prompt text describing the expected response elements
|
|
319
|
+
|
|
320
|
+
Example:
|
|
321
|
+
properties = {
|
|
322
|
+
"answer": {"type": "string", "description": "The answer"},
|
|
323
|
+
"confidence": {"type": "number", "description": "Confidence 0-1"}
|
|
324
|
+
}
|
|
325
|
+
# Returns:
|
|
326
|
+
# "## Response Structure\n\nYour response should include:\n- **answer**: The answer\n..."
|
|
327
|
+
"""
|
|
328
|
+
if not properties:
|
|
329
|
+
return ""
|
|
330
|
+
|
|
331
|
+
lines = ["## Response Guidelines", "", "Your response should address the following elements:"]
|
|
332
|
+
|
|
333
|
+
for field_name, field_def in properties.items():
|
|
334
|
+
field_type = field_def.get("type", "any")
|
|
335
|
+
description = field_def.get("description", "")
|
|
336
|
+
|
|
337
|
+
# Format based on type
|
|
338
|
+
if field_type == "array":
|
|
339
|
+
type_hint = "list"
|
|
340
|
+
elif field_type == "number":
|
|
341
|
+
type_hint = "number"
|
|
342
|
+
# Include min/max if specified
|
|
343
|
+
if "minimum" in field_def or "maximum" in field_def:
|
|
344
|
+
min_val = field_def.get("minimum", "")
|
|
345
|
+
max_val = field_def.get("maximum", "")
|
|
346
|
+
if min_val != "" and max_val != "":
|
|
347
|
+
type_hint = f"number ({min_val}-{max_val})"
|
|
348
|
+
elif field_type == "boolean":
|
|
349
|
+
type_hint = "yes/no"
|
|
350
|
+
else:
|
|
351
|
+
type_hint = field_type
|
|
352
|
+
|
|
353
|
+
# Build field description
|
|
354
|
+
field_line = f"- **{field_name}**"
|
|
355
|
+
if type_hint and type_hint != "string":
|
|
356
|
+
field_line += f" ({type_hint})"
|
|
357
|
+
if description:
|
|
358
|
+
field_line += f": {description}"
|
|
359
|
+
|
|
360
|
+
lines.append(field_line)
|
|
361
|
+
|
|
362
|
+
lines.append("")
|
|
363
|
+
lines.append("Respond naturally in prose, addressing these elements where relevant.")
|
|
364
|
+
|
|
365
|
+
return "\n".join(lines)
|
|
366
|
+
|
|
367
|
+
|
|
306
368
|
def _create_schema_wrapper(
|
|
307
369
|
result_type: type[BaseModel], strip_description: bool = True
|
|
308
370
|
) -> type[BaseModel]:
|
|
@@ -462,10 +524,11 @@ async def create_agent(
|
|
|
462
524
|
# agent_schema = load_agent_schema(context.agent_schema_uri)
|
|
463
525
|
pass
|
|
464
526
|
|
|
465
|
-
# Determine model: override
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
527
|
+
# Determine model: validate override against allowed list, fallback to context or settings
|
|
528
|
+
from rem.agentic.llm_provider_models import get_valid_model_or_default
|
|
529
|
+
|
|
530
|
+
default_model = context.default_model if context else settings.llm.default_model
|
|
531
|
+
model = get_valid_model_or_default(model_override, default_model)
|
|
469
532
|
|
|
470
533
|
# Extract schema fields
|
|
471
534
|
system_prompt = agent_schema.get("description", "") if agent_schema else ""
|
|
@@ -526,14 +589,25 @@ async def create_agent(
|
|
|
526
589
|
# TODO: Convert resources to tools (MCP convenience syntax)
|
|
527
590
|
pass
|
|
528
591
|
|
|
592
|
+
# Check if structured output is disabled for this schema
|
|
593
|
+
# When structured_output: false, properties become part of prompt instead of output_type
|
|
594
|
+
use_structured_output = metadata.get("structured_output", True)
|
|
595
|
+
|
|
529
596
|
# Create dynamic result_type from schema if not provided
|
|
530
597
|
if result_type is None and agent_schema and "properties" in agent_schema:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
598
|
+
if use_structured_output:
|
|
599
|
+
# Pre-process schema for Qwen compatibility (strips min/max, sets additionalProperties=False)
|
|
600
|
+
# This ensures the generated Pydantic model doesn't have incompatible constraints
|
|
601
|
+
sanitized_schema = _prepare_schema_for_qwen(agent_schema)
|
|
602
|
+
result_type = _create_model_from_schema(sanitized_schema)
|
|
603
|
+
logger.debug(f"Created dynamic Pydantic model: {result_type.__name__}")
|
|
604
|
+
else:
|
|
605
|
+
# Convert properties to prompt guidance instead of structured output
|
|
606
|
+
# This informs the agent about expected response structure without forcing it
|
|
607
|
+
properties_prompt = _convert_properties_to_prompt(agent_schema.get("properties", {}))
|
|
608
|
+
if properties_prompt:
|
|
609
|
+
system_prompt = system_prompt + "\n\n" + properties_prompt
|
|
610
|
+
logger.debug("Structured output disabled - properties converted to prompt guidance")
|
|
537
611
|
|
|
538
612
|
# Create agent with optional output_type for structured output and tools
|
|
539
613
|
if result_type:
|