remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,301 @@
1
+ """
2
+ LLM Provider Model Registry.
3
+
4
+ Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
5
+ Used by the models API endpoint and for validating model requests.
6
+
7
+ Future: Models will be stored in database for dynamic management.
8
+ """
9
+
10
+ from pydantic import BaseModel, Field
11
+ from typing import Literal
12
+
13
+
14
+ class ModelInfo(BaseModel):
15
+ """Information about a single model."""
16
+
17
+ id: str = Field(description="Model ID in provider:model format")
18
+ object: Literal["model"] = "model"
19
+ created: int = Field(description="Unix timestamp of model availability")
20
+ owned_by: str = Field(description="Provider name")
21
+ description: str | None = Field(default=None, description="Model description")
22
+ context_window: int | None = Field(default=None, description="Max context tokens")
23
+ max_output_tokens: int | None = Field(default=None, description="Max output tokens")
24
+
25
+
26
+ # Model definitions with 2025 releases
27
+ # Using Unix timestamps for created dates (approximate release dates)
28
+ AVAILABLE_MODELS: list[ModelInfo] = [
29
+ # ==========================================================================
30
+ # OpenAI Models (2025)
31
+ # ==========================================================================
32
+ # GPT-4.1 series (Released April 14, 2025)
33
+ ModelInfo(
34
+ id="openai:gpt-4.1",
35
+ created=1744588800, # April 14, 2025
36
+ owned_by="openai",
37
+ description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
38
+ context_window=1047576,
39
+ max_output_tokens=32768,
40
+ ),
41
+ ModelInfo(
42
+ id="openai:gpt-4.1-mini",
43
+ created=1744588800,
44
+ owned_by="openai",
45
+ description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
46
+ context_window=1047576,
47
+ max_output_tokens=32768,
48
+ ),
49
+ ModelInfo(
50
+ id="openai:gpt-4.1-nano",
51
+ created=1744588800,
52
+ owned_by="openai",
53
+ description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
54
+ context_window=1047576,
55
+ max_output_tokens=32768,
56
+ ),
57
+ # GPT-4o (legacy but still supported)
58
+ ModelInfo(
59
+ id="openai:gpt-4o",
60
+ created=1715644800, # May 13, 2024
61
+ owned_by="openai",
62
+ description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
63
+ context_window=128000,
64
+ max_output_tokens=16384,
65
+ ),
66
+ ModelInfo(
67
+ id="openai:gpt-4o-mini",
68
+ created=1721347200, # July 18, 2024
69
+ owned_by="openai",
70
+ description="Cost-efficient smaller GPT-4o variant.",
71
+ context_window=128000,
72
+ max_output_tokens=16384,
73
+ ),
74
+ # o1 reasoning models
75
+ ModelInfo(
76
+ id="openai:o1",
77
+ created=1733961600, # December 12, 2024
78
+ owned_by="openai",
79
+ description="Advanced reasoning model for complex problems. Extended thinking.",
80
+ context_window=200000,
81
+ max_output_tokens=100000,
82
+ ),
83
+ ModelInfo(
84
+ id="openai:o1-mini",
85
+ created=1726099200, # September 12, 2024
86
+ owned_by="openai",
87
+ description="Smaller reasoning model, fast for coding and math.",
88
+ context_window=128000,
89
+ max_output_tokens=65536,
90
+ ),
91
+ ModelInfo(
92
+ id="openai:o3-mini",
93
+ created=1738195200, # January 30, 2025
94
+ owned_by="openai",
95
+ description="Latest mini reasoning model with improved performance.",
96
+ context_window=200000,
97
+ max_output_tokens=100000,
98
+ ),
99
+ # ==========================================================================
100
+ # Anthropic Models (2025)
101
+ # ==========================================================================
102
+ # Claude 4.5 series (Latest - November 2025)
103
+ ModelInfo(
104
+ id="anthropic:claude-opus-4-5-20251124",
105
+ created=1732406400, # November 24, 2025
106
+ owned_by="anthropic",
107
+ description="Most capable Claude model. World-class coding with 'effort' parameter control.",
108
+ context_window=200000,
109
+ max_output_tokens=128000,
110
+ ),
111
+ ModelInfo(
112
+ id="anthropic:claude-sonnet-4-5-20250929",
113
+ created=1727568000, # September 29, 2025
114
+ owned_by="anthropic",
115
+ description="Best balance of intelligence and speed. Excellent for coding and agents.",
116
+ context_window=200000,
117
+ max_output_tokens=128000,
118
+ ),
119
+ ModelInfo(
120
+ id="anthropic:claude-haiku-4-5-20251101",
121
+ created=1730419200, # November 1, 2025
122
+ owned_by="anthropic",
123
+ description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
124
+ context_window=200000,
125
+ max_output_tokens=128000,
126
+ ),
127
+ # Claude 4 series
128
+ ModelInfo(
129
+ id="anthropic:claude-opus-4-20250514",
130
+ created=1715644800, # May 14, 2025
131
+ owned_by="anthropic",
132
+ description="World's best coding model. Sustained performance on complex agent workflows.",
133
+ context_window=200000,
134
+ max_output_tokens=128000,
135
+ ),
136
+ ModelInfo(
137
+ id="anthropic:claude-sonnet-4-20250514",
138
+ created=1715644800, # May 14, 2025
139
+ owned_by="anthropic",
140
+ description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
141
+ context_window=200000,
142
+ max_output_tokens=128000,
143
+ ),
144
+ ModelInfo(
145
+ id="anthropic:claude-opus-4-1-20250805",
146
+ created=1722816000, # August 5, 2025
147
+ owned_by="anthropic",
148
+ description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
149
+ context_window=200000,
150
+ max_output_tokens=128000,
151
+ ),
152
+ # Aliases for convenience
153
+ ModelInfo(
154
+ id="anthropic:claude-opus-4-5",
155
+ created=1732406400,
156
+ owned_by="anthropic",
157
+ description="Alias for latest Claude Opus 4.5",
158
+ context_window=200000,
159
+ max_output_tokens=128000,
160
+ ),
161
+ ModelInfo(
162
+ id="anthropic:claude-sonnet-4-5",
163
+ created=1727568000,
164
+ owned_by="anthropic",
165
+ description="Alias for latest Claude Sonnet 4.5",
166
+ context_window=200000,
167
+ max_output_tokens=128000,
168
+ ),
169
+ ModelInfo(
170
+ id="anthropic:claude-haiku-4-5",
171
+ created=1730419200,
172
+ owned_by="anthropic",
173
+ description="Alias for latest Claude Haiku 4.5",
174
+ context_window=200000,
175
+ max_output_tokens=128000,
176
+ ),
177
+ # ==========================================================================
178
+ # Google Models (2025)
179
+ # ==========================================================================
180
+ # Gemini 3 (Latest)
181
+ ModelInfo(
182
+ id="google:gemini-3-pro",
183
+ created=1730419200, # November 2025
184
+ owned_by="google",
185
+ description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
186
+ context_window=2000000,
187
+ max_output_tokens=65536,
188
+ ),
189
+ # Gemini 2.5 series
190
+ ModelInfo(
191
+ id="google:gemini-2.5-pro",
192
+ created=1727568000, # September 2025
193
+ owned_by="google",
194
+ description="High-capability model with adaptive thinking. 1M context window.",
195
+ context_window=1000000,
196
+ max_output_tokens=65536,
197
+ ),
198
+ ModelInfo(
199
+ id="google:gemini-2.5-flash",
200
+ created=1727568000,
201
+ owned_by="google",
202
+ description="Fast and capable. Best for large-scale processing and agentic tasks.",
203
+ context_window=1000000,
204
+ max_output_tokens=65536,
205
+ ),
206
+ ModelInfo(
207
+ id="google:gemini-2.5-flash-lite",
208
+ created=1727568000,
209
+ owned_by="google",
210
+ description="Optimized for massive scale. Balances cost and performance.",
211
+ context_window=1000000,
212
+ max_output_tokens=32768,
213
+ ),
214
+ # Gemini 2.0
215
+ ModelInfo(
216
+ id="google:gemini-2.0-flash",
217
+ created=1733875200, # December 2024
218
+ owned_by="google",
219
+ description="Fast multimodal model with native tool use.",
220
+ context_window=1000000,
221
+ max_output_tokens=8192,
222
+ ),
223
+ # Gemma open models
224
+ ModelInfo(
225
+ id="google:gemma-3",
226
+ created=1727568000,
227
+ owned_by="google",
228
+ description="Open model with text/image input, 140+ languages, 128K context.",
229
+ context_window=128000,
230
+ max_output_tokens=8192,
231
+ ),
232
+ ModelInfo(
233
+ id="google:gemma-3n",
234
+ created=1730419200,
235
+ owned_by="google",
236
+ description="Efficient open model for low-resource devices. Multimodal input.",
237
+ context_window=128000,
238
+ max_output_tokens=8192,
239
+ ),
240
+ # ==========================================================================
241
+ # Cerebras Models (Ultra-fast inference)
242
+ # ==========================================================================
243
+ ModelInfo(
244
+ id="cerebras:llama-3.3-70b",
245
+ created=1733875200, # December 2024
246
+ owned_by="cerebras",
247
+ description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
248
+ context_window=128000,
249
+ max_output_tokens=8192,
250
+ ),
251
+ ModelInfo(
252
+ id="cerebras:qwen-3-32b",
253
+ created=1733875200, # December 2024
254
+ owned_by="cerebras",
255
+ description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
256
+ context_window=32000,
257
+ max_output_tokens=8192,
258
+ ),
259
+ ]
260
+
261
+ # Set of valid model IDs for fast O(1) lookup
262
+ ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
263
+
264
+
265
+ def is_valid_model(model_id: str | None) -> bool:
266
+ """Check if a model ID is in the allowed list."""
267
+ if model_id is None:
268
+ return False
269
+ return model_id in ALLOWED_MODEL_IDS
270
+
271
+
272
+ def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
273
+ """
274
+ Return the model_id if it's valid, otherwise return the default.
275
+
276
+ Args:
277
+ model_id: The requested model ID (may be None or invalid)
278
+ default_model: Fallback model from settings
279
+
280
+ Returns:
281
+ Valid model ID to use
282
+ """
283
+ if is_valid_model(model_id):
284
+ return model_id # type: ignore[return-value]
285
+ return default_model
286
+
287
+
288
+ def get_model_by_id(model_id: str) -> ModelInfo | None:
289
+ """
290
+ Get model info by ID.
291
+
292
+ Args:
293
+ model_id: Model identifier in provider:model format
294
+
295
+ Returns:
296
+ ModelInfo if found, None otherwise
297
+ """
298
+ for model in AVAILABLE_MODELS:
299
+ if model.id == model_id:
300
+ return model
301
+ return None
@@ -128,15 +128,16 @@ def sanitize_tool_name(tool_name: str) -> str:
128
128
 
129
129
 
130
130
  def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
131
- """Load evaluator schema from schemas/evaluators/ directory.
131
+ """Load evaluator schema using centralized schema loader.
132
132
 
133
- Searches for evaluator schema in rem/schemas/evaluators/
134
- Supports .json, .yaml, and .yml files.
133
+ Uses the same unified search logic as agent schemas:
134
+ - "hello-world/default" → schemas/evaluators/hello-world/default.yaml
135
+ - "lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
136
+ - "rem-lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
135
137
 
136
138
  Args:
137
- evaluator_name: Evaluator name (with or without extension)
138
- e.g., "rem-lookup-correctness" or
139
- "rem-lookup-correctness.yaml"
139
+ evaluator_name: Evaluator name or path
140
+ e.g., "hello-world/default", "lookup-correctness"
140
141
 
141
142
  Returns:
142
143
  Evaluator schema dictionary with keys:
@@ -150,43 +151,13 @@ def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
150
151
  FileNotFoundError: If evaluator schema not found
151
152
 
152
153
  Example:
153
- >>> schema = load_evaluator_schema("rem-lookup-correctness")
154
+ >>> schema = load_evaluator_schema("hello-world/default")
154
155
  >>> print(schema["description"])
155
156
  """
156
- # Get schemas directory (rem/schemas/evaluators/)
157
- # rem.__file__ = rem/src/rem/__init__.py
158
- # We need rem/schemas/evaluators/
159
- import rem
160
- rem_module_dir = Path(rem.__file__).parent # rem/src/rem
161
- rem_package_root = rem_module_dir.parent.parent # rem/src/rem -> rem/src -> rem
162
- schema_dir = rem_package_root / "schemas" / "evaluators"
163
-
164
- # Try .yaml first (preferred format)
165
- yaml_path = schema_dir / f"{evaluator_name}.yaml"
166
- if yaml_path.exists():
167
- logger.debug(f"Loading evaluator schema from {yaml_path}")
168
- with open(yaml_path) as f:
169
- return yaml.safe_load(f)
170
-
171
- # Try .yml
172
- yml_path = schema_dir / f"{evaluator_name}.yml"
173
- if yml_path.exists():
174
- logger.debug(f"Loading evaluator schema from {yml_path}")
175
- with open(yml_path) as f:
176
- return yaml.safe_load(f)
177
-
178
- # Try .json
179
- json_path = schema_dir / f"{evaluator_name}.json"
180
- if json_path.exists():
181
- logger.debug(f"Loading evaluator schema from {json_path}")
182
- with open(json_path) as f:
183
- return json.load(f)
184
-
185
- raise FileNotFoundError(
186
- f"Evaluator schema not found: {evaluator_name}\n"
187
- f"Searched in: {schema_dir}\n"
188
- f"Supported formats: .yaml, .yml, .json"
189
- )
157
+ from ...utils.schema_loader import load_agent_schema
158
+
159
+ # Use centralized schema loader (searches evaluator paths too)
160
+ return load_agent_schema(evaluator_name)
190
161
 
191
162
 
192
163
  # =============================================================================
@@ -338,6 +309,22 @@ def create_evaluator_from_schema(
338
309
  # Already a dict
339
310
  schema = evaluator_schema_path
340
311
 
312
+ # Extract model from schema's provider_configs if not explicitly provided
313
+ if model_name is None:
314
+ json_schema_extra = schema.get("json_schema_extra", {})
315
+ provider_configs = json_schema_extra.get("provider_configs", [])
316
+ if provider_configs:
317
+ # Use first provider config
318
+ first_provider = provider_configs[0]
319
+ provider_name = first_provider.get("provider_name", "openai")
320
+ schema_model_name = first_provider.get("model_name", "gpt-4o-mini")
321
+ # Format as "provider:model" if not OpenAI (OpenAI is default)
322
+ if provider_name == "openai":
323
+ model_name = schema_model_name
324
+ else:
325
+ model_name = f"{provider_name}:{schema_model_name}"
326
+ logger.debug(f"Using model from schema provider_configs: {model_name}")
327
+
341
328
  # Create evaluator config
342
329
  evaluator_config = create_phoenix_evaluator(
343
330
  evaluator_schema=schema,
@@ -361,7 +348,8 @@ def create_evaluator_from_schema(
361
348
  Returns:
362
349
  Evaluation result with score, label, explanation
363
350
  """
364
- logger.debug(f"Evaluating example: {example.get('input', '')[:100]}...")
351
+ input_preview = str(example.get('input', ''))[:100]
352
+ logger.debug(f"Evaluating example: {input_preview}...")
365
353
 
366
354
  # Phoenix llm_classify() expects a flat dict with string values
367
355
  # Build evaluation input by flattening nested dicts
@@ -393,6 +381,7 @@ def create_evaluator_from_schema(
393
381
 
394
382
  try:
395
383
  # Create single-row DataFrame for llm_classify
384
+ # Note: Phoenix's llm_classify requires pandas DataFrame (imported above)
396
385
  df = pd.DataFrame([eval_input])
397
386
 
398
387
  # Call Phoenix llm_classify
@@ -404,7 +393,7 @@ def create_evaluator_from_schema(
404
393
  provide_explanation=True,
405
394
  )
406
395
 
407
- # Extract result
396
+ # Extract result (results_df is pandas DataFrame from Phoenix)
408
397
  if not results_df.empty:
409
398
  row = results_df.iloc[0]
410
399
  label = row.get("label", "error")
@@ -303,6 +303,68 @@ def _prepare_schema_for_qwen(schema: dict[str, Any]) -> dict[str, Any]:
303
303
  return schema_copy
304
304
 
305
305
 
306
+ def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
307
+ """
308
+ Convert schema properties to prompt guidance text.
309
+
310
+ When structured_output is disabled, this converts the properties
311
+ definition into natural language guidance that informs the agent
312
+ about the expected response structure without forcing JSON output.
313
+
314
+ Args:
315
+ properties: JSON Schema properties dict
316
+
317
+ Returns:
318
+ Prompt text describing the expected response elements
319
+
320
+ Example:
321
+ properties = {
322
+ "answer": {"type": "string", "description": "The answer"},
323
+ "confidence": {"type": "number", "description": "Confidence 0-1"}
324
+ }
325
+ # Returns:
326
+ # "## Response Structure\n\nYour response should include:\n- **answer**: The answer\n..."
327
+ """
328
+ if not properties:
329
+ return ""
330
+
331
+ lines = ["## Response Guidelines", "", "Your response should address the following elements:"]
332
+
333
+ for field_name, field_def in properties.items():
334
+ field_type = field_def.get("type", "any")
335
+ description = field_def.get("description", "")
336
+
337
+ # Format based on type
338
+ if field_type == "array":
339
+ type_hint = "list"
340
+ elif field_type == "number":
341
+ type_hint = "number"
342
+ # Include min/max if specified
343
+ if "minimum" in field_def or "maximum" in field_def:
344
+ min_val = field_def.get("minimum", "")
345
+ max_val = field_def.get("maximum", "")
346
+ if min_val != "" and max_val != "":
347
+ type_hint = f"number ({min_val}-{max_val})"
348
+ elif field_type == "boolean":
349
+ type_hint = "yes/no"
350
+ else:
351
+ type_hint = field_type
352
+
353
+ # Build field description
354
+ field_line = f"- **{field_name}**"
355
+ if type_hint and type_hint != "string":
356
+ field_line += f" ({type_hint})"
357
+ if description:
358
+ field_line += f": {description}"
359
+
360
+ lines.append(field_line)
361
+
362
+ lines.append("")
363
+ lines.append("Respond naturally in prose, addressing these elements where relevant.")
364
+
365
+ return "\n".join(lines)
366
+
367
+
306
368
  def _create_schema_wrapper(
307
369
  result_type: type[BaseModel], strip_description: bool = True
308
370
  ) -> type[BaseModel]:
@@ -462,10 +524,11 @@ async def create_agent(
462
524
  # agent_schema = load_agent_schema(context.agent_schema_uri)
463
525
  pass
464
526
 
465
- # Determine model: override > context.default_model > settings
466
- model = (
467
- model_override or (context.default_model if context else settings.llm.default_model)
468
- )
527
+ # Determine model: validate override against allowed list, fallback to context or settings
528
+ from rem.agentic.llm_provider_models import get_valid_model_or_default
529
+
530
+ default_model = context.default_model if context else settings.llm.default_model
531
+ model = get_valid_model_or_default(model_override, default_model)
469
532
 
470
533
  # Extract schema fields
471
534
  system_prompt = agent_schema.get("description", "") if agent_schema else ""
@@ -526,14 +589,25 @@ async def create_agent(
526
589
  # TODO: Convert resources to tools (MCP convenience syntax)
527
590
  pass
528
591
 
592
+ # Check if structured output is disabled for this schema
593
+ # When structured_output: false, properties become part of prompt instead of output_type
594
+ use_structured_output = metadata.get("structured_output", True)
595
+
529
596
  # Create dynamic result_type from schema if not provided
530
597
  if result_type is None and agent_schema and "properties" in agent_schema:
531
- # Pre-process schema for Qwen compatibility (strips min/max, sets additionalProperties=False)
532
- # This ensures the generated Pydantic model doesn't have incompatible constraints
533
- sanitized_schema = _prepare_schema_for_qwen(agent_schema)
534
- result_type = _create_model_from_schema(sanitized_schema)
535
- logger.debug(f"Created dynamic Pydantic model: {result_type.__name__}")
536
- logger.debug(f"Created dynamic Pydantic model: {result_type.__name__}")
598
+ if use_structured_output:
599
+ # Pre-process schema for Qwen compatibility (strips min/max, sets additionalProperties=False)
600
+ # This ensures the generated Pydantic model doesn't have incompatible constraints
601
+ sanitized_schema = _prepare_schema_for_qwen(agent_schema)
602
+ result_type = _create_model_from_schema(sanitized_schema)
603
+ logger.debug(f"Created dynamic Pydantic model: {result_type.__name__}")
604
+ else:
605
+ # Convert properties to prompt guidance instead of structured output
606
+ # This informs the agent about expected response structure without forcing it
607
+ properties_prompt = _convert_properties_to_prompt(agent_schema.get("properties", {}))
608
+ if properties_prompt:
609
+ system_prompt = system_prompt + "\n\n" + properties_prompt
610
+ logger.debug("Structured output disabled - properties converted to prompt guidance")
537
611
 
538
612
  # Create agent with optional output_type for structured output and tools
539
613
  if result_type: