remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (98) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/otel/setup.py +92 -4
  9. rem/agentic/providers/phoenix.py +32 -43
  10. rem/agentic/providers/pydantic_ai.py +142 -22
  11. rem/agentic/schema.py +358 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +238 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +151 -37
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +17 -2
  18. rem/api/mcp_router/tools.py +143 -7
  19. rem/api/middleware/tracking.py +172 -0
  20. rem/api/routers/admin.py +277 -0
  21. rem/api/routers/auth.py +124 -0
  22. rem/api/routers/chat/completions.py +152 -16
  23. rem/api/routers/chat/models.py +7 -3
  24. rem/api/routers/chat/sse_events.py +526 -0
  25. rem/api/routers/chat/streaming.py +608 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +148 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +357 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +201 -70
  34. rem/cli/commands/ask.py +13 -10
  35. rem/cli/commands/cluster.py +1359 -0
  36. rem/cli/commands/configure.py +4 -3
  37. rem/cli/commands/db.py +350 -137
  38. rem/cli/commands/experiments.py +76 -72
  39. rem/cli/commands/process.py +22 -15
  40. rem/cli/commands/scaffold.py +47 -0
  41. rem/cli/commands/schema.py +95 -49
  42. rem/cli/main.py +29 -6
  43. rem/config.py +2 -2
  44. rem/models/core/core_model.py +7 -1
  45. rem/models/core/rem_query.py +5 -2
  46. rem/models/entities/__init__.py +21 -0
  47. rem/models/entities/domain_resource.py +38 -0
  48. rem/models/entities/feedback.py +123 -0
  49. rem/models/entities/message.py +30 -1
  50. rem/models/entities/session.py +83 -0
  51. rem/models/entities/shared_session.py +180 -0
  52. rem/models/entities/user.py +10 -3
  53. rem/registry.py +373 -0
  54. rem/schemas/agents/rem.yaml +7 -3
  55. rem/services/content/providers.py +94 -140
  56. rem/services/content/service.py +92 -20
  57. rem/services/dreaming/affinity_service.py +2 -16
  58. rem/services/dreaming/moment_service.py +2 -15
  59. rem/services/embeddings/api.py +24 -17
  60. rem/services/embeddings/worker.py +16 -16
  61. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  62. rem/services/phoenix/client.py +252 -19
  63. rem/services/postgres/README.md +159 -15
  64. rem/services/postgres/__init__.py +2 -1
  65. rem/services/postgres/diff_service.py +426 -0
  66. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  67. rem/services/postgres/repository.py +132 -0
  68. rem/services/postgres/schema_generator.py +86 -5
  69. rem/services/postgres/service.py +6 -6
  70. rem/services/rate_limit.py +113 -0
  71. rem/services/rem/README.md +14 -0
  72. rem/services/rem/parser.py +44 -9
  73. rem/services/rem/service.py +36 -2
  74. rem/services/session/compression.py +17 -1
  75. rem/services/session/reload.py +1 -1
  76. rem/services/user_service.py +98 -0
  77. rem/settings.py +169 -17
  78. rem/sql/background_indexes.sql +21 -16
  79. rem/sql/migrations/001_install.sql +231 -54
  80. rem/sql/migrations/002_install_models.sql +457 -393
  81. rem/sql/migrations/003_optional_extensions.sql +326 -0
  82. rem/utils/constants.py +97 -0
  83. rem/utils/date_utils.py +228 -0
  84. rem/utils/embeddings.py +17 -4
  85. rem/utils/files.py +167 -0
  86. rem/utils/mime_types.py +158 -0
  87. rem/utils/model_helpers.py +156 -1
  88. rem/utils/schema_loader.py +191 -35
  89. rem/utils/sql_types.py +3 -1
  90. rem/utils/vision.py +9 -14
  91. rem/workers/README.md +14 -14
  92. rem/workers/db_maintainer.py +74 -0
  93. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
  94. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
  95. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
  96. rem/sql/002_install_models.sql +0 -1068
  97. rem/sql/install_models.sql +0 -1038
  98. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,301 @@
1
+ """
2
+ LLM Provider Model Registry.
3
+
4
+ Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
5
+ Used by the models API endpoint and for validating model requests.
6
+
7
+ Future: Models will be stored in database for dynamic management.
8
+ """
9
+
10
+ from pydantic import BaseModel, Field
11
+ from typing import Literal
12
+
13
+
14
+ class ModelInfo(BaseModel):
15
+ """Information about a single model."""
16
+
17
+ id: str = Field(description="Model ID in provider:model format")
18
+ object: Literal["model"] = "model"
19
+ created: int = Field(description="Unix timestamp of model availability")
20
+ owned_by: str = Field(description="Provider name")
21
+ description: str | None = Field(default=None, description="Model description")
22
+ context_window: int | None = Field(default=None, description="Max context tokens")
23
+ max_output_tokens: int | None = Field(default=None, description="Max output tokens")
24
+
25
+
26
+ # Model definitions with 2025 releases
27
+ # Using Unix timestamps for created dates (approximate release dates)
28
+ AVAILABLE_MODELS: list[ModelInfo] = [
29
+ # ==========================================================================
30
+ # OpenAI Models (2025)
31
+ # ==========================================================================
32
+ # GPT-4.1 series (Released April 14, 2025)
33
+ ModelInfo(
34
+ id="openai:gpt-4.1",
35
+ created=1744588800, # April 14, 2025
36
+ owned_by="openai",
37
+ description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
38
+ context_window=1047576,
39
+ max_output_tokens=32768,
40
+ ),
41
+ ModelInfo(
42
+ id="openai:gpt-4.1-mini",
43
+ created=1744588800,
44
+ owned_by="openai",
45
+ description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
46
+ context_window=1047576,
47
+ max_output_tokens=32768,
48
+ ),
49
+ ModelInfo(
50
+ id="openai:gpt-4.1-nano",
51
+ created=1744588800,
52
+ owned_by="openai",
53
+ description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
54
+ context_window=1047576,
55
+ max_output_tokens=32768,
56
+ ),
57
+ # GPT-4o (legacy but still supported)
58
+ ModelInfo(
59
+ id="openai:gpt-4o",
60
+ created=1715644800, # May 13, 2024
61
+ owned_by="openai",
62
+ description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
63
+ context_window=128000,
64
+ max_output_tokens=16384,
65
+ ),
66
+ ModelInfo(
67
+ id="openai:gpt-4o-mini",
68
+ created=1721347200, # July 18, 2024
69
+ owned_by="openai",
70
+ description="Cost-efficient smaller GPT-4o variant.",
71
+ context_window=128000,
72
+ max_output_tokens=16384,
73
+ ),
74
+ # o1 reasoning models
75
+ ModelInfo(
76
+ id="openai:o1",
77
+ created=1733961600, # December 12, 2024
78
+ owned_by="openai",
79
+ description="Advanced reasoning model for complex problems. Extended thinking.",
80
+ context_window=200000,
81
+ max_output_tokens=100000,
82
+ ),
83
+ ModelInfo(
84
+ id="openai:o1-mini",
85
+ created=1726099200, # September 12, 2024
86
+ owned_by="openai",
87
+ description="Smaller reasoning model, fast for coding and math.",
88
+ context_window=128000,
89
+ max_output_tokens=65536,
90
+ ),
91
+ ModelInfo(
92
+ id="openai:o3-mini",
93
+ created=1738195200, # January 30, 2025
94
+ owned_by="openai",
95
+ description="Latest mini reasoning model with improved performance.",
96
+ context_window=200000,
97
+ max_output_tokens=100000,
98
+ ),
99
+ # ==========================================================================
100
+ # Anthropic Models (2025)
101
+ # ==========================================================================
102
+ # Claude 4.5 series (Latest - November 2025)
103
+ ModelInfo(
104
+ id="anthropic:claude-opus-4-5-20251124",
105
+ created=1732406400, # November 24, 2025
106
+ owned_by="anthropic",
107
+ description="Most capable Claude model. World-class coding with 'effort' parameter control.",
108
+ context_window=200000,
109
+ max_output_tokens=128000,
110
+ ),
111
+ ModelInfo(
112
+ id="anthropic:claude-sonnet-4-5-20250929",
113
+ created=1727568000, # September 29, 2025
114
+ owned_by="anthropic",
115
+ description="Best balance of intelligence and speed. Excellent for coding and agents.",
116
+ context_window=200000,
117
+ max_output_tokens=128000,
118
+ ),
119
+ ModelInfo(
120
+ id="anthropic:claude-haiku-4-5-20251101",
121
+ created=1730419200, # November 1, 2025
122
+ owned_by="anthropic",
123
+ description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
124
+ context_window=200000,
125
+ max_output_tokens=128000,
126
+ ),
127
+ # Claude 4 series
128
+ ModelInfo(
129
+ id="anthropic:claude-opus-4-20250514",
130
+ created=1715644800, # May 14, 2025
131
+ owned_by="anthropic",
132
+ description="World's best coding model. Sustained performance on complex agent workflows.",
133
+ context_window=200000,
134
+ max_output_tokens=128000,
135
+ ),
136
+ ModelInfo(
137
+ id="anthropic:claude-sonnet-4-20250514",
138
+ created=1715644800, # May 14, 2025
139
+ owned_by="anthropic",
140
+ description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
141
+ context_window=200000,
142
+ max_output_tokens=128000,
143
+ ),
144
+ ModelInfo(
145
+ id="anthropic:claude-opus-4-1-20250805",
146
+ created=1722816000, # August 5, 2025
147
+ owned_by="anthropic",
148
+ description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
149
+ context_window=200000,
150
+ max_output_tokens=128000,
151
+ ),
152
+ # Aliases for convenience
153
+ ModelInfo(
154
+ id="anthropic:claude-opus-4-5",
155
+ created=1732406400,
156
+ owned_by="anthropic",
157
+ description="Alias for latest Claude Opus 4.5",
158
+ context_window=200000,
159
+ max_output_tokens=128000,
160
+ ),
161
+ ModelInfo(
162
+ id="anthropic:claude-sonnet-4-5",
163
+ created=1727568000,
164
+ owned_by="anthropic",
165
+ description="Alias for latest Claude Sonnet 4.5",
166
+ context_window=200000,
167
+ max_output_tokens=128000,
168
+ ),
169
+ ModelInfo(
170
+ id="anthropic:claude-haiku-4-5",
171
+ created=1730419200,
172
+ owned_by="anthropic",
173
+ description="Alias for latest Claude Haiku 4.5",
174
+ context_window=200000,
175
+ max_output_tokens=128000,
176
+ ),
177
+ # ==========================================================================
178
+ # Google Models (2025)
179
+ # ==========================================================================
180
+ # Gemini 3 (Latest)
181
+ ModelInfo(
182
+ id="google:gemini-3-pro",
183
+ created=1730419200, # November 2025
184
+ owned_by="google",
185
+ description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
186
+ context_window=2000000,
187
+ max_output_tokens=65536,
188
+ ),
189
+ # Gemini 2.5 series
190
+ ModelInfo(
191
+ id="google:gemini-2.5-pro",
192
+ created=1727568000, # September 2025
193
+ owned_by="google",
194
+ description="High-capability model with adaptive thinking. 1M context window.",
195
+ context_window=1000000,
196
+ max_output_tokens=65536,
197
+ ),
198
+ ModelInfo(
199
+ id="google:gemini-2.5-flash",
200
+ created=1727568000,
201
+ owned_by="google",
202
+ description="Fast and capable. Best for large-scale processing and agentic tasks.",
203
+ context_window=1000000,
204
+ max_output_tokens=65536,
205
+ ),
206
+ ModelInfo(
207
+ id="google:gemini-2.5-flash-lite",
208
+ created=1727568000,
209
+ owned_by="google",
210
+ description="Optimized for massive scale. Balances cost and performance.",
211
+ context_window=1000000,
212
+ max_output_tokens=32768,
213
+ ),
214
+ # Gemini 2.0
215
+ ModelInfo(
216
+ id="google:gemini-2.0-flash",
217
+ created=1733875200, # December 2024
218
+ owned_by="google",
219
+ description="Fast multimodal model with native tool use.",
220
+ context_window=1000000,
221
+ max_output_tokens=8192,
222
+ ),
223
+ # Gemma open models
224
+ ModelInfo(
225
+ id="google:gemma-3",
226
+ created=1727568000,
227
+ owned_by="google",
228
+ description="Open model with text/image input, 140+ languages, 128K context.",
229
+ context_window=128000,
230
+ max_output_tokens=8192,
231
+ ),
232
+ ModelInfo(
233
+ id="google:gemma-3n",
234
+ created=1730419200,
235
+ owned_by="google",
236
+ description="Efficient open model for low-resource devices. Multimodal input.",
237
+ context_window=128000,
238
+ max_output_tokens=8192,
239
+ ),
240
+ # ==========================================================================
241
+ # Cerebras Models (Ultra-fast inference)
242
+ # ==========================================================================
243
+ ModelInfo(
244
+ id="cerebras:llama-3.3-70b",
245
+ created=1733875200, # December 2024
246
+ owned_by="cerebras",
247
+ description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
248
+ context_window=128000,
249
+ max_output_tokens=8192,
250
+ ),
251
+ ModelInfo(
252
+ id="cerebras:qwen-3-32b",
253
+ created=1733875200, # December 2024
254
+ owned_by="cerebras",
255
+ description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
256
+ context_window=32000,
257
+ max_output_tokens=8192,
258
+ ),
259
+ ]
260
+
261
+ # Set of valid model IDs for fast O(1) lookup
262
+ ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
263
+
264
+
265
+ def is_valid_model(model_id: str | None) -> bool:
266
+ """Check if a model ID is in the allowed list."""
267
+ if model_id is None:
268
+ return False
269
+ return model_id in ALLOWED_MODEL_IDS
270
+
271
+
272
+ def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
273
+ """
274
+ Return the model_id if it's valid, otherwise return the default.
275
+
276
+ Args:
277
+ model_id: The requested model ID (may be None or invalid)
278
+ default_model: Fallback model from settings
279
+
280
+ Returns:
281
+ Valid model ID to use
282
+ """
283
+ if is_valid_model(model_id):
284
+ return model_id # type: ignore[return-value]
285
+ return default_model
286
+
287
+
288
+ def get_model_by_id(model_id: str) -> ModelInfo | None:
289
+ """
290
+ Get model info by ID.
291
+
292
+ Args:
293
+ model_id: Model identifier in provider:model format
294
+
295
+ Returns:
296
+ ModelInfo if found, None otherwise
297
+ """
298
+ for model in AVAILABLE_MODELS:
299
+ if model.id == model_id:
300
+ return model
301
+ return None
rem/agentic/otel/setup.py CHANGED
@@ -14,6 +14,7 @@ from loguru import logger
14
14
 
15
15
  from ...settings import settings
16
16
 
17
+
17
18
  # Global flag to track if instrumentation is initialized
18
19
  _instrumentation_initialized = False
19
20
 
@@ -52,12 +53,94 @@ def setup_instrumentation() -> None:
52
53
 
53
54
  try:
54
55
  from opentelemetry import trace
55
- from opentelemetry.sdk.trace import TracerProvider
56
- from opentelemetry.sdk.trace.export import BatchSpanProcessor
56
+ from opentelemetry.sdk.trace import TracerProvider, ReadableSpan
57
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult
57
58
  from opentelemetry.sdk.resources import Resource, SERVICE_NAME, DEPLOYMENT_ENVIRONMENT
58
59
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
59
60
  from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
60
61
 
62
+ class SanitizingSpanExporter(SpanExporter):
63
+ """
64
+ Wrapper exporter that sanitizes span attributes before export.
65
+
66
+ Removes None values that cause OTLP encoding failures like:
67
+ - llm.input_messages.3.message.content: None
68
+ """
69
+
70
+ def __init__(self, wrapped_exporter: SpanExporter):
71
+ self._wrapped = wrapped_exporter
72
+
73
+ def _sanitize_value(self, value):
74
+ """Recursively sanitize a value, replacing None with empty string."""
75
+ if value is None:
76
+ return "" # Replace None with empty string
77
+ if isinstance(value, dict):
78
+ return {k: self._sanitize_value(v) for k, v in value.items()}
79
+ if isinstance(value, (list, tuple)):
80
+ return [self._sanitize_value(v) for v in value]
81
+ return value
82
+
83
+ def export(self, spans: tuple[ReadableSpan, ...]) -> SpanExportResult:
84
+ # Create sanitized copies of spans
85
+ sanitized_spans = []
86
+ for span in spans:
87
+ if span.attributes:
88
+ # Sanitize all attribute values - replace None with empty string
89
+ sanitized_attrs = {}
90
+ for k, v in span.attributes.items():
91
+ sanitized_attrs[k] = self._sanitize_value(v)
92
+ sanitized_spans.append(_SanitizedSpan(span, sanitized_attrs))
93
+ else:
94
+ sanitized_spans.append(span)
95
+
96
+ return self._wrapped.export(tuple(sanitized_spans))
97
+
98
+ def shutdown(self) -> None:
99
+ self._wrapped.shutdown()
100
+
101
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
102
+ return self._wrapped.force_flush(timeout_millis)
103
+
104
+ class _SanitizedSpan(ReadableSpan):
105
+ """ReadableSpan wrapper with sanitized attributes."""
106
+
107
+ def __init__(self, original: ReadableSpan, sanitized_attributes: dict):
108
+ self._original = original
109
+ self._sanitized_attributes = sanitized_attributes
110
+
111
+ @property
112
+ def name(self): return self._original.name
113
+ @property
114
+ def context(self): return self._original.context
115
+ @property
116
+ def parent(self): return self._original.parent
117
+ @property
118
+ def resource(self): return self._original.resource
119
+ @property
120
+ def instrumentation_scope(self): return self._original.instrumentation_scope
121
+ @property
122
+ def status(self): return self._original.status
123
+ @property
124
+ def start_time(self): return self._original.start_time
125
+ @property
126
+ def end_time(self): return self._original.end_time
127
+ @property
128
+ def links(self): return self._original.links
129
+ @property
130
+ def events(self): return self._original.events
131
+ @property
132
+ def kind(self): return self._original.kind
133
+ @property
134
+ def attributes(self): return self._sanitized_attributes
135
+ @property
136
+ def dropped_attributes(self): return self._original.dropped_attributes
137
+ @property
138
+ def dropped_events(self): return self._original.dropped_events
139
+ @property
140
+ def dropped_links(self): return self._original.dropped_links
141
+
142
+ def get_span_context(self): return self._original.get_span_context()
143
+
61
144
  # Create resource with service metadata
62
145
  resource = Resource(
63
146
  attributes={
@@ -72,16 +155,19 @@ def setup_instrumentation() -> None:
72
155
 
73
156
  # Configure OTLP exporter based on protocol
74
157
  if settings.otel.protocol == "grpc":
75
- exporter = GRPCExporter(
158
+ base_exporter = GRPCExporter(
76
159
  endpoint=settings.otel.collector_endpoint,
77
160
  timeout=settings.otel.export_timeout,
78
161
  )
79
162
  else: # http
80
- exporter = HTTPExporter(
163
+ base_exporter = HTTPExporter(
81
164
  endpoint=f"{settings.otel.collector_endpoint}/v1/traces",
82
165
  timeout=settings.otel.export_timeout,
83
166
  )
84
167
 
168
+ # Wrap with sanitizing exporter to handle None values
169
+ exporter = SanitizingSpanExporter(base_exporter)
170
+
85
171
  # Add span processor
86
172
  tracer_provider.add_span_processor(BatchSpanProcessor(exporter))
87
173
 
@@ -95,6 +181,8 @@ def setup_instrumentation() -> None:
95
181
  # Add OpenInference span processor for Pydantic AI
96
182
  # This adds rich attributes (openinference.span.kind, input/output, etc.) to ALL traces
97
183
  # Phoenix receives these traces via the OTLP collector - no separate "Phoenix integration" needed
184
+ # Note: The OTEL exporter may log warnings about None values in tool call messages,
185
+ # but this is a known limitation in openinference-instrumentation-pydantic-ai
98
186
  try:
99
187
  from openinference.instrumentation.pydantic_ai import OpenInferenceSpanProcessor as PydanticAISpanProcessor
100
188
 
@@ -128,15 +128,16 @@ def sanitize_tool_name(tool_name: str) -> str:
128
128
 
129
129
 
130
130
  def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
131
- """Load evaluator schema from schemas/evaluators/ directory.
131
+ """Load evaluator schema using centralized schema loader.
132
132
 
133
- Searches for evaluator schema in rem/schemas/evaluators/
134
- Supports .json, .yaml, and .yml files.
133
+ Uses the same unified search logic as agent schemas:
134
+ - "hello-world/default" → schemas/evaluators/hello-world/default.yaml
135
+ - "lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
136
+ - "rem-lookup-correctness" → schemas/evaluators/rem/lookup-correctness.yaml
135
137
 
136
138
  Args:
137
- evaluator_name: Evaluator name (with or without extension)
138
- e.g., "rem-lookup-correctness" or
139
- "rem-lookup-correctness.yaml"
139
+ evaluator_name: Evaluator name or path
140
+ e.g., "hello-world/default", "lookup-correctness"
140
141
 
141
142
  Returns:
142
143
  Evaluator schema dictionary with keys:
@@ -150,43 +151,13 @@ def load_evaluator_schema(evaluator_name: str) -> dict[str, Any]:
150
151
  FileNotFoundError: If evaluator schema not found
151
152
 
152
153
  Example:
153
- >>> schema = load_evaluator_schema("rem-lookup-correctness")
154
+ >>> schema = load_evaluator_schema("hello-world/default")
154
155
  >>> print(schema["description"])
155
156
  """
156
- # Get schemas directory (rem/schemas/evaluators/)
157
- # rem.__file__ = rem/src/rem/__init__.py
158
- # We need rem/schemas/evaluators/
159
- import rem
160
- rem_module_dir = Path(rem.__file__).parent # rem/src/rem
161
- rem_package_root = rem_module_dir.parent.parent # rem/src/rem -> rem/src -> rem
162
- schema_dir = rem_package_root / "schemas" / "evaluators"
163
-
164
- # Try .yaml first (preferred format)
165
- yaml_path = schema_dir / f"{evaluator_name}.yaml"
166
- if yaml_path.exists():
167
- logger.debug(f"Loading evaluator schema from {yaml_path}")
168
- with open(yaml_path) as f:
169
- return yaml.safe_load(f)
170
-
171
- # Try .yml
172
- yml_path = schema_dir / f"{evaluator_name}.yml"
173
- if yml_path.exists():
174
- logger.debug(f"Loading evaluator schema from {yml_path}")
175
- with open(yml_path) as f:
176
- return yaml.safe_load(f)
177
-
178
- # Try .json
179
- json_path = schema_dir / f"{evaluator_name}.json"
180
- if json_path.exists():
181
- logger.debug(f"Loading evaluator schema from {json_path}")
182
- with open(json_path) as f:
183
- return json.load(f)
184
-
185
- raise FileNotFoundError(
186
- f"Evaluator schema not found: {evaluator_name}\n"
187
- f"Searched in: {schema_dir}\n"
188
- f"Supported formats: .yaml, .yml, .json"
189
- )
157
+ from ...utils.schema_loader import load_agent_schema
158
+
159
+ # Use centralized schema loader (searches evaluator paths too)
160
+ return load_agent_schema(evaluator_name)
190
161
 
191
162
 
192
163
  # =============================================================================
@@ -338,6 +309,22 @@ def create_evaluator_from_schema(
338
309
  # Already a dict
339
310
  schema = evaluator_schema_path
340
311
 
312
+ # Extract model from schema's provider_configs if not explicitly provided
313
+ if model_name is None:
314
+ json_schema_extra = schema.get("json_schema_extra", {})
315
+ provider_configs = json_schema_extra.get("provider_configs", [])
316
+ if provider_configs:
317
+ # Use first provider config
318
+ first_provider = provider_configs[0]
319
+ provider_name = first_provider.get("provider_name", "openai")
320
+ schema_model_name = first_provider.get("model_name", "gpt-4o-mini")
321
+ # Format as "provider:model" if not OpenAI (OpenAI is default)
322
+ if provider_name == "openai":
323
+ model_name = schema_model_name
324
+ else:
325
+ model_name = f"{provider_name}:{schema_model_name}"
326
+ logger.debug(f"Using model from schema provider_configs: {model_name}")
327
+
341
328
  # Create evaluator config
342
329
  evaluator_config = create_phoenix_evaluator(
343
330
  evaluator_schema=schema,
@@ -361,7 +348,8 @@ def create_evaluator_from_schema(
361
348
  Returns:
362
349
  Evaluation result with score, label, explanation
363
350
  """
364
- logger.debug(f"Evaluating example: {example.get('input', '')[:100]}...")
351
+ input_preview = str(example.get('input', ''))[:100]
352
+ logger.debug(f"Evaluating example: {input_preview}...")
365
353
 
366
354
  # Phoenix llm_classify() expects a flat dict with string values
367
355
  # Build evaluation input by flattening nested dicts
@@ -393,6 +381,7 @@ def create_evaluator_from_schema(
393
381
 
394
382
  try:
395
383
  # Create single-row DataFrame for llm_classify
384
+ # Note: Phoenix's llm_classify requires pandas DataFrame (imported above)
396
385
  df = pd.DataFrame([eval_input])
397
386
 
398
387
  # Call Phoenix llm_classify
@@ -404,7 +393,7 @@ def create_evaluator_from_schema(
404
393
  provide_explanation=True,
405
394
  )
406
395
 
407
- # Extract result
396
+ # Extract result (results_df is pandas DataFrame from Phoenix)
408
397
  if not results_df.empty:
409
398
  row = results_df.iloc[0]
410
399
  label = row.get("label", "error")