remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1124 @@
1
+ """
2
+ Experiment management CLI commands.
3
+
4
+ Experiments use ExperimentConfig (rem/models/core/experiment.py) for configuration
5
+ and support Git+S3 hybrid storage. Includes dataset, prompt, and trace management.
6
+
7
+ Directory Structure:
8
+ .experiments/{experiment-name}/
9
+ ├── experiment.yaml # ExperimentConfig
10
+ ├── README.md # Auto-generated docs
11
+ ├── datasets/ # Optional: small datasets
12
+ └── results/ # Optional: metrics summaries
13
+
14
+ Commands:
15
+ # Experiment lifecycle
16
+ rem experiments create <name> --agent <agent> --evaluator <evaluator>
17
+ rem experiments list
18
+ rem experiments show <name>
19
+ rem experiments run <name> [--version <tag>]
20
+
21
+ # Dataset management
22
+ rem experiments dataset list
23
+ rem experiments dataset create <name> --from-csv data.csv
24
+ rem experiments dataset add <name> --from-csv data.csv
25
+
26
+ # Prompt management
27
+ rem experiments prompt list
28
+ rem experiments prompt create <name> --system-prompt "..."
29
+
30
+ # Trace retrieval
31
+ rem experiments trace list --project <name>
32
+ """
33
+
34
+ import asyncio
35
+ from pathlib import Path
36
+ from typing import Any, Optional, cast
37
+
38
+ import click
39
+ from loguru import logger
40
+
41
+
42
+ @click.group()
43
+ def experiments():
44
+ """Experiment configuration and execution commands."""
45
+ pass
46
+
47
+
48
+ # =============================================================================
49
+ # CREATE COMMAND
50
+ # =============================================================================
51
+
52
+
53
+ @experiments.command("create")
54
+ @click.argument("name")
55
+ @click.option("--agent", "-a", required=True, help="Agent schema name (e.g., 'cv-parser')")
56
+ @click.option("--evaluator", "-e", default="default", help="Evaluator schema name (default: 'default')")
57
+ @click.option("--description", "-d", help="Experiment description")
58
+ @click.option("--dataset-location", type=click.Choice(["git", "s3", "hybrid"]), default="git",
59
+ help="Where to store datasets")
60
+ @click.option("--results-location", type=click.Choice(["git", "s3", "hybrid"]), default="git",
61
+ help="Where to store results")
62
+ @click.option("--tags", help="Comma-separated tags (e.g., 'production,cv-parser')")
63
+ @click.option("--base-path", default=".experiments", help="Base directory for experiments")
64
+ def create(
65
+ name: str,
66
+ agent: str,
67
+ evaluator: str,
68
+ description: Optional[str],
69
+ dataset_location: str,
70
+ results_location: str,
71
+ tags: Optional[str],
72
+ base_path: str,
73
+ ):
74
+ """Create a new experiment configuration.
75
+
76
+ Creates directory structure and generates experiment.yaml and README.md.
77
+
78
+ Examples:
79
+ # Small experiment (Git-only)
80
+ rem experiments create hello-world-validation \\
81
+ --agent hello-world \\
82
+ --evaluator default \\
83
+ --description "Smoke test for hello-world agent"
84
+
85
+ # Large experiment (Hybrid storage)
86
+ rem experiments create cv-parser-production \\
87
+ --agent cv-parser \\
88
+ --evaluator default \\
89
+ --description "Production CV parser evaluation" \\
90
+ --dataset-location s3 \\
91
+ --results-location hybrid \\
92
+ --tags "production,cv-parser,weekly"
93
+ """
94
+ from rem.models.core.experiment import (
95
+ ExperimentConfig,
96
+ DatasetLocation,
97
+ DatasetReference,
98
+ SchemaReference,
99
+ ResultsConfig,
100
+ ExperimentStatus,
101
+ )
102
+
103
+ try:
104
+ # Build dataset reference
105
+ if dataset_location == "git":
106
+ dataset_ref = DatasetReference(
107
+ location=DatasetLocation.GIT,
108
+ path="datasets/ground_truth.csv",
109
+ format="csv",
110
+ description="Ground truth dataset for evaluation"
111
+ )
112
+ else: # s3 or hybrid
113
+ dataset_ref = DatasetReference(
114
+ location=DatasetLocation(dataset_location),
115
+ path=f"s3://rem-experiments/{name}/datasets/ground_truth.parquet",
116
+ format="parquet",
117
+ schema_path="datasets/schema.yaml" if dataset_location == "hybrid" else None,
118
+ description="Ground truth dataset for evaluation"
119
+ )
120
+
121
+ # Build results config
122
+ if results_location == "git":
123
+ results_config = ResultsConfig(
124
+ location=DatasetLocation.GIT,
125
+ base_path="results/",
126
+ save_traces=False,
127
+ save_metrics_summary=True
128
+ )
129
+ elif results_location == "s3":
130
+ results_config = ResultsConfig(
131
+ location=DatasetLocation.S3,
132
+ base_path=f"s3://rem-experiments/{name}/results/",
133
+ save_traces=True,
134
+ save_metrics_summary=False
135
+ )
136
+ else: # hybrid
137
+ results_config = ResultsConfig(
138
+ location=DatasetLocation.HYBRID,
139
+ base_path=f"s3://rem-experiments/{name}/results/",
140
+ save_traces=True,
141
+ save_metrics_summary=True,
142
+ metrics_file="metrics.json"
143
+ )
144
+
145
+ # Parse tags
146
+ tag_list = [t.strip() for t in tags.split(",")] if tags else []
147
+
148
+ # Create experiment config
149
+ config = ExperimentConfig(
150
+ name=name,
151
+ description=description or f"Evaluation experiment for {agent} agent",
152
+ agent_schema_ref=SchemaReference(
153
+ name=agent,
154
+ version=None, # Use latest by default
155
+ type="agent"
156
+ ),
157
+ evaluator_schema_ref=SchemaReference(
158
+ name=evaluator,
159
+ type="evaluator"
160
+ ),
161
+ datasets={"ground_truth": dataset_ref},
162
+ results=results_config,
163
+ status=ExperimentStatus.DRAFT,
164
+ tags=tag_list
165
+ )
166
+
167
+ # Save configuration
168
+ config_path = config.save(base_path)
169
+ readme_path = config.save_readme(base_path)
170
+
171
+ # Create datasets directory
172
+ datasets_dir = config.get_experiment_dir(base_path) / "datasets"
173
+ datasets_dir.mkdir(parents=True, exist_ok=True)
174
+
175
+ # Create results directory if Git-based
176
+ if results_location == "git":
177
+ results_dir = config.get_experiment_dir(base_path) / "results"
178
+ results_dir.mkdir(parents=True, exist_ok=True)
179
+
180
+ click.echo(f"\n✓ Created experiment: {name}")
181
+ click.echo(f" Configuration: {config_path}")
182
+ click.echo(f" Documentation: {readme_path}")
183
+ click.echo(f" Datasets: {datasets_dir}")
184
+ if results_location == "git":
185
+ click.echo(f" Results: {results_dir}")
186
+ click.echo(f"\nNext steps:")
187
+ click.echo(f" 1. Add dataset to {datasets_dir}/")
188
+ click.echo(f" 2. Review configuration: {config_path}")
189
+ click.echo(f" 3. Run experiment: rem experiments run {name}")
190
+ click.echo(f" 4. Commit to Git: git add .experiments/{name}/ && git commit")
191
+
192
+ except Exception as e:
193
+ logger.error(f"Failed to create experiment: {e}")
194
+ click.echo(f"Error: {e}", err=True)
195
+ raise click.Abort()
196
+
197
+
198
+ # =============================================================================
199
+ # LIST COMMAND
200
+ # =============================================================================
201
+
202
+
203
+ @experiments.command("list")
204
+ @click.option("--base-path", default=".experiments", help="Base directory for experiments")
205
+ @click.option("--status", help="Filter by status (draft, ready, completed, etc.)")
206
+ @click.option("--tags", help="Filter by tags (comma-separated)")
207
+ def list_experiments(
208
+ base_path: str,
209
+ status: Optional[str],
210
+ tags: Optional[str],
211
+ ):
212
+ """List all experiments.
213
+
214
+ Examples:
215
+ rem experiments list
216
+ rem experiments list --status ready
217
+ rem experiments list --tags production,cv-parser
218
+ """
219
+ from rem.models.core.experiment import ExperimentConfig, ExperimentStatus
220
+
221
+ try:
222
+ experiments_dir = Path(base_path)
223
+ if not experiments_dir.exists():
224
+ click.echo(f"No experiments directory found at {base_path}")
225
+ return
226
+
227
+ # Find all experiment.yaml files
228
+ configs = []
229
+ for exp_dir in experiments_dir.iterdir():
230
+ if not exp_dir.is_dir() or exp_dir.name.startswith("."):
231
+ continue
232
+
233
+ config_file = exp_dir / "experiment.yaml"
234
+ if config_file.exists():
235
+ try:
236
+ config = ExperimentConfig.from_yaml(config_file)
237
+ configs.append(config)
238
+ except Exception as e:
239
+ logger.warning(f"Failed to load {config_file}: {e}")
240
+
241
+ # Apply filters
242
+ if status:
243
+ status_enum = ExperimentStatus(status)
244
+ configs = [c for c in configs if c.status == status_enum]
245
+
246
+ if tags:
247
+ filter_tags = set(t.strip().lower() for t in tags.split(","))
248
+ configs = [c for c in configs if filter_tags & set(c.tags)]
249
+
250
+ if not configs:
251
+ click.echo("No experiments found")
252
+ return
253
+
254
+ # Sort by updated_at descending
255
+ configs.sort(key=lambda c: c.updated_at, reverse=True)
256
+
257
+ # Display table
258
+ click.echo(f"\nExperiments ({len(configs)} total):\n")
259
+ click.echo(f"{'Name':<30} {'Status':<12} {'Agent':<20} {'Updated':<12}")
260
+ click.echo("-" * 75)
261
+
262
+ for config in configs:
263
+ name = config.name[:30]
264
+ status_str = config.status.value[:12]
265
+ agent = config.agent_schema_ref.name[:20]
266
+ updated = config.updated_at.strftime("%Y-%m-%d")
267
+ click.echo(f"{name:<30} {status_str:<12} {agent:<20} {updated:<12}")
268
+
269
+ except Exception as e:
270
+ logger.error(f"Failed to list experiments: {e}")
271
+ click.echo(f"Error: {e}", err=True)
272
+ raise click.Abort()
273
+
274
+
275
+ # =============================================================================
276
+ # SHOW COMMAND
277
+ # =============================================================================
278
+
279
+
280
+ @experiments.command("show")
281
+ @click.argument("name")
282
+ @click.option("--base-path", default=".experiments", help="Base directory for experiments")
283
+ def show(name: str, base_path: str):
284
+ """Show experiment details.
285
+
286
+ Examples:
287
+ rem experiments show hello-world-validation
288
+ """
289
+ from rem.models.core.experiment import ExperimentConfig
290
+
291
+ try:
292
+ config_path = Path(base_path) / name / "experiment.yaml"
293
+ if not config_path.exists():
294
+ click.echo(f"Experiment not found: {name}")
295
+ click.echo(f" Looked in: {config_path}")
296
+ raise click.Abort()
297
+
298
+ config = ExperimentConfig.from_yaml(config_path)
299
+
300
+ click.echo(f"\nExperiment: {config.name}")
301
+ click.echo(f"{'=' * 60}\n")
302
+ click.echo(f"Description: {config.description}")
303
+ click.echo(f"Status: {config.status.value}")
304
+ if config.tags:
305
+ click.echo(f"Tags: {', '.join(config.tags)}")
306
+
307
+ click.echo(f"\nAgent Schema:")
308
+ click.echo(f" Name: {config.agent_schema_ref.name}")
309
+ click.echo(f" Version: {config.agent_schema_ref.version or 'latest'}")
310
+
311
+ click.echo(f"\nEvaluator Schema:")
312
+ click.echo(f" Name: {config.evaluator_schema_ref.name}")
313
+
314
+ click.echo(f"\nDatasets:")
315
+ for ds_name, ds_ref in config.datasets.items():
316
+ click.echo(f" {ds_name}:")
317
+ click.echo(f" Location: {ds_ref.location.value}")
318
+ click.echo(f" Path: {ds_ref.path}")
319
+ click.echo(f" Format: {ds_ref.format}")
320
+
321
+ click.echo(f"\nResults:")
322
+ click.echo(f" Location: {config.results.location.value}")
323
+ click.echo(f" Base Path: {config.results.base_path}")
324
+ click.echo(f" Save Traces: {config.results.save_traces}")
325
+ click.echo(f" Metrics File: {config.results.metrics_file}")
326
+
327
+ click.echo(f"\nTimestamps:")
328
+ click.echo(f" Created: {config.created_at.isoformat()}")
329
+ click.echo(f" Updated: {config.updated_at.isoformat()}")
330
+ if config.last_run_at:
331
+ click.echo(f" Last Run: {config.last_run_at.isoformat()}")
332
+
333
+ if config.metadata:
334
+ click.echo(f"\nMetadata:")
335
+ for key, value in config.metadata.items():
336
+ click.echo(f" {key}: {value}")
337
+
338
+ except Exception as e:
339
+ logger.error(f"Failed to show experiment: {e}")
340
+ click.echo(f"Error: {e}", err=True)
341
+ raise click.Abort()
342
+
343
+
344
+ # =============================================================================
345
+ # RUN COMMAND
346
+ # =============================================================================
347
+
348
+
349
+ @experiments.command("run")
350
+ @click.argument("name")
351
+ @click.option("--base-path", default=".experiments", help="Base directory for experiments")
352
+ @click.option("--version", help="Git tag version to load (e.g., 'experiments/my-exp/v1.0.0')")
353
+ @click.option("--dry-run", is_flag=True, help="Test on small subset without saving")
354
+ @click.option("--update-prompts", is_flag=True, help="Update prompts in Phoenix before running")
355
+ @click.option("--phoenix-url", help="Phoenix server URL (overrides PHOENIX_BASE_URL env var)")
356
+ @click.option("--phoenix-api-key", help="Phoenix API key (overrides PHOENIX_API_KEY env var)")
357
+ def run(
358
+ name: str,
359
+ base_path: str,
360
+ version: Optional[str],
361
+ dry_run: bool,
362
+ update_prompts: bool,
363
+ phoenix_url: Optional[str],
364
+ phoenix_api_key: Optional[str],
365
+ ):
366
+ """Run an experiment using Phoenix provider.
367
+
368
+ Loads configuration, executes agent and evaluator, saves results.
369
+
370
+ Phoenix Connection:
371
+ Commands respect PHOENIX_BASE_URL and PHOENIX_API_KEY environment variables.
372
+ Defaults to localhost:6006 for local development.
373
+
374
+ Production (on cluster):
375
+ export PHOENIX_BASE_URL=http://phoenix-svc.observability.svc.cluster.local:6006
376
+ export PHOENIX_API_KEY=<your-key>
377
+ kubectl exec -it deployment/rem-api -- rem experiments run my-experiment
378
+
379
+ Development (port-forward):
380
+ kubectl port-forward -n observability svc/phoenix-svc 6006:6006
381
+ export PHOENIX_API_KEY=<your-key>
382
+ rem experiments run my-experiment
383
+
384
+ Local (local Phoenix):
385
+ python -m phoenix.server.main serve
386
+ rem experiments run my-experiment
387
+
388
+ Examples:
389
+ # Run experiment with latest schemas
390
+ rem experiments run hello-world-validation
391
+
392
+ # Run specific version
393
+ rem experiments run hello-world-validation \\
394
+ --version experiments/hello-world-validation/v1.0.0
395
+
396
+ # Dry run (test without saving)
397
+ rem experiments run cv-parser-production --dry-run
398
+
399
+ # Override Phoenix connection
400
+ rem experiments run my-experiment \\
401
+ --phoenix-url http://phoenix.example.com:6006 \\
402
+ --phoenix-api-key <key>
403
+ """
404
+ from rem.models.core.experiment import ExperimentConfig, ExperimentStatus
405
+ from rem.services.git import GitService
406
+ from rem.services.phoenix import PhoenixClient
407
+ from rem.agentic.providers.phoenix import create_evaluator_from_schema
408
+ from datetime import datetime
409
+ import pandas as pd
410
+
411
+ try:
412
+ # Load experiment configuration
413
+ if version:
414
+ # Load from Git at specific version
415
+ git_svc = GitService()
416
+ config_yaml = git_svc.fs.read(
417
+ f"git://rem/.experiments/{name}/experiment.yaml?ref={version}"
418
+ )
419
+ config = ExperimentConfig(**config_yaml)
420
+ click.echo(f"✓ Loaded experiment from Git: {version}")
421
+ else:
422
+ # Load from local filesystem
423
+ config_path = Path(base_path) / name / "experiment.yaml"
424
+ if not config_path.exists():
425
+ click.echo(f"Experiment not found: {name}")
426
+ click.echo(f" Looked in: {config_path}")
427
+ raise click.Abort()
428
+ config = ExperimentConfig.from_yaml(config_path)
429
+ click.echo(f"✓ Loaded experiment: {name}")
430
+
431
+ # Display experiment info
432
+ click.echo(f"\nExperiment: {config.name}")
433
+ click.echo(f" Agent: {config.agent_schema_ref.name} (version: {config.agent_schema_ref.version or 'latest'})")
434
+ click.echo(f" Evaluator: {config.evaluator_schema_ref.name}")
435
+ click.echo(f" Status: {config.status.value}")
436
+ if dry_run:
437
+ click.echo(f" Mode: DRY RUN (no data will be saved)")
438
+ click.echo()
439
+
440
+ # Load agent schema from Git or filesystem
441
+ agent_name = config.agent_schema_ref.name
442
+ agent_version = config.agent_schema_ref.version
443
+
444
+ click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
445
+
446
+ # Try Git first, fallback to filesystem
447
+ agent_schema = None
448
+ try:
449
+ git_svc = GitService()
450
+ agent_schema = git_svc.load_schema(agent_name, version=agent_version)
451
+ click.echo(f"✓ Loaded agent schema from Git")
452
+ except Exception as e:
453
+ logger.debug(f"Git not available, trying filesystem: {e}")
454
+
455
+ # Fallback to local filesystem
456
+ from rem.services.fs import FS
457
+ fs = FS()
458
+
459
+ schema_path = f"schemas/agents/{agent_name}.yaml"
460
+ try:
461
+ agent_schema = fs.read(schema_path)
462
+ click.echo(f"✓ Loaded agent schema from filesystem")
463
+ except Exception as fs_error:
464
+ logger.error(f"Failed to load agent schema: Git: {e}, FS: {fs_error}")
465
+ click.echo(f"Error: Could not load agent schema '{agent_name}'")
466
+ click.echo(f" Tried Git: {e}")
467
+ click.echo(f" Tried filesystem: {schema_path}")
468
+ click.echo(f" Make sure the schema exists")
469
+ raise click.Abort()
470
+
471
+ # Create agent function from schema
472
+ from rem.agentic.providers.pydantic_ai import create_agent
473
+ from rem.agentic.context import AgentContext
474
+
475
+ # Create agent context
476
+ context = AgentContext(
477
+ user_id="experiment-runner",
478
+ tenant_id="experiments",
479
+ session_id=f"experiment-{config.name}",
480
+ )
481
+
482
+ agent_runtime = asyncio.run(create_agent(
483
+ context=context,
484
+ agent_schema_override=agent_schema
485
+ ))
486
+
487
+ def task_fn(example: dict[str, Any]) -> dict[str, Any]:
488
+ """Run agent on example."""
489
+ input_data = example.get("input", {})
490
+
491
+ # Extract query from input
492
+ query = input_data.get("query", "")
493
+ if not query:
494
+ # Try other common input keys
495
+ query = input_data.get("text", input_data.get("prompt", str(input_data)))
496
+
497
+ # Run agent
498
+ result = asyncio.run(agent_runtime.run(query))
499
+
500
+ # Serialize result (critical for Pydantic models!)
501
+ from rem.agentic.serialization import serialize_agent_result
502
+ serialized = serialize_agent_result(result)
503
+ # Ensure we return a dict (Phoenix expects dict output)
504
+ if isinstance(serialized, str):
505
+ return {"output": serialized}
506
+ return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
507
+
508
+ # Load evaluator schema
509
+ evaluator_name = config.evaluator_schema_ref.name
510
+ evaluator_version = config.evaluator_schema_ref.version
511
+
512
+ # Resolve evaluator path (evaluators are organized by agent name)
513
+ evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
514
+
515
+ click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
516
+
517
+ try:
518
+ evaluator_fn = create_evaluator_from_schema(
519
+ evaluator_schema_path=evaluator_schema_path,
520
+ model_name=None, # Use default from schema
521
+ )
522
+ click.echo(f"✓ Loaded evaluator schema")
523
+ except Exception as e:
524
+ logger.warning(f"Failed to load evaluator: {e}")
525
+ click.echo(f"Error: Could not load evaluator schema")
526
+ click.echo(f" Path: {evaluator_schema_path}")
527
+ click.echo(f" Make sure the schema exists")
528
+ raise click.Abort()
529
+
530
+ # Load dataset
531
+ click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
532
+ dataset_ref = list(config.datasets.values())[0]
533
+
534
+ if dataset_ref.location.value == "git":
535
+ # Load from Git
536
+ dataset_path = Path(base_path) / name / dataset_ref.path
537
+ if not dataset_path.exists():
538
+ click.echo(f"Error: Dataset not found: {dataset_path}")
539
+ raise click.Abort()
540
+
541
+ if dataset_ref.format == "csv":
542
+ dataset_df = pd.read_csv(dataset_path)
543
+ elif dataset_ref.format == "parquet":
544
+ dataset_df = pd.read_parquet(dataset_path)
545
+ elif dataset_ref.format == "jsonl":
546
+ dataset_df = pd.read_json(dataset_path, lines=True)
547
+ else:
548
+ click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
549
+ raise click.Abort()
550
+ elif dataset_ref.location.value in ["s3", "hybrid"]:
551
+ # Load from S3 using FS provider
552
+ from rem.services.fs import FS
553
+
554
+ fs = FS()
555
+
556
+ try:
557
+ if dataset_ref.format == "csv":
558
+ content = fs.read(dataset_ref.path)
559
+ from io import StringIO
560
+ dataset_df = pd.read_csv(StringIO(content))
561
+ elif dataset_ref.format == "parquet":
562
+ # For parquet, we need binary read
563
+ import tempfile
564
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
565
+ tmp_path = tmp.name
566
+ # Download via FS
567
+ content_bytes = fs.read(dataset_ref.path)
568
+ tmp.write(content_bytes)
569
+ dataset_df = pd.read_parquet(tmp_path)
570
+ Path(tmp_path).unlink() # Clean up temp file
571
+ elif dataset_ref.format == "jsonl":
572
+ content = fs.read(dataset_ref.path)
573
+ from io import StringIO
574
+ dataset_df = pd.read_json(StringIO(content), lines=True)
575
+ else:
576
+ click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
577
+ raise click.Abort()
578
+
579
+ click.echo(f"✓ Loaded dataset from S3")
580
+ except Exception as e:
581
+ logger.error(f"Failed to load dataset from S3: {e}")
582
+ click.echo(f"Error: Could not load dataset from S3")
583
+ click.echo(f" Path: {dataset_ref.path}")
584
+ click.echo(f" Format: {dataset_ref.format}")
585
+ raise click.Abort()
586
+ else:
587
+ click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
588
+ raise click.Abort()
589
+
590
+ click.echo(f"✓ Loaded dataset: {len(dataset_df)} examples")
591
+
592
+ # Update prompts in Phoenix if requested
593
+ if update_prompts:
594
+ # TODO: Implement prompt updating
595
+ click.echo("⚠ --update-prompts not yet implemented")
596
+
597
+ # Run experiment via Phoenix
598
+ if not dry_run:
599
+ # Create Phoenix client with optional overrides
600
+ from rem.services.phoenix.config import PhoenixConfig
601
+ import os
602
+
603
+ phoenix_config = PhoenixConfig(
604
+ base_url=phoenix_url or os.getenv("PHOENIX_BASE_URL"),
605
+ api_key=phoenix_api_key or os.getenv("PHOENIX_API_KEY")
606
+ )
607
+
608
+ # Display Phoenix connection info
609
+ phoenix_display_url = phoenix_config.base_url
610
+ phoenix_has_key = "Yes" if phoenix_config.api_key else "No"
611
+ click.echo(f"\nPhoenix Connection:")
612
+ click.echo(f" URL: {phoenix_display_url}")
613
+ click.echo(f" API Key: {phoenix_has_key}")
614
+ click.echo()
615
+
616
+ client = PhoenixClient(config=phoenix_config)
617
+
618
+ experiment_name = f"{config.name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
619
+
620
+ click.echo(f"\n⏳ Running experiment: {experiment_name}")
621
+ click.echo(f" This may take several minutes...")
622
+
623
+ experiment = client.run_experiment(
624
+ dataset=dataset_df, # type: ignore[arg-type]
625
+ task=task_fn,
626
+ evaluators=[evaluator_fn],
627
+ experiment_name=experiment_name,
628
+ experiment_description=config.description,
629
+ experiment_metadata={
630
+ "agent": config.agent_schema_ref.name,
631
+ "evaluator": config.evaluator_schema_ref.name,
632
+ "experiment_config": config.name,
633
+ **config.metadata
634
+ }
635
+ )
636
+
637
+ # Update experiment status
638
+ config.status = ExperimentStatus.COMPLETED
639
+ config.last_run_at = datetime.now()
640
+ if not version: # Only save if not loading from Git
641
+ config.save(base_path)
642
+
643
+ click.echo(f"\n✓ Experiment complete!")
644
+ if hasattr(experiment, "url"):
645
+ click.echo(f" View results: {experiment.url}") # type: ignore[attr-defined]
646
+
647
+ # Save results according to config.results settings
648
+ if config.results.save_metrics_summary:
649
+ # Get experiment data
650
+ try:
651
+ exp_data = client.get_experiment(experiment.id) # type: ignore[attr-defined]
652
+
653
+ # Build metrics summary
654
+ metrics = {
655
+ "experiment_id": experiment.id, # type: ignore[attr-defined]
656
+ "experiment_name": experiment_name,
657
+ "agent": config.agent_schema_ref.name,
658
+ "evaluator": config.evaluator_schema_ref.name,
659
+ "dataset_size": len(dataset_df),
660
+ "completed_at": datetime.now().isoformat(),
661
+ "phoenix_url": getattr(experiment, "url", None),
662
+ "task_runs": len(exp_data.get("task_runs", [])),
663
+ }
664
+
665
+ # Save metrics
666
+ if config.results.location.value == "git" or config.results.location.value == "hybrid":
667
+ # Save to Git
668
+ metrics_path = Path(base_path) / name / "results" / (config.results.metrics_file or "metrics.json")
669
+ metrics_path.parent.mkdir(parents=True, exist_ok=True)
670
+
671
+ import json
672
+ with open(metrics_path, "w") as f:
673
+ json.dump(metrics, f, indent=2)
674
+
675
+ click.echo(f"\n✓ Saved metrics summary: {metrics_path}")
676
+
677
+ if config.results.location.value == "s3" or config.results.location.value == "hybrid":
678
+ # Save to S3
679
+ from rem.services.fs import FS
680
+ fs = FS()
681
+
682
+ s3_metrics_path = config.results.base_path.rstrip("/") + "/" + (config.results.metrics_file or "metrics.json")
683
+
684
+ import json
685
+ fs.write(s3_metrics_path, json.dumps(metrics, indent=2))
686
+
687
+ click.echo(f"✓ Saved metrics summary to S3: {s3_metrics_path}")
688
+
689
+ except Exception as e:
690
+ logger.warning(f"Failed to save metrics: {e}")
691
+ click.echo(f"⚠ Could not save metrics summary: {e}")
692
+ else:
693
+ click.echo("\n✓ Dry run complete (no data saved)")
694
+
695
+ except Exception as e:
696
+ logger.error(f"Failed to run experiment: {e}")
697
+ click.echo(f"Error: {e}", err=True)
698
+ raise click.Abort()
699
+
700
+
701
+ # =============================================================================
702
+ # DATASET COMMANDS
703
+ # =============================================================================
704
+
705
+
706
+ @experiments.group()
707
+ def dataset():
708
+ """Dataset management commands."""
709
+ pass
710
+
711
+
712
+ @dataset.command("list")
713
+ def dataset_list():
714
+ """List all datasets.
715
+
716
+ Example:
717
+ rem experiments dataset list
718
+ """
719
+ from rem.services.phoenix import PhoenixClient
720
+
721
+ try:
722
+ client = PhoenixClient()
723
+ datasets = client.list_datasets()
724
+
725
+ if not datasets:
726
+ click.echo("No datasets found")
727
+ return
728
+
729
+ click.echo(f"\nDatasets ({len(datasets)} total):\n")
730
+ click.echo(f"{'Name':<40} {'Examples':>10} {'Created':<12}")
731
+ click.echo("-" * 65)
732
+
733
+ for ds in datasets:
734
+ name = ds.get("name", "")[:40]
735
+ count = ds.get("example_count", 0)
736
+ created = ds.get("created_at", "")[:10]
737
+ click.echo(f"{name:<40} {count:>10} {created:<12}")
738
+
739
+ except Exception as e:
740
+ logger.error(f"Failed to list datasets: {e}")
741
+ click.echo(f"Error: {e}", err=True)
742
+ raise click.Abort()
743
+
744
+
745
+ @dataset.command("create")
746
+ @click.argument("name")
747
+ @click.option("--from-csv", type=click.Path(exists=True, path_type=Path), help="Create from CSV file")
748
+ @click.option("--input-keys", help="Comma-separated input column names")
749
+ @click.option("--output-keys", help="Comma-separated output column names (reference/ground truth)")
750
+ @click.option("--metadata-keys", help="Comma-separated metadata column names (difficulty, type, etc.)")
751
+ @click.option("--description", help="Dataset description")
752
+ def dataset_create(
753
+ name: str,
754
+ from_csv: Optional[Path],
755
+ input_keys: Optional[str],
756
+ output_keys: Optional[str],
757
+ metadata_keys: Optional[str],
758
+ description: Optional[str],
759
+ ):
760
+ """Create a dataset (golden set).
761
+
762
+ Two modes:
763
+ 1. From CSV: --from-csv golden.csv --input-keys query --output-keys expected
764
+ 2. Manual (empty): Will create empty dataset to populate later
765
+
766
+ Examples:
767
+ # From CSV (SME golden set)
768
+ rem experiments dataset create rem-lookup-golden \\
769
+ --from-csv golden-lookup.csv \\
770
+ --input-keys query \\
771
+ --output-keys expected_label,expected_type \\
772
+ --metadata-keys difficulty,query_type
773
+
774
+ # Empty dataset (populate later)
775
+ rem experiments dataset create rem-test --description "Test dataset"
776
+ """
777
+ from rem.services.phoenix import PhoenixClient
778
+
779
+ try:
780
+ client = PhoenixClient()
781
+
782
+ if from_csv:
783
+ # Create from CSV
784
+ if not input_keys or not output_keys:
785
+ click.echo("Error: --input-keys and --output-keys required for CSV", err=True)
786
+ raise click.Abort()
787
+
788
+ dataset = client.create_dataset_from_csv(
789
+ name=name,
790
+ csv_file_path=from_csv,
791
+ input_keys=input_keys.split(","),
792
+ output_keys=output_keys.split(","),
793
+ metadata_keys=metadata_keys.split(",") if metadata_keys else None,
794
+ description=description,
795
+ )
796
+
797
+ click.echo(f"✓ Created dataset '{dataset.name}' from CSV with {len(dataset)} examples")
798
+
799
+ else:
800
+ # Create empty dataset
801
+ dataset = client.create_dataset_from_data(
802
+ name=name,
803
+ inputs=[],
804
+ outputs=[],
805
+ description=description,
806
+ )
807
+
808
+ click.echo(f"✓ Created empty dataset '{dataset.name}'")
809
+ click.echo(" Use 'rem experiments dataset add' to add examples")
810
+
811
+ except Exception as e:
812
+ logger.error(f"Failed to create dataset: {e}")
813
+ click.echo(f"Error: {e}", err=True)
814
+ raise click.Abort()
815
+
816
+
817
+ @dataset.command("add")
818
+ @click.argument("dataset_name")
819
+ @click.option("--from-csv", type=click.Path(exists=True, path_type=Path), required=True,
820
+ help="CSV file with examples")
821
+ @click.option("--input-keys", required=True, help="Comma-separated input column names")
822
+ @click.option("--output-keys", required=True, help="Comma-separated output column names")
823
+ @click.option("--metadata-keys", help="Comma-separated metadata column names")
824
+ def dataset_add(
825
+ dataset_name: str,
826
+ from_csv: Path,
827
+ input_keys: str,
828
+ output_keys: str,
829
+ metadata_keys: Optional[str],
830
+ ):
831
+ """Add examples to an existing dataset.
832
+
833
+ Example:
834
+ rem experiments dataset add rem-lookup-golden \\
835
+ --from-csv new-examples.csv \\
836
+ --input-keys query \\
837
+ --output-keys expected_label,expected_type
838
+ """
839
+ from rem.services.phoenix import PhoenixClient
840
+ import pandas as pd
841
+
842
+ try:
843
+ client = PhoenixClient()
844
+
845
+ # Load CSV
846
+ df = pd.read_csv(from_csv)
847
+
848
+ # Extract data
849
+ inputs = cast(list[dict[str, Any]], df[input_keys.split(",")].to_dict("records"))
850
+ outputs = cast(list[dict[str, Any]], df[output_keys.split(",")].to_dict("records"))
851
+ metadata = None
852
+ if metadata_keys:
853
+ metadata = cast(list[dict[str, Any]], df[metadata_keys.split(",")].to_dict("records"))
854
+
855
+ # Add to dataset
856
+ dataset = client.add_examples_to_dataset(
857
+ dataset=dataset_name,
858
+ inputs=inputs,
859
+ outputs=outputs,
860
+ metadata=metadata,
861
+ )
862
+
863
+ click.echo(f"✓ Added {len(inputs)} examples to dataset '{dataset.name}'")
864
+ click.echo(f" Total examples: {len(dataset)}")
865
+
866
+ except Exception as e:
867
+ logger.error(f"Failed to add examples: {e}")
868
+ click.echo(f"Error: {e}", err=True)
869
+ raise click.Abort()
870
+
871
+
872
+ # =============================================================================
873
+ # PROMPT COMMANDS
874
+ # =============================================================================
875
+
876
+
877
+ @experiments.group()
878
+ def prompt():
879
+ """Prompt management commands."""
880
+ pass
881
+
882
+
883
+ @prompt.command("create")
884
+ @click.argument("name")
885
+ @click.option("--system-prompt", "-s", required=True, help="System prompt text")
886
+ @click.option("--description", "-d", help="Prompt description")
887
+ @click.option("--model-provider", default="OPENAI", help="Model provider (OPENAI, ANTHROPIC)")
888
+ @click.option("--model-name", "-m", help="Model name (e.g., gpt-4o, claude-sonnet-4-5)")
889
+ @click.option("--type", "-t", "prompt_type", default="Agent", help="Prompt type (Agent or Evaluator)")
890
+ def prompt_create(
891
+ name: str,
892
+ system_prompt: str,
893
+ description: Optional[str],
894
+ model_provider: str,
895
+ model_name: Optional[str],
896
+ prompt_type: str,
897
+ ):
898
+ """Create a prompt.
899
+
900
+ Examples:
901
+ # Create agent prompt
902
+ rem experiments prompt create hello-world \\
903
+ --system-prompt "You are a helpful assistant." \\
904
+ --model-name gpt-4o
905
+
906
+ # Create evaluator prompt
907
+ rem experiments prompt create correctness-evaluator \\
908
+ --system-prompt "Evaluate the correctness of responses." \\
909
+ --type Evaluator \\
910
+ --model-provider ANTHROPIC \\
911
+ --model-name claude-sonnet-4-5
912
+ """
913
+ from rem.services.phoenix import PhoenixClient
914
+ from rem.services.phoenix.prompt_labels import PhoenixPromptLabels
915
+ from phoenix.client import Client
916
+ from phoenix.client.types.prompts import PromptVersion
917
+ from phoenix.client.__generated__ import v1
918
+
919
+ try:
920
+ # Set default model if not specified
921
+ if not model_name:
922
+ model_name = "gpt-4o" if model_provider == "OPENAI" else "claude-sonnet-4-5-20250929"
923
+
924
+ # Get config
925
+ phoenix_client = PhoenixClient()
926
+ config = phoenix_client.config
927
+
928
+ # Create client
929
+ client = Client(
930
+ base_url=config.base_url,
931
+ api_key=config.api_key
932
+ )
933
+
934
+ # Create prompt messages
935
+ messages = [
936
+ v1.PromptMessage(
937
+ role="system",
938
+ content=system_prompt
939
+ )
940
+ ]
941
+
942
+ # Create PromptVersion
943
+ version = PromptVersion(
944
+ messages,
945
+ model_name=model_name,
946
+ description="v1.0",
947
+ model_provider=model_provider # type: ignore[arg-type]
948
+ )
949
+
950
+ # Create the prompt
951
+ result = client.prompts.create(
952
+ name=name,
953
+ version=version,
954
+ prompt_description=description or f"{prompt_type} prompt: {name}"
955
+ )
956
+
957
+ click.echo(f"✓ Created prompt '{name}' (ID: {result.id})")
958
+
959
+ # Try to get the prompt ID for label assignment
960
+ try:
961
+ import httpx
962
+ query = """
963
+ query {
964
+ prompts(first: 1, filterBy: {name: {equals: "%s"}}) {
965
+ edges {
966
+ node {
967
+ id
968
+ name
969
+ }
970
+ }
971
+ }
972
+ }
973
+ """ % name
974
+
975
+ response = httpx.post(
976
+ f"{config.base_url}/graphql",
977
+ json={"query": query},
978
+ headers={"authorization": f"Bearer {config.api_key}"},
979
+ timeout=10,
980
+ )
981
+ graphql_result = response.json()
982
+ prompts = graphql_result.get("data", {}).get("prompts", {}).get("edges", [])
983
+
984
+ if prompts:
985
+ prompt_id = prompts[0]["node"]["id"]
986
+
987
+ # Assign labels
988
+ if not config.base_url:
989
+ raise ValueError("Phoenix base_url is required")
990
+ labels_helper = PhoenixPromptLabels(
991
+ base_url=config.base_url, api_key=config.api_key
992
+ )
993
+
994
+ # Assign REM + type label
995
+ label_names = ["REM", prompt_type]
996
+ labels_helper.assign_prompt_labels(prompt_id, label_names)
997
+ click.echo(f"✓ Assigned labels: {', '.join(label_names)}")
998
+ except Exception as e:
999
+ click.echo(f"⚠ Warning: Could not assign labels: {e}")
1000
+
1001
+ click.echo(f"\nView in UI: {config.base_url}")
1002
+
1003
+ except Exception as e:
1004
+ logger.error(f"Failed to create prompt: {e}")
1005
+ click.echo(f"Error: {e}", err=True)
1006
+ raise click.Abort()
1007
+
1008
+
1009
+ @prompt.command("list")
1010
+ def prompt_list():
1011
+ """List all prompts.
1012
+
1013
+ Example:
1014
+ rem experiments prompt list
1015
+ """
1016
+ import httpx
1017
+ from rem.services.phoenix import PhoenixClient
1018
+
1019
+ try:
1020
+ phoenix_client = PhoenixClient()
1021
+ config = phoenix_client.config
1022
+
1023
+ query = """
1024
+ query {
1025
+ prompts(first: 100) {
1026
+ edges {
1027
+ node {
1028
+ id
1029
+ name
1030
+ description
1031
+ createdAt
1032
+ }
1033
+ }
1034
+ }
1035
+ }
1036
+ """
1037
+
1038
+ response = httpx.post(
1039
+ f"{config.base_url}/graphql",
1040
+ json={"query": query},
1041
+ headers={"authorization": f"Bearer {config.api_key}"},
1042
+ timeout=10,
1043
+ )
1044
+
1045
+ result = response.json()
1046
+ prompts = result.get("data", {}).get("prompts", {}).get("edges", [])
1047
+
1048
+ if not prompts:
1049
+ click.echo("No prompts found")
1050
+ return
1051
+
1052
+ click.echo(f"\nPrompts ({len(prompts)} total):\n")
1053
+ click.echo(f"{'Name':<40} {'Created':<20}")
1054
+ click.echo("-" * 65)
1055
+
1056
+ for edge in prompts:
1057
+ node = edge["node"]
1058
+ name = node.get("name", "")[:40]
1059
+ created = node.get("createdAt", "")[:19]
1060
+ click.echo(f"{name:<40} {created:<20}")
1061
+
1062
+ except Exception as e:
1063
+ logger.error(f"Failed to list prompts: {e}")
1064
+ click.echo(f"Error: {e}", err=True)
1065
+ raise click.Abort()
1066
+
1067
+
1068
+ # =============================================================================
1069
+ # TRACE COMMANDS
1070
+ # =============================================================================
1071
+
1072
+
1073
+ @experiments.group()
1074
+ def trace():
1075
+ """Trace retrieval commands."""
1076
+ pass
1077
+
1078
+
1079
+ @trace.command("list")
1080
+ @click.option("--project", "-p", help="Filter by project name")
1081
+ @click.option("--days", "-d", default=7, help="Number of days to look back")
1082
+ @click.option("--limit", "-l", default=20, help="Maximum traces to return")
1083
+ def trace_list(
1084
+ project: Optional[str],
1085
+ days: int,
1086
+ limit: int,
1087
+ ):
1088
+ """List recent traces.
1089
+
1090
+ Example:
1091
+ rem experiments trace list --project rem-agents --days 7 --limit 50
1092
+ """
1093
+ from rem.services.phoenix import PhoenixClient
1094
+ from datetime import datetime, timedelta
1095
+
1096
+ try:
1097
+ client = PhoenixClient()
1098
+
1099
+ start_time = datetime.now() - timedelta(days=days)
1100
+
1101
+ traces_df = client.get_traces(
1102
+ project_name=project,
1103
+ start_time=start_time,
1104
+ limit=limit,
1105
+ )
1106
+
1107
+ if len(traces_df) == 0:
1108
+ click.echo("No traces found")
1109
+ return
1110
+
1111
+ click.echo(f"\nRecent Traces ({len(traces_df)} results):\n")
1112
+ click.echo(f"{'Span ID':<15} {'Name':<30} {'Start Time':<20}")
1113
+ click.echo("-" * 70)
1114
+
1115
+ for _, row in traces_df.head(limit).iterrows():
1116
+ span_id = str(row.get("context.span_id", ""))[:12]
1117
+ name = str(row.get("name", ""))[:30]
1118
+ start = str(row.get("start_time", ""))[:19]
1119
+ click.echo(f"{span_id:<15} {name:<30} {start:<20}")
1120
+
1121
+ except Exception as e:
1122
+ logger.error(f"Failed to list traces: {e}")
1123
+ click.echo(f"Error: {e}", err=True)
1124
+ raise click.Abort()