remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
@@ -5,11 +5,21 @@ Experiments use ExperimentConfig (rem/models/core/experiment.py) for configurati
5
5
  and support Git+S3 hybrid storage. Includes dataset, prompt, and trace management.
6
6
 
7
7
  Directory Structure:
8
- .experiments/{experiment-name}/
9
- ├── experiment.yaml # ExperimentConfig
10
- ├── README.md # Auto-generated docs
11
- ├── datasets/ # Optional: small datasets
12
- └── results/ # Optional: metrics summaries
8
+ experiments/{experiment-name}/
9
+ ├── experiment.yaml # ExperimentConfig (metadata, agent ref, evaluator ref)
10
+ ├── README.md # Auto-generated documentation
11
+ ├── ground-truth/ # Evaluation datasets (Q&A pairs)
12
+ │ ├── dataset.csv # Input/output pairs for evaluation
13
+ │ └── dataset.yaml # Alternative YAML format
14
+ ├── seed-data/ # Data to seed REM before running experiments
15
+ │ └── data.yaml # Users, resources, moments in REM format
16
+ └── results/ # Experiment results and metrics
17
+ └── {run-timestamp}/ # Each run gets its own timestamped folder
18
+ ├── metrics.json # Summary metrics
19
+ └── run_info.json # Run metadata (eval framework URLs, etc)
20
+
21
+ Environment Variables:
22
+ EXPERIMENTS_HOME: Override default experiment directory (default: "experiments")
13
23
 
14
24
  Commands:
15
25
  # Experiment lifecycle
@@ -60,7 +70,7 @@ def experiments():
60
70
  @click.option("--results-location", type=click.Choice(["git", "s3", "hybrid"]), default="git",
61
71
  help="Where to store results")
62
72
  @click.option("--tags", help="Comma-separated tags (e.g., 'production,cv-parser')")
63
- @click.option("--base-path", default=".experiments", help="Base directory for experiments")
73
+ @click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
64
74
  def create(
65
75
  name: str,
66
76
  agent: str,
@@ -69,12 +79,17 @@ def create(
69
79
  dataset_location: str,
70
80
  results_location: str,
71
81
  tags: Optional[str],
72
- base_path: str,
82
+ base_path: Optional[str],
73
83
  ):
74
84
  """Create a new experiment configuration.
75
85
 
76
86
  Creates directory structure and generates experiment.yaml and README.md.
77
87
 
88
+ The experiment directory will contain:
89
+ - ground-truth/: Q&A pairs for evaluation
90
+ - seed-data/: REM data (users, resources, moments) to load before running
91
+ - results/: Timestamped run results
92
+
78
93
  Examples:
79
94
  # Small experiment (Git-only)
80
95
  rem experiments create hello-world-validation \\
@@ -90,6 +105,9 @@ def create(
90
105
  --dataset-location s3 \\
91
106
  --results-location hybrid \\
92
107
  --tags "production,cv-parser,weekly"
108
+
109
+ # Custom location
110
+ EXPERIMENTS_HOME=/path/to/experiments rem experiments create my-test --agent my-agent
93
111
  """
94
112
  from rem.models.core.experiment import (
95
113
  ExperimentConfig,
@@ -99,15 +117,19 @@ def create(
99
117
  ResultsConfig,
100
118
  ExperimentStatus,
101
119
  )
120
+ import os
102
121
 
103
122
  try:
123
+ # Resolve base path: CLI arg > EXPERIMENTS_HOME env var > default "experiments"
124
+ if base_path is None:
125
+ base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
104
126
  # Build dataset reference
105
127
  if dataset_location == "git":
106
128
  dataset_ref = DatasetReference(
107
129
  location=DatasetLocation.GIT,
108
- path="datasets/ground_truth.csv",
130
+ path="ground-truth/dataset.csv",
109
131
  format="csv",
110
- description="Ground truth dataset for evaluation"
132
+ description="Ground truth Q&A dataset for evaluation"
111
133
  )
112
134
  else: # s3 or hybrid
113
135
  dataset_ref = DatasetReference(
@@ -168,26 +190,167 @@ def create(
168
190
  config_path = config.save(base_path)
169
191
  readme_path = config.save_readme(base_path)
170
192
 
171
- # Create datasets directory
172
- datasets_dir = config.get_experiment_dir(base_path) / "datasets"
173
- datasets_dir.mkdir(parents=True, exist_ok=True)
193
+ # Create new directory structure
194
+ exp_dir = config.get_experiment_dir(base_path)
195
+
196
+ # Create ground-truth directory
197
+ ground_truth_dir = exp_dir / "ground-truth"
198
+ ground_truth_dir.mkdir(parents=True, exist_ok=True)
199
+
200
+ # Create seed-data directory
201
+ seed_data_dir = exp_dir / "seed-data"
202
+ seed_data_dir.mkdir(parents=True, exist_ok=True)
174
203
 
175
204
  # Create results directory if Git-based
176
205
  if results_location == "git":
177
- results_dir = config.get_experiment_dir(base_path) / "results"
206
+ results_dir = exp_dir / "results"
178
207
  results_dir.mkdir(parents=True, exist_ok=True)
179
208
 
209
+ # Create placeholder files with documentation
210
+ ground_truth_readme = ground_truth_dir / "README.md"
211
+ ground_truth_readme.write_text("""# Ground Truth Dataset
212
+
213
+ This directory contains Q&A pairs for evaluating the agent.
214
+
215
+ ## Format
216
+
217
+ **CSV format** (`dataset.csv`):
218
+ ```csv
219
+ input,expected_output,metadata
220
+ "What is the capital of France?","Paris","{\"difficulty\": \"easy\"}"
221
+ ```
222
+
223
+ **YAML format** (`dataset.yaml`):
224
+ ```yaml
225
+ - input: "What is the capital of France?"
226
+ expected_output: "Paris"
227
+ metadata:
228
+ difficulty: easy
229
+ ```
230
+
231
+ ## Generating Ground Truth
232
+
233
+ ### Using AI Assistants
234
+
235
+ AI coding assistants (like Claude, GPT-4, etc.) can help generate comprehensive ground-truth datasets:
236
+
237
+ 1. **Generate from existing examples**: Show the assistant examples from your domain and ask it to create similar Q&A pairs
238
+ 2. **Create challenging questions**: Ask the assistant to act as a judge and generate HARD questions that test edge cases
239
+ 3. **Vary difficulty levels**: Request a mix of easy, medium, and hard questions with appropriate metadata tags
240
+
241
+ Example prompt:
242
+ ```
243
+ Based on these example documents about [your domain], generate 20 Q&A pairs
244
+ for evaluating an agent. Include:
245
+ - 5 easy factual questions
246
+ - 10 medium questions requiring reasoning
247
+ - 5 hard questions with edge cases
248
+ Format as CSV with difficulty and category metadata.
249
+ ```
250
+
251
+ ### Ground Truth as Judge
252
+
253
+ **Important**: Keep ground-truth data **separate** from the agent being tested:
254
+ - Ground truth should be hidden from the agent during evaluation
255
+ - The agent should only see the `input` field
256
+ - The evaluator compares agent output against `expected_output`
257
+ - This ensures unbiased evaluation
258
+
259
+ ### Quality Guidelines
260
+
261
+ 1. **Diverse Coverage**: Include various question types and difficulty levels
262
+ 2. **Domain-Specific**: Use terminology and scenarios from your actual use case
263
+ 3. **Metadata Tags**: Add difficulty, category, priority for analysis
264
+ 4. **SME Review**: Have domain experts validate expected outputs
265
+
266
+ ## Usage
267
+
268
+ These datasets can be:
269
+ - Loaded into evaluation frameworks (Arize Phoenix, etc.)
270
+ - Used for regression testing
271
+ - Converted to different formats as needed
272
+
273
+ The experiment runner will automatically use this data for evaluation.
274
+ """)
275
+
276
+ seed_data_readme = seed_data_dir / "README.md"
277
+ seed_data_readme.write_text("""# Seed Data
278
+
279
+ This directory contains REM data to load before running the experiment.
280
+
281
+ ## Format
282
+
283
+ Use standard REM YAML format:
284
+
285
+ ```yaml
286
+ users:
287
+ - id: test-user-001
288
+ user_id: experiment-test
289
+ email: test@example.com
290
+
291
+ resources:
292
+ - id: resource-001
293
+ user_id: experiment-test
294
+ label: example-document
295
+ content: "Document content here..."
296
+
297
+ moments:
298
+ - id: moment-001
299
+ user_id: experiment-test
300
+ label: example-meeting
301
+ starts_timestamp: "2024-01-15T14:00:00"
302
+ ```
303
+
304
+ ## Generating Seed Data
305
+
306
+ ### Using AI Assistants
307
+
308
+ AI coding assistants can help generate realistic seed data for your experiments:
309
+
310
+ 1. **From existing datasets**: Reference examples from the `datasets/` directory
311
+ 2. **Domain-specific scenarios**: Describe your use case and ask for appropriate test data
312
+ 3. **Anonymized versions**: Ask to create fictional data based on real patterns
313
+
314
+ Example prompt:
315
+ ```
316
+ Based on the recruitment dataset examples in datasets/domains/recruitment/,
317
+ generate seed data for testing a CV parser agent. Include:
318
+ - 3 test users
319
+ - 5 CV documents (resources) with varied experience levels
320
+ - 2 interview moment entries
321
+ Use fictional names and anonymize all content.
322
+ ```
323
+
324
+ ### Best Practices
325
+
326
+ 1. **Minimal**: Only include data necessary for the ground-truth questions to be answerable
327
+ 2. **Anonymized**: Always use fictional names, companies, and content
328
+ 3. **Relevant**: Seed data should provide context for evaluation questions
329
+ 4. **Versioned**: Track changes to seed data in Git for reproducibility
330
+
331
+ ## Usage
332
+
333
+ Load this data before running experiments:
334
+ ```bash
335
+ rem db load --file seed-data/data.yaml --user-id experiment-test
336
+ ```
337
+
338
+ This ensures your agent has the necessary context for evaluation.
339
+ """)
340
+
180
341
  click.echo(f"\n✓ Created experiment: {name}")
181
342
  click.echo(f" Configuration: {config_path}")
182
343
  click.echo(f" Documentation: {readme_path}")
183
- click.echo(f" Datasets: {datasets_dir}")
344
+ click.echo(f" Ground Truth: {ground_truth_dir}")
345
+ click.echo(f" Seed Data: {seed_data_dir}")
184
346
  if results_location == "git":
185
347
  click.echo(f" Results: {results_dir}")
186
348
  click.echo(f"\nNext steps:")
187
- click.echo(f" 1. Add dataset to {datasets_dir}/")
188
- click.echo(f" 2. Review configuration: {config_path}")
189
- click.echo(f" 3. Run experiment: rem experiments run {name}")
190
- click.echo(f" 4. Commit to Git: git add .experiments/{name}/ && git commit")
349
+ click.echo(f" 1. Add ground truth Q&A to {ground_truth_dir}/dataset.csv")
350
+ click.echo(f" 2. Add seed data to {seed_data_dir}/data.yaml (optional)")
351
+ click.echo(f" 3. Review configuration: {config_path}")
352
+ click.echo(f" 4. Run experiment: rem experiments run {name}")
353
+ click.echo(f" 5. Commit to Git: git add {base_path}/{name}/ && git commit")
191
354
 
192
355
  except Exception as e:
193
356
  logger.error(f"Failed to create experiment: {e}")
@@ -201,11 +364,11 @@ def create(
201
364
 
202
365
 
203
366
  @experiments.command("list")
204
- @click.option("--base-path", default=".experiments", help="Base directory for experiments")
367
+ @click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
205
368
  @click.option("--status", help="Filter by status (draft, ready, completed, etc.)")
206
369
  @click.option("--tags", help="Filter by tags (comma-separated)")
207
370
  def list_experiments(
208
- base_path: str,
371
+ base_path: Optional[str],
209
372
  status: Optional[str],
210
373
  tags: Optional[str],
211
374
  ):
@@ -217,8 +380,13 @@ def list_experiments(
217
380
  rem experiments list --tags production,cv-parser
218
381
  """
219
382
  from rem.models.core.experiment import ExperimentConfig, ExperimentStatus
383
+ import os
220
384
 
221
385
  try:
386
+ # Resolve base path
387
+ if base_path is None:
388
+ base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
389
+
222
390
  experiments_dir = Path(base_path)
223
391
  if not experiments_dir.exists():
224
392
  click.echo(f"No experiments directory found at {base_path}")
@@ -279,16 +447,21 @@ def list_experiments(
279
447
 
280
448
  @experiments.command("show")
281
449
  @click.argument("name")
282
- @click.option("--base-path", default=".experiments", help="Base directory for experiments")
283
- def show(name: str, base_path: str):
450
+ @click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
451
+ def show(name: str, base_path: Optional[str]):
284
452
  """Show experiment details.
285
453
 
286
454
  Examples:
287
455
  rem experiments show hello-world-validation
288
456
  """
289
457
  from rem.models.core.experiment import ExperimentConfig
458
+ import os
290
459
 
291
460
  try:
461
+ # Resolve base path
462
+ if base_path is None:
463
+ base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
464
+
292
465
  config_path = Path(base_path) / name / "experiment.yaml"
293
466
  if not config_path.exists():
294
467
  click.echo(f"Experiment not found: {name}")
@@ -348,7 +521,7 @@ def show(name: str, base_path: str):
348
521
 
349
522
  @experiments.command("run")
350
523
  @click.argument("name")
351
- @click.option("--base-path", default=".experiments", help="Base directory for experiments")
524
+ @click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
352
525
  @click.option("--version", help="Git tag version to load (e.g., 'experiments/my-exp/v1.0.0')")
353
526
  @click.option("--dry-run", is_flag=True, help="Test on small subset without saving")
354
527
  @click.option("--update-prompts", is_flag=True, help="Update prompts in Phoenix before running")
@@ -356,7 +529,7 @@ def show(name: str, base_path: str):
356
529
  @click.option("--phoenix-api-key", help="Phoenix API key (overrides PHOENIX_API_KEY env var)")
357
530
  def run(
358
531
  name: str,
359
- base_path: str,
532
+ base_path: Optional[str],
360
533
  version: Optional[str],
361
534
  dry_run: bool,
362
535
  update_prompts: bool,
@@ -405,10 +578,14 @@ def run(
405
578
  from rem.services.git import GitService
406
579
  from rem.services.phoenix import PhoenixClient
407
580
  from rem.agentic.providers.phoenix import create_evaluator_from_schema
408
- from datetime import datetime
409
- import pandas as pd
581
+ from rem.utils.date_utils import utc_now, to_iso, format_timestamp_for_experiment
582
+ import os
410
583
 
411
584
  try:
585
+ # Resolve base path
586
+ if base_path is None:
587
+ base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
588
+
412
589
  # Load experiment configuration
413
590
  if version:
414
591
  # Load from Git at specific version
@@ -437,36 +614,22 @@ def run(
437
614
  click.echo(f" Mode: DRY RUN (no data will be saved)")
438
615
  click.echo()
439
616
 
440
- # Load agent schema from Git or filesystem
617
+ # Load agent schema using centralized schema loader
441
618
  agent_name = config.agent_schema_ref.name
442
619
  agent_version = config.agent_schema_ref.version
443
620
 
444
621
  click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
445
622
 
446
- # Try Git first, fallback to filesystem
447
- agent_schema = None
448
- try:
449
- git_svc = GitService()
450
- agent_schema = git_svc.load_schema(agent_name, version=agent_version)
451
- click.echo(f"✓ Loaded agent schema from Git")
452
- except Exception as e:
453
- logger.debug(f"Git not available, trying filesystem: {e}")
454
-
455
- # Fallback to local filesystem
456
- from rem.services.fs import FS
457
- fs = FS()
623
+ from rem.utils.schema_loader import load_agent_schema
458
624
 
459
- schema_path = f"schemas/agents/{agent_name}.yaml"
460
- try:
461
- agent_schema = fs.read(schema_path)
462
- click.echo(f"✓ Loaded agent schema from filesystem")
463
- except Exception as fs_error:
464
- logger.error(f"Failed to load agent schema: Git: {e}, FS: {fs_error}")
465
- click.echo(f"Error: Could not load agent schema '{agent_name}'")
466
- click.echo(f" Tried Git: {e}")
467
- click.echo(f" Tried filesystem: {schema_path}")
468
- click.echo(f" Make sure the schema exists")
469
- raise click.Abort()
625
+ try:
626
+ agent_schema = load_agent_schema(agent_name)
627
+ click.echo(f"✓ Loaded agent schema: {agent_name}")
628
+ except FileNotFoundError as e:
629
+ logger.error(f"Failed to load agent schema: {e}")
630
+ click.echo(f"Error: Could not load agent schema '{agent_name}'")
631
+ click.echo(f" {e}")
632
+ raise click.Abort()
470
633
 
471
634
  # Create agent function from schema
472
635
  from rem.agentic.providers.pydantic_ai import create_agent
@@ -505,73 +668,85 @@ def run(
505
668
  return {"output": serialized}
506
669
  return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
507
670
 
508
- # Load evaluator schema
671
+ # Load evaluator schema using centralized schema loader
509
672
  evaluator_name = config.evaluator_schema_ref.name
510
673
  evaluator_version = config.evaluator_schema_ref.version
511
674
 
512
- # Resolve evaluator path (evaluators are organized by agent name)
513
- evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
514
-
515
675
  click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
516
676
 
517
- try:
518
- evaluator_fn = create_evaluator_from_schema(
519
- evaluator_schema_path=evaluator_schema_path,
520
- model_name=None, # Use default from schema
521
- )
522
- click.echo(f"✓ Loaded evaluator schema")
523
- except Exception as e:
524
- logger.warning(f"Failed to load evaluator: {e}")
525
- click.echo(f"Error: Could not load evaluator schema")
526
- click.echo(f" Path: {evaluator_schema_path}")
527
- click.echo(f" Make sure the schema exists")
677
+ # Try multiple evaluator path patterns (agent-specific, then generic)
678
+ evaluator_paths_to_try = [
679
+ f"{agent_name}/{evaluator_name}", # e.g., hello-world/default
680
+ f"{agent_name}-{evaluator_name}", # e.g., hello-world-default
681
+ evaluator_name, # e.g., default (generic)
682
+ ]
683
+
684
+ evaluator_fn = None
685
+ evaluator_load_error = None
686
+
687
+ for evaluator_path in evaluator_paths_to_try:
688
+ try:
689
+ evaluator_fn = create_evaluator_from_schema(
690
+ evaluator_schema_path=evaluator_path,
691
+ model_name=None, # Use default from schema
692
+ )
693
+ click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
694
+ break
695
+ except FileNotFoundError as e:
696
+ evaluator_load_error = e
697
+ logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
698
+ continue
699
+ except Exception as e:
700
+ evaluator_load_error = e
701
+ logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
702
+ continue
703
+
704
+ if evaluator_fn is None:
705
+ click.echo(f"Error: Could not load evaluator schema '{evaluator_name}'")
706
+ click.echo(f" Tried paths: {evaluator_paths_to_try}")
707
+ if evaluator_load_error:
708
+ click.echo(f" Last error: {evaluator_load_error}")
528
709
  raise click.Abort()
529
710
 
530
- # Load dataset
711
+ # Load dataset using Polars
712
+ import polars as pl
713
+
531
714
  click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
532
715
  dataset_ref = list(config.datasets.values())[0]
533
716
 
534
717
  if dataset_ref.location.value == "git":
535
- # Load from Git
718
+ # Load from Git (local filesystem)
536
719
  dataset_path = Path(base_path) / name / dataset_ref.path
537
720
  if not dataset_path.exists():
538
721
  click.echo(f"Error: Dataset not found: {dataset_path}")
539
722
  raise click.Abort()
540
723
 
541
724
  if dataset_ref.format == "csv":
542
- dataset_df = pd.read_csv(dataset_path)
725
+ dataset_df = pl.read_csv(dataset_path)
543
726
  elif dataset_ref.format == "parquet":
544
- dataset_df = pd.read_parquet(dataset_path)
727
+ dataset_df = pl.read_parquet(dataset_path)
545
728
  elif dataset_ref.format == "jsonl":
546
- dataset_df = pd.read_json(dataset_path, lines=True)
729
+ dataset_df = pl.read_ndjson(dataset_path)
547
730
  else:
548
731
  click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
549
732
  raise click.Abort()
550
733
  elif dataset_ref.location.value in ["s3", "hybrid"]:
551
734
  # Load from S3 using FS provider
552
735
  from rem.services.fs import FS
736
+ from io import BytesIO
553
737
 
554
738
  fs = FS()
555
739
 
556
740
  try:
557
741
  if dataset_ref.format == "csv":
558
742
  content = fs.read(dataset_ref.path)
559
- from io import StringIO
560
- dataset_df = pd.read_csv(StringIO(content))
743
+ dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
561
744
  elif dataset_ref.format == "parquet":
562
- # For parquet, we need binary read
563
- import tempfile
564
- with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
565
- tmp_path = tmp.name
566
- # Download via FS
567
- content_bytes = fs.read(dataset_ref.path)
568
- tmp.write(content_bytes)
569
- dataset_df = pd.read_parquet(tmp_path)
570
- Path(tmp_path).unlink() # Clean up temp file
745
+ content_bytes = fs.read(dataset_ref.path)
746
+ dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
571
747
  elif dataset_ref.format == "jsonl":
572
748
  content = fs.read(dataset_ref.path)
573
- from io import StringIO
574
- dataset_df = pd.read_json(StringIO(content), lines=True)
749
+ dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
575
750
  else:
576
751
  click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
577
752
  raise click.Abort()
@@ -615,13 +790,13 @@ def run(
615
790
 
616
791
  client = PhoenixClient(config=phoenix_config)
617
792
 
618
- experiment_name = f"{config.name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
793
+ experiment_name = f"{config.name}-{format_timestamp_for_experiment()}"
619
794
 
620
795
  click.echo(f"\n⏳ Running experiment: {experiment_name}")
621
796
  click.echo(f" This may take several minutes...")
622
797
 
623
798
  experiment = client.run_experiment(
624
- dataset=dataset_df, # type: ignore[arg-type]
799
+ dataset=dataset_df,
625
800
  task=task_fn,
626
801
  evaluators=[evaluator_fn],
627
802
  experiment_name=experiment_name,
@@ -631,12 +806,15 @@ def run(
631
806
  "evaluator": config.evaluator_schema_ref.name,
632
807
  "experiment_config": config.name,
633
808
  **config.metadata
634
- }
809
+ },
810
+ # Smart column detection for DataFrame -> Phoenix Dataset conversion
811
+ input_keys=["input"] if "input" in dataset_df.columns else None,
812
+ output_keys=["expected_output"] if "expected_output" in dataset_df.columns else None,
635
813
  )
636
814
 
637
815
  # Update experiment status
638
816
  config.status = ExperimentStatus.COMPLETED
639
- config.last_run_at = datetime.now()
817
+ config.last_run_at = utc_now()
640
818
  if not version: # Only save if not loading from Git
641
819
  config.save(base_path)
642
820
 
@@ -657,7 +835,7 @@ def run(
657
835
  "agent": config.agent_schema_ref.name,
658
836
  "evaluator": config.evaluator_schema_ref.name,
659
837
  "dataset_size": len(dataset_df),
660
- "completed_at": datetime.now().isoformat(),
838
+ "completed_at": to_iso(utc_now()),
661
839
  "phoenix_url": getattr(experiment, "url", None),
662
840
  "task_runs": len(exp_data.get("task_runs", [])),
663
841
  }
@@ -837,20 +1015,24 @@ def dataset_add(
837
1015
  --output-keys expected_label,expected_type
838
1016
  """
839
1017
  from rem.services.phoenix import PhoenixClient
840
- import pandas as pd
1018
+ import polars as pl
841
1019
 
842
1020
  try:
843
1021
  client = PhoenixClient()
844
1022
 
845
- # Load CSV
846
- df = pd.read_csv(from_csv)
1023
+ # Load CSV with Polars
1024
+ df = pl.read_csv(from_csv)
1025
+ records = df.to_dicts()
847
1026
 
848
1027
  # Extract data
849
- inputs = cast(list[dict[str, Any]], df[input_keys.split(",")].to_dict("records"))
850
- outputs = cast(list[dict[str, Any]], df[output_keys.split(",")].to_dict("records"))
1028
+ input_cols = input_keys.split(",")
1029
+ output_cols = output_keys.split(",")
1030
+ inputs = [{k: row.get(k) for k in input_cols} for row in records]
1031
+ outputs = [{k: row.get(k) for k in output_cols} for row in records]
851
1032
  metadata = None
852
1033
  if metadata_keys:
853
- metadata = cast(list[dict[str, Any]], df[metadata_keys.split(",")].to_dict("records"))
1034
+ meta_cols = metadata_keys.split(",")
1035
+ metadata = [{k: row.get(k) for k in meta_cols} for row in records]
854
1036
 
855
1037
  # Add to dataset
856
1038
  dataset = client.add_examples_to_dataset(
@@ -1091,12 +1273,12 @@ def trace_list(
1091
1273
  rem experiments trace list --project rem-agents --days 7 --limit 50
1092
1274
  """
1093
1275
  from rem.services.phoenix import PhoenixClient
1094
- from datetime import datetime, timedelta
1276
+ from rem.utils.date_utils import days_ago
1095
1277
 
1096
1278
  try:
1097
1279
  client = PhoenixClient()
1098
1280
 
1099
- start_time = datetime.now() - timedelta(days=days)
1281
+ start_time = days_ago(days)
1100
1282
 
1101
1283
  traces_df = client.get_traces(
1102
1284
  project_name=project,
@@ -192,15 +192,13 @@ def process_uri(uri: str, output: str, save: str | None):
192
192
 
193
193
 
194
194
  @click.command(name="files")
195
- @click.option("--tenant-id", required=True, help="Tenant ID")
196
- @click.option("--user-id", help="Filter by user ID")
195
+ @click.option("--user-id", default=None, help="User ID (default: from settings)")
197
196
  @click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
198
197
  @click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
199
198
  @click.option("--limit", type=int, help="Max files to process")
200
199
  @click.option("--provider", help="Optional LLM provider override")
201
200
  @click.option("--model", help="Optional model override")
202
201
  def process_files(
203
- tenant_id: str,
204
202
  user_id: Optional[str],
205
203
  status: Optional[str],
206
204
  extractor: Optional[str],
@@ -217,19 +215,22 @@ def process_files(
217
215
 
218
216
  \b
219
217
  # List completed files
220
- rem process files --tenant-id acme-corp --status completed
218
+ rem process files --status completed
221
219
 
222
220
  \b
223
221
  # Extract from CV files
224
- rem process files --tenant-id acme-corp --extractor cv-parser-v1 --limit 10
222
+ rem process files --extractor cv-parser-v1 --limit 10
225
223
 
226
224
  \b
227
225
  # Extract with provider override
228
- rem process files --tenant-id acme-corp --extractor contract-analyzer-v1 \\
226
+ rem process files --extractor contract-analyzer-v1 \\
229
227
  --provider anthropic --model claude-sonnet-4-5
230
228
  """
229
+ from ...settings import settings
230
+ effective_user_id = user_id or settings.test.effective_user_id
231
+
231
232
  logger.warning("Not implemented yet")
232
- logger.info(f"Would process files for tenant: {tenant_id}")
233
+ logger.info(f"Would process files for user: {effective_user_id}")
233
234
 
234
235
  if user_id:
235
236
  logger.info(f"Filter: user_id={user_id}")