remdb 0.3.114__py3-none-any.whl → 0.3.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +23 -3
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/pydantic_ai.py +26 -2
- rem/api/main.py +4 -1
- rem/api/mcp_router/server.py +9 -3
- rem/api/mcp_router/tools.py +324 -2
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +250 -4
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +35 -1
- rem/api/routers/feedback.py +134 -14
- rem/api/routers/query.py +6 -3
- rem/cli/commands/README.md +42 -0
- rem/cli/commands/cluster.py +617 -168
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +66 -22
- rem/cli/commands/experiments.py +242 -26
- rem/cli/commands/schema.py +6 -5
- rem/config.py +8 -1
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/diff_service.py +108 -3
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/session/compression.py +7 -0
- rem/settings.py +150 -18
- rem/sql/migrations/001_install.sql +156 -0
- rem/sql/migrations/002_install_models.sql +1864 -1
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/schema_loader.py +94 -3
- rem/utils/sql_paths.py +146 -0
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/METADATA +213 -177
- {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/RECORD +41 -36
- {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/WHEEL +0 -0
- {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
rem/cli/commands/configure.py
CHANGED
|
@@ -405,9 +405,7 @@ def configure_command(install: bool, claude_desktop: bool, show: bool, edit: boo
|
|
|
405
405
|
|
|
406
406
|
# Create a context for the command and invoke it
|
|
407
407
|
ctx = click.Context(migrate)
|
|
408
|
-
ctx.invoke(migrate,
|
|
409
|
-
background_indexes=False, connection=None,
|
|
410
|
-
sql_dir=Path("rem/sql"))
|
|
408
|
+
ctx.invoke(migrate, background_indexes=False)
|
|
411
409
|
|
|
412
410
|
click.echo("✅ Database installation complete")
|
|
413
411
|
|
rem/cli/commands/db.py
CHANGED
|
@@ -126,29 +126,51 @@ def migrate(background_indexes: bool):
|
|
|
126
126
|
async def _migrate_async(background_indexes: bool):
|
|
127
127
|
"""Async implementation of migrate command."""
|
|
128
128
|
from ...settings import settings
|
|
129
|
+
from ...utils.sql_paths import (
|
|
130
|
+
get_package_sql_dir,
|
|
131
|
+
get_user_sql_dir,
|
|
132
|
+
list_all_migrations,
|
|
133
|
+
)
|
|
129
134
|
|
|
130
135
|
click.echo()
|
|
131
136
|
click.echo("REM Database Migration")
|
|
132
137
|
click.echo("=" * 60)
|
|
133
138
|
|
|
134
|
-
# Find SQL directory
|
|
135
|
-
|
|
136
|
-
|
|
139
|
+
# Find package SQL directory
|
|
140
|
+
try:
|
|
141
|
+
package_sql_dir = get_package_sql_dir()
|
|
142
|
+
click.echo(f"Package SQL: {package_sql_dir}")
|
|
143
|
+
except FileNotFoundError as e:
|
|
144
|
+
click.secho(f"✗ {e}", fg="red")
|
|
145
|
+
raise click.Abort()
|
|
146
|
+
|
|
147
|
+
# Check for user migrations
|
|
148
|
+
user_sql_dir = get_user_sql_dir()
|
|
149
|
+
if user_sql_dir:
|
|
150
|
+
click.echo(f"User SQL: {user_sql_dir}")
|
|
151
|
+
|
|
152
|
+
# Get all migrations (package + user)
|
|
153
|
+
all_migrations = list_all_migrations()
|
|
154
|
+
|
|
155
|
+
if not all_migrations:
|
|
156
|
+
click.secho("✗ No migration files found", fg="red")
|
|
157
|
+
raise click.Abort()
|
|
137
158
|
|
|
138
|
-
click.echo(f"
|
|
159
|
+
click.echo(f"Found {len(all_migrations)} migration(s)")
|
|
139
160
|
click.echo()
|
|
140
161
|
|
|
141
|
-
#
|
|
142
|
-
|
|
143
|
-
(migrations_dir / "001_install.sql", "Core Infrastructure"),
|
|
144
|
-
(migrations_dir / "002_install_models.sql", "Entity Tables"),
|
|
145
|
-
]
|
|
162
|
+
# Add background indexes if requested
|
|
163
|
+
migrations_to_apply = [(f, f.stem) for f in all_migrations]
|
|
146
164
|
|
|
147
165
|
if background_indexes:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
166
|
+
bg_indexes = package_sql_dir / "background_indexes.sql"
|
|
167
|
+
if bg_indexes.exists():
|
|
168
|
+
migrations_to_apply.append((bg_indexes, "Background Indexes"))
|
|
169
|
+
else:
|
|
170
|
+
click.secho("⚠ background_indexes.sql not found, skipping", fg="yellow")
|
|
171
|
+
|
|
172
|
+
# Check all files exist (they should, but verify)
|
|
173
|
+
for file_path, description in migrations_to_apply:
|
|
152
174
|
if not file_path.exists():
|
|
153
175
|
click.secho(f"✗ {file_path.name} not found", fg="red")
|
|
154
176
|
if "002" in file_path.name:
|
|
@@ -162,8 +184,8 @@ async def _migrate_async(background_indexes: bool):
|
|
|
162
184
|
conn_str = settings.postgres.connection_string
|
|
163
185
|
total_time = 0.0
|
|
164
186
|
|
|
165
|
-
for file_path, description in
|
|
166
|
-
click.echo(f"Applying: {
|
|
187
|
+
for file_path, description in migrations_to_apply:
|
|
188
|
+
click.echo(f"Applying: {file_path.name}")
|
|
167
189
|
|
|
168
190
|
sql_content = file_path.read_text(encoding="utf-8")
|
|
169
191
|
start_time = time.time()
|
|
@@ -469,6 +491,13 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
469
491
|
is_flag=True,
|
|
470
492
|
help="Generate incremental migration file from diff",
|
|
471
493
|
)
|
|
494
|
+
@click.option(
|
|
495
|
+
"--strategy",
|
|
496
|
+
"-s",
|
|
497
|
+
type=click.Choice(["additive", "full", "safe"]),
|
|
498
|
+
default="additive",
|
|
499
|
+
help="Migration strategy: additive (no drops, default), full (all changes), safe (additive + type widenings)",
|
|
500
|
+
)
|
|
472
501
|
@click.option(
|
|
473
502
|
"--models",
|
|
474
503
|
"-m",
|
|
@@ -491,6 +520,7 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
491
520
|
def diff(
|
|
492
521
|
check: bool,
|
|
493
522
|
generate: bool,
|
|
523
|
+
strategy: str,
|
|
494
524
|
models: Path | None,
|
|
495
525
|
output_dir: Path | None,
|
|
496
526
|
message: str,
|
|
@@ -502,23 +532,30 @@ def diff(
|
|
|
502
532
|
- Your Pydantic models (the target schema)
|
|
503
533
|
- The current database (what's actually deployed)
|
|
504
534
|
|
|
535
|
+
Strategies:
|
|
536
|
+
additive Only ADD columns/tables/indexes (safe, no data loss) [default]
|
|
537
|
+
full All changes including DROPs (use with caution)
|
|
538
|
+
safe Additive + safe column type changes (widenings only)
|
|
539
|
+
|
|
505
540
|
Examples:
|
|
506
|
-
rem db diff
|
|
507
|
-
rem db diff --
|
|
508
|
-
rem db diff --generate
|
|
541
|
+
rem db diff # Show additive changes only
|
|
542
|
+
rem db diff --strategy full # Show all changes including drops
|
|
543
|
+
rem db diff --generate # Create migration file
|
|
544
|
+
rem db diff --check # CI mode: exit 1 if drift
|
|
509
545
|
|
|
510
546
|
Workflow:
|
|
511
547
|
1. Develop locally, modify Pydantic models
|
|
512
548
|
2. Run 'rem db diff' to see changes
|
|
513
549
|
3. Run 'rem db diff --generate' to create migration
|
|
514
|
-
4. Review generated SQL, then 'rem db
|
|
550
|
+
4. Review generated SQL, then 'rem db apply <file>'
|
|
515
551
|
"""
|
|
516
|
-
asyncio.run(_diff_async(check, generate, models, output_dir, message))
|
|
552
|
+
asyncio.run(_diff_async(check, generate, strategy, models, output_dir, message))
|
|
517
553
|
|
|
518
554
|
|
|
519
555
|
async def _diff_async(
|
|
520
556
|
check: bool,
|
|
521
557
|
generate: bool,
|
|
558
|
+
strategy: str,
|
|
522
559
|
models: Path | None,
|
|
523
560
|
output_dir: Path | None,
|
|
524
561
|
message: str,
|
|
@@ -529,9 +566,10 @@ async def _diff_async(
|
|
|
529
566
|
click.echo()
|
|
530
567
|
click.echo("REM Schema Diff")
|
|
531
568
|
click.echo("=" * 60)
|
|
569
|
+
click.echo(f"Strategy: {strategy}")
|
|
532
570
|
|
|
533
571
|
# Initialize diff service
|
|
534
|
-
diff_service = DiffService(models_dir=models)
|
|
572
|
+
diff_service = DiffService(models_dir=models, strategy=strategy)
|
|
535
573
|
|
|
536
574
|
try:
|
|
537
575
|
# Compute diff
|
|
@@ -543,10 +581,16 @@ async def _diff_async(
|
|
|
543
581
|
if not result.has_changes:
|
|
544
582
|
click.secho("✓ No schema drift detected", fg="green")
|
|
545
583
|
click.echo(" Database matches Pydantic models")
|
|
584
|
+
if result.filtered_count > 0:
|
|
585
|
+
click.echo()
|
|
586
|
+
click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
|
|
587
|
+
click.echo(" Use --strategy full to see all changes")
|
|
546
588
|
return
|
|
547
589
|
|
|
548
590
|
# Show changes
|
|
549
591
|
click.secho(f"⚠ Schema drift detected: {result.change_count} change(s)", fg="yellow")
|
|
592
|
+
if result.filtered_count > 0:
|
|
593
|
+
click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
|
|
550
594
|
click.echo()
|
|
551
595
|
click.echo("Changes:")
|
|
552
596
|
for line in result.summary:
|
|
@@ -581,7 +625,7 @@ async def _diff_async(
|
|
|
581
625
|
click.echo()
|
|
582
626
|
click.echo("Next steps:")
|
|
583
627
|
click.echo(" 1. Review the generated SQL file")
|
|
584
|
-
click.echo(" 2. Run: rem db
|
|
628
|
+
click.echo(" 2. Run: rem db apply <file>")
|
|
585
629
|
else:
|
|
586
630
|
click.echo("No migration file generated (no changes)")
|
|
587
631
|
|
rem/cli/commands/experiments.py
CHANGED
|
@@ -514,6 +514,159 @@ def show(name: str, base_path: Optional[str]):
|
|
|
514
514
|
raise click.Abort()
|
|
515
515
|
|
|
516
516
|
|
|
517
|
+
# =============================================================================
|
|
518
|
+
# VIBES MODE HELPER
|
|
519
|
+
# =============================================================================
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _run_vibes_mode(
|
|
523
|
+
config: Any,
|
|
524
|
+
dataset_df: Any,
|
|
525
|
+
task_fn: Any,
|
|
526
|
+
base_path: str,
|
|
527
|
+
limit: Optional[int],
|
|
528
|
+
evaluator_schema_path: Path,
|
|
529
|
+
) -> None:
|
|
530
|
+
"""Run experiment in vibes mode - execute agent and export for AI evaluation.
|
|
531
|
+
|
|
532
|
+
Vibes mode runs the agent on each example and saves results to a JSONL file.
|
|
533
|
+
The AI assistant (e.g., Claude Code) then acts as the judge using the
|
|
534
|
+
evaluator schema to evaluate results.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
config: ExperimentConfig object
|
|
538
|
+
dataset_df: Polars DataFrame with ground truth examples
|
|
539
|
+
task_fn: Function to run agent on each example
|
|
540
|
+
base_path: Base directory for experiments
|
|
541
|
+
limit: Optional limit on number of examples to process
|
|
542
|
+
evaluator_schema_path: Path to the evaluator schema YAML file
|
|
543
|
+
"""
|
|
544
|
+
from rem.utils.date_utils import format_timestamp_for_experiment, utc_now, to_iso
|
|
545
|
+
import json
|
|
546
|
+
|
|
547
|
+
# Apply limit if specified
|
|
548
|
+
if limit:
|
|
549
|
+
dataset_df = dataset_df.head(limit)
|
|
550
|
+
click.echo(f" (Limited to {limit} examples)")
|
|
551
|
+
|
|
552
|
+
# Create results directory
|
|
553
|
+
timestamp = format_timestamp_for_experiment()
|
|
554
|
+
results_dir = Path(base_path) / config.name / "results" / timestamp
|
|
555
|
+
results_dir.mkdir(parents=True, exist_ok=True)
|
|
556
|
+
|
|
557
|
+
click.echo(f"\n⏳ Running agent on {len(dataset_df)} examples...")
|
|
558
|
+
click.echo(f" Results will be saved to: {results_dir}")
|
|
559
|
+
click.echo()
|
|
560
|
+
|
|
561
|
+
# Run agent on each example and collect results
|
|
562
|
+
results = []
|
|
563
|
+
records = dataset_df.to_dicts()
|
|
564
|
+
|
|
565
|
+
for i, record in enumerate(records, 1):
|
|
566
|
+
example_id = record.get("id", i)
|
|
567
|
+
click.echo(f" [{i}/{len(records)}] Processing example {example_id}...", nl=False)
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
# Prepare input for agent
|
|
571
|
+
input_text = record.get("text", record.get("input", record.get("query", "")))
|
|
572
|
+
example_input = {"query": input_text} if isinstance(input_text, str) else input_text
|
|
573
|
+
|
|
574
|
+
# Run agent
|
|
575
|
+
output = task_fn({"input": example_input})
|
|
576
|
+
|
|
577
|
+
result = {
|
|
578
|
+
"id": example_id,
|
|
579
|
+
"input": input_text,
|
|
580
|
+
"ground_truth": record.get("ground_truth", record.get("expected_output", "")),
|
|
581
|
+
"category": record.get("category", ""),
|
|
582
|
+
"agent_output": output,
|
|
583
|
+
"status": "success",
|
|
584
|
+
}
|
|
585
|
+
click.echo(" ✓")
|
|
586
|
+
|
|
587
|
+
except Exception as e:
|
|
588
|
+
result = {
|
|
589
|
+
"id": example_id,
|
|
590
|
+
"input": record.get("text", record.get("input", "")),
|
|
591
|
+
"ground_truth": record.get("ground_truth", record.get("expected_output", "")),
|
|
592
|
+
"category": record.get("category", ""),
|
|
593
|
+
"agent_output": None,
|
|
594
|
+
"status": "error",
|
|
595
|
+
"error": str(e),
|
|
596
|
+
}
|
|
597
|
+
click.echo(f" ✗ ({e})")
|
|
598
|
+
|
|
599
|
+
results.append(result)
|
|
600
|
+
|
|
601
|
+
# Save results to JSONL
|
|
602
|
+
results_file = results_dir / "vibes-results.jsonl"
|
|
603
|
+
with open(results_file, "w") as f:
|
|
604
|
+
for result in results:
|
|
605
|
+
f.write(json.dumps(result) + "\n")
|
|
606
|
+
|
|
607
|
+
# Copy evaluator schema to results dir for easy reference
|
|
608
|
+
import shutil
|
|
609
|
+
evaluator_copy = results_dir / "evaluator-schema.yaml"
|
|
610
|
+
shutil.copy(evaluator_schema_path, evaluator_copy)
|
|
611
|
+
|
|
612
|
+
# Save run metadata
|
|
613
|
+
run_info = {
|
|
614
|
+
"experiment": config.name,
|
|
615
|
+
"agent": config.agent_schema_ref.name,
|
|
616
|
+
"evaluator": config.evaluator_schema_ref.name,
|
|
617
|
+
"mode": "vibes",
|
|
618
|
+
"timestamp": timestamp,
|
|
619
|
+
"total_examples": len(records),
|
|
620
|
+
"successful": len([r for r in results if r["status"] == "success"]),
|
|
621
|
+
"failed": len([r for r in results if r["status"] == "error"]),
|
|
622
|
+
"completed_at": to_iso(utc_now()),
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
run_info_file = results_dir / "run-info.json"
|
|
626
|
+
with open(run_info_file, "w") as f:
|
|
627
|
+
json.dump(run_info, f, indent=2)
|
|
628
|
+
|
|
629
|
+
# Print summary and instructions
|
|
630
|
+
success_count = run_info["successful"]
|
|
631
|
+
fail_count = run_info["failed"]
|
|
632
|
+
|
|
633
|
+
click.echo(f"\n{'=' * 60}")
|
|
634
|
+
click.echo(f"VIBES MODE COMPLETE")
|
|
635
|
+
click.echo(f"{'=' * 60}")
|
|
636
|
+
click.echo(f"\nResults: {success_count} successful, {fail_count} failed")
|
|
637
|
+
click.echo(f"\nFiles saved to: {results_dir}/")
|
|
638
|
+
click.echo(f" - vibes-results.jsonl (agent outputs)")
|
|
639
|
+
click.echo(f" - evaluator-schema.yaml (evaluation criteria)")
|
|
640
|
+
click.echo(f" - run-info.json (run metadata)")
|
|
641
|
+
|
|
642
|
+
click.echo(f"\n{'=' * 60}")
|
|
643
|
+
click.echo(f"NEXT STEP: Ask your AI assistant to evaluate")
|
|
644
|
+
click.echo(f"{'=' * 60}")
|
|
645
|
+
click.echo(f"""
|
|
646
|
+
Copy this prompt to Claude Code or your AI assistant:
|
|
647
|
+
|
|
648
|
+
Please evaluate the experiment results in:
|
|
649
|
+
{results_dir}/
|
|
650
|
+
|
|
651
|
+
Read the vibes-results.jsonl file and evaluate each example
|
|
652
|
+
using the evaluator schema in evaluator-schema.yaml.
|
|
653
|
+
|
|
654
|
+
For each example, provide:
|
|
655
|
+
1. extracted_classification
|
|
656
|
+
2. exact_match (vs ground_truth)
|
|
657
|
+
3. semantic_match
|
|
658
|
+
4. reasoning_quality_score
|
|
659
|
+
5. overall_score
|
|
660
|
+
6. pass/fail
|
|
661
|
+
|
|
662
|
+
Then provide summary metrics:
|
|
663
|
+
- Exact match accuracy
|
|
664
|
+
- Semantic match accuracy
|
|
665
|
+
- Average overall score
|
|
666
|
+
- Pass rate
|
|
667
|
+
""")
|
|
668
|
+
|
|
669
|
+
|
|
517
670
|
# =============================================================================
|
|
518
671
|
# RUN COMMAND
|
|
519
672
|
# =============================================================================
|
|
@@ -524,6 +677,8 @@ def show(name: str, base_path: Optional[str]):
|
|
|
524
677
|
@click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
|
|
525
678
|
@click.option("--version", help="Git tag version to load (e.g., 'experiments/my-exp/v1.0.0')")
|
|
526
679
|
@click.option("--dry-run", is_flag=True, help="Test on small subset without saving")
|
|
680
|
+
@click.option("--only-vibes", is_flag=True, help="Run agent locally, export results for AI evaluation (no Phoenix)")
|
|
681
|
+
@click.option("--limit", "-n", type=int, help="Limit number of examples to evaluate (useful with --only-vibes)")
|
|
527
682
|
@click.option("--update-prompts", is_flag=True, help="Update prompts in Phoenix before running")
|
|
528
683
|
@click.option("--phoenix-url", help="Phoenix server URL (overrides PHOENIX_BASE_URL env var)")
|
|
529
684
|
@click.option("--phoenix-api-key", help="Phoenix API key (overrides PHOENIX_API_KEY env var)")
|
|
@@ -532,14 +687,45 @@ def run(
|
|
|
532
687
|
base_path: Optional[str],
|
|
533
688
|
version: Optional[str],
|
|
534
689
|
dry_run: bool,
|
|
690
|
+
only_vibes: bool,
|
|
691
|
+
limit: Optional[int],
|
|
535
692
|
update_prompts: bool,
|
|
536
693
|
phoenix_url: Optional[str],
|
|
537
694
|
phoenix_api_key: Optional[str],
|
|
538
695
|
):
|
|
539
|
-
"""Run an experiment using Phoenix provider.
|
|
696
|
+
"""Run an experiment using Phoenix provider or local vibes mode.
|
|
540
697
|
|
|
541
698
|
Loads configuration, executes agent and evaluator, saves results.
|
|
542
699
|
|
|
700
|
+
Vibes Mode (--only-vibes):
|
|
701
|
+
Run agent locally without Phoenix infrastructure. Agent outputs are saved
|
|
702
|
+
to a JSONL file along with the evaluator schema. Your AI assistant (e.g.,
|
|
703
|
+
Claude Code) then acts as the judge to evaluate results.
|
|
704
|
+
|
|
705
|
+
This enables seamless switching between:
|
|
706
|
+
- Local evaluation: Quick iteration with AI-as-judge
|
|
707
|
+
- Phoenix evaluation: Production metrics and dashboards
|
|
708
|
+
|
|
709
|
+
Usage:
|
|
710
|
+
rem experiments run my-experiment --only-vibes
|
|
711
|
+
rem experiments run my-experiment --only-vibes --limit 5
|
|
712
|
+
|
|
713
|
+
The command will:
|
|
714
|
+
1. Run the agent on each ground-truth example
|
|
715
|
+
2. Save results to results/{timestamp}/vibes-results.jsonl
|
|
716
|
+
3. Print the evaluator prompt and schema
|
|
717
|
+
4. Instruct you to ask your AI assistant to evaluate
|
|
718
|
+
|
|
719
|
+
Example workflow with Claude Code:
|
|
720
|
+
$ rem experiments run mental-health-classifier --only-vibes --limit 3
|
|
721
|
+
# ... agent runs ...
|
|
722
|
+
# Results saved to: .experiments/mental-health-classifier/results/20241203-143022/
|
|
723
|
+
|
|
724
|
+
# Then ask Claude Code:
|
|
725
|
+
"Please evaluate the experiment results in
|
|
726
|
+
.experiments/mental-health-classifier/results/20241203-143022/
|
|
727
|
+
using the evaluator schema provided"
|
|
728
|
+
|
|
543
729
|
Phoenix Connection:
|
|
544
730
|
Commands respect PHOENIX_BASE_URL and PHOENIX_API_KEY environment variables.
|
|
545
731
|
Defaults to localhost:6006 for local development.
|
|
@@ -562,6 +748,12 @@ def run(
|
|
|
562
748
|
# Run experiment with latest schemas
|
|
563
749
|
rem experiments run hello-world-validation
|
|
564
750
|
|
|
751
|
+
# Quick local evaluation (vibes mode)
|
|
752
|
+
rem experiments run hello-world-validation --only-vibes
|
|
753
|
+
|
|
754
|
+
# Vibes mode with limited examples
|
|
755
|
+
rem experiments run hello-world-validation --only-vibes --limit 5
|
|
756
|
+
|
|
565
757
|
# Run specific version
|
|
566
758
|
rem experiments run hello-world-validation \\
|
|
567
759
|
--version experiments/hello-world-validation/v1.0.0
|
|
@@ -674,35 +866,47 @@ def run(
|
|
|
674
866
|
|
|
675
867
|
click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
|
|
676
868
|
|
|
677
|
-
#
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
869
|
+
# Find evaluator schema file path
|
|
870
|
+
from rem.utils.schema_loader import get_evaluator_schema_path
|
|
871
|
+
|
|
872
|
+
evaluator_schema_path = get_evaluator_schema_path(evaluator_name)
|
|
873
|
+
if not evaluator_schema_path or not evaluator_schema_path.exists():
|
|
874
|
+
click.echo(f"Error: Could not find evaluator schema '{evaluator_name}'")
|
|
875
|
+
raise click.Abort()
|
|
876
|
+
|
|
877
|
+
click.echo(f"✓ Found evaluator schema: {evaluator_schema_path}")
|
|
683
878
|
|
|
879
|
+
# For Phoenix mode, also load evaluator function
|
|
684
880
|
evaluator_fn = None
|
|
685
|
-
|
|
881
|
+
if not only_vibes:
|
|
882
|
+
# Try multiple evaluator path patterns (agent-specific, then generic)
|
|
883
|
+
evaluator_paths_to_try = [
|
|
884
|
+
f"{agent_name}/{evaluator_name}", # e.g., hello-world/default
|
|
885
|
+
f"{agent_name}-{evaluator_name}", # e.g., hello-world-default
|
|
886
|
+
evaluator_name, # e.g., default (generic)
|
|
887
|
+
]
|
|
686
888
|
|
|
687
|
-
|
|
688
|
-
try:
|
|
689
|
-
evaluator_fn = create_evaluator_from_schema(
|
|
690
|
-
evaluator_schema_path=evaluator_path,
|
|
691
|
-
model_name=None, # Use default from schema
|
|
692
|
-
)
|
|
693
|
-
click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
|
|
694
|
-
break
|
|
695
|
-
except FileNotFoundError as e:
|
|
696
|
-
evaluator_load_error = e
|
|
697
|
-
logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
|
|
698
|
-
continue
|
|
699
|
-
except Exception as e:
|
|
700
|
-
evaluator_load_error = e
|
|
701
|
-
logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
|
|
702
|
-
continue
|
|
889
|
+
evaluator_load_error = None
|
|
703
890
|
|
|
704
|
-
|
|
705
|
-
|
|
891
|
+
for evaluator_path in evaluator_paths_to_try:
|
|
892
|
+
try:
|
|
893
|
+
evaluator_fn = create_evaluator_from_schema(
|
|
894
|
+
evaluator_schema_path=evaluator_path,
|
|
895
|
+
model_name=None, # Use default from schema
|
|
896
|
+
)
|
|
897
|
+
click.echo(f"✓ Loaded evaluator function: {evaluator_path}")
|
|
898
|
+
break
|
|
899
|
+
except FileNotFoundError as e:
|
|
900
|
+
evaluator_load_error = e
|
|
901
|
+
logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
|
|
902
|
+
continue
|
|
903
|
+
except Exception as e:
|
|
904
|
+
evaluator_load_error = e
|
|
905
|
+
logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
|
|
906
|
+
continue
|
|
907
|
+
|
|
908
|
+
if evaluator_fn is None and not only_vibes:
|
|
909
|
+
click.echo(f"Error: Could not load evaluator function '{evaluator_name}'")
|
|
706
910
|
click.echo(f" Tried paths: {evaluator_paths_to_try}")
|
|
707
911
|
if evaluator_load_error:
|
|
708
912
|
click.echo(f" Last error: {evaluator_load_error}")
|
|
@@ -769,6 +973,18 @@ def run(
|
|
|
769
973
|
# TODO: Implement prompt updating
|
|
770
974
|
click.echo("⚠ --update-prompts not yet implemented")
|
|
771
975
|
|
|
976
|
+
# Vibes mode: run agent and export for AI evaluation
|
|
977
|
+
if only_vibes:
|
|
978
|
+
_run_vibes_mode(
|
|
979
|
+
config=config,
|
|
980
|
+
dataset_df=dataset_df,
|
|
981
|
+
task_fn=task_fn,
|
|
982
|
+
base_path=base_path,
|
|
983
|
+
limit=limit,
|
|
984
|
+
evaluator_schema_path=evaluator_schema_path,
|
|
985
|
+
)
|
|
986
|
+
return
|
|
987
|
+
|
|
772
988
|
# Run experiment via Phoenix
|
|
773
989
|
if not dry_run:
|
|
774
990
|
# Create Phoenix client with optional overrides
|
rem/cli/commands/schema.py
CHANGED
|
@@ -16,6 +16,7 @@ from loguru import logger
|
|
|
16
16
|
|
|
17
17
|
from ...settings import settings
|
|
18
18
|
from ...services.postgres.schema_generator import SchemaGenerator
|
|
19
|
+
from ...utils.sql_paths import get_package_sql_dir, get_package_migrations_dir
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def _import_model_modules() -> list[str]:
|
|
@@ -55,7 +56,7 @@ def _import_model_modules() -> list[str]:
|
|
|
55
56
|
"--output-dir",
|
|
56
57
|
type=click.Path(path_type=Path),
|
|
57
58
|
default=None,
|
|
58
|
-
help=
|
|
59
|
+
help="Base output directory (default: package sql/migrations)",
|
|
59
60
|
)
|
|
60
61
|
def generate(output: Path, output_dir: Path | None):
|
|
61
62
|
"""
|
|
@@ -110,8 +111,8 @@ def generate(output: Path, output_dir: Path | None):
|
|
|
110
111
|
models = registry.get_models(include_core=True)
|
|
111
112
|
click.echo(f"Generating schema from {len(models)} registered models")
|
|
112
113
|
|
|
113
|
-
# Default to migrations directory
|
|
114
|
-
actual_output_dir = output_dir or
|
|
114
|
+
# Default to package migrations directory
|
|
115
|
+
actual_output_dir = output_dir or get_package_migrations_dir()
|
|
115
116
|
generator = SchemaGenerator(output_dir=actual_output_dir)
|
|
116
117
|
|
|
117
118
|
# Generate schema from registry
|
|
@@ -124,7 +125,7 @@ def generate(output: Path, output_dir: Path | None):
|
|
|
124
125
|
# Generate background indexes in parent sql dir
|
|
125
126
|
background_indexes = generator.generate_background_indexes()
|
|
126
127
|
if background_indexes:
|
|
127
|
-
bg_file =
|
|
128
|
+
bg_file = get_package_sql_dir() / "background_indexes.sql"
|
|
128
129
|
bg_file.write_text(background_indexes)
|
|
129
130
|
click.echo(f"✓ Background indexes: {bg_file}")
|
|
130
131
|
|
|
@@ -204,7 +205,7 @@ def validate():
|
|
|
204
205
|
"-o",
|
|
205
206
|
type=click.Path(path_type=Path),
|
|
206
207
|
default=None,
|
|
207
|
-
help=
|
|
208
|
+
help="Output file for background indexes (default: package sql/background_indexes.sql)",
|
|
208
209
|
)
|
|
209
210
|
def indexes(output: Path):
|
|
210
211
|
"""
|
rem/config.py
CHANGED
|
@@ -95,9 +95,16 @@ def load_config() -> dict[str, Any]:
|
|
|
95
95
|
"""
|
|
96
96
|
Load configuration from ~/.rem/config.yaml.
|
|
97
97
|
|
|
98
|
+
Set REM_SKIP_CONFIG=1 to skip loading the config file (useful when using .env files).
|
|
99
|
+
|
|
98
100
|
Returns:
|
|
99
|
-
Configuration dictionary (empty if file doesn't exist)
|
|
101
|
+
Configuration dictionary (empty if file doesn't exist or skipped)
|
|
100
102
|
"""
|
|
103
|
+
# Allow skipping config file via environment variable
|
|
104
|
+
if os.environ.get("REM_SKIP_CONFIG", "").lower() in ("1", "true", "yes"):
|
|
105
|
+
logger.debug("Skipping config file (REM_SKIP_CONFIG is set)")
|
|
106
|
+
return {}
|
|
107
|
+
|
|
101
108
|
config_path = get_config_path()
|
|
102
109
|
|
|
103
110
|
if not config_path.exists():
|