remdb 0.3.0__py3-none-any.whl → 0.3.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +293 -73
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +313 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/METADATA +464 -289
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/RECORD +104 -73
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
rem/cli/commands/experiments.py
CHANGED
|
@@ -514,6 +514,159 @@ def show(name: str, base_path: Optional[str]):
|
|
|
514
514
|
raise click.Abort()
|
|
515
515
|
|
|
516
516
|
|
|
517
|
+
# =============================================================================
|
|
518
|
+
# VIBES MODE HELPER
|
|
519
|
+
# =============================================================================
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _run_vibes_mode(
|
|
523
|
+
config: Any,
|
|
524
|
+
dataset_df: Any,
|
|
525
|
+
task_fn: Any,
|
|
526
|
+
base_path: str,
|
|
527
|
+
limit: Optional[int],
|
|
528
|
+
evaluator_schema_path: Path,
|
|
529
|
+
) -> None:
|
|
530
|
+
"""Run experiment in vibes mode - execute agent and export for AI evaluation.
|
|
531
|
+
|
|
532
|
+
Vibes mode runs the agent on each example and saves results to a JSONL file.
|
|
533
|
+
The AI assistant (e.g., Claude Code) then acts as the judge using the
|
|
534
|
+
evaluator schema to evaluate results.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
config: ExperimentConfig object
|
|
538
|
+
dataset_df: Polars DataFrame with ground truth examples
|
|
539
|
+
task_fn: Function to run agent on each example
|
|
540
|
+
base_path: Base directory for experiments
|
|
541
|
+
limit: Optional limit on number of examples to process
|
|
542
|
+
evaluator_schema_path: Path to the evaluator schema YAML file
|
|
543
|
+
"""
|
|
544
|
+
from rem.utils.date_utils import format_timestamp_for_experiment, utc_now, to_iso
|
|
545
|
+
import json
|
|
546
|
+
|
|
547
|
+
# Apply limit if specified
|
|
548
|
+
if limit:
|
|
549
|
+
dataset_df = dataset_df.head(limit)
|
|
550
|
+
click.echo(f" (Limited to {limit} examples)")
|
|
551
|
+
|
|
552
|
+
# Create results directory
|
|
553
|
+
timestamp = format_timestamp_for_experiment()
|
|
554
|
+
results_dir = Path(base_path) / config.name / "results" / timestamp
|
|
555
|
+
results_dir.mkdir(parents=True, exist_ok=True)
|
|
556
|
+
|
|
557
|
+
click.echo(f"\n⏳ Running agent on {len(dataset_df)} examples...")
|
|
558
|
+
click.echo(f" Results will be saved to: {results_dir}")
|
|
559
|
+
click.echo()
|
|
560
|
+
|
|
561
|
+
# Run agent on each example and collect results
|
|
562
|
+
results = []
|
|
563
|
+
records = dataset_df.to_dicts()
|
|
564
|
+
|
|
565
|
+
for i, record in enumerate(records, 1):
|
|
566
|
+
example_id = record.get("id", i)
|
|
567
|
+
click.echo(f" [{i}/{len(records)}] Processing example {example_id}...", nl=False)
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
# Prepare input for agent
|
|
571
|
+
input_text = record.get("text", record.get("input", record.get("query", "")))
|
|
572
|
+
example_input = {"query": input_text} if isinstance(input_text, str) else input_text
|
|
573
|
+
|
|
574
|
+
# Run agent
|
|
575
|
+
output = task_fn({"input": example_input})
|
|
576
|
+
|
|
577
|
+
result = {
|
|
578
|
+
"id": example_id,
|
|
579
|
+
"input": input_text,
|
|
580
|
+
"ground_truth": record.get("ground_truth", record.get("expected_output", "")),
|
|
581
|
+
"category": record.get("category", ""),
|
|
582
|
+
"agent_output": output,
|
|
583
|
+
"status": "success",
|
|
584
|
+
}
|
|
585
|
+
click.echo(" ✓")
|
|
586
|
+
|
|
587
|
+
except Exception as e:
|
|
588
|
+
result = {
|
|
589
|
+
"id": example_id,
|
|
590
|
+
"input": record.get("text", record.get("input", "")),
|
|
591
|
+
"ground_truth": record.get("ground_truth", record.get("expected_output", "")),
|
|
592
|
+
"category": record.get("category", ""),
|
|
593
|
+
"agent_output": None,
|
|
594
|
+
"status": "error",
|
|
595
|
+
"error": str(e),
|
|
596
|
+
}
|
|
597
|
+
click.echo(f" ✗ ({e})")
|
|
598
|
+
|
|
599
|
+
results.append(result)
|
|
600
|
+
|
|
601
|
+
# Save results to JSONL
|
|
602
|
+
results_file = results_dir / "vibes-results.jsonl"
|
|
603
|
+
with open(results_file, "w") as f:
|
|
604
|
+
for result in results:
|
|
605
|
+
f.write(json.dumps(result) + "\n")
|
|
606
|
+
|
|
607
|
+
# Copy evaluator schema to results dir for easy reference
|
|
608
|
+
import shutil
|
|
609
|
+
evaluator_copy = results_dir / "evaluator-schema.yaml"
|
|
610
|
+
shutil.copy(evaluator_schema_path, evaluator_copy)
|
|
611
|
+
|
|
612
|
+
# Save run metadata
|
|
613
|
+
run_info = {
|
|
614
|
+
"experiment": config.name,
|
|
615
|
+
"agent": config.agent_schema_ref.name,
|
|
616
|
+
"evaluator": config.evaluator_schema_ref.name,
|
|
617
|
+
"mode": "vibes",
|
|
618
|
+
"timestamp": timestamp,
|
|
619
|
+
"total_examples": len(records),
|
|
620
|
+
"successful": len([r for r in results if r["status"] == "success"]),
|
|
621
|
+
"failed": len([r for r in results if r["status"] == "error"]),
|
|
622
|
+
"completed_at": to_iso(utc_now()),
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
run_info_file = results_dir / "run-info.json"
|
|
626
|
+
with open(run_info_file, "w") as f:
|
|
627
|
+
json.dump(run_info, f, indent=2)
|
|
628
|
+
|
|
629
|
+
# Print summary and instructions
|
|
630
|
+
success_count = run_info["successful"]
|
|
631
|
+
fail_count = run_info["failed"]
|
|
632
|
+
|
|
633
|
+
click.echo(f"\n{'=' * 60}")
|
|
634
|
+
click.echo(f"VIBES MODE COMPLETE")
|
|
635
|
+
click.echo(f"{'=' * 60}")
|
|
636
|
+
click.echo(f"\nResults: {success_count} successful, {fail_count} failed")
|
|
637
|
+
click.echo(f"\nFiles saved to: {results_dir}/")
|
|
638
|
+
click.echo(f" - vibes-results.jsonl (agent outputs)")
|
|
639
|
+
click.echo(f" - evaluator-schema.yaml (evaluation criteria)")
|
|
640
|
+
click.echo(f" - run-info.json (run metadata)")
|
|
641
|
+
|
|
642
|
+
click.echo(f"\n{'=' * 60}")
|
|
643
|
+
click.echo(f"NEXT STEP: Ask your AI assistant to evaluate")
|
|
644
|
+
click.echo(f"{'=' * 60}")
|
|
645
|
+
click.echo(f"""
|
|
646
|
+
Copy this prompt to Claude Code or your AI assistant:
|
|
647
|
+
|
|
648
|
+
Please evaluate the experiment results in:
|
|
649
|
+
{results_dir}/
|
|
650
|
+
|
|
651
|
+
Read the vibes-results.jsonl file and evaluate each example
|
|
652
|
+
using the evaluator schema in evaluator-schema.yaml.
|
|
653
|
+
|
|
654
|
+
For each example, provide:
|
|
655
|
+
1. extracted_classification
|
|
656
|
+
2. exact_match (vs ground_truth)
|
|
657
|
+
3. semantic_match
|
|
658
|
+
4. reasoning_quality_score
|
|
659
|
+
5. overall_score
|
|
660
|
+
6. pass/fail
|
|
661
|
+
|
|
662
|
+
Then provide summary metrics:
|
|
663
|
+
- Exact match accuracy
|
|
664
|
+
- Semantic match accuracy
|
|
665
|
+
- Average overall score
|
|
666
|
+
- Pass rate
|
|
667
|
+
""")
|
|
668
|
+
|
|
669
|
+
|
|
517
670
|
# =============================================================================
|
|
518
671
|
# RUN COMMAND
|
|
519
672
|
# =============================================================================
|
|
@@ -524,6 +677,8 @@ def show(name: str, base_path: Optional[str]):
|
|
|
524
677
|
@click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
|
|
525
678
|
@click.option("--version", help="Git tag version to load (e.g., 'experiments/my-exp/v1.0.0')")
|
|
526
679
|
@click.option("--dry-run", is_flag=True, help="Test on small subset without saving")
|
|
680
|
+
@click.option("--only-vibes", is_flag=True, help="Run agent locally, export results for AI evaluation (no Phoenix)")
|
|
681
|
+
@click.option("--limit", "-n", type=int, help="Limit number of examples to evaluate (useful with --only-vibes)")
|
|
527
682
|
@click.option("--update-prompts", is_flag=True, help="Update prompts in Phoenix before running")
|
|
528
683
|
@click.option("--phoenix-url", help="Phoenix server URL (overrides PHOENIX_BASE_URL env var)")
|
|
529
684
|
@click.option("--phoenix-api-key", help="Phoenix API key (overrides PHOENIX_API_KEY env var)")
|
|
@@ -532,14 +687,45 @@ def run(
|
|
|
532
687
|
base_path: Optional[str],
|
|
533
688
|
version: Optional[str],
|
|
534
689
|
dry_run: bool,
|
|
690
|
+
only_vibes: bool,
|
|
691
|
+
limit: Optional[int],
|
|
535
692
|
update_prompts: bool,
|
|
536
693
|
phoenix_url: Optional[str],
|
|
537
694
|
phoenix_api_key: Optional[str],
|
|
538
695
|
):
|
|
539
|
-
"""Run an experiment using Phoenix provider.
|
|
696
|
+
"""Run an experiment using Phoenix provider or local vibes mode.
|
|
540
697
|
|
|
541
698
|
Loads configuration, executes agent and evaluator, saves results.
|
|
542
699
|
|
|
700
|
+
Vibes Mode (--only-vibes):
|
|
701
|
+
Run agent locally without Phoenix infrastructure. Agent outputs are saved
|
|
702
|
+
to a JSONL file along with the evaluator schema. Your AI assistant (e.g.,
|
|
703
|
+
Claude Code) then acts as the judge to evaluate results.
|
|
704
|
+
|
|
705
|
+
This enables seamless switching between:
|
|
706
|
+
- Local evaluation: Quick iteration with AI-as-judge
|
|
707
|
+
- Phoenix evaluation: Production metrics and dashboards
|
|
708
|
+
|
|
709
|
+
Usage:
|
|
710
|
+
rem experiments run my-experiment --only-vibes
|
|
711
|
+
rem experiments run my-experiment --only-vibes --limit 5
|
|
712
|
+
|
|
713
|
+
The command will:
|
|
714
|
+
1. Run the agent on each ground-truth example
|
|
715
|
+
2. Save results to results/{timestamp}/vibes-results.jsonl
|
|
716
|
+
3. Print the evaluator prompt and schema
|
|
717
|
+
4. Instruct you to ask your AI assistant to evaluate
|
|
718
|
+
|
|
719
|
+
Example workflow with Claude Code:
|
|
720
|
+
$ rem experiments run mental-health-classifier --only-vibes --limit 3
|
|
721
|
+
# ... agent runs ...
|
|
722
|
+
# Results saved to: .experiments/mental-health-classifier/results/20241203-143022/
|
|
723
|
+
|
|
724
|
+
# Then ask Claude Code:
|
|
725
|
+
"Please evaluate the experiment results in
|
|
726
|
+
.experiments/mental-health-classifier/results/20241203-143022/
|
|
727
|
+
using the evaluator schema provided"
|
|
728
|
+
|
|
543
729
|
Phoenix Connection:
|
|
544
730
|
Commands respect PHOENIX_BASE_URL and PHOENIX_API_KEY environment variables.
|
|
545
731
|
Defaults to localhost:6006 for local development.
|
|
@@ -562,6 +748,12 @@ def run(
|
|
|
562
748
|
# Run experiment with latest schemas
|
|
563
749
|
rem experiments run hello-world-validation
|
|
564
750
|
|
|
751
|
+
# Quick local evaluation (vibes mode)
|
|
752
|
+
rem experiments run hello-world-validation --only-vibes
|
|
753
|
+
|
|
754
|
+
# Vibes mode with limited examples
|
|
755
|
+
rem experiments run hello-world-validation --only-vibes --limit 5
|
|
756
|
+
|
|
565
757
|
# Run specific version
|
|
566
758
|
rem experiments run hello-world-validation \\
|
|
567
759
|
--version experiments/hello-world-validation/v1.0.0
|
|
@@ -578,8 +770,7 @@ def run(
|
|
|
578
770
|
from rem.services.git import GitService
|
|
579
771
|
from rem.services.phoenix import PhoenixClient
|
|
580
772
|
from rem.agentic.providers.phoenix import create_evaluator_from_schema
|
|
581
|
-
from
|
|
582
|
-
import pandas as pd
|
|
773
|
+
from rem.utils.date_utils import utc_now, to_iso, format_timestamp_for_experiment
|
|
583
774
|
import os
|
|
584
775
|
|
|
585
776
|
try:
|
|
@@ -615,36 +806,22 @@ def run(
|
|
|
615
806
|
click.echo(f" Mode: DRY RUN (no data will be saved)")
|
|
616
807
|
click.echo()
|
|
617
808
|
|
|
618
|
-
# Load agent schema
|
|
809
|
+
# Load agent schema using centralized schema loader
|
|
619
810
|
agent_name = config.agent_schema_ref.name
|
|
620
811
|
agent_version = config.agent_schema_ref.version
|
|
621
812
|
|
|
622
813
|
click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
|
|
623
814
|
|
|
624
|
-
|
|
625
|
-
agent_schema = None
|
|
626
|
-
try:
|
|
627
|
-
git_svc = GitService()
|
|
628
|
-
agent_schema = git_svc.load_schema(agent_name, version=agent_version)
|
|
629
|
-
click.echo(f"✓ Loaded agent schema from Git")
|
|
630
|
-
except Exception as e:
|
|
631
|
-
logger.debug(f"Git not available, trying filesystem: {e}")
|
|
632
|
-
|
|
633
|
-
# Fallback to local filesystem
|
|
634
|
-
from rem.services.fs import FS
|
|
635
|
-
fs = FS()
|
|
815
|
+
from rem.utils.schema_loader import load_agent_schema
|
|
636
816
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
click.echo(f" Tried filesystem: {schema_path}")
|
|
646
|
-
click.echo(f" Make sure the schema exists")
|
|
647
|
-
raise click.Abort()
|
|
817
|
+
try:
|
|
818
|
+
agent_schema = load_agent_schema(agent_name)
|
|
819
|
+
click.echo(f"✓ Loaded agent schema: {agent_name}")
|
|
820
|
+
except FileNotFoundError as e:
|
|
821
|
+
logger.error(f"Failed to load agent schema: {e}")
|
|
822
|
+
click.echo(f"Error: Could not load agent schema '{agent_name}'")
|
|
823
|
+
click.echo(f" {e}")
|
|
824
|
+
raise click.Abort()
|
|
648
825
|
|
|
649
826
|
# Create agent function from schema
|
|
650
827
|
from rem.agentic.providers.pydantic_ai import create_agent
|
|
@@ -683,73 +860,97 @@ def run(
|
|
|
683
860
|
return {"output": serialized}
|
|
684
861
|
return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
|
|
685
862
|
|
|
686
|
-
# Load evaluator schema
|
|
863
|
+
# Load evaluator schema using centralized schema loader
|
|
687
864
|
evaluator_name = config.evaluator_schema_ref.name
|
|
688
865
|
evaluator_version = config.evaluator_schema_ref.version
|
|
689
866
|
|
|
690
|
-
# Resolve evaluator path (evaluators are organized by agent name)
|
|
691
|
-
evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
|
|
692
|
-
|
|
693
867
|
click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
|
|
694
868
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
click.echo(f"
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
869
|
+
# Find evaluator schema file path
|
|
870
|
+
from rem.utils.schema_loader import get_evaluator_schema_path
|
|
871
|
+
|
|
872
|
+
evaluator_schema_path = get_evaluator_schema_path(evaluator_name)
|
|
873
|
+
if not evaluator_schema_path or not evaluator_schema_path.exists():
|
|
874
|
+
click.echo(f"Error: Could not find evaluator schema '{evaluator_name}'")
|
|
875
|
+
raise click.Abort()
|
|
876
|
+
|
|
877
|
+
click.echo(f"✓ Found evaluator schema: {evaluator_schema_path}")
|
|
878
|
+
|
|
879
|
+
# For Phoenix mode, also load evaluator function
|
|
880
|
+
evaluator_fn = None
|
|
881
|
+
if not only_vibes:
|
|
882
|
+
# Try multiple evaluator path patterns (agent-specific, then generic)
|
|
883
|
+
evaluator_paths_to_try = [
|
|
884
|
+
f"{agent_name}/{evaluator_name}", # e.g., hello-world/default
|
|
885
|
+
f"{agent_name}-{evaluator_name}", # e.g., hello-world-default
|
|
886
|
+
evaluator_name, # e.g., default (generic)
|
|
887
|
+
]
|
|
888
|
+
|
|
889
|
+
evaluator_load_error = None
|
|
890
|
+
|
|
891
|
+
for evaluator_path in evaluator_paths_to_try:
|
|
892
|
+
try:
|
|
893
|
+
evaluator_fn = create_evaluator_from_schema(
|
|
894
|
+
evaluator_schema_path=evaluator_path,
|
|
895
|
+
model_name=None, # Use default from schema
|
|
896
|
+
)
|
|
897
|
+
click.echo(f"✓ Loaded evaluator function: {evaluator_path}")
|
|
898
|
+
break
|
|
899
|
+
except FileNotFoundError as e:
|
|
900
|
+
evaluator_load_error = e
|
|
901
|
+
logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
|
|
902
|
+
continue
|
|
903
|
+
except Exception as e:
|
|
904
|
+
evaluator_load_error = e
|
|
905
|
+
logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
|
|
906
|
+
continue
|
|
907
|
+
|
|
908
|
+
if evaluator_fn is None and not only_vibes:
|
|
909
|
+
click.echo(f"Error: Could not load evaluator function '{evaluator_name}'")
|
|
910
|
+
click.echo(f" Tried paths: {evaluator_paths_to_try}")
|
|
911
|
+
if evaluator_load_error:
|
|
912
|
+
click.echo(f" Last error: {evaluator_load_error}")
|
|
706
913
|
raise click.Abort()
|
|
707
914
|
|
|
708
|
-
# Load dataset
|
|
915
|
+
# Load dataset using Polars
|
|
916
|
+
import polars as pl
|
|
917
|
+
|
|
709
918
|
click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
|
|
710
919
|
dataset_ref = list(config.datasets.values())[0]
|
|
711
920
|
|
|
712
921
|
if dataset_ref.location.value == "git":
|
|
713
|
-
# Load from Git
|
|
922
|
+
# Load from Git (local filesystem)
|
|
714
923
|
dataset_path = Path(base_path) / name / dataset_ref.path
|
|
715
924
|
if not dataset_path.exists():
|
|
716
925
|
click.echo(f"Error: Dataset not found: {dataset_path}")
|
|
717
926
|
raise click.Abort()
|
|
718
927
|
|
|
719
928
|
if dataset_ref.format == "csv":
|
|
720
|
-
dataset_df =
|
|
929
|
+
dataset_df = pl.read_csv(dataset_path)
|
|
721
930
|
elif dataset_ref.format == "parquet":
|
|
722
|
-
dataset_df =
|
|
931
|
+
dataset_df = pl.read_parquet(dataset_path)
|
|
723
932
|
elif dataset_ref.format == "jsonl":
|
|
724
|
-
dataset_df =
|
|
933
|
+
dataset_df = pl.read_ndjson(dataset_path)
|
|
725
934
|
else:
|
|
726
935
|
click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
|
|
727
936
|
raise click.Abort()
|
|
728
937
|
elif dataset_ref.location.value in ["s3", "hybrid"]:
|
|
729
938
|
# Load from S3 using FS provider
|
|
730
939
|
from rem.services.fs import FS
|
|
940
|
+
from io import BytesIO
|
|
731
941
|
|
|
732
942
|
fs = FS()
|
|
733
943
|
|
|
734
944
|
try:
|
|
735
945
|
if dataset_ref.format == "csv":
|
|
736
946
|
content = fs.read(dataset_ref.path)
|
|
737
|
-
|
|
738
|
-
dataset_df = pd.read_csv(StringIO(content))
|
|
947
|
+
dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
|
|
739
948
|
elif dataset_ref.format == "parquet":
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
|
|
743
|
-
tmp_path = tmp.name
|
|
744
|
-
# Download via FS
|
|
745
|
-
content_bytes = fs.read(dataset_ref.path)
|
|
746
|
-
tmp.write(content_bytes)
|
|
747
|
-
dataset_df = pd.read_parquet(tmp_path)
|
|
748
|
-
Path(tmp_path).unlink() # Clean up temp file
|
|
949
|
+
content_bytes = fs.read(dataset_ref.path)
|
|
950
|
+
dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
|
|
749
951
|
elif dataset_ref.format == "jsonl":
|
|
750
952
|
content = fs.read(dataset_ref.path)
|
|
751
|
-
|
|
752
|
-
dataset_df = pd.read_json(StringIO(content), lines=True)
|
|
953
|
+
dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
|
|
753
954
|
else:
|
|
754
955
|
click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
|
|
755
956
|
raise click.Abort()
|
|
@@ -772,6 +973,18 @@ def run(
|
|
|
772
973
|
# TODO: Implement prompt updating
|
|
773
974
|
click.echo("⚠ --update-prompts not yet implemented")
|
|
774
975
|
|
|
976
|
+
# Vibes mode: run agent and export for AI evaluation
|
|
977
|
+
if only_vibes:
|
|
978
|
+
_run_vibes_mode(
|
|
979
|
+
config=config,
|
|
980
|
+
dataset_df=dataset_df,
|
|
981
|
+
task_fn=task_fn,
|
|
982
|
+
base_path=base_path,
|
|
983
|
+
limit=limit,
|
|
984
|
+
evaluator_schema_path=evaluator_schema_path,
|
|
985
|
+
)
|
|
986
|
+
return
|
|
987
|
+
|
|
775
988
|
# Run experiment via Phoenix
|
|
776
989
|
if not dry_run:
|
|
777
990
|
# Create Phoenix client with optional overrides
|
|
@@ -793,13 +1006,13 @@ def run(
|
|
|
793
1006
|
|
|
794
1007
|
client = PhoenixClient(config=phoenix_config)
|
|
795
1008
|
|
|
796
|
-
experiment_name = f"{config.name}-{
|
|
1009
|
+
experiment_name = f"{config.name}-{format_timestamp_for_experiment()}"
|
|
797
1010
|
|
|
798
1011
|
click.echo(f"\n⏳ Running experiment: {experiment_name}")
|
|
799
1012
|
click.echo(f" This may take several minutes...")
|
|
800
1013
|
|
|
801
1014
|
experiment = client.run_experiment(
|
|
802
|
-
dataset=dataset_df,
|
|
1015
|
+
dataset=dataset_df,
|
|
803
1016
|
task=task_fn,
|
|
804
1017
|
evaluators=[evaluator_fn],
|
|
805
1018
|
experiment_name=experiment_name,
|
|
@@ -809,12 +1022,15 @@ def run(
|
|
|
809
1022
|
"evaluator": config.evaluator_schema_ref.name,
|
|
810
1023
|
"experiment_config": config.name,
|
|
811
1024
|
**config.metadata
|
|
812
|
-
}
|
|
1025
|
+
},
|
|
1026
|
+
# Smart column detection for DataFrame -> Phoenix Dataset conversion
|
|
1027
|
+
input_keys=["input"] if "input" in dataset_df.columns else None,
|
|
1028
|
+
output_keys=["expected_output"] if "expected_output" in dataset_df.columns else None,
|
|
813
1029
|
)
|
|
814
1030
|
|
|
815
1031
|
# Update experiment status
|
|
816
1032
|
config.status = ExperimentStatus.COMPLETED
|
|
817
|
-
config.last_run_at =
|
|
1033
|
+
config.last_run_at = utc_now()
|
|
818
1034
|
if not version: # Only save if not loading from Git
|
|
819
1035
|
config.save(base_path)
|
|
820
1036
|
|
|
@@ -835,7 +1051,7 @@ def run(
|
|
|
835
1051
|
"agent": config.agent_schema_ref.name,
|
|
836
1052
|
"evaluator": config.evaluator_schema_ref.name,
|
|
837
1053
|
"dataset_size": len(dataset_df),
|
|
838
|
-
"completed_at":
|
|
1054
|
+
"completed_at": to_iso(utc_now()),
|
|
839
1055
|
"phoenix_url": getattr(experiment, "url", None),
|
|
840
1056
|
"task_runs": len(exp_data.get("task_runs", [])),
|
|
841
1057
|
}
|
|
@@ -1015,20 +1231,24 @@ def dataset_add(
|
|
|
1015
1231
|
--output-keys expected_label,expected_type
|
|
1016
1232
|
"""
|
|
1017
1233
|
from rem.services.phoenix import PhoenixClient
|
|
1018
|
-
import
|
|
1234
|
+
import polars as pl
|
|
1019
1235
|
|
|
1020
1236
|
try:
|
|
1021
1237
|
client = PhoenixClient()
|
|
1022
1238
|
|
|
1023
|
-
# Load CSV
|
|
1024
|
-
df =
|
|
1239
|
+
# Load CSV with Polars
|
|
1240
|
+
df = pl.read_csv(from_csv)
|
|
1241
|
+
records = df.to_dicts()
|
|
1025
1242
|
|
|
1026
1243
|
# Extract data
|
|
1027
|
-
|
|
1028
|
-
|
|
1244
|
+
input_cols = input_keys.split(",")
|
|
1245
|
+
output_cols = output_keys.split(",")
|
|
1246
|
+
inputs = [{k: row.get(k) for k in input_cols} for row in records]
|
|
1247
|
+
outputs = [{k: row.get(k) for k in output_cols} for row in records]
|
|
1029
1248
|
metadata = None
|
|
1030
1249
|
if metadata_keys:
|
|
1031
|
-
|
|
1250
|
+
meta_cols = metadata_keys.split(",")
|
|
1251
|
+
metadata = [{k: row.get(k) for k in meta_cols} for row in records]
|
|
1032
1252
|
|
|
1033
1253
|
# Add to dataset
|
|
1034
1254
|
dataset = client.add_examples_to_dataset(
|
|
@@ -1269,12 +1489,12 @@ def trace_list(
|
|
|
1269
1489
|
rem experiments trace list --project rem-agents --days 7 --limit 50
|
|
1270
1490
|
"""
|
|
1271
1491
|
from rem.services.phoenix import PhoenixClient
|
|
1272
|
-
from
|
|
1492
|
+
from rem.utils.date_utils import days_ago
|
|
1273
1493
|
|
|
1274
1494
|
try:
|
|
1275
1495
|
client = PhoenixClient()
|
|
1276
1496
|
|
|
1277
|
-
start_time =
|
|
1497
|
+
start_time = days_ago(days)
|
|
1278
1498
|
|
|
1279
1499
|
traces_df = client.get_traces(
|
|
1280
1500
|
project_name=project,
|
rem/cli/commands/process.py
CHANGED
|
@@ -12,12 +12,12 @@ from rem.services.content import ContentService
|
|
|
12
12
|
|
|
13
13
|
@click.command(name="ingest")
|
|
14
14
|
@click.argument("file_path", type=click.Path(exists=True))
|
|
15
|
-
@click.option("--user-id",
|
|
15
|
+
@click.option("--user-id", default=None, help="User ID to scope file privately (default: public/shared)")
|
|
16
16
|
@click.option("--category", help="Optional file category")
|
|
17
17
|
@click.option("--tags", help="Optional comma-separated tags")
|
|
18
18
|
def process_ingest(
|
|
19
19
|
file_path: str,
|
|
20
|
-
user_id: str,
|
|
20
|
+
user_id: str | None,
|
|
21
21
|
category: str | None,
|
|
22
22
|
tags: str | None,
|
|
23
23
|
):
|
|
@@ -32,8 +32,9 @@ def process_ingest(
|
|
|
32
32
|
5. Creates a File entity record.
|
|
33
33
|
|
|
34
34
|
Examples:
|
|
35
|
-
rem process ingest sample.pdf
|
|
36
|
-
rem process ingest contract.docx --
|
|
35
|
+
rem process ingest sample.pdf
|
|
36
|
+
rem process ingest contract.docx --category legal --tags contract,2023
|
|
37
|
+
rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
|
|
37
38
|
"""
|
|
38
39
|
import asyncio
|
|
39
40
|
from ...services.content import ContentService
|
|
@@ -56,7 +57,8 @@ def process_ingest(
|
|
|
56
57
|
|
|
57
58
|
tag_list = tags.split(",") if tags else None
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
scope_msg = f"user: {user_id}" if user_id else "public"
|
|
61
|
+
logger.info(f"Ingesting file: {file_path} ({scope_msg})")
|
|
60
62
|
result = await service.ingest_file(
|
|
61
63
|
file_uri=file_path,
|
|
62
64
|
user_id=user_id,
|
|
@@ -65,11 +67,15 @@ def process_ingest(
|
|
|
65
67
|
is_local_server=True, # CLI is local
|
|
66
68
|
)
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
+
# Handle schema ingestion (agents/evaluators)
|
|
71
|
+
if result.get("schema_name"):
|
|
72
|
+
logger.success(f"Schema ingested: {result['schema_name']} (kind={result.get('kind', 'agent')})")
|
|
73
|
+
logger.info(f"Version: {result.get('version', '1.0.0')}")
|
|
74
|
+
# Handle file ingestion
|
|
75
|
+
elif result.get("processing_status") == "completed":
|
|
76
|
+
logger.success(f"File ingested: {result['file_name']}")
|
|
70
77
|
logger.info(f"File ID: {result['file_id']}")
|
|
71
78
|
logger.info(f"Resources created: {result['resources_created']}")
|
|
72
|
-
logger.info(f"Status: {result['processing_status']}")
|
|
73
79
|
else:
|
|
74
80
|
logger.error(f"Ingestion failed: {result.get('message', 'Unknown error')}")
|
|
75
81
|
sys.exit(1)
|
|
@@ -192,15 +198,13 @@ def process_uri(uri: str, output: str, save: str | None):
|
|
|
192
198
|
|
|
193
199
|
|
|
194
200
|
@click.command(name="files")
|
|
195
|
-
@click.option("--
|
|
196
|
-
@click.option("--user-id", help="Filter by user ID")
|
|
201
|
+
@click.option("--user-id", default=None, help="User ID (default: from settings)")
|
|
197
202
|
@click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
|
|
198
203
|
@click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
|
|
199
204
|
@click.option("--limit", type=int, help="Max files to process")
|
|
200
205
|
@click.option("--provider", help="Optional LLM provider override")
|
|
201
206
|
@click.option("--model", help="Optional model override")
|
|
202
207
|
def process_files(
|
|
203
|
-
tenant_id: str,
|
|
204
208
|
user_id: Optional[str],
|
|
205
209
|
status: Optional[str],
|
|
206
210
|
extractor: Optional[str],
|
|
@@ -217,19 +221,22 @@ def process_files(
|
|
|
217
221
|
|
|
218
222
|
\b
|
|
219
223
|
# List completed files
|
|
220
|
-
rem process files --
|
|
224
|
+
rem process files --status completed
|
|
221
225
|
|
|
222
226
|
\b
|
|
223
227
|
# Extract from CV files
|
|
224
|
-
rem process files --
|
|
228
|
+
rem process files --extractor cv-parser-v1 --limit 10
|
|
225
229
|
|
|
226
230
|
\b
|
|
227
231
|
# Extract with provider override
|
|
228
|
-
rem process files --
|
|
232
|
+
rem process files --extractor contract-analyzer-v1 \\
|
|
229
233
|
--provider anthropic --model claude-sonnet-4-5
|
|
230
234
|
"""
|
|
235
|
+
from ...settings import settings
|
|
236
|
+
effective_user_id = user_id or settings.test.effective_user_id
|
|
237
|
+
|
|
231
238
|
logger.warning("Not implemented yet")
|
|
232
|
-
logger.info(f"Would process files for
|
|
239
|
+
logger.info(f"Would process files for user: {effective_user_id}")
|
|
233
240
|
|
|
234
241
|
if user_id:
|
|
235
242
|
logger.info(f"Filter: user_id={user_id}")
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scaffold command - generate project structure for REM-based applications.
|
|
3
|
+
|
|
4
|
+
TODO: Implement this command to generate:
|
|
5
|
+
- my_app/main.py (entry point with create_app)
|
|
6
|
+
- my_app/models.py (example CoreModel subclass)
|
|
7
|
+
- my_app/routers/ (example FastAPI router)
|
|
8
|
+
- schemas/agents/ (example agent schema)
|
|
9
|
+
- schemas/evaluators/ (example evaluator)
|
|
10
|
+
- sql/migrations/ (empty migrations directory)
|
|
11
|
+
- pyproject.toml (with remdb dependency)
|
|
12
|
+
- README.md (basic usage instructions)
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
rem scaffold my-app
|
|
16
|
+
rem scaffold my-app --with-examples # Include example models/routers/tools
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import click
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command()
|
|
23
|
+
@click.argument("name")
|
|
24
|
+
@click.option("--with-examples", is_flag=True, help="Include example code")
|
|
25
|
+
def scaffold(name: str, with_examples: bool) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Generate a new REM-based project structure.
|
|
28
|
+
|
|
29
|
+
NAME is the project directory name to create.
|
|
30
|
+
"""
|
|
31
|
+
click.echo(f"TODO: Scaffold command not yet implemented")
|
|
32
|
+
click.echo(f"Would create project: {name}")
|
|
33
|
+
click.echo(f"With examples: {with_examples}")
|
|
34
|
+
click.echo()
|
|
35
|
+
click.echo("For now, manually create this structure:")
|
|
36
|
+
click.echo(f"""
|
|
37
|
+
{name}/
|
|
38
|
+
├── {name.replace('-', '_')}/
|
|
39
|
+
│ ├── main.py # Entry point (create_app + extensions)
|
|
40
|
+
│ ├── models.py # Custom models (inherit CoreModel)
|
|
41
|
+
│ └── routers/ # Custom FastAPI routers
|
|
42
|
+
├── schemas/
|
|
43
|
+
│ ├── agents/ # Custom agent YAML schemas
|
|
44
|
+
│ └── evaluators/ # Custom evaluator schemas
|
|
45
|
+
├── sql/migrations/ # Custom SQL migrations
|
|
46
|
+
└── pyproject.toml
|
|
47
|
+
""")
|