PyPI - remdb - Versions diffs - 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl - Mend

remdb 0.3.118py3-none-any.whl → 0.3.146py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show

rem/agentic/agents/sse_simulator.py +2 -0
rem/agentic/context.py +23 -3
rem/agentic/mcp/tool_wrapper.py +126 -15
rem/agentic/otel/setup.py +1 -0
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +122 -43
rem/agentic/schema.py +4 -1
rem/api/mcp_router/tools.py +13 -2
rem/api/routers/chat/completions.py +250 -4
rem/api/routers/chat/models.py +81 -7
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +17 -1
rem/api/routers/chat/streaming.py +35 -1
rem/api/routers/feedback.py +134 -14
rem/auth/middleware.py +66 -1
rem/cli/commands/cluster.py +590 -82
rem/cli/commands/configure.py +3 -4
rem/cli/commands/experiments.py +468 -76
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +2 -0
rem/config.py +8 -1
rem/models/core/experiment.py +58 -14
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/services/phoenix/client.py +59 -18
rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
rem/services/session/compression.py +7 -0
rem/settings.py +260 -17
rem/sql/migrations/002_install_models.sql +91 -91
rem/sql/migrations/004_cache_system.sql +1 -1
rem/utils/README.md +45 -0
rem/utils/files.py +157 -1
rem/utils/schema_loader.py +94 -3
rem/utils/vision.py +1 -1
rem/workers/__init__.py +2 -1
rem/workers/db_listener.py +579 -0
{remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
{remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
{remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
{remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0

rem/services/phoenix/client.py CHANGED Viewed

@@ -793,40 +793,72 @@ class PhoenixClient:
         score: float | None = None,
         explanation: str | None = None,
         metadata: dict[str, Any] | None = None,
+        trace_id: str | None = None,
     ) -> str | None:
-        """Add feedback annotation to a span.
+        """Add feedback annotation to a span via Phoenix REST API.
+        Uses direct HTTP POST to /v1/span_annotations for reliability
+        (Phoenix Python client API changes frequently).
         Args:
-            span_id: Span ID to annotate
+            span_id: Span ID to annotate (hex string)
             annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
             annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
             label: Optional label (e.g., "correct", "incorrect", "helpful")
             score: Optional numeric score (0.0-1.0)
             explanation: Optional explanation text
             metadata: Optional additional metadata dict
+            trace_id: Optional trace ID (used if span lookup needed)
         Returns:
             Annotation ID if successful, None otherwise
         """
+        import httpx
         try:
-            result = self._client.add_span_annotation(  # type: ignore[attr-defined]
-                span_id=span_id,
-                name=annotation_name,
-                annotator_kind=annotator_kind,
-                label=label,
-                score=score,
-                explanation=explanation,
-                metadata=metadata,
-            )
+            # Build annotation payload for Phoenix REST API
+            annotation_data = {
+                "span_id": span_id,
+                "name": annotation_name,
+                "annotator_kind": annotator_kind,
+                "result": {
+                    "label": label,
+                    "score": score,
+                    "explanation": explanation,
+                },
+                "metadata": metadata or {},
+            }
-            annotation_id = getattr(result, "id", None) if result else None
-            logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
+            # Add trace_id if provided
+            if trace_id:
+                annotation_data["trace_id"] = trace_id
+            # POST to Phoenix REST API
+            annotations_endpoint = f"{self.config.base_url}/v1/span_annotations"
+            headers = {}
+            if self.config.api_key:
+                headers["Authorization"] = f"Bearer {self.config.api_key}"
+            with httpx.Client(timeout=5.0) as client:
+                response = client.post(
+                    annotations_endpoint,
+                    json={"data": [annotation_data]},
+                    headers=headers,
+                )
+                response.raise_for_status()
-            return annotation_id
+            logger.info(f"Added {annotator_kind} feedback to span {span_id}")
+            return span_id  # Return span_id as annotation reference
+        except httpx.HTTPStatusError as e:
+            logger.error(
+                f"Failed to add span feedback (HTTP {e.response.status_code}): "
+                f"{e.response.text if hasattr(e, 'response') else 'N/A'}"
+            )
+            return None
         except Exception as e:
             logger.error(f"Failed to add span feedback: {e}")
-            raise
+            return None
     def sync_user_feedback(
         self,
@@ -835,6 +867,7 @@ class PhoenixClient:
         categories: list[str] | None = None,
         comment: str | None = None,
         feedback_id: str | None = None,
+        trace_id: str | None = None,
     ) -> str | None:
         """Sync user feedback to Phoenix as a span annotation.
@@ -847,6 +880,7 @@ class PhoenixClient:
             categories: List of feedback categories
             comment: Free-text comment
             feedback_id: Optional REM feedback ID for reference
+            trace_id: Optional trace ID for the span
         Returns:
             Phoenix annotation ID if successful
@@ -860,12 +894,18 @@ class PhoenixClient:
             ... )
         """
         # Convert rating to 0-1 score
+        # Rating scheme:
+        #   -1 = thumbs down → score 0.0
+        #    1 = thumbs up   → score 1.0
+        #  2-5 = star rating → normalized to 0-1 range
         score = None
         if rating is not None:
             if rating == -1:
                 score = 0.0
-            elif 1 <= rating <= 5:
-                score = rating / 5.0
+            elif rating == 1:
+                score = 1.0  # Thumbs up
+            elif 2 <= rating <= 5:
+                score = (rating - 1) / 4.0  # 2→0.25, 3→0.5, 4→0.75, 5→1.0
         # Use primary category as label
         label = categories[0] if categories else None
@@ -880,7 +920,7 @@ class PhoenixClient:
                 explanation = f"Categories: {cats_str}"
         # Build metadata
-        metadata = {
+        metadata: dict[str, Any] = {
             "rating": rating,
             "categories": categories or [],
         }
@@ -895,6 +935,7 @@ class PhoenixClient:
             score=score,
             explanation=explanation,
             metadata=metadata,
+            trace_id=trace_id,
         )
     def get_span_annotations(

rem/services/postgres/pydantic_to_sqlalchemy.py CHANGED Viewed

@@ -513,18 +513,15 @@ def get_target_metadata() -> MetaData:
     """
     Get SQLAlchemy metadata for Alembic autogenerate.
-    This is the main entry point used by alembic/env.py.
+    This is the main entry point used by alembic/env.py and rem db diff.
+    Uses the model registry as the source of truth, which includes:
+    - Core REM models (Resource, Message, User, etc.)
+    - User-registered models via @rem.register_model decorator
     Returns:
-        SQLAlchemy MetaData object representing current Pydantic models
+        SQLAlchemy MetaData object representing all registered Pydantic models
     """
-    import rem
-    package_root = Path(rem.__file__).parent.parent.parent
-    models_dir = package_root / "src" / "rem" / "models" / "entities"
-    if not models_dir.exists():
-        logger.error(f"Models directory not found: {models_dir}")
-        return MetaData()
-    return build_sqlalchemy_metadata_from_pydantic(models_dir)
+    # build_sqlalchemy_metadata_from_pydantic uses the registry internally,
+    # so no directory path is needed (the parameter is kept for backwards compat)
+    return build_sqlalchemy_metadata_from_pydantic()

rem/services/session/compression.py CHANGED Viewed

@@ -170,12 +170,16 @@ class SessionMessageStore:
         entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
         # Create Message entity for assistant response
+        # Use pre-generated id from message dict if available (for frontend feedback)
         msg = Message(
+            id=message.get("id"),  # Use pre-generated ID if provided
             content=message.get("content", ""),
             message_type=message.get("role", "assistant"),
             session_id=session_id,
             tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
             user_id=user_id or self.user_id,
+            trace_id=message.get("trace_id"),
+            span_id=message.get("span_id"),
             metadata={
                 "message_index": message_index,
                 "entity_key": entity_key,  # Store entity key for LOOKUP
@@ -284,11 +288,14 @@ class SessionMessageStore:
                 # Short assistant messages, user messages, and system messages stored as-is
                 # Store ALL messages in database for full audit trail
                 msg = Message(
+                    id=message.get("id"),  # Use pre-generated ID if provided
                     content=content,
                     message_type=message.get("role", "user"),
                     session_id=session_id,
                     tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
                     user_id=user_id or self.user_id,
+                    trace_id=message.get("trace_id"),
+                    span_id=message.get("span_id"),
                     metadata={
                         "message_index": idx,
                         "timestamp": message.get("timestamp"),

rem/settings.py CHANGED Viewed

@@ -21,8 +21,8 @@ Example .env file:
     LLM__OPENAI_API_KEY=sk-...
     LLM__ANTHROPIC_API_KEY=sk-ant-...
-    # Database (port 5050 for Docker Compose)
-    POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
+    # Database (port 5051 for Docker Compose prebuilt, 5050 for local dev)
+    POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5051/rem
     POSTGRES__POOL_MIN_SIZE=5
     POSTGRES__POOL_MAX_SIZE=20
     POSTGRES__STATEMENT_TIMEOUT=30000
@@ -33,14 +33,15 @@ Example .env file:
     AUTH__OIDC_CLIENT_ID=your-client-id
     AUTH__SESSION_SECRET=your-secret-key
-    # OpenTelemetry (disabled by default)
+    # OpenTelemetry (disabled by default - enable via env var when collector available)
+    # Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
     OTEL__ENABLED=false
     OTEL__SERVICE_NAME=rem-api
-    OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
-    OTEL__PROTOCOL=http
+    OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
+    OTEL__PROTOCOL=grpc
-    # Arize Phoenix (disabled by default)
-    PHOENIX__ENABLED=false
+    # Arize Phoenix (enabled by default - can be disabled via env var)
+    PHOENIX__ENABLED=true
     PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
     PHOENIX__PROJECT_NAME=rem
@@ -241,6 +242,11 @@ class OTELSettings(BaseSettings):
         description="Export timeout in milliseconds",
     )
+    insecure: bool = Field(
+        default=True,
+        description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
+    )
 class PhoenixSettings(BaseSettings):
     """
@@ -267,8 +273,8 @@ class PhoenixSettings(BaseSettings):
     )
     enabled: bool = Field(
-        default=False,
-        description="Enable Phoenix integration (disabled by default for local dev)",
+        default=True,
+        description="Enable Phoenix integration (enabled by default)",
     )
     base_url: str = Field(
@@ -458,10 +464,11 @@ class PostgresSettings(BaseSettings):
     )
     connection_string: str = Field(
-        default="postgresql://rem:rem@localhost:5050/rem",
-        description="PostgreSQL connection string (default uses Docker Compose port 5050)",
+        default="postgresql://rem:rem@localhost:5051/rem",
+        description="PostgreSQL connection string (default uses Docker Compose prebuilt port 5051)",
     )
     pool_size: int = Field(
         default=10,
         description="Connection pool size (deprecated, use pool_min_size/pool_max_size)",
@@ -686,6 +693,91 @@ class S3Settings(BaseSettings):
     )
+class DataLakeSettings(BaseSettings):
+    """
+    Data lake settings for experiment and dataset storage.
+    Data Lake Convention:
+        The data lake provides a standardized structure for storing datasets,
+        experiments, and calibration data in S3. Users bring their own bucket
+        and the version is pinned by default to v0 in the path.
+    S3 Path Structure:
+        s3://{bucket}/{version}/datasets/
+        ├── raw/                        # Raw source data + transformers
+        │   └── {dataset_name}/         # e.g., cns_drugs, codes, care
+        ├── tables/                     # Database table data (JSONL)
+        │   ├── resources/              # → resources table
+        │   │   ├── drugs/{category}/   # Psychotropic drugs
+        │   │   ├── care/stages/        # Treatment stages
+        │   │   └── crisis/             # Crisis resources
+        │   └── codes/                  # → codes table
+        │       ├── icd10/{category}/   # ICD-10 codes
+        │       └── cpt/                # CPT codes
+        └── calibration/                # Agent calibration
+            ├── experiments/            # Experiment configs + results
+            │   └── {agent}/{task}/     # e.g., siggy/risk-assessment
+            └── datasets/               # Shared evaluation datasets
+    Experiment Storage:
+        - Local: experiments/{agent}/{task}/experiment.yaml
+        - S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
+    Environment variables:
+        DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
+        DATA_LAKE__VERSION - Path version prefix (default: v0)
+        DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
+        DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="DATA_LAKE__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    bucket_name: str | None = Field(
+        default=None,
+        description="S3 bucket for data lake storage (user-provided)",
+    )
+    version: str = Field(
+        default="v0",
+        description="API version for data lake paths",
+    )
+    datasets_prefix: str = Field(
+        default="datasets",
+        description="Root directory for datasets in the bucket",
+    )
+    experiments_prefix: str = Field(
+        default="experiments",
+        description="Subdirectory within calibration for experiments",
+    )
+    def get_base_uri(self) -> str | None:
+        """Get the base S3 URI for the data lake."""
+        if not self.bucket_name:
+            return None
+        return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
+    def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
+        """Get the S3 URI for an experiment."""
+        base = self.get_base_uri()
+        if not base:
+            return None
+        return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
+    def get_tables_uri(self, table: str = "resources") -> str | None:
+        """Get the S3 URI for a table directory."""
+        base = self.get_base_uri()
+        if not base:
+            return None
+        return f"{base}/tables/{table}"
 class ChunkingSettings(BaseSettings):
     """
     Document chunking settings for semantic text splitting.
@@ -969,6 +1061,8 @@ class APISettings(BaseSettings):
         API__RELOAD - Enable auto-reload for development
         API__WORKERS - Number of worker processes (production)
         API__LOG_LEVEL - Logging level (debug, info, warning, error)
+        API__API_KEY_ENABLED - Enable X-API-Key header authentication
+        API__API_KEY - API key for X-API-Key authentication
     """
     model_config = SettingsConfigDict(
@@ -1003,6 +1097,23 @@ class APISettings(BaseSettings):
         description="Logging level (debug, info, warning, error, critical)",
     )
+    api_key_enabled: bool = Field(
+        default=False,
+        description=(
+            "Enable X-API-Key header authentication for API endpoints. "
+            "When enabled, requests must include X-API-Key header with valid key. "
+            "This provides simple API key auth independent of OAuth."
+        ),
+    )
+    api_key: str | None = Field(
+        default=None,
+        description=(
+            "API key for X-API-Key authentication. Required when api_key_enabled=true. "
+            "Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
+        ),
+    )
 class ModelsSettings(BaseSettings):
     """
@@ -1051,10 +1162,26 @@ class ModelsSettings(BaseSettings):
     @property
     def module_list(self) -> list[str]:
-        """Get modules as a list, filtering empty strings."""
-        if not self.import_modules:
-            return []
-        return [m.strip() for m in self.import_modules.split(";") if m.strip()]
+        """
+        Get modules as a list, filtering empty strings.
+        Auto-detects ./models folder if it exists and is importable.
+        """
+        modules = []
+        if self.import_modules:
+            modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
+        # Auto-detect ./models if it exists and is a Python package (convention over configuration)
+        from pathlib import Path
+        models_path = Path("./models")
+        if models_path.exists() and models_path.is_dir():
+            # Check if it's a Python package (has __init__.py)
+            if (models_path / "__init__.py").exists():
+                if "models" not in modules:
+                    modules.insert(0, "models")
+        return modules
 class SchemaSettings(BaseSettings):
@@ -1240,6 +1367,110 @@ class GitSettings(BaseSettings):
     )
+class DBListenerSettings(BaseSettings):
+    """
+    PostgreSQL LISTEN/NOTIFY database listener settings.
+    The DB Listener is a lightweight worker that subscribes to PostgreSQL
+    NOTIFY events and dispatches them to external systems (SQS, REST, custom).
+    Architecture:
+        - Single-replica deployment (to avoid duplicate processing)
+        - Dedicated connection for LISTEN (not from connection pool)
+        - Automatic reconnection with exponential backoff
+        - Graceful shutdown on SIGTERM
+    Use Cases:
+        - Sync data changes to external systems (Phoenix, webhooks)
+        - Trigger async jobs without polling
+        - Event-driven architectures with PostgreSQL as event source
+    Example PostgreSQL trigger:
+        CREATE OR REPLACE FUNCTION notify_feedback_insert()
+        RETURNS TRIGGER AS $$
+        BEGIN
+            PERFORM pg_notify('feedback_sync', json_build_object(
+                'id', NEW.id,
+                'table', 'feedbacks',
+                'action', 'insert'
+            )::text);
+            RETURN NEW;
+        END;
+        $$ LANGUAGE plpgsql;
+    Environment variables:
+        DB_LISTENER__ENABLED - Enable the listener worker (default: false)
+        DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
+        DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
+        DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
+        DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
+        DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
+        DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
+    References:
+        - PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
+        - Brandur's Notifier: https://brandur.org/notifier
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="DB_LISTENER__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    enabled: bool = Field(
+        default=False,
+        description="Enable the DB Listener worker (disabled by default)",
+    )
+    channels: str = Field(
+        default="",
+        description=(
+            "Comma-separated list of PostgreSQL channels to LISTEN on. "
+            "Example: 'feedback_sync,entity_update,user_events'"
+        ),
+    )
+    handler_type: str = Field(
+        default="rest",
+        description=(
+            "Handler type for dispatching notifications. Options: "
+            "'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
+        ),
+    )
+    sqs_queue_url: str = Field(
+        default="",
+        description="SQS queue URL for handler_type='sqs'",
+    )
+    rest_endpoint: str = Field(
+        default="http://localhost:8000/api/v1/internal/events",
+        description=(
+            "REST endpoint URL for handler_type='rest'. "
+            "Receives POST with {channel, payload, source} JSON body."
+        ),
+    )
+    reconnect_delay: float = Field(
+        default=1.0,
+        description="Initial delay (seconds) between reconnection attempts",
+    )
+    max_reconnect_delay: float = Field(
+        default=60.0,
+        description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
+    )
+    @property
+    def channel_list(self) -> list[str]:
+        """Get channels as a list, filtering empty strings."""
+        if not self.channels:
+            return []
+        return [c.strip() for c in self.channels.split(",") if c.strip()]
 class TestSettings(BaseSettings):
     """
     Test environment settings.
@@ -1347,18 +1578,30 @@ class Settings(BaseSettings):
     migration: MigrationSettings = Field(default_factory=MigrationSettings)
     storage: StorageSettings = Field(default_factory=StorageSettings)
     s3: S3Settings = Field(default_factory=S3Settings)
+    data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
     git: GitSettings = Field(default_factory=GitSettings)
     sqs: SQSSettings = Field(default_factory=SQSSettings)
+    db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
     chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
     content: ContentSettings = Field(default_factory=ContentSettings)
     schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
     test: TestSettings = Field(default_factory=TestSettings)
+# Auto-load .env file from current directory if it exists
+# This happens BEFORE config file loading, so .env takes precedence
+from pathlib import Path
+from dotenv import load_dotenv
+_dotenv_path = Path(".env")
+if _dotenv_path.exists():
+    load_dotenv(_dotenv_path, override=False)  # Don't override existing env vars
+    logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
 # Load configuration from ~/.rem/config.yaml before initializing settings
 # This allows user configuration to be merged with environment variables
-# Set REM_SKIP_CONFIG_FILE=true to disable (useful for development with .env)
-if not os.getenv("REM_SKIP_CONFIG_FILE", "").lower() in ("true", "1", "yes"):
+# Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
+if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
     try:
         from rem.config import load_config, merge_config_to_env

remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl

Potentially problematic release.

remdb 0.3.118py3-none-any.whl → 0.3.146py3-none-any.whl