PyPI - remdb - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl - Mend

remdb 0.3.103py3-none-any.whl → 0.3.141py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (74) hide show

rem/agentic/agents/sse_simulator.py +2 -0
rem/agentic/context.py +51 -27
rem/agentic/mcp/tool_wrapper.py +155 -18
rem/agentic/otel/setup.py +93 -4
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +195 -46
rem/agentic/schema.py +361 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/main.py +85 -16
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +18 -4
rem/api/mcp_router/tools.py +394 -16
rem/api/routers/admin.py +218 -1
rem/api/routers/chat/completions.py +280 -7
rem/api/routers/chat/models.py +81 -7
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +17 -1
rem/api/routers/chat/streaming.py +177 -3
rem/api/routers/feedback.py +142 -329
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +13 -13
rem/cli/commands/README.md +237 -64
rem/cli/commands/cluster.py +1808 -0
rem/cli/commands/configure.py +4 -7
rem/cli/commands/db.py +354 -143
rem/cli/commands/experiments.py +436 -30
rem/cli/commands/process.py +14 -8
rem/cli/commands/schema.py +92 -45
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +29 -6
rem/config.py +8 -1
rem/models/core/experiment.py +54 -0
rem/models/core/rem_query.py +5 -2
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/shared_session.py +2 -28
rem/registry.py +10 -4
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/services/content/service.py +30 -8
rem/services/embeddings/api.py +4 -4
rem/services/embeddings/worker.py +16 -16
rem/services/phoenix/client.py +59 -18
rem/services/postgres/README.md +151 -26
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +531 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
rem/services/postgres/schema_generator.py +205 -4
rem/services/postgres/service.py +6 -6
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/compression.py +7 -0
rem/services/session/reload.py +1 -1
rem/settings.py +288 -16
rem/sql/background_indexes.sql +19 -24
rem/sql/migrations/001_install.sql +252 -69
rem/sql/migrations/002_install_models.sql +2197 -619
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/utils/__init__.py +18 -0
rem/utils/date_utils.py +2 -2
rem/utils/schema_loader.py +110 -15
rem/utils/sql_paths.py +146 -0
rem/utils/vision.py +1 -1
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
rem/sql/migrations/003_seed_default_user.sql +0 -48
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0

rem/settings.py CHANGED Viewed

@@ -33,14 +33,15 @@ Example .env file:
     AUTH__OIDC_CLIENT_ID=your-client-id
     AUTH__SESSION_SECRET=your-secret-key
-    # OpenTelemetry (disabled by default)
+    # OpenTelemetry (disabled by default - enable via env var when collector available)
+    # Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
     OTEL__ENABLED=false
     OTEL__SERVICE_NAME=rem-api
-    OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
-    OTEL__PROTOCOL=http
+    OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
+    OTEL__PROTOCOL=grpc
-    # Arize Phoenix (disabled by default)
-    PHOENIX__ENABLED=false
+    # Arize Phoenix (enabled by default - can be disabled via env var)
+    PHOENIX__ENABLED=true
     PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
     PHOENIX__PROJECT_NAME=rem
@@ -58,7 +59,7 @@ Example .env file:
 import os
 import hashlib
-from pydantic import Field, field_validator, FieldValidationInfo
+from pydantic import Field, field_validator, ValidationInfo
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from loguru import logger
@@ -241,6 +242,11 @@ class OTELSettings(BaseSettings):
         description="Export timeout in milliseconds",
     )
+    insecure: bool = Field(
+        default=True,
+        description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
+    )
 class PhoenixSettings(BaseSettings):
     """
@@ -267,8 +273,8 @@ class PhoenixSettings(BaseSettings):
     )
     enabled: bool = Field(
-        default=False,
-        description="Enable Phoenix integration (disabled by default for local dev)",
+        default=True,
+        description="Enable Phoenix integration (enabled by default)",
     )
     base_url: str = Field(
@@ -414,7 +420,7 @@ class AuthSettings(BaseSettings):
     @field_validator("session_secret", mode="before")
     @classmethod
-    def generate_dev_secret(cls, v: str | None, info: FieldValidationInfo) -> str:
+    def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
         # Only generate if not already set and not in production
         if not v and info.data.get("environment") != "production":
             # Deterministic secret for development
@@ -686,6 +692,91 @@ class S3Settings(BaseSettings):
     )
+class DataLakeSettings(BaseSettings):
+    """
+    Data lake settings for experiment and dataset storage.
+    Data Lake Convention:
+        The data lake provides a standardized structure for storing datasets,
+        experiments, and calibration data in S3. Users bring their own bucket
+        and the version is pinned by default to v0 in the path.
+    S3 Path Structure:
+        s3://{bucket}/{version}/datasets/
+        ├── raw/                        # Raw source data + transformers
+        │   └── {dataset_name}/         # e.g., cns_drugs, codes, care
+        ├── tables/                     # Database table data (JSONL)
+        │   ├── resources/              # → resources table
+        │   │   ├── drugs/{category}/   # Psychotropic drugs
+        │   │   ├── care/stages/        # Treatment stages
+        │   │   └── crisis/             # Crisis resources
+        │   └── codes/                  # → codes table
+        │       ├── icd10/{category}/   # ICD-10 codes
+        │       └── cpt/                # CPT codes
+        └── calibration/                # Agent calibration
+            ├── experiments/            # Experiment configs + results
+            │   └── {agent}/{task}/     # e.g., siggy/risk-assessment
+            └── datasets/               # Shared evaluation datasets
+    Experiment Storage:
+        - Local: experiments/{agent}/{task}/experiment.yaml
+        - S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
+    Environment variables:
+        DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
+        DATA_LAKE__VERSION - Path version prefix (default: v0)
+        DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
+        DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="DATA_LAKE__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    bucket_name: str | None = Field(
+        default=None,
+        description="S3 bucket for data lake storage (user-provided)",
+    )
+    version: str = Field(
+        default="v0",
+        description="API version for data lake paths",
+    )
+    datasets_prefix: str = Field(
+        default="datasets",
+        description="Root directory for datasets in the bucket",
+    )
+    experiments_prefix: str = Field(
+        default="experiments",
+        description="Subdirectory within calibration for experiments",
+    )
+    def get_base_uri(self) -> str | None:
+        """Get the base S3 URI for the data lake."""
+        if not self.bucket_name:
+            return None
+        return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
+    def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
+        """Get the S3 URI for an experiment."""
+        base = self.get_base_uri()
+        if not base:
+            return None
+        return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
+    def get_tables_uri(self, table: str = "resources") -> str | None:
+        """Get the S3 URI for a table directory."""
+        base = self.get_base_uri()
+        if not base:
+            return None
+        return f"{base}/tables/{table}"
 class ChunkingSettings(BaseSettings):
     """
     Document chunking settings for semantic text splitting.
@@ -1004,6 +1095,75 @@ class APISettings(BaseSettings):
     )
+class ModelsSettings(BaseSettings):
+    """
+    Custom model registration settings for downstream applications.
+    Allows downstream apps to specify Python modules containing custom models
+    that should be imported (and thus registered) before schema generation.
+    This enables `rem db schema generate` to discover models registered with
+    `@rem.register_model` in downstream applications.
+    Environment variables:
+        MODELS__IMPORT_MODULES - Semicolon-separated list of Python modules to import
+                                 Example: "models;myapp.entities;myapp.custom_models"
+    Example:
+        # In downstream app's .env
+        MODELS__IMPORT_MODULES=models
+        # In downstream app's models/__init__.py
+        import rem
+        from rem.models.core import CoreModel
+        @rem.register_model
+        class MyCustomEntity(CoreModel):
+            name: str
+        # Then run schema generation
+        rem db schema generate  # Includes MyCustomEntity
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="MODELS__",
+        extra="ignore",
+    )
+    import_modules: str = Field(
+        default="",
+        description=(
+            "Semicolon-separated list of Python modules to import for model registration. "
+            "These modules are imported before schema generation to ensure custom models "
+            "decorated with @rem.register_model are discovered. "
+            "Example: 'models;myapp.entities'"
+        ),
+    )
+    @property
+    def module_list(self) -> list[str]:
+        """
+        Get modules as a list, filtering empty strings.
+        Auto-detects ./models folder if it exists and is importable.
+        """
+        modules = []
+        if self.import_modules:
+            modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
+        # Auto-detect ./models if it exists and is a Python package (convention over configuration)
+        from pathlib import Path
+        models_path = Path("./models")
+        if models_path.exists() and models_path.is_dir():
+            # Check if it's a Python package (has __init__.py)
+            if (models_path / "__init__.py").exists():
+                if "models" not in modules:
+                    modules.insert(0, "models")
+        return modules
 class SchemaSettings(BaseSettings):
     """
     Schema search path settings for agent and evaluator schemas.
@@ -1187,6 +1347,110 @@ class GitSettings(BaseSettings):
     )
+class DBListenerSettings(BaseSettings):
+    """
+    PostgreSQL LISTEN/NOTIFY database listener settings.
+    The DB Listener is a lightweight worker that subscribes to PostgreSQL
+    NOTIFY events and dispatches them to external systems (SQS, REST, custom).
+    Architecture:
+        - Single-replica deployment (to avoid duplicate processing)
+        - Dedicated connection for LISTEN (not from connection pool)
+        - Automatic reconnection with exponential backoff
+        - Graceful shutdown on SIGTERM
+    Use Cases:
+        - Sync data changes to external systems (Phoenix, webhooks)
+        - Trigger async jobs without polling
+        - Event-driven architectures with PostgreSQL as event source
+    Example PostgreSQL trigger:
+        CREATE OR REPLACE FUNCTION notify_feedback_insert()
+        RETURNS TRIGGER AS $$
+        BEGIN
+            PERFORM pg_notify('feedback_sync', json_build_object(
+                'id', NEW.id,
+                'table', 'feedbacks',
+                'action', 'insert'
+            )::text);
+            RETURN NEW;
+        END;
+        $$ LANGUAGE plpgsql;
+    Environment variables:
+        DB_LISTENER__ENABLED - Enable the listener worker (default: false)
+        DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
+        DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
+        DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
+        DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
+        DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
+        DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
+    References:
+        - PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
+        - Brandur's Notifier: https://brandur.org/notifier
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="DB_LISTENER__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    enabled: bool = Field(
+        default=False,
+        description="Enable the DB Listener worker (disabled by default)",
+    )
+    channels: str = Field(
+        default="",
+        description=(
+            "Comma-separated list of PostgreSQL channels to LISTEN on. "
+            "Example: 'feedback_sync,entity_update,user_events'"
+        ),
+    )
+    handler_type: str = Field(
+        default="rest",
+        description=(
+            "Handler type for dispatching notifications. Options: "
+            "'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
+        ),
+    )
+    sqs_queue_url: str = Field(
+        default="",
+        description="SQS queue URL for handler_type='sqs'",
+    )
+    rest_endpoint: str = Field(
+        default="http://localhost:8000/api/v1/internal/events",
+        description=(
+            "REST endpoint URL for handler_type='rest'. "
+            "Receives POST with {channel, payload, source} JSON body."
+        ),
+    )
+    reconnect_delay: float = Field(
+        default=1.0,
+        description="Initial delay (seconds) between reconnection attempts",
+    )
+    max_reconnect_delay: float = Field(
+        default=60.0,
+        description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
+    )
+    @property
+    def channel_list(self) -> list[str]:
+        """Get channels as a list, filtering empty strings."""
+        if not self.channels:
+            return []
+        return [c.strip() for c in self.channels.split(",") if c.strip()]
 class TestSettings(BaseSettings):
     """
     Test environment settings.
@@ -1281,16 +1545,12 @@ class Settings(BaseSettings):
         description="Root path for reverse proxy (e.g., /rem for ALB routing)",
     )
-    sql_dir: str = Field(
-        default="src/rem/sql",
-        description="Directory for SQL files and migrations",
-    )
     # Nested settings groups
     api: APISettings = Field(default_factory=APISettings)
     chat: ChatSettings = Field(default_factory=ChatSettings)
     llm: LLMSettings = Field(default_factory=LLMSettings)
     mcp: MCPSettings = Field(default_factory=MCPSettings)
+    models: ModelsSettings = Field(default_factory=ModelsSettings)
     otel: OTELSettings = Field(default_factory=OTELSettings)
     phoenix: PhoenixSettings = Field(default_factory=PhoenixSettings)
     auth: AuthSettings = Field(default_factory=AuthSettings)
@@ -1298,18 +1558,30 @@ class Settings(BaseSettings):
     migration: MigrationSettings = Field(default_factory=MigrationSettings)
     storage: StorageSettings = Field(default_factory=StorageSettings)
     s3: S3Settings = Field(default_factory=S3Settings)
+    data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
     git: GitSettings = Field(default_factory=GitSettings)
     sqs: SQSSettings = Field(default_factory=SQSSettings)
+    db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
     chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
     content: ContentSettings = Field(default_factory=ContentSettings)
     schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
     test: TestSettings = Field(default_factory=TestSettings)
+# Auto-load .env file from current directory if it exists
+# This happens BEFORE config file loading, so .env takes precedence
+from pathlib import Path
+from dotenv import load_dotenv
+_dotenv_path = Path(".env")
+if _dotenv_path.exists():
+    load_dotenv(_dotenv_path, override=False)  # Don't override existing env vars
+    logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
 # Load configuration from ~/.rem/config.yaml before initializing settings
 # This allows user configuration to be merged with environment variables
-# Set REM_SKIP_CONFIG_FILE=true to disable (useful for development with .env)
-if not os.getenv("REM_SKIP_CONFIG_FILE", "").lower() in ("true", "1", "yes"):
+# Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
+if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
     try:
         from rem.config import load_config, merge_config_to_env

rem/sql/background_indexes.sql CHANGED Viewed

@@ -1,9 +1,9 @@
 -- Background index creation
 -- Run AFTER initial data load to avoid blocking writes
--- HNSW vector index for embeddings_users
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
-ON embeddings_users
+-- HNSW vector index for embeddings_files
+CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
+ON embeddings_files
 USING hnsw (embedding vector_cosine_ops);
 -- HNSW vector index for embeddings_image_resources
@@ -11,29 +11,14 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_image_resources_vector_hn
 ON embeddings_image_resources
 USING hnsw (embedding vector_cosine_ops);
--- HNSW vector index for embeddings_moments
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
-ON embeddings_moments
-USING hnsw (embedding vector_cosine_ops);
--- HNSW vector index for embeddings_sessions
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
-ON embeddings_sessions
-USING hnsw (embedding vector_cosine_ops);
--- HNSW vector index for embeddings_resources
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
-ON embeddings_resources
-USING hnsw (embedding vector_cosine_ops);
 -- HNSW vector index for embeddings_messages
 CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_messages_vector_hnsw
 ON embeddings_messages
 USING hnsw (embedding vector_cosine_ops);
--- HNSW vector index for embeddings_files
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
-ON embeddings_files
+-- HNSW vector index for embeddings_moments
+CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
+ON embeddings_moments
 USING hnsw (embedding vector_cosine_ops);
 -- HNSW vector index for embeddings_ontology_configs
@@ -41,12 +26,22 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_ontology_configs_vector_h
 ON embeddings_ontology_configs
 USING hnsw (embedding vector_cosine_ops);
--- HNSW vector index for embeddings_domain_resources
-CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_domain_resources_vector_hnsw
-ON embeddings_domain_resources
+-- HNSW vector index for embeddings_resources
+CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
+ON embeddings_resources
 USING hnsw (embedding vector_cosine_ops);
 -- HNSW vector index for embeddings_schemas
 CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_schemas_vector_hnsw
 ON embeddings_schemas
 USING hnsw (embedding vector_cosine_ops);
+-- HNSW vector index for embeddings_sessions
+CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
+ON embeddings_sessions
+USING hnsw (embedding vector_cosine_ops);
+-- HNSW vector index for embeddings_users
+CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
+ON embeddings_users
+USING hnsw (embedding vector_cosine_ops);

remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl

Potentially problematic release.

remdb 0.3.103py3-none-any.whl → 0.3.141py3-none-any.whl