PyPI - remdb - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl - Mend

remdb 0.3.103py3-none-any.whl → 0.3.141py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (74) hide show

rem/agentic/agents/sse_simulator.py +2 -0
rem/agentic/context.py +51 -27
rem/agentic/mcp/tool_wrapper.py +155 -18
rem/agentic/otel/setup.py +93 -4
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +195 -46
rem/agentic/schema.py +361 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/main.py +85 -16
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +18 -4
rem/api/mcp_router/tools.py +394 -16
rem/api/routers/admin.py +218 -1
rem/api/routers/chat/completions.py +280 -7
rem/api/routers/chat/models.py +81 -7
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +17 -1
rem/api/routers/chat/streaming.py +177 -3
rem/api/routers/feedback.py +142 -329
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +13 -13
rem/cli/commands/README.md +237 -64
rem/cli/commands/cluster.py +1808 -0
rem/cli/commands/configure.py +4 -7
rem/cli/commands/db.py +354 -143
rem/cli/commands/experiments.py +436 -30
rem/cli/commands/process.py +14 -8
rem/cli/commands/schema.py +92 -45
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +29 -6
rem/config.py +8 -1
rem/models/core/experiment.py +54 -0
rem/models/core/rem_query.py +5 -2
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/shared_session.py +2 -28
rem/registry.py +10 -4
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/services/content/service.py +30 -8
rem/services/embeddings/api.py +4 -4
rem/services/embeddings/worker.py +16 -16
rem/services/phoenix/client.py +59 -18
rem/services/postgres/README.md +151 -26
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +531 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
rem/services/postgres/schema_generator.py +205 -4
rem/services/postgres/service.py +6 -6
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/compression.py +7 -0
rem/services/session/reload.py +1 -1
rem/settings.py +288 -16
rem/sql/background_indexes.sql +19 -24
rem/sql/migrations/001_install.sql +252 -69
rem/sql/migrations/002_install_models.sql +2197 -619
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/utils/__init__.py +18 -0
rem/utils/date_utils.py +2 -2
rem/utils/schema_loader.py +110 -15
rem/utils/sql_paths.py +146 -0
rem/utils/vision.py +1 -1
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
rem/sql/migrations/003_seed_default_user.sql +0 -48
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
{remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0

rem/services/postgres/schema_generator.py CHANGED Viewed

@@ -12,6 +12,7 @@ Output includes:
 - KV_STORE triggers
 - Indexes (foreground and background)
 - Migrations
+- Schema table entries (for agent-like table access)
 Usage:
     from rem.services.postgres.schema_generator import SchemaGenerator
@@ -30,14 +31,192 @@ Usage:
 import importlib.util
 import inspect
+import json
+import uuid
 from pathlib import Path
-from typing import Type
+from typing import Any, Type
 from loguru import logger
 from pydantic import BaseModel
 from ...settings import settings
-from .register_type import register_type
+from ...utils.sql_paths import get_package_sql_dir
+from .register_type import register_type, should_embed_field
+# Namespace UUID for generating deterministic UUIDs from model names
+# Using UUID5 with this namespace ensures same model always gets same UUID
+REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8")  # DNS namespace
+def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
+    """
+    Generate deterministic UUID from fully qualified model name.
+    Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
+    Same fully qualified name always produces same UUID.
+    Args:
+        fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
+    Returns:
+        Deterministic UUID for this model
+    """
+    return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
+def extract_model_schema_metadata(
+    model: Type[BaseModel],
+    table_name: str,
+    entity_key_field: str,
+    include_search_tool: bool = True,
+) -> dict[str, Any]:
+    """
+    Extract schema metadata from a Pydantic model for schemas table.
+    Args:
+        model: Pydantic model class
+        table_name: Database table name
+        entity_key_field: Field used as entity key in kv_store
+        include_search_tool: If True, add search_rem tool for querying this table
+    Returns:
+        Dict with schema metadata ready for schemas table insert
+    """
+    # Get fully qualified name
+    fqn = f"{model.__module__}.{model.__name__}"
+    # Generate deterministic UUID
+    schema_id = generate_model_uuid(fqn)
+    # Get JSON schema from Pydantic
+    json_schema = model.model_json_schema()
+    # Find embedding fields
+    embedding_fields = []
+    for field_name, field_info in model.model_fields.items():
+        if should_embed_field(field_name, field_info):
+            embedding_fields.append(field_name)
+    # Build description with search capability note
+    base_description = model.__doc__ or f"Schema for {model.__name__}"
+    search_note = (
+        f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
+        f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
+        f"SEARCH for semantic similarity, or SQL for complex queries."
+    ) if include_search_tool else ""
+    # Build spec with table metadata and tools
+    # Note: default_search_table is used by create_agent to append a description
+    # suffix to the search_rem tool when loading it dynamically
+    has_embeddings = bool(embedding_fields)
+    spec = {
+        "type": "object",
+        "description": base_description + search_note,
+        "properties": json_schema.get("properties", {}),
+        "required": json_schema.get("required", []),
+        "json_schema_extra": {
+            "table_name": table_name,
+            "entity_key_field": entity_key_field,
+            "embedding_fields": embedding_fields,
+            "fully_qualified_name": fqn,
+            "tools": ["search_rem"] if include_search_tool else [],
+            "default_search_table": table_name,
+            "has_embeddings": has_embeddings,
+        },
+    }
+    # Build content (documentation)
+    content = f"""# {model.__name__}
+{base_description}
+## Overview
+The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
+identified by its `{entity_key_field}` field for lookups and graph traversal.
+## Search Capabilities
+This schema includes the `search_rem` tool which supports:
+- **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
+- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
+- **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
+- **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
+## Table Info
+| Property | Value |
+|----------|-------|
+| Table | `{table_name}` |
+| Entity Key | `{entity_key_field}` |
+| Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
+| Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
+## Fields
+"""
+    for field_name, field_info in model.model_fields.items():
+        field_type = str(field_info.annotation) if field_info.annotation else "Any"
+        field_desc = field_info.description or ""
+        required = "Required" if field_info.is_required() else "Optional"
+        content += f"### `{field_name}`\n"
+        content += f"- **Type**: `{field_type}`\n"
+        content += f"- **{required}**\n"
+        if field_desc:
+            content += f"- {field_desc}\n"
+        content += "\n"
+    return {
+        "id": str(schema_id),
+        "name": model.__name__,
+        "table_name": table_name,
+        "entity_key_field": entity_key_field,
+        "embedding_fields": embedding_fields,
+        "fqn": fqn,
+        "spec": spec,
+        "content": content,
+        "category": "entity",
+    }
+def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
+    """
+    Generate SQL UPSERT statement for schemas table.
+    Uses ON CONFLICT DO UPDATE for idempotency.
+    Args:
+        schema_metadata: Dict from extract_model_schema_metadata()
+    Returns:
+        SQL INSERT ... ON CONFLICT statement
+    """
+    # Escape single quotes in content and spec
+    content_escaped = schema_metadata["content"].replace("'", "''")
+    spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
+    sql = f"""
+-- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
+INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
+VALUES (
+    '{schema_metadata['id']}'::uuid,
+    'system',
+    '{schema_metadata['name']}',
+    '{content_escaped}',
+    '{spec_json}'::jsonb,
+    'entity',
+    '{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
+)
+ON CONFLICT (id) DO UPDATE SET
+    name = EXCLUDED.name,
+    content = EXCLUDED.content,
+    spec = EXCLUDED.spec,
+    category = EXCLUDED.category,
+    metadata = EXCLUDED.metadata,
+    updated_at = CURRENT_TIMESTAMP;
+"""
+    return sql.strip()
 class SchemaGenerator:
@@ -56,9 +235,9 @@ class SchemaGenerator:
         Initialize schema generator.
         Args:
-            output_dir: Optional directory for output files (defaults to settings.sql_dir)
+            output_dir: Optional directory for output files (defaults to package sql dir)
         """
-        self.output_dir = output_dir or Path(settings.sql_dir)
+        self.output_dir = output_dir or get_package_sql_dir()
         self.schemas: dict[str, dict] = {}
     def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
@@ -234,6 +413,14 @@ class SchemaGenerator:
             create_kv_trigger=True,
         )
+        # Extract schema metadata for schemas table entry
+        schema_metadata = extract_model_schema_metadata(
+            model=model,
+            table_name=table_name,
+            entity_key_field=entity_key_field,
+        )
+        schema["schema_metadata"] = schema_metadata
         self.schemas[table_name] = schema
         return schema
@@ -343,6 +530,7 @@ class SchemaGenerator:
             "-- 2. Embeddings tables (embeddings_<table>)",
             "-- 3. KV_STORE triggers for cache maintenance",
             "-- 4. Indexes (foreground only, background indexes separate)",
+            "-- 5. Schema table entries (for agent-like table access)",
             "",
             "-- ============================================================================",
             "-- PREREQUISITES CHECK",
@@ -388,6 +576,19 @@ class SchemaGenerator:
                 sql_parts.append(schema["sql"]["kv_trigger"])
                 sql_parts.append("")
+        # Add schema table entries (every entity table is also an "agent")
+        sql_parts.append("-- ============================================================================")
+        sql_parts.append("-- SCHEMA TABLE ENTRIES")
+        sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
+        sql_parts.append("-- ============================================================================")
+        sql_parts.append("")
+        for table_name, schema in self.schemas.items():
+            if "schema_metadata" in schema:
+                schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
+                sql_parts.append(schema_upsert)
+                sql_parts.append("")
         # Add migration record
         sql_parts.append("-- ============================================================================")
         sql_parts.append("-- RECORD MIGRATION")

rem/services/postgres/service.py CHANGED Viewed

@@ -190,19 +190,19 @@ class PostgresService:
     async def connect(self) -> None:
         """Establish database connection pool."""
-        logger.info(f"Connecting to PostgreSQL with pool size {self.pool_size}")
+        logger.debug(f"Connecting to PostgreSQL with pool size {self.pool_size}")
         self.pool = await asyncpg.create_pool(
             self.connection_string,
             min_size=1,
             max_size=self.pool_size,
             init=self._init_connection,  # Configure JSONB codec on each connection
         )
-        logger.info("PostgreSQL connection pool established")
+        logger.debug("PostgreSQL connection pool established")
         # Start embedding worker if available
         if self.embedding_worker and hasattr(self.embedding_worker, "start"):
             await self.embedding_worker.start()
-            logger.info("Embedding worker started")
+            logger.debug("Embedding worker started")
     async def disconnect(self) -> None:
         """Close database connection pool."""
@@ -211,10 +211,10 @@ class PostgresService:
         # The worker will be stopped explicitly when the application shuts down
         if self.pool:
-            logger.info("Closing PostgreSQL connection pool")
+            logger.debug("Closing PostgreSQL connection pool")
             await self.pool.close()
             self.pool = None
-            logger.info("PostgreSQL connection pool closed")
+            logger.debug("PostgreSQL connection pool closed")
     async def execute(
         self,
@@ -631,7 +631,7 @@ class PostgresService:
         table_name: str,
         embedding: list[float],
         limit: int = 10,
-        min_similarity: float = 0.7,
+        min_similarity: float = 0.3,
         tenant_id: Optional[str] = None,
     ) -> list[dict[str, Any]]:
         """

rem/services/rem/parser.py CHANGED Viewed

@@ -50,9 +50,36 @@ class RemQueryParser:
         params: Dict[str, Any] = {}
         positional_args: List[str] = []
-        # Process remaining tokens
-        for token in tokens[1:]:
-            if "=" in token:
+        # For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
+        if query_type == QueryType.SQL:
+            # Everything after "SQL" is the raw SQL query
+            raw_sql = query_string[3:].strip()  # Skip "SQL" prefix
+            params["raw_query"] = raw_sql
+            return query_type, params
+        # Process remaining tokens, handling REM keywords
+        i = 1
+        while i < len(tokens):
+            token = tokens[i]
+            token_upper = token.upper()
+            # Handle REM keywords that take a value
+            if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
+                if i + 1 < len(tokens):
+                    keyword_map = {
+                        "LIMIT": "limit",
+                        "DEPTH": "max_depth",
+                        "THRESHOLD": "threshold",
+                        "TYPE": "edge_types",
+                        "FROM": "initial_query",
+                        "WITH": "initial_query",
+                    }
+                    key = keyword_map[token_upper]
+                    value = tokens[i + 1]
+                    params[key] = self._convert_value(key, value)
+                    i += 2
+                    continue
+            elif "=" in token:
                 # It's a keyword argument
                 key, value = token.split("=", 1)
                 # Handle parameter aliases
@@ -61,6 +88,7 @@ class RemQueryParser:
             else:
                 # It's a positional argument part
                 positional_args.append(token)
+            i += 1
         # Map positional arguments to specific fields based on QueryType
         self._map_positional_args(query_type, positional_args, params)
@@ -133,13 +161,20 @@ class RemQueryParser:
             params["query_text"] = combined_value
         elif query_type == QueryType.SEARCH:
-            params["query_text"] = combined_value
+            # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
+            # First positional arg is table name, rest is query text
+            if len(positional_args) >= 2:
+                params["table_name"] = positional_args[0]
+                params["query_text"] = " ".join(positional_args[1:])
+            elif len(positional_args) == 1:
+                # Could be table name or query text - assume query text if no table
+                params["query_text"] = positional_args[0]
+            # If no positional args, params stays empty
         elif query_type == QueryType.TRAVERSE:
             params["initial_query"] = combined_value
-        # SQL typically requires named arguments (table=...), but if we supported
-        # SQL SELECT * FROM ..., we might handle it differently.
-        # For now, RemService expects table=...
-        # If there are positional args for SQL, we might ignore or raise,
-        # but current service doesn't use them.
+        elif query_type == QueryType.SQL:
+            # SQL with positional args means "SQL SELECT * FROM ..." form
+            # Treat the combined positional args as the raw SQL query
+            params["raw_query"] = combined_value

rem/services/rem/service.py CHANGED Viewed

@@ -13,6 +13,31 @@ Design:
 - All queries pushed down to Postgres for performance
 - Model schema inspection for validation only
 - Exceptions for missing fields/embeddings
+TODO: Staged Plan Execution
+- Implement execute_staged_plan() method for multi-stage query execution
+- Each stage can be:
+  1. Static query (query field): Execute REM dialect directly
+  2. Dynamic query (intent field): LLM interprets intent + previous results to build query
+- Flow for dynamic stages:
+  1. Gather results from depends_on stages (from previous_results or current execution)
+  2. Pass intent + previous results to LLM (like ask_rem but with context)
+  3. LLM generates REM query based on what it learned from previous stages
+  4. Execute generated query
+  5. Store results in stage_results for client to use in continuation
+- Multi-turn continuation:
+  - Client passes previous_results back from response's stage_results
+  - Client sets resume_from_stage to skip already-executed stages
+  - Server uses previous_results as context for depends_on lookups
+- Use cases:
+  - LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
+  - SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
+  - Complex graph exploration with LLM-driven navigation
+- API: POST /api/v1/query with:
+  - mode="staged-plan"
+  - plan=[{stage, query|intent, name, depends_on}]
+  - previous_results=[{stage, name, query_executed, results, count}] (for continuation)
+  - resume_from_stage=N (to skip completed stages)
 """
 from typing import Any
@@ -309,17 +334,26 @@ class RemService:
         )
         # Execute vector search via rem_search() PostgreSQL function
+        min_sim = params.min_similarity if params.min_similarity is not None else 0.3
+        limit = params.limit or 10
         query_params = get_search_params(
             query_embedding,
             table_name,
             field_name,
             tenant_id,
             provider,
-            params.min_similarity or 0.7,
-            params.limit or 10,
+            min_sim,
+            limit,
             tenant_id, # Use tenant_id (query.user_id) as user_id
         )
+        logger.debug(
+            f"SEARCH params: table={table_name}, field={field_name}, "
+            f"tenant_id={tenant_id}, provider={provider}, "
+            f"min_similarity={min_sim}, limit={limit}, "
+            f"embedding_dims={len(query_embedding)}"
+        )
         results = await self.db.execute(SEARCH_QUERY, query_params)
+        logger.debug(f"SEARCH results: {len(results)} rows")
         return {
             "query_type": "SEARCH",

rem/services/session/compression.py CHANGED Viewed

@@ -170,12 +170,16 @@ class SessionMessageStore:
         entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
         # Create Message entity for assistant response
+        # Use pre-generated id from message dict if available (for frontend feedback)
         msg = Message(
+            id=message.get("id"),  # Use pre-generated ID if provided
             content=message.get("content", ""),
             message_type=message.get("role", "assistant"),
             session_id=session_id,
             tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
             user_id=user_id or self.user_id,
+            trace_id=message.get("trace_id"),
+            span_id=message.get("span_id"),
             metadata={
                 "message_index": message_index,
                 "entity_key": entity_key,  # Store entity key for LOOKUP
@@ -284,11 +288,14 @@ class SessionMessageStore:
                 # Short assistant messages, user messages, and system messages stored as-is
                 # Store ALL messages in database for full audit trail
                 msg = Message(
+                    id=message.get("id"),  # Use pre-generated ID if provided
                     content=content,
                     message_type=message.get("role", "user"),
                     session_id=session_id,
                     tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
                     user_id=user_id or self.user_id,
+                    trace_id=message.get("trace_id"),
+                    span_id=message.get("span_id"),
                     metadata={
                         "message_index": idx,
                         "timestamp": message.get("timestamp"),

rem/services/session/reload.py CHANGED Viewed

@@ -65,7 +65,7 @@ async def reload_session(
             session_id=session_id, user_id=user_id, decompress=decompress_messages
         )
-        logger.info(
+        logger.debug(
             f"Reloaded {len(messages)} messages for session {session_id} "
             f"(decompressed={decompress_messages})"
         )

remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl

Potentially problematic release.

remdb 0.3.103py3-none-any.whl → 0.3.141py3-none-any.whl