PyPI - remdb - Versions diffs - 0.2.6__py3-none-any.whl - Mend

remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show

rem/__init__.py +2 -0
rem/agentic/README.md +650 -0
rem/agentic/__init__.py +39 -0
rem/agentic/agents/README.md +155 -0
rem/agentic/agents/__init__.py +8 -0
rem/agentic/context.py +148 -0
rem/agentic/context_builder.py +329 -0
rem/agentic/mcp/__init__.py +0 -0
rem/agentic/mcp/tool_wrapper.py +107 -0
rem/agentic/otel/__init__.py +5 -0
rem/agentic/otel/setup.py +151 -0
rem/agentic/providers/phoenix.py +674 -0
rem/agentic/providers/pydantic_ai.py +572 -0
rem/agentic/query.py +117 -0
rem/agentic/query_helper.py +89 -0
rem/agentic/schema.py +396 -0
rem/agentic/serialization.py +245 -0
rem/agentic/tools/__init__.py +5 -0
rem/agentic/tools/rem_tools.py +231 -0
rem/api/README.md +420 -0
rem/api/main.py +324 -0
rem/api/mcp_router/prompts.py +182 -0
rem/api/mcp_router/resources.py +536 -0
rem/api/mcp_router/server.py +213 -0
rem/api/mcp_router/tools.py +584 -0
rem/api/routers/auth.py +229 -0
rem/api/routers/chat/__init__.py +5 -0
rem/api/routers/chat/completions.py +281 -0
rem/api/routers/chat/json_utils.py +76 -0
rem/api/routers/chat/models.py +124 -0
rem/api/routers/chat/streaming.py +185 -0
rem/auth/README.md +258 -0
rem/auth/__init__.py +26 -0
rem/auth/middleware.py +100 -0
rem/auth/providers/__init__.py +13 -0
rem/auth/providers/base.py +376 -0
rem/auth/providers/google.py +163 -0
rem/auth/providers/microsoft.py +237 -0
rem/cli/README.md +455 -0
rem/cli/__init__.py +8 -0
rem/cli/commands/README.md +126 -0
rem/cli/commands/__init__.py +3 -0
rem/cli/commands/ask.py +565 -0
rem/cli/commands/configure.py +423 -0
rem/cli/commands/db.py +493 -0
rem/cli/commands/dreaming.py +324 -0
rem/cli/commands/experiments.py +1124 -0
rem/cli/commands/mcp.py +66 -0
rem/cli/commands/process.py +245 -0
rem/cli/commands/schema.py +183 -0
rem/cli/commands/serve.py +106 -0
rem/cli/dreaming.py +363 -0
rem/cli/main.py +88 -0
rem/config.py +237 -0
rem/mcp_server.py +41 -0
rem/models/core/__init__.py +49 -0
rem/models/core/core_model.py +64 -0
rem/models/core/engram.py +333 -0
rem/models/core/experiment.py +628 -0
rem/models/core/inline_edge.py +132 -0
rem/models/core/rem_query.py +243 -0
rem/models/entities/__init__.py +43 -0
rem/models/entities/file.py +57 -0
rem/models/entities/image_resource.py +88 -0
rem/models/entities/message.py +35 -0
rem/models/entities/moment.py +123 -0
rem/models/entities/ontology.py +191 -0
rem/models/entities/ontology_config.py +131 -0
rem/models/entities/resource.py +95 -0
rem/models/entities/schema.py +87 -0
rem/models/entities/user.py +85 -0
rem/py.typed +0 -0
rem/schemas/README.md +507 -0
rem/schemas/__init__.py +6 -0
rem/schemas/agents/README.md +92 -0
rem/schemas/agents/core/moment-builder.yaml +178 -0
rem/schemas/agents/core/rem-query-agent.yaml +226 -0
rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
rem/schemas/agents/core/simple-assistant.yaml +19 -0
rem/schemas/agents/core/user-profile-builder.yaml +163 -0
rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
rem/schemas/agents/examples/contract-extractor.yaml +134 -0
rem/schemas/agents/examples/cv-parser.yaml +263 -0
rem/schemas/agents/examples/hello-world.yaml +37 -0
rem/schemas/agents/examples/query.yaml +54 -0
rem/schemas/agents/examples/simple.yaml +21 -0
rem/schemas/agents/examples/test.yaml +29 -0
rem/schemas/agents/rem.yaml +128 -0
rem/schemas/evaluators/hello-world/default.yaml +77 -0
rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
rem/services/__init__.py +16 -0
rem/services/audio/INTEGRATION.md +308 -0
rem/services/audio/README.md +376 -0
rem/services/audio/__init__.py +15 -0
rem/services/audio/chunker.py +354 -0
rem/services/audio/transcriber.py +259 -0
rem/services/content/README.md +1269 -0
rem/services/content/__init__.py +5 -0
rem/services/content/providers.py +806 -0
rem/services/content/service.py +657 -0
rem/services/dreaming/README.md +230 -0
rem/services/dreaming/__init__.py +53 -0
rem/services/dreaming/affinity_service.py +336 -0
rem/services/dreaming/moment_service.py +264 -0
rem/services/dreaming/ontology_service.py +54 -0
rem/services/dreaming/user_model_service.py +297 -0
rem/services/dreaming/utils.py +39 -0
rem/services/embeddings/__init__.py +11 -0
rem/services/embeddings/api.py +120 -0
rem/services/embeddings/worker.py +421 -0
rem/services/fs/README.md +662 -0
rem/services/fs/__init__.py +62 -0
rem/services/fs/examples.py +206 -0
rem/services/fs/examples_paths.py +204 -0
rem/services/fs/git_provider.py +935 -0
rem/services/fs/local_provider.py +760 -0
rem/services/fs/parsing-hooks-examples.md +172 -0
rem/services/fs/paths.py +276 -0
rem/services/fs/provider.py +460 -0
rem/services/fs/s3_provider.py +1042 -0
rem/services/fs/service.py +186 -0
rem/services/git/README.md +1075 -0
rem/services/git/__init__.py +17 -0
rem/services/git/service.py +469 -0
rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
rem/services/phoenix/README.md +453 -0
rem/services/phoenix/__init__.py +46 -0
rem/services/phoenix/client.py +686 -0
rem/services/phoenix/config.py +88 -0
rem/services/phoenix/prompt_labels.py +477 -0
rem/services/postgres/README.md +575 -0
rem/services/postgres/__init__.py +23 -0
rem/services/postgres/migration_service.py +427 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
rem/services/postgres/register_type.py +352 -0
rem/services/postgres/repository.py +337 -0
rem/services/postgres/schema_generator.py +379 -0
rem/services/postgres/service.py +802 -0
rem/services/postgres/sql_builder.py +354 -0
rem/services/rem/README.md +304 -0
rem/services/rem/__init__.py +23 -0
rem/services/rem/exceptions.py +71 -0
rem/services/rem/executor.py +293 -0
rem/services/rem/parser.py +145 -0
rem/services/rem/queries.py +196 -0
rem/services/rem/query.py +371 -0
rem/services/rem/service.py +527 -0
rem/services/session/README.md +374 -0
rem/services/session/__init__.py +6 -0
rem/services/session/compression.py +360 -0
rem/services/session/reload.py +77 -0
rem/settings.py +1235 -0
rem/sql/002_install_models.sql +1068 -0
rem/sql/background_indexes.sql +42 -0
rem/sql/install_models.sql +1038 -0
rem/sql/migrations/001_install.sql +503 -0
rem/sql/migrations/002_install_models.sql +1202 -0
rem/utils/AGENTIC_CHUNKING.md +597 -0
rem/utils/README.md +583 -0
rem/utils/__init__.py +43 -0
rem/utils/agentic_chunking.py +622 -0
rem/utils/batch_ops.py +343 -0
rem/utils/chunking.py +108 -0
rem/utils/clip_embeddings.py +276 -0
rem/utils/dict_utils.py +98 -0
rem/utils/embeddings.py +423 -0
rem/utils/examples/embeddings_example.py +305 -0
rem/utils/examples/sql_types_example.py +202 -0
rem/utils/markdown.py +16 -0
rem/utils/model_helpers.py +236 -0
rem/utils/schema_loader.py +229 -0
rem/utils/sql_types.py +348 -0
rem/utils/user_id.py +81 -0
rem/utils/vision.py +330 -0
rem/workers/README.md +506 -0
rem/workers/__init__.py +5 -0
rem/workers/dreaming.py +502 -0
rem/workers/engram_processor.py +312 -0
rem/workers/sqs_file_processor.py +193 -0
remdb-0.2.6.dist-info/METADATA +1191 -0
remdb-0.2.6.dist-info/RECORD +187 -0
remdb-0.2.6.dist-info/WHEEL +4 -0
remdb-0.2.6.dist-info/entry_points.txt +2 -0

rem/agentic/serialization.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""
+Pydantic Serialization Utilities for Agent Results.
+Critical Pattern:
+When returning Pydantic model instances from agent results (especially in MCP tools,
+API responses, or any serialization context), ALWAYS serialize them explicitly using
+.model_dump() or .model_dump_json() before returning.
+Why This Matters:
+- FastMCP, FastAPI, and other frameworks may use their own serialization logic
+- Pydantic models returned directly may not include all fields during serialization
+- Newly added fields might be silently dropped if not explicitly serialized
+- result.output or result.data may be a Pydantic model instance, not a dict
+Common Anti-Patterns to Avoid:
+```python
+# ❌ BAD: Returns Pydantic model directly
+return {
+    "status": "success",
+    "response": result.output,  # Pydantic model instance!
+}
+# ✅ GOOD: Explicitly serialize first
+return {
+    "status": "success",
+    "response": result.output.model_dump(),  # Serialized dict
+}
+```
+Design Rules:
+1. Always check if object has .model_dump() or .model_dump_json()
+2. Use serialize_agent_result() for consistent handling
+3. In streaming contexts, use .model_dump_json() for SSE
+4. Document when functions return Pydantic models vs dicts
+"""
+from typing import Any, cast
+from pydantic import BaseModel
+def serialize_agent_result(result: Any) -> dict[str, Any] | str:
+    """
+    Safely serialize an agent result, handling Pydantic models correctly.
+    This function ensures that Pydantic model instances are properly serialized
+    before being returned from API endpoints, MCP tools, or any other context
+    where serialization is critical.
+    Args:
+        result: Agent result which may be:
+            - Pydantic model instance (has .model_dump())
+            - Dict (already serialized)
+            - Primitive type (str, int, bool, None)
+            - List or other collection
+    Returns:
+        Serialized result as dict or primitive type
+    Examples:
+        >>> # With Pydantic model result
+        >>> agent_result = await agent.run(query)
+        >>> serialized = serialize_agent_result(agent_result.output)
+        >>> return {"response": serialized}  # Safe to serialize
+        >>> # With already-serialized result
+        >>> data = {"key": "value"}
+        >>> serialized = serialize_agent_result(data)
+        >>> assert serialized == data  # No-op for dicts
+        >>> # With primitive result
+        >>> result = "Hello world"
+        >>> serialized = serialize_agent_result(result)
+        >>> assert serialized == result  # No-op for primitives
+    """
+    # Check if this is a Pydantic model instance
+    if isinstance(result, BaseModel):
+        return result.model_dump()
+    # Check if this has a model_dump method (duck typing)
+    if hasattr(result, "model_dump") and callable(getattr(result, "model_dump")):
+        return cast(dict[str, Any] | str, result.model_dump())
+    # Already a dict or primitive - return as-is
+    return cast(dict[str, Any] | str, result)
+def serialize_agent_result_json(result: Any) -> str:
+    """
+    Safely serialize an agent result to JSON string, handling Pydantic models correctly.
+    Use this variant when you need a JSON string output (e.g., for SSE streaming,
+    JSON responses, or storage).
+    Args:
+        result: Agent result which may be:
+            - Pydantic model instance (has .model_dump_json())
+            - Dict or other JSON-serializable object
+            - Primitive type
+    Returns:
+        JSON string representation
+    Examples:
+        >>> # With Pydantic model result
+        >>> agent_result = await agent.run(query)
+        >>> json_str = serialize_agent_result_json(agent_result.output)
+        >>> return Response(content=json_str, media_type="application/json")
+        >>> # For SSE streaming
+        >>> chunk = serialize_agent_result_json(result.output)
+        >>> yield f"data: {chunk}\\n\\n"
+    """
+    import json
+    # Check if this is a Pydantic model instance with model_dump_json
+    if isinstance(result, BaseModel):
+        return result.model_dump_json()
+    # Check if this has a model_dump_json method (duck typing)
+    if hasattr(result, "model_dump_json") and callable(
+        getattr(result, "model_dump_json")
+    ):
+        return cast(str, result.model_dump_json())
+    # Fall back to standard json.dumps
+    return json.dumps(result)
+def is_pydantic_model(obj: Any) -> bool:
+    """
+    Check if an object is a Pydantic model instance.
+    Args:
+        obj: Object to check
+    Returns:
+        True if object is a Pydantic model instance
+    Examples:
+        >>> from pydantic import BaseModel
+        >>> class MyModel(BaseModel):
+        ...     value: str
+        >>> instance = MyModel(value="test")
+        >>> assert is_pydantic_model(instance) == True
+        >>> assert is_pydantic_model({"value": "test"}) == False
+    """
+    return isinstance(obj, BaseModel) or (
+        hasattr(obj, "model_dump") and hasattr(obj, "model_fields")
+    )
+def safe_serialize_dict(data: dict[str, Any]) -> dict[str, Any]:
+    """
+    Recursively serialize a dict that may contain Pydantic models.
+    Use this when you have a dict that may contain Pydantic model instances
+    nested within it (e.g., as values).
+    Args:
+        data: Dict that may contain Pydantic models
+    Returns:
+        Dict with all Pydantic models serialized to dicts
+    Examples:
+        >>> # Dict with nested Pydantic model
+        >>> data = {
+        ...     "status": "success",
+        ...     "result": some_pydantic_model,  # Will be serialized
+        ...     "metadata": {"count": 5}
+        ... }
+        >>> serialized = safe_serialize_dict(data)
+        >>> # All Pydantic models are now dicts
+    """
+    result = {}
+    for key, value in data.items():
+        if is_pydantic_model(value):
+            result[key] = serialize_agent_result(value)
+        elif isinstance(value, dict):
+            result[key] = safe_serialize_dict(value)
+        elif isinstance(value, list):
+            result[key] = [
+                serialize_agent_result(item) if is_pydantic_model(item) else item
+                for item in value
+            ]
+        else:
+            result[key] = value
+    return result
+# Example usage patterns for documentation
+USAGE_EXAMPLES = """
+# Example 1: MCP Tool returning agent result
+async def ask_rem_tool(query: str) -> dict[str, Any]:
+    from rem.agentic.serialization import serialize_agent_result
+    agent = await create_agent()
+    result = await agent.run(query)
+    # ✅ GOOD: Serialize before returning
+    return {
+        "status": "success",
+        "response": serialize_agent_result(result.output),
+        "model": result.model,
+    }
+# Example 2: API endpoint with Pydantic result
+@app.post("/query")
+async def query_endpoint(body: QueryRequest):
+    from rem.agentic.serialization import serialize_agent_result
+    agent = await create_agent()
+    result = await agent.run(body.query)
+    # ✅ GOOD: Serialize for FastAPI response
+    return {
+        "data": serialize_agent_result(result.output),
+        "usage": result.usage().model_dump() if result.usage() else None,
+    }
+# Example 3: Streaming with SSE
+async def stream_results(agent, query):
+    from rem.agentic.serialization import serialize_agent_result_json
+    async with agent.iter(query) as run:
+        async for event in run:
+            if isinstance(event, SomeEvent):
+                # ✅ GOOD: Serialize to JSON string for SSE
+                json_str = serialize_agent_result_json(event.data)
+                yield f"data: {json_str}\\n\\n"
+# Example 4: Service layer returning to MCP tool
+async def ask_rem(query: str, tenant_id: str) -> dict[str, Any]:
+    from rem.agentic.serialization import serialize_agent_result
+    agent = await create_agent()
+    result = await agent.run(query)
+    # ✅ GOOD: Serialize in service layer
+    return {
+        "query_output": serialize_agent_result(result.data),
+        "natural_query": query,
+    }
+"""

rem/agentic/tools/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Agent tools for REM operations."""
+from .rem_tools import search_rem_tool, ingest_file_tool
+__all__ = ["search_rem_tool", "ingest_file_tool"]

rem/agentic/tools/rem_tools.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""
+REM tools for agent execution (CLI and API compatible).
+These tools work in both CLI and API contexts by initializing services on-demand.
+They wrap the service layer directly, not MCP tools.
+"""
+from typing import Any, Literal, cast
+from loguru import logger
+from ...models.core import (
+    FuzzyParameters,
+    LookupParameters,
+    QueryType,
+    RemQuery,
+    SearchParameters,
+    SQLParameters,
+    TraverseParameters,
+)
+from ...services.content import ContentService
+from ...services.postgres import get_postgres_service
+from ...services.rem import RemService
+# Service cache for reuse within agent execution
+_service_cache: dict[str, Any] = {}
+async def _get_rem_service() -> RemService:
+    """Get or create RemService instance."""
+    if "rem_service" not in _service_cache:
+        db = get_postgres_service()
+        if not db:
+            raise RuntimeError("PostgreSQL is disabled. Cannot use REM service.")
+        await db.connect()
+        _service_cache["postgres"] = db
+        _service_cache["rem_service"] = RemService(postgres_service=db)
+        logger.debug("Initialized RemService for agent tools")
+    return cast(RemService, _service_cache["rem_service"])
+async def search_rem_tool(
+    query_type: Literal["lookup", "fuzzy", "search", "sql", "traverse"],
+    user_id: str,
+    # LOOKUP parameters
+    entity_key: str | None = None,
+    # FUZZY parameters
+    query_text: str | None = None,
+    threshold: float = 0.7,
+    # SEARCH parameters
+    table: str | None = None,
+    limit: int = 20,
+    # SQL parameters
+    sql_query: str | None = None,
+    # TRAVERSE parameters
+    initial_query: str | None = None,
+    edge_types: list[str] | None = None,
+    depth: int = 1,
+) -> dict[str, Any]:
+    """
+    Execute REM queries for entity lookup, semantic search, and graph traversal.
+    This tool works in both CLI and API contexts by initializing services on-demand.
+    Args:
+        query_type: Type of query (lookup, fuzzy, search, sql, traverse)
+        user_id: User identifier for data scoping
+        entity_key: Entity key for LOOKUP (e.g., "Sarah Chen")
+        query_text: Search text for FUZZY or SEARCH
+        threshold: Similarity threshold for FUZZY (0.0-1.0)
+        table: Target table for SEARCH (resources, moments, users, etc.)
+        limit: Max results for SEARCH
+        sql_query: SQL query string for SQL type
+        initial_query: Starting entity for TRAVERSE
+        edge_types: Edge types to follow for TRAVERSE
+        depth: Traversal depth for TRAVERSE
+    Returns:
+        Dict with query results and metadata
+    """
+    try:
+        rem_service = await _get_rem_service()
+        # Build RemQuery based on query_type
+        if query_type == "lookup":
+            if not entity_key:
+                return {"status": "error", "error": "entity_key required for LOOKUP"}
+            query = RemQuery(
+                query_type=QueryType.LOOKUP,
+                parameters=LookupParameters(
+                    key=entity_key,
+                    user_id=user_id,
+                ),
+                user_id=user_id,
+            )
+        elif query_type == "fuzzy":
+            if not query_text:
+                return {"status": "error", "error": "query_text required for FUZZY"}
+            query = RemQuery(
+                query_type=QueryType.FUZZY,
+                parameters=FuzzyParameters(
+                    query_text=query_text,
+                    threshold=threshold,
+                    limit=limit, # Implied parameter
+                ),
+                user_id=user_id,
+            )
+        elif query_type == "search":
+            if not query_text:
+                return {"status": "error", "error": "query_text required for SEARCH"}
+            if not table:
+                return {"status": "error", "error": "table required for SEARCH"}
+            query = RemQuery(
+                query_type=QueryType.SEARCH,
+                parameters=SearchParameters(
+                    query_text=query_text,
+                    table_name=table,
+                    limit=limit,
+                ),
+                user_id=user_id,
+            )
+        elif query_type == "sql":
+            if not sql_query:
+                return {"status": "error", "error": "sql_query required for SQL"}
+            if not table:
+                 return {"status": "error", "error": "table required for SQL queries"}
+            query = RemQuery(
+                query_type=QueryType.SQL,
+                parameters=SQLParameters(
+                    table_name=table,
+                    where_clause=sql_query,
+                    limit=limit, # SQLParams accepts limit
+                ),
+                user_id=user_id,
+            )
+        elif query_type == "traverse":
+            if not initial_query:
+                return {"status": "error", "error": "initial_query required for TRAVERSE"}
+            query = RemQuery(
+                query_type=QueryType.TRAVERSE,
+                parameters=TraverseParameters(
+                    initial_query=initial_query,
+                    edge_types=edge_types or [],
+                    max_depth=depth,
+                ),
+                user_id=user_id,
+            )
+        else:
+            return {"status": "error", "error": f"Unknown query_type: {query_type}"}
+        # Execute query
+        logger.info(f"Executing REM query: {query_type} for user {user_id}")
+        result = await rem_service.execute_query(query)
+        logger.info(f"Query completed: {query_type}")
+        return {
+            "status": "success",
+            "query_type": query_type,
+            "results": result,
+        }
+    except Exception as e:
+        logger.error(f"search_rem_tool failed: {e}", exc_info=True)
+        return {
+            "status": "error",
+            "error": str(e),
+        }
+async def ingest_file_tool(
+    file_uri: str,
+    user_id: str,
+    category: str | None = None,
+    tags: list[str] | None = None,
+    is_local_server: bool = True,  # CLI is always local
+) -> dict[str, Any]:
+    """
+    Ingest file into REM (read + store + parse + chunk + embed).
+    This tool works in both CLI and API contexts.
+    Args:
+        file_uri: File location (local path, s3:// URI, or http(s):// URL)
+        user_id: User identifier for data scoping
+        category: Optional category (document, code, audio, etc.)
+        tags: Optional tags for file
+        is_local_server: True if running as local CLI (default)
+    Returns:
+        Dict with file_id, processing_status, resources_created, etc.
+    """
+    try:
+        content_service = ContentService()
+        result = await content_service.ingest_file(
+            file_uri=file_uri,
+            user_id=user_id,
+            category=category,
+            tags=tags,
+            is_local_server=is_local_server,
+        )
+        logger.info(
+            f"File ingestion complete: {result['file_name']} "
+            f"(status: {result['processing_status']}, "
+            f"resources: {result['resources_created']})"
+        )
+        return {
+            "status": "success",
+            **result,
+        }
+    except Exception as e:
+        logger.error(f"ingest_file_tool failed: {e}", exc_info=True)
+        return {
+            "status": "error",
+            "error": str(e),
+        }