PyPI - hindsight-api - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

hindsight-api 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

hindsight_api/admin/__init__.py +1 -0
hindsight_api/admin/cli.py +252 -0
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
hindsight_api/api/http.py +282 -20
hindsight_api/api/mcp.py +47 -52
hindsight_api/config.py +238 -6
hindsight_api/engine/cross_encoder.py +599 -86
hindsight_api/engine/db_budget.py +284 -0
hindsight_api/engine/db_utils.py +11 -0
hindsight_api/engine/embeddings.py +453 -26
hindsight_api/engine/entity_resolver.py +8 -5
hindsight_api/engine/interface.py +8 -4
hindsight_api/engine/llm_wrapper.py +241 -27
hindsight_api/engine/memory_engine.py +609 -122
hindsight_api/engine/query_analyzer.py +4 -3
hindsight_api/engine/response_models.py +38 -0
hindsight_api/engine/retain/fact_extraction.py +388 -192
hindsight_api/engine/retain/fact_storage.py +34 -8
hindsight_api/engine/retain/link_utils.py +24 -16
hindsight_api/engine/retain/orchestrator.py +52 -17
hindsight_api/engine/retain/types.py +9 -0
hindsight_api/engine/search/graph_retrieval.py +42 -13
hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
hindsight_api/engine/search/mpfp_retrieval.py +362 -117
hindsight_api/engine/search/reranking.py +2 -2
hindsight_api/engine/search/retrieval.py +847 -200
hindsight_api/engine/search/tags.py +172 -0
hindsight_api/engine/search/think_utils.py +1 -1
hindsight_api/engine/search/trace.py +12 -0
hindsight_api/engine/search/tracer.py +24 -1
hindsight_api/engine/search/types.py +21 -0
hindsight_api/engine/task_backend.py +109 -18
hindsight_api/engine/utils.py +1 -1
hindsight_api/extensions/context.py +10 -1
hindsight_api/main.py +56 -4
hindsight_api/metrics.py +433 -48
hindsight_api/migrations.py +141 -1
hindsight_api/models.py +3 -1
hindsight_api/pg0.py +53 -0
hindsight_api/server.py +39 -2
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
hindsight_api-0.3.0.dist-info/RECORD +82 -0
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
hindsight_api-0.2.0.dist-info/RECORD +0 -75
{hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0

hindsight_api/api/http.py CHANGED Viewed

@@ -36,7 +36,8 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
 from hindsight_api import MemoryEngine
 from hindsight_api.engine.db_utils import acquire_with_retry
 from hindsight_api.engine.memory_engine import Budget, fq_table
-from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
+from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES, TokenUsage
+from hindsight_api.engine.search.tags import TagsMatch
 from hindsight_api.extensions import HttpExtension, OperationValidationError, load_extension
 from hindsight_api.metrics import create_metrics_collector, get_metrics_collector, initialize_metrics
 from hindsight_api.models import RequestContext
@@ -81,6 +82,8 @@ class RecallRequest(BaseModel):
                 "trace": True,
                 "query_timestamp": "2023-05-30T23:40:00",
                 "include": {"entities": {"max_tokens": 500}},
+                "tags": ["user_a"],
+                "tags_match": "any",
             }
         }
     )
@@ -99,6 +102,15 @@ class RecallRequest(BaseModel):
         default_factory=IncludeOptions,
         description="Options for including additional data (entities are included by default)",
     )
+    tags: list[str] | None = Field(
+        default=None,
+        description="Filter memories by tags. If not specified, all memories are returned.",
+    )
+    tags_match: TagsMatch = Field(
+        default="any",
+        description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
+        "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
+    )
 class RecallResult(BaseModel):
@@ -119,6 +131,7 @@ class RecallResult(BaseModel):
                 "document_id": "session_abc123",
                 "metadata": {"source": "slack"},
                 "chunk_id": "456e7890-e12b-34d5-a678-901234567890",
+                "tags": ["user_a", "user_b"],
             }
         },
     }
@@ -134,6 +147,7 @@ class RecallResult(BaseModel):
     document_id: str | None = None  # Document this memory belongs to
     metadata: dict[str, str] | None = None  # User-defined metadata
     chunk_id: str | None = None  # Chunk this fact was extracted from
+    tags: list[str] | None = None  # Visibility scope tags
 class EntityObservationResponse(BaseModel):
@@ -188,12 +202,18 @@ class EntityListResponse(BaseModel):
                         "first_seen": "2024-01-15T10:30:00Z",
                         "last_seen": "2024-02-01T14:00:00Z",
                     }
-                ]
+                ],
+                "total": 150,
+                "limit": 100,
+                "offset": 0,
             }
         }
     )
     items: list[EntityListItem]
+    total: int
+    limit: int
+    offset: int
 class EntityDetailResponse(BaseModel):
@@ -300,6 +320,7 @@ class MemoryItem(BaseModel):
                 "metadata": {"source": "slack", "channel": "engineering"},
                 "document_id": "meeting_notes_2024_01_15",
                 "entities": [{"text": "Alice"}, {"text": "ML model", "type": "CONCEPT"}],
+                "tags": ["user_a", "user_b"],
             }
         },
     )
@@ -313,6 +334,10 @@ class MemoryItem(BaseModel):
         default=None,
         description="Optional entities to combine with auto-extracted entities.",
     )
+    tags: list[str] | None = Field(
+        default=None,
+        description="Optional tags for visibility scoping. Memories with tags can be filtered during recall.",
+    )
     @field_validator("timestamp", mode="before")
     @classmethod
@@ -347,6 +372,7 @@ class RetainRequest(BaseModel):
                     },
                 ],
                 "async": False,
+                "document_tags": ["user_a", "user_b"],
             }
         }
     )
@@ -357,6 +383,10 @@ class RetainRequest(BaseModel):
         alias="async",
         description="If true, process asynchronously in background. If false, wait for completion (default: false)",
     )
+    document_tags: list[str] | None = Field(
+        default=None,
+        description="Tags applied to all items in this request. These are merged with any item-level tags.",
+    )
 class RetainResponse(BaseModel):
@@ -364,7 +394,15 @@ class RetainResponse(BaseModel):
     model_config = ConfigDict(
         populate_by_name=True,
-        json_schema_extra={"example": {"success": True, "bank_id": "user123", "items_count": 2, "async": False}},
+        json_schema_extra={
+            "example": {
+                "success": True,
+                "bank_id": "user123",
+                "items_count": 2,
+                "async": False,
+                "usage": {"input_tokens": 500, "output_tokens": 100, "total_tokens": 600},
+            }
+        },
     )
     success: bool
@@ -373,6 +411,14 @@ class RetainResponse(BaseModel):
     is_async: bool = Field(
         alias="async", serialization_alias="async", description="Whether the operation was processed asynchronously"
     )
+    operation_id: str | None = Field(
+        default=None,
+        description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations and find this ID. Only present when async=true.",
+    )
+    usage: TokenUsage | None = Field(
+        default=None,
+        description="Token usage metrics for LLM calls during fact extraction (only present for synchronous operations)",
+    )
 class FactsIncludeOptions(BaseModel):
@@ -409,6 +455,8 @@ class ReflectRequest(BaseModel):
                     },
                     "required": ["summary", "key_points"],
                 },
+                "tags": ["user_a"],
+                "tags_match": "any",
             }
         }
     )
@@ -424,6 +472,15 @@ class ReflectRequest(BaseModel):
         default=None,
         description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
     )
+    tags: list[str] | None = Field(
+        default=None,
+        description="Filter memories by tags during reflection. If not specified, all memories are considered.",
+    )
+    tags_match: TagsMatch = Field(
+        default="any",
+        description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
+        "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
+    )
 class OpinionItem(BaseModel):
@@ -472,6 +529,7 @@ class ReflectResponse(BaseModel):
                     "summary": "AI is transformative",
                     "key_points": ["Used in healthcare", "Discussed recently"],
                 },
+                "usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
             }
         }
     )
@@ -482,6 +540,10 @@ class ReflectResponse(BaseModel):
         default=None,
         description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
     )
+    usage: TokenUsage | None = Field(
+        default=None,
+        description="Token usage metrics for LLM calls during reflection.",
+    )
 class BanksResponse(BaseModel):
@@ -630,6 +692,7 @@ class GraphDataResponse(BaseModel):
                     }
                 ],
                 "total_units": 2,
+                "limit": 1000,
             }
         }
     )
@@ -638,6 +701,7 @@ class GraphDataResponse(BaseModel):
     edges: list[dict[str, Any]]
     table_rows: list[dict[str, Any]]
     total_units: int
+    limit: int
 class ListMemoryUnitsResponse(BaseModel):
@@ -699,6 +763,37 @@ class ListDocumentsResponse(BaseModel):
     offset: int
+class TagItem(BaseModel):
+    """Single tag with usage count."""
+    tag: str = Field(description="The tag value")
+    count: int = Field(description="Number of memories with this tag")
+class ListTagsResponse(BaseModel):
+    """Response model for list tags endpoint."""
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "items": [
+                    {"tag": "user:alice", "count": 42},
+                    {"tag": "user:bob", "count": 15},
+                    {"tag": "session:abc123", "count": 8},
+                ],
+                "total": 25,
+                "limit": 100,
+                "offset": 0,
+            }
+        }
+    )
+    items: list[TagItem]
+    total: int
+    limit: int
+    offset: int
 class DocumentResponse(BaseModel):
     """Response model for get document endpoint."""
@@ -712,6 +807,7 @@ class DocumentResponse(BaseModel):
                 "created_at": "2024-01-15T10:30:00Z",
                 "updated_at": "2024-01-15T10:30:00Z",
                 "memory_unit_count": 15,
+                "tags": ["user_a", "session_123"],
             }
         }
     )
@@ -723,6 +819,7 @@ class DocumentResponse(BaseModel):
     created_at: str
     updated_at: str
     memory_unit_count: int
+    tags: list[str] = Field(default_factory=list, description="Tags associated with this document")
 class DeleteDocumentResponse(BaseModel):
@@ -934,6 +1031,12 @@ def create_app(
             await memory.initialize()
             logging.info("Memory system initialized")
+            # Set up DB pool metrics after memory initialization
+            metrics_collector = get_metrics_collector()
+            if memory._pool is not None and hasattr(metrics_collector, "set_db_pool"):
+                metrics_collector.set_db_pool(memory._pool)
+                logging.info("DB pool metrics configured")
         # Call HTTP extension startup hook
         if http_extension:
             await http_extension.on_startup()
@@ -970,6 +1073,30 @@ def create_app(
     # This is required for mounted sub-applications where lifespan may not fire
     app.state.memory = memory
+    # Add HTTP metrics middleware
+    @app.middleware("http")
+    async def http_metrics_middleware(request, call_next):
+        """Record HTTP request metrics."""
+        # Normalize endpoint path to reduce cardinality
+        # Replace UUIDs and numeric IDs with placeholders
+        import re
+        from starlette.requests import Request
+        path = request.url.path
+        # Replace UUIDs
+        path = re.sub(r"/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}", path)
+        # Replace numeric IDs
+        path = re.sub(r"/\d+(?=/|$)", "/{id}", path)
+        status_code = [500]  # Default to 500, will be updated
+        metrics_collector = get_metrics_collector()
+        with metrics_collector.record_http_request(request.method, path, lambda: status_code[0]):
+            response = await call_next(request)
+            status_code[0] = response.status_code
+            return response
     # Register all routes
     _register_routes(app)
@@ -1049,16 +1176,19 @@ def _register_routes(app: FastAPI):
         "/v1/default/banks/{bank_id}/graph",
         response_model=GraphDataResponse,
         summary="Get memory graph data",
-        description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion). Limited to 1000 most recent items.",
+        description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion).",
         operation_id="get_graph",
         tags=["Memory"],
     )
     async def api_graph(
-        bank_id: str, type: str | None = None, request_context: RequestContext = Depends(get_request_context)
+        bank_id: str,
+        type: str | None = None,
+        limit: int = 1000,
+        request_context: RequestContext = Depends(get_request_context),
     ):
         """Get graph data from database, filtered by bank_id and optionally by type."""
         try:
-            data = await app.state.memory.get_graph_data(bank_id, type, request_context=request_context)
+            data = await app.state.memory.get_graph_data(bank_id, type, limit=limit, request_context=request_context)
             return data
         except (AuthenticationError, HTTPException):
             raise
@@ -1117,6 +1247,37 @@ def _register_routes(app: FastAPI):
             logger.error(f"Error in /v1/default/banks/{bank_id}/memories/list: {error_detail}")
             raise HTTPException(status_code=500, detail=str(e))
+    @app.get(
+        "/v1/default/banks/{bank_id}/memories/{memory_id}",
+        summary="Get memory unit",
+        description="Get a single memory unit by ID with all its metadata including entities and tags.",
+        operation_id="get_memory",
+        tags=["Memory"],
+    )
+    async def api_get_memory(
+        bank_id: str,
+        memory_id: str,
+        request_context: RequestContext = Depends(get_request_context),
+    ):
+        """Get a single memory unit by ID."""
+        try:
+            data = await app.state.memory.get_memory_unit(
+                bank_id=bank_id,
+                memory_id=memory_id,
+                request_context=request_context,
+            )
+            if data is None:
+                raise HTTPException(status_code=404, detail=f"Memory unit '{memory_id}' not found")
+            return data
+        except (AuthenticationError, HTTPException):
+            raise
+        except Exception as e:
+            import traceback
+            error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+            logger.error(f"Error in /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
+            raise HTTPException(status_code=500, detail=str(e))
     @app.post(
         "/v1/default/banks/{bank_id}/memories/recall",
         response_model=RecallResponse,
@@ -1134,6 +1295,9 @@ def _register_routes(app: FastAPI):
         bank_id: str, request: RecallRequest, request_context: RequestContext = Depends(get_request_context)
     ):
         """Run a recall and return results with trace."""
+        import time
+        handler_start = time.time()
         metrics = get_metrics_collector()
         try:
@@ -1159,10 +1323,12 @@ def _register_routes(app: FastAPI):
             include_chunks = request.include.chunks is not None
             max_chunk_tokens = request.include.chunks.max_tokens if include_chunks else 8192
+            pre_recall = time.time() - handler_start
             # Run recall with tracing (record metrics)
             with metrics.record_operation(
-                "recall", bank_id=bank_id, budget=request.budget.value, max_tokens=request.max_tokens
+                "recall", bank_id=bank_id, source="api", budget=request.budget.value, max_tokens=request.max_tokens
             ):
+                recall_start = time.time()
                 core_result = await app.state.memory.recall_async(
                     bank_id=bank_id,
                     query=request.query,
@@ -1176,6 +1342,8 @@ def _register_routes(app: FastAPI):
                     include_chunks=include_chunks,
                     max_chunk_tokens=max_chunk_tokens,
                     request_context=request_context,
+                    tags=request.tags,
+                    tags_match=request.tags_match,
                 )
             # Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)
@@ -1191,6 +1359,7 @@ def _register_routes(app: FastAPI):
                     mentioned_at=fact.mentioned_at,
                     document_id=fact.document_id,
                     chunk_id=fact.chunk_id,
+                    tags=fact.tags,
                 )
                 for fact in core_result.results
             ]
@@ -1221,9 +1390,21 @@ def _register_routes(app: FastAPI):
                         ],
                     )
-            return RecallResponse(
+            response = RecallResponse(
                 results=recall_results, trace=core_result.trace, entities=entities_response, chunks=chunks_response
             )
+            handler_duration = time.time() - handler_start
+            recall_duration = time.time() - recall_start
+            post_recall = handler_duration - pre_recall - recall_duration
+            if handler_duration > 1.0:
+                logging.info(
+                    f"[RECALL HTTP] bank={bank_id} handler_total={handler_duration:.3f}s "
+                    f"pre={pre_recall:.3f}s recall={recall_duration:.3f}s post={post_recall:.3f}s "
+                    f"results={len(recall_results)} entities={len(entities_response) if entities_response else 0}"
+                )
+            return response
         except HTTPException:
             raise
         except OperationValidationError as e:
@@ -1233,8 +1414,11 @@ def _register_routes(app: FastAPI):
         except Exception as e:
             import traceback
+            handler_duration = time.time() - handler_start
             error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
-            logger.error(f"Error in /v1/default/banks/{bank_id}/memories/recall: {error_detail}")
+            logger.error(
+                f"[RECALL ERROR] bank={bank_id} handler_duration={handler_duration:.3f}s error={str(e)}\n{error_detail}"
+            )
             raise HTTPException(status_code=500, detail=str(e))
     @app.post(
@@ -1259,7 +1443,7 @@ def _register_routes(app: FastAPI):
         try:
             # Use the memory system's reflect_async method (record metrics)
-            with metrics.record_operation("reflect", bank_id=bank_id, budget=request.budget.value):
+            with metrics.record_operation("reflect", bank_id=bank_id, source="api", budget=request.budget.value):
                 core_result = await app.state.memory.reflect_async(
                     bank_id=bank_id,
                     query=request.query,
@@ -1268,6 +1452,8 @@ def _register_routes(app: FastAPI):
                     max_tokens=request.max_tokens,
                     response_schema=request.response_schema,
                     request_context=request_context,
+                    tags=request.tags,
+                    tags_match=request.tags_match,
                 )
             # Convert core MemoryFact objects to API ReflectFact objects if facts are requested
@@ -1290,6 +1476,7 @@ def _register_routes(app: FastAPI):
                 text=core_result.text,
                 based_on=based_on_facts,
                 structured_output=core_result.structured_output,
+                usage=core_result.usage,
             )
         except OperationValidationError as e:
@@ -1333,9 +1520,14 @@ def _register_routes(app: FastAPI):
         operation_id="get_agent_stats",
         tags=["Banks"],
     )
-    async def api_stats(bank_id: str):
+    async def api_stats(
+        bank_id: str,
+        request_context: RequestContext = Depends(get_request_context),
+    ):
         """Get statistics about memory nodes and links for a memory bank."""
         try:
+            # Authenticate and set tenant schema
+            await app.state.memory._authenticate_tenant(request_context)
             pool = await app.state.memory._get_pool()
             async with acquire_with_retry(pool) as conn:
                 # Get node counts by fact_type
@@ -1454,19 +1646,27 @@ def _register_routes(app: FastAPI):
         "/v1/default/banks/{bank_id}/entities",
         response_model=EntityListResponse,
         summary="List entities",
-        description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count.",
+        description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count. Supports pagination.",
         operation_id="list_entities",
         tags=["Entities"],
     )
     async def api_list_entities(
         bank_id: str,
         limit: int = Query(default=100, description="Maximum number of entities to return"),
+        offset: int = Query(default=0, description="Offset for pagination"),
         request_context: RequestContext = Depends(get_request_context),
     ):
-        """List entities for a memory bank."""
+        """List entities for a memory bank with pagination."""
         try:
-            entities = await app.state.memory.list_entities(bank_id, limit=limit, request_context=request_context)
-            return EntityListResponse(items=[EntityListItem(**e) for e in entities])
+            data = await app.state.memory.list_entities(
+                bank_id, limit=limit, offset=offset, request_context=request_context
+            )
+            return EntityListResponse(
+                items=[EntityListItem(**e) for e in data["items"]],
+                total=data["total"],
+                limit=data["limit"],
+                offset=data["offset"],
+            )
         except (AuthenticationError, HTTPException):
             raise
         except Exception as e:
@@ -1638,6 +1838,59 @@ def _register_routes(app: FastAPI):
             logger.error(f"Error in /v1/default/banks/{bank_id}/documents/{document_id}: {error_detail}")
             raise HTTPException(status_code=500, detail=str(e))
+    @app.get(
+        "/v1/default/banks/{bank_id}/tags",
+        response_model=ListTagsResponse,
+        summary="List tags",
+        description="List all unique tags in a memory bank with usage counts. "
+        "Supports wildcard search using '*' (e.g., 'user:*', '*-fred', 'tag*-2'). Case-insensitive.",
+        operation_id="list_tags",
+        tags=["Memory"],
+    )
+    async def api_list_tags(
+        bank_id: str,
+        q: str | None = Query(
+            default=None,
+            description="Wildcard pattern to filter tags (e.g., 'user:*' for user:alice, '*-admin' for role-admin). "
+            "Use '*' as wildcard. Case-insensitive.",
+        ),
+        limit: int = Query(default=100, description="Maximum number of tags to return"),
+        offset: int = Query(default=0, description="Offset for pagination"),
+        request_context: RequestContext = Depends(get_request_context),
+    ):
+        """
+        List all unique tags in a memory bank.
+        Use this endpoint to discover available tags or expand wildcard patterns.
+        Supports '*' wildcards for flexible matching (case-insensitive):
+        - 'user:*' matches user:alice, user:bob
+        - '*-admin' matches role-admin, super-admin
+        - 'env*-prod' matches env-prod, environment-prod
+        Args:
+            bank_id: Memory Bank ID (from path)
+            q: Wildcard pattern to filter tags (use '*' as wildcard)
+            limit: Maximum number of tags to return (default: 100)
+            offset: Offset for pagination (default: 0)
+        """
+        try:
+            data = await app.state.memory.list_tags(
+                bank_id=bank_id,
+                pattern=q,
+                limit=limit,
+                offset=offset,
+                request_context=request_context,
+            )
+            return data
+        except (AuthenticationError, HTTPException):
+            raise
+        except Exception as e:
+            import traceback
+            error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+            logger.error(f"Error in /v1/default/banks/{bank_id}/tags: {error_detail}")
+            raise HTTPException(status_code=500, detail=str(e))
     @app.get(
         "/v1/default/chunks/{chunk_id:path}",
         response_model=ChunkResponse,
@@ -2000,28 +2253,37 @@ def _register_routes(app: FastAPI):
                     content_dict["document_id"] = item.document_id
                 if item.entities:
                     content_dict["entities"] = [{"text": e.text, "type": e.type or "CONCEPT"} for e in item.entities]
+                if item.tags:
+                    content_dict["tags"] = item.tags
                 contents.append(content_dict)
             if request.async_:
                 # Async processing: queue task and return immediately
-                result = await app.state.memory.submit_async_retain(bank_id, contents, request_context=request_context)
+                result = await app.state.memory.submit_async_retain(
+                    bank_id, contents, document_tags=request.document_tags, request_context=request_context
+                )
                 return RetainResponse.model_validate(
                     {
                         "success": True,
                         "bank_id": bank_id,
                         "items_count": result["items_count"],
                         "async": True,
+                        "operation_id": result["operation_id"],
                     }
                 )
             else:
                 # Synchronous processing: wait for completion (record metrics)
-                with metrics.record_operation("retain", bank_id=bank_id):
-                    result = await app.state.memory.retain_batch_async(
-                        bank_id=bank_id, contents=contents, request_context=request_context
+                with metrics.record_operation("retain", bank_id=bank_id, source="api"):
+                    result, usage = await app.state.memory.retain_batch_async(
+                        bank_id=bank_id,
+                        contents=contents,
+                        document_tags=request.document_tags,
+                        request_context=request_context,
+                        return_usage=True,
                     )
                 return RetainResponse.model_validate(
-                    {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False}
+                    {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False, "usage": usage}
                 )
         except OperationValidationError as e:
             raise HTTPException(status_code=e.status_code, detail=e.reason)

hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

hindsight-api 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl