hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +252 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/api/http.py +282 -20
  6. hindsight_api/api/mcp.py +47 -52
  7. hindsight_api/config.py +238 -6
  8. hindsight_api/engine/cross_encoder.py +599 -86
  9. hindsight_api/engine/db_budget.py +284 -0
  10. hindsight_api/engine/db_utils.py +11 -0
  11. hindsight_api/engine/embeddings.py +453 -26
  12. hindsight_api/engine/entity_resolver.py +8 -5
  13. hindsight_api/engine/interface.py +8 -4
  14. hindsight_api/engine/llm_wrapper.py +241 -27
  15. hindsight_api/engine/memory_engine.py +609 -122
  16. hindsight_api/engine/query_analyzer.py +4 -3
  17. hindsight_api/engine/response_models.py +38 -0
  18. hindsight_api/engine/retain/fact_extraction.py +388 -192
  19. hindsight_api/engine/retain/fact_storage.py +34 -8
  20. hindsight_api/engine/retain/link_utils.py +24 -16
  21. hindsight_api/engine/retain/orchestrator.py +52 -17
  22. hindsight_api/engine/retain/types.py +9 -0
  23. hindsight_api/engine/search/graph_retrieval.py +42 -13
  24. hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
  25. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  26. hindsight_api/engine/search/reranking.py +2 -2
  27. hindsight_api/engine/search/retrieval.py +847 -200
  28. hindsight_api/engine/search/tags.py +172 -0
  29. hindsight_api/engine/search/think_utils.py +1 -1
  30. hindsight_api/engine/search/trace.py +12 -0
  31. hindsight_api/engine/search/tracer.py +24 -1
  32. hindsight_api/engine/search/types.py +21 -0
  33. hindsight_api/engine/task_backend.py +109 -18
  34. hindsight_api/engine/utils.py +1 -1
  35. hindsight_api/extensions/context.py +10 -1
  36. hindsight_api/main.py +56 -4
  37. hindsight_api/metrics.py +433 -48
  38. hindsight_api/migrations.py +141 -1
  39. hindsight_api/models.py +3 -1
  40. hindsight_api/pg0.py +53 -0
  41. hindsight_api/server.py +39 -2
  42. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
  43. hindsight_api-0.3.0.dist-info/RECORD +82 -0
  44. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
  45. hindsight_api-0.2.0.dist-info/RECORD +0 -75
  46. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
hindsight_api/api/http.py CHANGED
@@ -36,7 +36,8 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
36
36
  from hindsight_api import MemoryEngine
37
37
  from hindsight_api.engine.db_utils import acquire_with_retry
38
38
  from hindsight_api.engine.memory_engine import Budget, fq_table
39
- from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
39
+ from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES, TokenUsage
40
+ from hindsight_api.engine.search.tags import TagsMatch
40
41
  from hindsight_api.extensions import HttpExtension, OperationValidationError, load_extension
41
42
  from hindsight_api.metrics import create_metrics_collector, get_metrics_collector, initialize_metrics
42
43
  from hindsight_api.models import RequestContext
@@ -81,6 +82,8 @@ class RecallRequest(BaseModel):
81
82
  "trace": True,
82
83
  "query_timestamp": "2023-05-30T23:40:00",
83
84
  "include": {"entities": {"max_tokens": 500}},
85
+ "tags": ["user_a"],
86
+ "tags_match": "any",
84
87
  }
85
88
  }
86
89
  )
@@ -99,6 +102,15 @@ class RecallRequest(BaseModel):
99
102
  default_factory=IncludeOptions,
100
103
  description="Options for including additional data (entities are included by default)",
101
104
  )
105
+ tags: list[str] | None = Field(
106
+ default=None,
107
+ description="Filter memories by tags. If not specified, all memories are returned.",
108
+ )
109
+ tags_match: TagsMatch = Field(
110
+ default="any",
111
+ description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
112
+ "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
113
+ )
102
114
 
103
115
 
104
116
  class RecallResult(BaseModel):
@@ -119,6 +131,7 @@ class RecallResult(BaseModel):
119
131
  "document_id": "session_abc123",
120
132
  "metadata": {"source": "slack"},
121
133
  "chunk_id": "456e7890-e12b-34d5-a678-901234567890",
134
+ "tags": ["user_a", "user_b"],
122
135
  }
123
136
  },
124
137
  }
@@ -134,6 +147,7 @@ class RecallResult(BaseModel):
134
147
  document_id: str | None = None # Document this memory belongs to
135
148
  metadata: dict[str, str] | None = None # User-defined metadata
136
149
  chunk_id: str | None = None # Chunk this fact was extracted from
150
+ tags: list[str] | None = None # Visibility scope tags
137
151
 
138
152
 
139
153
  class EntityObservationResponse(BaseModel):
@@ -188,12 +202,18 @@ class EntityListResponse(BaseModel):
188
202
  "first_seen": "2024-01-15T10:30:00Z",
189
203
  "last_seen": "2024-02-01T14:00:00Z",
190
204
  }
191
- ]
205
+ ],
206
+ "total": 150,
207
+ "limit": 100,
208
+ "offset": 0,
192
209
  }
193
210
  }
194
211
  )
195
212
 
196
213
  items: list[EntityListItem]
214
+ total: int
215
+ limit: int
216
+ offset: int
197
217
 
198
218
 
199
219
  class EntityDetailResponse(BaseModel):
@@ -300,6 +320,7 @@ class MemoryItem(BaseModel):
300
320
  "metadata": {"source": "slack", "channel": "engineering"},
301
321
  "document_id": "meeting_notes_2024_01_15",
302
322
  "entities": [{"text": "Alice"}, {"text": "ML model", "type": "CONCEPT"}],
323
+ "tags": ["user_a", "user_b"],
303
324
  }
304
325
  },
305
326
  )
@@ -313,6 +334,10 @@ class MemoryItem(BaseModel):
313
334
  default=None,
314
335
  description="Optional entities to combine with auto-extracted entities.",
315
336
  )
337
+ tags: list[str] | None = Field(
338
+ default=None,
339
+ description="Optional tags for visibility scoping. Memories with tags can be filtered during recall.",
340
+ )
316
341
 
317
342
  @field_validator("timestamp", mode="before")
318
343
  @classmethod
@@ -347,6 +372,7 @@ class RetainRequest(BaseModel):
347
372
  },
348
373
  ],
349
374
  "async": False,
375
+ "document_tags": ["user_a", "user_b"],
350
376
  }
351
377
  }
352
378
  )
@@ -357,6 +383,10 @@ class RetainRequest(BaseModel):
357
383
  alias="async",
358
384
  description="If true, process asynchronously in background. If false, wait for completion (default: false)",
359
385
  )
386
+ document_tags: list[str] | None = Field(
387
+ default=None,
388
+ description="Tags applied to all items in this request. These are merged with any item-level tags.",
389
+ )
360
390
 
361
391
 
362
392
  class RetainResponse(BaseModel):
@@ -364,7 +394,15 @@ class RetainResponse(BaseModel):
364
394
 
365
395
  model_config = ConfigDict(
366
396
  populate_by_name=True,
367
- json_schema_extra={"example": {"success": True, "bank_id": "user123", "items_count": 2, "async": False}},
397
+ json_schema_extra={
398
+ "example": {
399
+ "success": True,
400
+ "bank_id": "user123",
401
+ "items_count": 2,
402
+ "async": False,
403
+ "usage": {"input_tokens": 500, "output_tokens": 100, "total_tokens": 600},
404
+ }
405
+ },
368
406
  )
369
407
 
370
408
  success: bool
@@ -373,6 +411,14 @@ class RetainResponse(BaseModel):
373
411
  is_async: bool = Field(
374
412
  alias="async", serialization_alias="async", description="Whether the operation was processed asynchronously"
375
413
  )
414
+ operation_id: str | None = Field(
415
+ default=None,
416
+ description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations and find this ID. Only present when async=true.",
417
+ )
418
+ usage: TokenUsage | None = Field(
419
+ default=None,
420
+ description="Token usage metrics for LLM calls during fact extraction (only present for synchronous operations)",
421
+ )
376
422
 
377
423
 
378
424
  class FactsIncludeOptions(BaseModel):
@@ -409,6 +455,8 @@ class ReflectRequest(BaseModel):
409
455
  },
410
456
  "required": ["summary", "key_points"],
411
457
  },
458
+ "tags": ["user_a"],
459
+ "tags_match": "any",
412
460
  }
413
461
  }
414
462
  )
@@ -424,6 +472,15 @@ class ReflectRequest(BaseModel):
424
472
  default=None,
425
473
  description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
426
474
  )
475
+ tags: list[str] | None = Field(
476
+ default=None,
477
+ description="Filter memories by tags during reflection. If not specified, all memories are considered.",
478
+ )
479
+ tags_match: TagsMatch = Field(
480
+ default="any",
481
+ description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
482
+ "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
483
+ )
427
484
 
428
485
 
429
486
  class OpinionItem(BaseModel):
@@ -472,6 +529,7 @@ class ReflectResponse(BaseModel):
472
529
  "summary": "AI is transformative",
473
530
  "key_points": ["Used in healthcare", "Discussed recently"],
474
531
  },
532
+ "usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
475
533
  }
476
534
  }
477
535
  )
@@ -482,6 +540,10 @@ class ReflectResponse(BaseModel):
482
540
  default=None,
483
541
  description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
484
542
  )
543
+ usage: TokenUsage | None = Field(
544
+ default=None,
545
+ description="Token usage metrics for LLM calls during reflection.",
546
+ )
485
547
 
486
548
 
487
549
  class BanksResponse(BaseModel):
@@ -630,6 +692,7 @@ class GraphDataResponse(BaseModel):
630
692
  }
631
693
  ],
632
694
  "total_units": 2,
695
+ "limit": 1000,
633
696
  }
634
697
  }
635
698
  )
@@ -638,6 +701,7 @@ class GraphDataResponse(BaseModel):
638
701
  edges: list[dict[str, Any]]
639
702
  table_rows: list[dict[str, Any]]
640
703
  total_units: int
704
+ limit: int
641
705
 
642
706
 
643
707
  class ListMemoryUnitsResponse(BaseModel):
@@ -699,6 +763,37 @@ class ListDocumentsResponse(BaseModel):
699
763
  offset: int
700
764
 
701
765
 
766
+ class TagItem(BaseModel):
767
+ """Single tag with usage count."""
768
+
769
+ tag: str = Field(description="The tag value")
770
+ count: int = Field(description="Number of memories with this tag")
771
+
772
+
773
+ class ListTagsResponse(BaseModel):
774
+ """Response model for list tags endpoint."""
775
+
776
+ model_config = ConfigDict(
777
+ json_schema_extra={
778
+ "example": {
779
+ "items": [
780
+ {"tag": "user:alice", "count": 42},
781
+ {"tag": "user:bob", "count": 15},
782
+ {"tag": "session:abc123", "count": 8},
783
+ ],
784
+ "total": 25,
785
+ "limit": 100,
786
+ "offset": 0,
787
+ }
788
+ }
789
+ )
790
+
791
+ items: list[TagItem]
792
+ total: int
793
+ limit: int
794
+ offset: int
795
+
796
+
702
797
  class DocumentResponse(BaseModel):
703
798
  """Response model for get document endpoint."""
704
799
 
@@ -712,6 +807,7 @@ class DocumentResponse(BaseModel):
712
807
  "created_at": "2024-01-15T10:30:00Z",
713
808
  "updated_at": "2024-01-15T10:30:00Z",
714
809
  "memory_unit_count": 15,
810
+ "tags": ["user_a", "session_123"],
715
811
  }
716
812
  }
717
813
  )
@@ -723,6 +819,7 @@ class DocumentResponse(BaseModel):
723
819
  created_at: str
724
820
  updated_at: str
725
821
  memory_unit_count: int
822
+ tags: list[str] = Field(default_factory=list, description="Tags associated with this document")
726
823
 
727
824
 
728
825
  class DeleteDocumentResponse(BaseModel):
@@ -934,6 +1031,12 @@ def create_app(
934
1031
  await memory.initialize()
935
1032
  logging.info("Memory system initialized")
936
1033
 
1034
+ # Set up DB pool metrics after memory initialization
1035
+ metrics_collector = get_metrics_collector()
1036
+ if memory._pool is not None and hasattr(metrics_collector, "set_db_pool"):
1037
+ metrics_collector.set_db_pool(memory._pool)
1038
+ logging.info("DB pool metrics configured")
1039
+
937
1040
  # Call HTTP extension startup hook
938
1041
  if http_extension:
939
1042
  await http_extension.on_startup()
@@ -970,6 +1073,30 @@ def create_app(
970
1073
  # This is required for mounted sub-applications where lifespan may not fire
971
1074
  app.state.memory = memory
972
1075
 
1076
+ # Add HTTP metrics middleware
1077
+ @app.middleware("http")
1078
+ async def http_metrics_middleware(request, call_next):
1079
+ """Record HTTP request metrics."""
1080
+ # Normalize endpoint path to reduce cardinality
1081
+ # Replace UUIDs and numeric IDs with placeholders
1082
+ import re
1083
+
1084
+ from starlette.requests import Request
1085
+
1086
+ path = request.url.path
1087
+ # Replace UUIDs
1088
+ path = re.sub(r"/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}", path)
1089
+ # Replace numeric IDs
1090
+ path = re.sub(r"/\d+(?=/|$)", "/{id}", path)
1091
+
1092
+ status_code = [500] # Default to 500, will be updated
1093
+ metrics_collector = get_metrics_collector()
1094
+
1095
+ with metrics_collector.record_http_request(request.method, path, lambda: status_code[0]):
1096
+ response = await call_next(request)
1097
+ status_code[0] = response.status_code
1098
+ return response
1099
+
973
1100
  # Register all routes
974
1101
  _register_routes(app)
975
1102
 
@@ -1049,16 +1176,19 @@ def _register_routes(app: FastAPI):
1049
1176
  "/v1/default/banks/{bank_id}/graph",
1050
1177
  response_model=GraphDataResponse,
1051
1178
  summary="Get memory graph data",
1052
- description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion). Limited to 1000 most recent items.",
1179
+ description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion).",
1053
1180
  operation_id="get_graph",
1054
1181
  tags=["Memory"],
1055
1182
  )
1056
1183
  async def api_graph(
1057
- bank_id: str, type: str | None = None, request_context: RequestContext = Depends(get_request_context)
1184
+ bank_id: str,
1185
+ type: str | None = None,
1186
+ limit: int = 1000,
1187
+ request_context: RequestContext = Depends(get_request_context),
1058
1188
  ):
1059
1189
  """Get graph data from database, filtered by bank_id and optionally by type."""
1060
1190
  try:
1061
- data = await app.state.memory.get_graph_data(bank_id, type, request_context=request_context)
1191
+ data = await app.state.memory.get_graph_data(bank_id, type, limit=limit, request_context=request_context)
1062
1192
  return data
1063
1193
  except (AuthenticationError, HTTPException):
1064
1194
  raise
@@ -1117,6 +1247,37 @@ def _register_routes(app: FastAPI):
1117
1247
  logger.error(f"Error in /v1/default/banks/{bank_id}/memories/list: {error_detail}")
1118
1248
  raise HTTPException(status_code=500, detail=str(e))
1119
1249
 
1250
+ @app.get(
1251
+ "/v1/default/banks/{bank_id}/memories/{memory_id}",
1252
+ summary="Get memory unit",
1253
+ description="Get a single memory unit by ID with all its metadata including entities and tags.",
1254
+ operation_id="get_memory",
1255
+ tags=["Memory"],
1256
+ )
1257
+ async def api_get_memory(
1258
+ bank_id: str,
1259
+ memory_id: str,
1260
+ request_context: RequestContext = Depends(get_request_context),
1261
+ ):
1262
+ """Get a single memory unit by ID."""
1263
+ try:
1264
+ data = await app.state.memory.get_memory_unit(
1265
+ bank_id=bank_id,
1266
+ memory_id=memory_id,
1267
+ request_context=request_context,
1268
+ )
1269
+ if data is None:
1270
+ raise HTTPException(status_code=404, detail=f"Memory unit '{memory_id}' not found")
1271
+ return data
1272
+ except (AuthenticationError, HTTPException):
1273
+ raise
1274
+ except Exception as e:
1275
+ import traceback
1276
+
1277
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1278
+ logger.error(f"Error in /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
1279
+ raise HTTPException(status_code=500, detail=str(e))
1280
+
1120
1281
  @app.post(
1121
1282
  "/v1/default/banks/{bank_id}/memories/recall",
1122
1283
  response_model=RecallResponse,
@@ -1134,6 +1295,9 @@ def _register_routes(app: FastAPI):
1134
1295
  bank_id: str, request: RecallRequest, request_context: RequestContext = Depends(get_request_context)
1135
1296
  ):
1136
1297
  """Run a recall and return results with trace."""
1298
+ import time
1299
+
1300
+ handler_start = time.time()
1137
1301
  metrics = get_metrics_collector()
1138
1302
 
1139
1303
  try:
@@ -1159,10 +1323,12 @@ def _register_routes(app: FastAPI):
1159
1323
  include_chunks = request.include.chunks is not None
1160
1324
  max_chunk_tokens = request.include.chunks.max_tokens if include_chunks else 8192
1161
1325
 
1326
+ pre_recall = time.time() - handler_start
1162
1327
  # Run recall with tracing (record metrics)
1163
1328
  with metrics.record_operation(
1164
- "recall", bank_id=bank_id, budget=request.budget.value, max_tokens=request.max_tokens
1329
+ "recall", bank_id=bank_id, source="api", budget=request.budget.value, max_tokens=request.max_tokens
1165
1330
  ):
1331
+ recall_start = time.time()
1166
1332
  core_result = await app.state.memory.recall_async(
1167
1333
  bank_id=bank_id,
1168
1334
  query=request.query,
@@ -1176,6 +1342,8 @@ def _register_routes(app: FastAPI):
1176
1342
  include_chunks=include_chunks,
1177
1343
  max_chunk_tokens=max_chunk_tokens,
1178
1344
  request_context=request_context,
1345
+ tags=request.tags,
1346
+ tags_match=request.tags_match,
1179
1347
  )
1180
1348
 
1181
1349
  # Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)
@@ -1191,6 +1359,7 @@ def _register_routes(app: FastAPI):
1191
1359
  mentioned_at=fact.mentioned_at,
1192
1360
  document_id=fact.document_id,
1193
1361
  chunk_id=fact.chunk_id,
1362
+ tags=fact.tags,
1194
1363
  )
1195
1364
  for fact in core_result.results
1196
1365
  ]
@@ -1221,9 +1390,21 @@ def _register_routes(app: FastAPI):
1221
1390
  ],
1222
1391
  )
1223
1392
 
1224
- return RecallResponse(
1393
+ response = RecallResponse(
1225
1394
  results=recall_results, trace=core_result.trace, entities=entities_response, chunks=chunks_response
1226
1395
  )
1396
+
1397
+ handler_duration = time.time() - handler_start
1398
+ recall_duration = time.time() - recall_start
1399
+ post_recall = handler_duration - pre_recall - recall_duration
1400
+ if handler_duration > 1.0:
1401
+ logging.info(
1402
+ f"[RECALL HTTP] bank={bank_id} handler_total={handler_duration:.3f}s "
1403
+ f"pre={pre_recall:.3f}s recall={recall_duration:.3f}s post={post_recall:.3f}s "
1404
+ f"results={len(recall_results)} entities={len(entities_response) if entities_response else 0}"
1405
+ )
1406
+
1407
+ return response
1227
1408
  except HTTPException:
1228
1409
  raise
1229
1410
  except OperationValidationError as e:
@@ -1233,8 +1414,11 @@ def _register_routes(app: FastAPI):
1233
1414
  except Exception as e:
1234
1415
  import traceback
1235
1416
 
1417
+ handler_duration = time.time() - handler_start
1236
1418
  error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1237
- logger.error(f"Error in /v1/default/banks/{bank_id}/memories/recall: {error_detail}")
1419
+ logger.error(
1420
+ f"[RECALL ERROR] bank={bank_id} handler_duration={handler_duration:.3f}s error={str(e)}\n{error_detail}"
1421
+ )
1238
1422
  raise HTTPException(status_code=500, detail=str(e))
1239
1423
 
1240
1424
  @app.post(
@@ -1259,7 +1443,7 @@ def _register_routes(app: FastAPI):
1259
1443
 
1260
1444
  try:
1261
1445
  # Use the memory system's reflect_async method (record metrics)
1262
- with metrics.record_operation("reflect", bank_id=bank_id, budget=request.budget.value):
1446
+ with metrics.record_operation("reflect", bank_id=bank_id, source="api", budget=request.budget.value):
1263
1447
  core_result = await app.state.memory.reflect_async(
1264
1448
  bank_id=bank_id,
1265
1449
  query=request.query,
@@ -1268,6 +1452,8 @@ def _register_routes(app: FastAPI):
1268
1452
  max_tokens=request.max_tokens,
1269
1453
  response_schema=request.response_schema,
1270
1454
  request_context=request_context,
1455
+ tags=request.tags,
1456
+ tags_match=request.tags_match,
1271
1457
  )
1272
1458
 
1273
1459
  # Convert core MemoryFact objects to API ReflectFact objects if facts are requested
@@ -1290,6 +1476,7 @@ def _register_routes(app: FastAPI):
1290
1476
  text=core_result.text,
1291
1477
  based_on=based_on_facts,
1292
1478
  structured_output=core_result.structured_output,
1479
+ usage=core_result.usage,
1293
1480
  )
1294
1481
 
1295
1482
  except OperationValidationError as e:
@@ -1333,9 +1520,14 @@ def _register_routes(app: FastAPI):
1333
1520
  operation_id="get_agent_stats",
1334
1521
  tags=["Banks"],
1335
1522
  )
1336
- async def api_stats(bank_id: str):
1523
+ async def api_stats(
1524
+ bank_id: str,
1525
+ request_context: RequestContext = Depends(get_request_context),
1526
+ ):
1337
1527
  """Get statistics about memory nodes and links for a memory bank."""
1338
1528
  try:
1529
+ # Authenticate and set tenant schema
1530
+ await app.state.memory._authenticate_tenant(request_context)
1339
1531
  pool = await app.state.memory._get_pool()
1340
1532
  async with acquire_with_retry(pool) as conn:
1341
1533
  # Get node counts by fact_type
@@ -1454,19 +1646,27 @@ def _register_routes(app: FastAPI):
1454
1646
  "/v1/default/banks/{bank_id}/entities",
1455
1647
  response_model=EntityListResponse,
1456
1648
  summary="List entities",
1457
- description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count.",
1649
+ description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count. Supports pagination.",
1458
1650
  operation_id="list_entities",
1459
1651
  tags=["Entities"],
1460
1652
  )
1461
1653
  async def api_list_entities(
1462
1654
  bank_id: str,
1463
1655
  limit: int = Query(default=100, description="Maximum number of entities to return"),
1656
+ offset: int = Query(default=0, description="Offset for pagination"),
1464
1657
  request_context: RequestContext = Depends(get_request_context),
1465
1658
  ):
1466
- """List entities for a memory bank."""
1659
+ """List entities for a memory bank with pagination."""
1467
1660
  try:
1468
- entities = await app.state.memory.list_entities(bank_id, limit=limit, request_context=request_context)
1469
- return EntityListResponse(items=[EntityListItem(**e) for e in entities])
1661
+ data = await app.state.memory.list_entities(
1662
+ bank_id, limit=limit, offset=offset, request_context=request_context
1663
+ )
1664
+ return EntityListResponse(
1665
+ items=[EntityListItem(**e) for e in data["items"]],
1666
+ total=data["total"],
1667
+ limit=data["limit"],
1668
+ offset=data["offset"],
1669
+ )
1470
1670
  except (AuthenticationError, HTTPException):
1471
1671
  raise
1472
1672
  except Exception as e:
@@ -1638,6 +1838,59 @@ def _register_routes(app: FastAPI):
1638
1838
  logger.error(f"Error in /v1/default/banks/{bank_id}/documents/{document_id}: {error_detail}")
1639
1839
  raise HTTPException(status_code=500, detail=str(e))
1640
1840
 
1841
+ @app.get(
1842
+ "/v1/default/banks/{bank_id}/tags",
1843
+ response_model=ListTagsResponse,
1844
+ summary="List tags",
1845
+ description="List all unique tags in a memory bank with usage counts. "
1846
+ "Supports wildcard search using '*' (e.g., 'user:*', '*-fred', 'tag*-2'). Case-insensitive.",
1847
+ operation_id="list_tags",
1848
+ tags=["Memory"],
1849
+ )
1850
+ async def api_list_tags(
1851
+ bank_id: str,
1852
+ q: str | None = Query(
1853
+ default=None,
1854
+ description="Wildcard pattern to filter tags (e.g., 'user:*' for user:alice, '*-admin' for role-admin). "
1855
+ "Use '*' as wildcard. Case-insensitive.",
1856
+ ),
1857
+ limit: int = Query(default=100, description="Maximum number of tags to return"),
1858
+ offset: int = Query(default=0, description="Offset for pagination"),
1859
+ request_context: RequestContext = Depends(get_request_context),
1860
+ ):
1861
+ """
1862
+ List all unique tags in a memory bank.
1863
+
1864
+ Use this endpoint to discover available tags or expand wildcard patterns.
1865
+ Supports '*' wildcards for flexible matching (case-insensitive):
1866
+ - 'user:*' matches user:alice, user:bob
1867
+ - '*-admin' matches role-admin, super-admin
1868
+ - 'env*-prod' matches env-prod, environment-prod
1869
+
1870
+ Args:
1871
+ bank_id: Memory Bank ID (from path)
1872
+ q: Wildcard pattern to filter tags (use '*' as wildcard)
1873
+ limit: Maximum number of tags to return (default: 100)
1874
+ offset: Offset for pagination (default: 0)
1875
+ """
1876
+ try:
1877
+ data = await app.state.memory.list_tags(
1878
+ bank_id=bank_id,
1879
+ pattern=q,
1880
+ limit=limit,
1881
+ offset=offset,
1882
+ request_context=request_context,
1883
+ )
1884
+ return data
1885
+ except (AuthenticationError, HTTPException):
1886
+ raise
1887
+ except Exception as e:
1888
+ import traceback
1889
+
1890
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1891
+ logger.error(f"Error in /v1/default/banks/{bank_id}/tags: {error_detail}")
1892
+ raise HTTPException(status_code=500, detail=str(e))
1893
+
1641
1894
  @app.get(
1642
1895
  "/v1/default/chunks/{chunk_id:path}",
1643
1896
  response_model=ChunkResponse,
@@ -2000,28 +2253,37 @@ def _register_routes(app: FastAPI):
2000
2253
  content_dict["document_id"] = item.document_id
2001
2254
  if item.entities:
2002
2255
  content_dict["entities"] = [{"text": e.text, "type": e.type or "CONCEPT"} for e in item.entities]
2256
+ if item.tags:
2257
+ content_dict["tags"] = item.tags
2003
2258
  contents.append(content_dict)
2004
2259
 
2005
2260
  if request.async_:
2006
2261
  # Async processing: queue task and return immediately
2007
- result = await app.state.memory.submit_async_retain(bank_id, contents, request_context=request_context)
2262
+ result = await app.state.memory.submit_async_retain(
2263
+ bank_id, contents, document_tags=request.document_tags, request_context=request_context
2264
+ )
2008
2265
  return RetainResponse.model_validate(
2009
2266
  {
2010
2267
  "success": True,
2011
2268
  "bank_id": bank_id,
2012
2269
  "items_count": result["items_count"],
2013
2270
  "async": True,
2271
+ "operation_id": result["operation_id"],
2014
2272
  }
2015
2273
  )
2016
2274
  else:
2017
2275
  # Synchronous processing: wait for completion (record metrics)
2018
- with metrics.record_operation("retain", bank_id=bank_id):
2019
- result = await app.state.memory.retain_batch_async(
2020
- bank_id=bank_id, contents=contents, request_context=request_context
2276
+ with metrics.record_operation("retain", bank_id=bank_id, source="api"):
2277
+ result, usage = await app.state.memory.retain_batch_async(
2278
+ bank_id=bank_id,
2279
+ contents=contents,
2280
+ document_tags=request.document_tags,
2281
+ request_context=request_context,
2282
+ return_usage=True,
2021
2283
  )
2022
2284
 
2023
2285
  return RetainResponse.model_validate(
2024
- {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False}
2286
+ {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False, "usage": usage}
2025
2287
  )
2026
2288
  except OperationValidationError as e:
2027
2289
  raise HTTPException(status_code=e.status_code, detail=e.reason)