hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +252 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/api/http.py +282 -20
- hindsight_api/api/mcp.py +47 -52
- hindsight_api/config.py +238 -6
- hindsight_api/engine/cross_encoder.py +599 -86
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/embeddings.py +453 -26
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +8 -4
- hindsight_api/engine/llm_wrapper.py +241 -27
- hindsight_api/engine/memory_engine.py +609 -122
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/response_models.py +38 -0
- hindsight_api/engine/retain/fact_extraction.py +388 -192
- hindsight_api/engine/retain/fact_storage.py +34 -8
- hindsight_api/engine/retain/link_utils.py +24 -16
- hindsight_api/engine/retain/orchestrator.py +52 -17
- hindsight_api/engine/retain/types.py +9 -0
- hindsight_api/engine/search/graph_retrieval.py +42 -13
- hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +847 -200
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +1 -1
- hindsight_api/engine/search/trace.py +12 -0
- hindsight_api/engine/search/tracer.py +24 -1
- hindsight_api/engine/search/types.py +21 -0
- hindsight_api/engine/task_backend.py +109 -18
- hindsight_api/engine/utils.py +1 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/main.py +56 -4
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -1
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
- hindsight_api-0.3.0.dist-info/RECORD +82 -0
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.2.0.dist-info/RECORD +0 -75
- {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
hindsight_api/api/http.py
CHANGED
|
@@ -36,7 +36,8 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
|
36
36
|
from hindsight_api import MemoryEngine
|
|
37
37
|
from hindsight_api.engine.db_utils import acquire_with_retry
|
|
38
38
|
from hindsight_api.engine.memory_engine import Budget, fq_table
|
|
39
|
-
from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
|
|
39
|
+
from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES, TokenUsage
|
|
40
|
+
from hindsight_api.engine.search.tags import TagsMatch
|
|
40
41
|
from hindsight_api.extensions import HttpExtension, OperationValidationError, load_extension
|
|
41
42
|
from hindsight_api.metrics import create_metrics_collector, get_metrics_collector, initialize_metrics
|
|
42
43
|
from hindsight_api.models import RequestContext
|
|
@@ -81,6 +82,8 @@ class RecallRequest(BaseModel):
|
|
|
81
82
|
"trace": True,
|
|
82
83
|
"query_timestamp": "2023-05-30T23:40:00",
|
|
83
84
|
"include": {"entities": {"max_tokens": 500}},
|
|
85
|
+
"tags": ["user_a"],
|
|
86
|
+
"tags_match": "any",
|
|
84
87
|
}
|
|
85
88
|
}
|
|
86
89
|
)
|
|
@@ -99,6 +102,15 @@ class RecallRequest(BaseModel):
|
|
|
99
102
|
default_factory=IncludeOptions,
|
|
100
103
|
description="Options for including additional data (entities are included by default)",
|
|
101
104
|
)
|
|
105
|
+
tags: list[str] | None = Field(
|
|
106
|
+
default=None,
|
|
107
|
+
description="Filter memories by tags. If not specified, all memories are returned.",
|
|
108
|
+
)
|
|
109
|
+
tags_match: TagsMatch = Field(
|
|
110
|
+
default="any",
|
|
111
|
+
description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
|
|
112
|
+
"'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
|
|
113
|
+
)
|
|
102
114
|
|
|
103
115
|
|
|
104
116
|
class RecallResult(BaseModel):
|
|
@@ -119,6 +131,7 @@ class RecallResult(BaseModel):
|
|
|
119
131
|
"document_id": "session_abc123",
|
|
120
132
|
"metadata": {"source": "slack"},
|
|
121
133
|
"chunk_id": "456e7890-e12b-34d5-a678-901234567890",
|
|
134
|
+
"tags": ["user_a", "user_b"],
|
|
122
135
|
}
|
|
123
136
|
},
|
|
124
137
|
}
|
|
@@ -134,6 +147,7 @@ class RecallResult(BaseModel):
|
|
|
134
147
|
document_id: str | None = None # Document this memory belongs to
|
|
135
148
|
metadata: dict[str, str] | None = None # User-defined metadata
|
|
136
149
|
chunk_id: str | None = None # Chunk this fact was extracted from
|
|
150
|
+
tags: list[str] | None = None # Visibility scope tags
|
|
137
151
|
|
|
138
152
|
|
|
139
153
|
class EntityObservationResponse(BaseModel):
|
|
@@ -188,12 +202,18 @@ class EntityListResponse(BaseModel):
|
|
|
188
202
|
"first_seen": "2024-01-15T10:30:00Z",
|
|
189
203
|
"last_seen": "2024-02-01T14:00:00Z",
|
|
190
204
|
}
|
|
191
|
-
]
|
|
205
|
+
],
|
|
206
|
+
"total": 150,
|
|
207
|
+
"limit": 100,
|
|
208
|
+
"offset": 0,
|
|
192
209
|
}
|
|
193
210
|
}
|
|
194
211
|
)
|
|
195
212
|
|
|
196
213
|
items: list[EntityListItem]
|
|
214
|
+
total: int
|
|
215
|
+
limit: int
|
|
216
|
+
offset: int
|
|
197
217
|
|
|
198
218
|
|
|
199
219
|
class EntityDetailResponse(BaseModel):
|
|
@@ -300,6 +320,7 @@ class MemoryItem(BaseModel):
|
|
|
300
320
|
"metadata": {"source": "slack", "channel": "engineering"},
|
|
301
321
|
"document_id": "meeting_notes_2024_01_15",
|
|
302
322
|
"entities": [{"text": "Alice"}, {"text": "ML model", "type": "CONCEPT"}],
|
|
323
|
+
"tags": ["user_a", "user_b"],
|
|
303
324
|
}
|
|
304
325
|
},
|
|
305
326
|
)
|
|
@@ -313,6 +334,10 @@ class MemoryItem(BaseModel):
|
|
|
313
334
|
default=None,
|
|
314
335
|
description="Optional entities to combine with auto-extracted entities.",
|
|
315
336
|
)
|
|
337
|
+
tags: list[str] | None = Field(
|
|
338
|
+
default=None,
|
|
339
|
+
description="Optional tags for visibility scoping. Memories with tags can be filtered during recall.",
|
|
340
|
+
)
|
|
316
341
|
|
|
317
342
|
@field_validator("timestamp", mode="before")
|
|
318
343
|
@classmethod
|
|
@@ -347,6 +372,7 @@ class RetainRequest(BaseModel):
|
|
|
347
372
|
},
|
|
348
373
|
],
|
|
349
374
|
"async": False,
|
|
375
|
+
"document_tags": ["user_a", "user_b"],
|
|
350
376
|
}
|
|
351
377
|
}
|
|
352
378
|
)
|
|
@@ -357,6 +383,10 @@ class RetainRequest(BaseModel):
|
|
|
357
383
|
alias="async",
|
|
358
384
|
description="If true, process asynchronously in background. If false, wait for completion (default: false)",
|
|
359
385
|
)
|
|
386
|
+
document_tags: list[str] | None = Field(
|
|
387
|
+
default=None,
|
|
388
|
+
description="Tags applied to all items in this request. These are merged with any item-level tags.",
|
|
389
|
+
)
|
|
360
390
|
|
|
361
391
|
|
|
362
392
|
class RetainResponse(BaseModel):
|
|
@@ -364,7 +394,15 @@ class RetainResponse(BaseModel):
|
|
|
364
394
|
|
|
365
395
|
model_config = ConfigDict(
|
|
366
396
|
populate_by_name=True,
|
|
367
|
-
json_schema_extra={
|
|
397
|
+
json_schema_extra={
|
|
398
|
+
"example": {
|
|
399
|
+
"success": True,
|
|
400
|
+
"bank_id": "user123",
|
|
401
|
+
"items_count": 2,
|
|
402
|
+
"async": False,
|
|
403
|
+
"usage": {"input_tokens": 500, "output_tokens": 100, "total_tokens": 600},
|
|
404
|
+
}
|
|
405
|
+
},
|
|
368
406
|
)
|
|
369
407
|
|
|
370
408
|
success: bool
|
|
@@ -373,6 +411,14 @@ class RetainResponse(BaseModel):
|
|
|
373
411
|
is_async: bool = Field(
|
|
374
412
|
alias="async", serialization_alias="async", description="Whether the operation was processed asynchronously"
|
|
375
413
|
)
|
|
414
|
+
operation_id: str | None = Field(
|
|
415
|
+
default=None,
|
|
416
|
+
description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations and find this ID. Only present when async=true.",
|
|
417
|
+
)
|
|
418
|
+
usage: TokenUsage | None = Field(
|
|
419
|
+
default=None,
|
|
420
|
+
description="Token usage metrics for LLM calls during fact extraction (only present for synchronous operations)",
|
|
421
|
+
)
|
|
376
422
|
|
|
377
423
|
|
|
378
424
|
class FactsIncludeOptions(BaseModel):
|
|
@@ -409,6 +455,8 @@ class ReflectRequest(BaseModel):
|
|
|
409
455
|
},
|
|
410
456
|
"required": ["summary", "key_points"],
|
|
411
457
|
},
|
|
458
|
+
"tags": ["user_a"],
|
|
459
|
+
"tags_match": "any",
|
|
412
460
|
}
|
|
413
461
|
}
|
|
414
462
|
)
|
|
@@ -424,6 +472,15 @@ class ReflectRequest(BaseModel):
|
|
|
424
472
|
default=None,
|
|
425
473
|
description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
|
|
426
474
|
)
|
|
475
|
+
tags: list[str] | None = Field(
|
|
476
|
+
default=None,
|
|
477
|
+
description="Filter memories by tags during reflection. If not specified, all memories are considered.",
|
|
478
|
+
)
|
|
479
|
+
tags_match: TagsMatch = Field(
|
|
480
|
+
default="any",
|
|
481
|
+
description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
|
|
482
|
+
"'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
|
|
483
|
+
)
|
|
427
484
|
|
|
428
485
|
|
|
429
486
|
class OpinionItem(BaseModel):
|
|
@@ -472,6 +529,7 @@ class ReflectResponse(BaseModel):
|
|
|
472
529
|
"summary": "AI is transformative",
|
|
473
530
|
"key_points": ["Used in healthcare", "Discussed recently"],
|
|
474
531
|
},
|
|
532
|
+
"usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
|
|
475
533
|
}
|
|
476
534
|
}
|
|
477
535
|
)
|
|
@@ -482,6 +540,10 @@ class ReflectResponse(BaseModel):
|
|
|
482
540
|
default=None,
|
|
483
541
|
description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
|
|
484
542
|
)
|
|
543
|
+
usage: TokenUsage | None = Field(
|
|
544
|
+
default=None,
|
|
545
|
+
description="Token usage metrics for LLM calls during reflection.",
|
|
546
|
+
)
|
|
485
547
|
|
|
486
548
|
|
|
487
549
|
class BanksResponse(BaseModel):
|
|
@@ -630,6 +692,7 @@ class GraphDataResponse(BaseModel):
|
|
|
630
692
|
}
|
|
631
693
|
],
|
|
632
694
|
"total_units": 2,
|
|
695
|
+
"limit": 1000,
|
|
633
696
|
}
|
|
634
697
|
}
|
|
635
698
|
)
|
|
@@ -638,6 +701,7 @@ class GraphDataResponse(BaseModel):
|
|
|
638
701
|
edges: list[dict[str, Any]]
|
|
639
702
|
table_rows: list[dict[str, Any]]
|
|
640
703
|
total_units: int
|
|
704
|
+
limit: int
|
|
641
705
|
|
|
642
706
|
|
|
643
707
|
class ListMemoryUnitsResponse(BaseModel):
|
|
@@ -699,6 +763,37 @@ class ListDocumentsResponse(BaseModel):
|
|
|
699
763
|
offset: int
|
|
700
764
|
|
|
701
765
|
|
|
766
|
+
class TagItem(BaseModel):
|
|
767
|
+
"""Single tag with usage count."""
|
|
768
|
+
|
|
769
|
+
tag: str = Field(description="The tag value")
|
|
770
|
+
count: int = Field(description="Number of memories with this tag")
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
class ListTagsResponse(BaseModel):
|
|
774
|
+
"""Response model for list tags endpoint."""
|
|
775
|
+
|
|
776
|
+
model_config = ConfigDict(
|
|
777
|
+
json_schema_extra={
|
|
778
|
+
"example": {
|
|
779
|
+
"items": [
|
|
780
|
+
{"tag": "user:alice", "count": 42},
|
|
781
|
+
{"tag": "user:bob", "count": 15},
|
|
782
|
+
{"tag": "session:abc123", "count": 8},
|
|
783
|
+
],
|
|
784
|
+
"total": 25,
|
|
785
|
+
"limit": 100,
|
|
786
|
+
"offset": 0,
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
items: list[TagItem]
|
|
792
|
+
total: int
|
|
793
|
+
limit: int
|
|
794
|
+
offset: int
|
|
795
|
+
|
|
796
|
+
|
|
702
797
|
class DocumentResponse(BaseModel):
|
|
703
798
|
"""Response model for get document endpoint."""
|
|
704
799
|
|
|
@@ -712,6 +807,7 @@ class DocumentResponse(BaseModel):
|
|
|
712
807
|
"created_at": "2024-01-15T10:30:00Z",
|
|
713
808
|
"updated_at": "2024-01-15T10:30:00Z",
|
|
714
809
|
"memory_unit_count": 15,
|
|
810
|
+
"tags": ["user_a", "session_123"],
|
|
715
811
|
}
|
|
716
812
|
}
|
|
717
813
|
)
|
|
@@ -723,6 +819,7 @@ class DocumentResponse(BaseModel):
|
|
|
723
819
|
created_at: str
|
|
724
820
|
updated_at: str
|
|
725
821
|
memory_unit_count: int
|
|
822
|
+
tags: list[str] = Field(default_factory=list, description="Tags associated with this document")
|
|
726
823
|
|
|
727
824
|
|
|
728
825
|
class DeleteDocumentResponse(BaseModel):
|
|
@@ -934,6 +1031,12 @@ def create_app(
|
|
|
934
1031
|
await memory.initialize()
|
|
935
1032
|
logging.info("Memory system initialized")
|
|
936
1033
|
|
|
1034
|
+
# Set up DB pool metrics after memory initialization
|
|
1035
|
+
metrics_collector = get_metrics_collector()
|
|
1036
|
+
if memory._pool is not None and hasattr(metrics_collector, "set_db_pool"):
|
|
1037
|
+
metrics_collector.set_db_pool(memory._pool)
|
|
1038
|
+
logging.info("DB pool metrics configured")
|
|
1039
|
+
|
|
937
1040
|
# Call HTTP extension startup hook
|
|
938
1041
|
if http_extension:
|
|
939
1042
|
await http_extension.on_startup()
|
|
@@ -970,6 +1073,30 @@ def create_app(
|
|
|
970
1073
|
# This is required for mounted sub-applications where lifespan may not fire
|
|
971
1074
|
app.state.memory = memory
|
|
972
1075
|
|
|
1076
|
+
# Add HTTP metrics middleware
|
|
1077
|
+
@app.middleware("http")
|
|
1078
|
+
async def http_metrics_middleware(request, call_next):
|
|
1079
|
+
"""Record HTTP request metrics."""
|
|
1080
|
+
# Normalize endpoint path to reduce cardinality
|
|
1081
|
+
# Replace UUIDs and numeric IDs with placeholders
|
|
1082
|
+
import re
|
|
1083
|
+
|
|
1084
|
+
from starlette.requests import Request
|
|
1085
|
+
|
|
1086
|
+
path = request.url.path
|
|
1087
|
+
# Replace UUIDs
|
|
1088
|
+
path = re.sub(r"/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}", path)
|
|
1089
|
+
# Replace numeric IDs
|
|
1090
|
+
path = re.sub(r"/\d+(?=/|$)", "/{id}", path)
|
|
1091
|
+
|
|
1092
|
+
status_code = [500] # Default to 500, will be updated
|
|
1093
|
+
metrics_collector = get_metrics_collector()
|
|
1094
|
+
|
|
1095
|
+
with metrics_collector.record_http_request(request.method, path, lambda: status_code[0]):
|
|
1096
|
+
response = await call_next(request)
|
|
1097
|
+
status_code[0] = response.status_code
|
|
1098
|
+
return response
|
|
1099
|
+
|
|
973
1100
|
# Register all routes
|
|
974
1101
|
_register_routes(app)
|
|
975
1102
|
|
|
@@ -1049,16 +1176,19 @@ def _register_routes(app: FastAPI):
|
|
|
1049
1176
|
"/v1/default/banks/{bank_id}/graph",
|
|
1050
1177
|
response_model=GraphDataResponse,
|
|
1051
1178
|
summary="Get memory graph data",
|
|
1052
|
-
description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion).
|
|
1179
|
+
description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion).",
|
|
1053
1180
|
operation_id="get_graph",
|
|
1054
1181
|
tags=["Memory"],
|
|
1055
1182
|
)
|
|
1056
1183
|
async def api_graph(
|
|
1057
|
-
bank_id: str,
|
|
1184
|
+
bank_id: str,
|
|
1185
|
+
type: str | None = None,
|
|
1186
|
+
limit: int = 1000,
|
|
1187
|
+
request_context: RequestContext = Depends(get_request_context),
|
|
1058
1188
|
):
|
|
1059
1189
|
"""Get graph data from database, filtered by bank_id and optionally by type."""
|
|
1060
1190
|
try:
|
|
1061
|
-
data = await app.state.memory.get_graph_data(bank_id, type, request_context=request_context)
|
|
1191
|
+
data = await app.state.memory.get_graph_data(bank_id, type, limit=limit, request_context=request_context)
|
|
1062
1192
|
return data
|
|
1063
1193
|
except (AuthenticationError, HTTPException):
|
|
1064
1194
|
raise
|
|
@@ -1117,6 +1247,37 @@ def _register_routes(app: FastAPI):
|
|
|
1117
1247
|
logger.error(f"Error in /v1/default/banks/{bank_id}/memories/list: {error_detail}")
|
|
1118
1248
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1119
1249
|
|
|
1250
|
+
@app.get(
|
|
1251
|
+
"/v1/default/banks/{bank_id}/memories/{memory_id}",
|
|
1252
|
+
summary="Get memory unit",
|
|
1253
|
+
description="Get a single memory unit by ID with all its metadata including entities and tags.",
|
|
1254
|
+
operation_id="get_memory",
|
|
1255
|
+
tags=["Memory"],
|
|
1256
|
+
)
|
|
1257
|
+
async def api_get_memory(
|
|
1258
|
+
bank_id: str,
|
|
1259
|
+
memory_id: str,
|
|
1260
|
+
request_context: RequestContext = Depends(get_request_context),
|
|
1261
|
+
):
|
|
1262
|
+
"""Get a single memory unit by ID."""
|
|
1263
|
+
try:
|
|
1264
|
+
data = await app.state.memory.get_memory_unit(
|
|
1265
|
+
bank_id=bank_id,
|
|
1266
|
+
memory_id=memory_id,
|
|
1267
|
+
request_context=request_context,
|
|
1268
|
+
)
|
|
1269
|
+
if data is None:
|
|
1270
|
+
raise HTTPException(status_code=404, detail=f"Memory unit '{memory_id}' not found")
|
|
1271
|
+
return data
|
|
1272
|
+
except (AuthenticationError, HTTPException):
|
|
1273
|
+
raise
|
|
1274
|
+
except Exception as e:
|
|
1275
|
+
import traceback
|
|
1276
|
+
|
|
1277
|
+
error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
1278
|
+
logger.error(f"Error in /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
|
|
1279
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1280
|
+
|
|
1120
1281
|
@app.post(
|
|
1121
1282
|
"/v1/default/banks/{bank_id}/memories/recall",
|
|
1122
1283
|
response_model=RecallResponse,
|
|
@@ -1134,6 +1295,9 @@ def _register_routes(app: FastAPI):
|
|
|
1134
1295
|
bank_id: str, request: RecallRequest, request_context: RequestContext = Depends(get_request_context)
|
|
1135
1296
|
):
|
|
1136
1297
|
"""Run a recall and return results with trace."""
|
|
1298
|
+
import time
|
|
1299
|
+
|
|
1300
|
+
handler_start = time.time()
|
|
1137
1301
|
metrics = get_metrics_collector()
|
|
1138
1302
|
|
|
1139
1303
|
try:
|
|
@@ -1159,10 +1323,12 @@ def _register_routes(app: FastAPI):
|
|
|
1159
1323
|
include_chunks = request.include.chunks is not None
|
|
1160
1324
|
max_chunk_tokens = request.include.chunks.max_tokens if include_chunks else 8192
|
|
1161
1325
|
|
|
1326
|
+
pre_recall = time.time() - handler_start
|
|
1162
1327
|
# Run recall with tracing (record metrics)
|
|
1163
1328
|
with metrics.record_operation(
|
|
1164
|
-
"recall", bank_id=bank_id, budget=request.budget.value, max_tokens=request.max_tokens
|
|
1329
|
+
"recall", bank_id=bank_id, source="api", budget=request.budget.value, max_tokens=request.max_tokens
|
|
1165
1330
|
):
|
|
1331
|
+
recall_start = time.time()
|
|
1166
1332
|
core_result = await app.state.memory.recall_async(
|
|
1167
1333
|
bank_id=bank_id,
|
|
1168
1334
|
query=request.query,
|
|
@@ -1176,6 +1342,8 @@ def _register_routes(app: FastAPI):
|
|
|
1176
1342
|
include_chunks=include_chunks,
|
|
1177
1343
|
max_chunk_tokens=max_chunk_tokens,
|
|
1178
1344
|
request_context=request_context,
|
|
1345
|
+
tags=request.tags,
|
|
1346
|
+
tags_match=request.tags_match,
|
|
1179
1347
|
)
|
|
1180
1348
|
|
|
1181
1349
|
# Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)
|
|
@@ -1191,6 +1359,7 @@ def _register_routes(app: FastAPI):
|
|
|
1191
1359
|
mentioned_at=fact.mentioned_at,
|
|
1192
1360
|
document_id=fact.document_id,
|
|
1193
1361
|
chunk_id=fact.chunk_id,
|
|
1362
|
+
tags=fact.tags,
|
|
1194
1363
|
)
|
|
1195
1364
|
for fact in core_result.results
|
|
1196
1365
|
]
|
|
@@ -1221,9 +1390,21 @@ def _register_routes(app: FastAPI):
|
|
|
1221
1390
|
],
|
|
1222
1391
|
)
|
|
1223
1392
|
|
|
1224
|
-
|
|
1393
|
+
response = RecallResponse(
|
|
1225
1394
|
results=recall_results, trace=core_result.trace, entities=entities_response, chunks=chunks_response
|
|
1226
1395
|
)
|
|
1396
|
+
|
|
1397
|
+
handler_duration = time.time() - handler_start
|
|
1398
|
+
recall_duration = time.time() - recall_start
|
|
1399
|
+
post_recall = handler_duration - pre_recall - recall_duration
|
|
1400
|
+
if handler_duration > 1.0:
|
|
1401
|
+
logging.info(
|
|
1402
|
+
f"[RECALL HTTP] bank={bank_id} handler_total={handler_duration:.3f}s "
|
|
1403
|
+
f"pre={pre_recall:.3f}s recall={recall_duration:.3f}s post={post_recall:.3f}s "
|
|
1404
|
+
f"results={len(recall_results)} entities={len(entities_response) if entities_response else 0}"
|
|
1405
|
+
)
|
|
1406
|
+
|
|
1407
|
+
return response
|
|
1227
1408
|
except HTTPException:
|
|
1228
1409
|
raise
|
|
1229
1410
|
except OperationValidationError as e:
|
|
@@ -1233,8 +1414,11 @@ def _register_routes(app: FastAPI):
|
|
|
1233
1414
|
except Exception as e:
|
|
1234
1415
|
import traceback
|
|
1235
1416
|
|
|
1417
|
+
handler_duration = time.time() - handler_start
|
|
1236
1418
|
error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
1237
|
-
logger.error(
|
|
1419
|
+
logger.error(
|
|
1420
|
+
f"[RECALL ERROR] bank={bank_id} handler_duration={handler_duration:.3f}s error={str(e)}\n{error_detail}"
|
|
1421
|
+
)
|
|
1238
1422
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1239
1423
|
|
|
1240
1424
|
@app.post(
|
|
@@ -1259,7 +1443,7 @@ def _register_routes(app: FastAPI):
|
|
|
1259
1443
|
|
|
1260
1444
|
try:
|
|
1261
1445
|
# Use the memory system's reflect_async method (record metrics)
|
|
1262
|
-
with metrics.record_operation("reflect", bank_id=bank_id, budget=request.budget.value):
|
|
1446
|
+
with metrics.record_operation("reflect", bank_id=bank_id, source="api", budget=request.budget.value):
|
|
1263
1447
|
core_result = await app.state.memory.reflect_async(
|
|
1264
1448
|
bank_id=bank_id,
|
|
1265
1449
|
query=request.query,
|
|
@@ -1268,6 +1452,8 @@ def _register_routes(app: FastAPI):
|
|
|
1268
1452
|
max_tokens=request.max_tokens,
|
|
1269
1453
|
response_schema=request.response_schema,
|
|
1270
1454
|
request_context=request_context,
|
|
1455
|
+
tags=request.tags,
|
|
1456
|
+
tags_match=request.tags_match,
|
|
1271
1457
|
)
|
|
1272
1458
|
|
|
1273
1459
|
# Convert core MemoryFact objects to API ReflectFact objects if facts are requested
|
|
@@ -1290,6 +1476,7 @@ def _register_routes(app: FastAPI):
|
|
|
1290
1476
|
text=core_result.text,
|
|
1291
1477
|
based_on=based_on_facts,
|
|
1292
1478
|
structured_output=core_result.structured_output,
|
|
1479
|
+
usage=core_result.usage,
|
|
1293
1480
|
)
|
|
1294
1481
|
|
|
1295
1482
|
except OperationValidationError as e:
|
|
@@ -1333,9 +1520,14 @@ def _register_routes(app: FastAPI):
|
|
|
1333
1520
|
operation_id="get_agent_stats",
|
|
1334
1521
|
tags=["Banks"],
|
|
1335
1522
|
)
|
|
1336
|
-
async def api_stats(
|
|
1523
|
+
async def api_stats(
|
|
1524
|
+
bank_id: str,
|
|
1525
|
+
request_context: RequestContext = Depends(get_request_context),
|
|
1526
|
+
):
|
|
1337
1527
|
"""Get statistics about memory nodes and links for a memory bank."""
|
|
1338
1528
|
try:
|
|
1529
|
+
# Authenticate and set tenant schema
|
|
1530
|
+
await app.state.memory._authenticate_tenant(request_context)
|
|
1339
1531
|
pool = await app.state.memory._get_pool()
|
|
1340
1532
|
async with acquire_with_retry(pool) as conn:
|
|
1341
1533
|
# Get node counts by fact_type
|
|
@@ -1454,19 +1646,27 @@ def _register_routes(app: FastAPI):
|
|
|
1454
1646
|
"/v1/default/banks/{bank_id}/entities",
|
|
1455
1647
|
response_model=EntityListResponse,
|
|
1456
1648
|
summary="List entities",
|
|
1457
|
-
description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count.",
|
|
1649
|
+
description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count. Supports pagination.",
|
|
1458
1650
|
operation_id="list_entities",
|
|
1459
1651
|
tags=["Entities"],
|
|
1460
1652
|
)
|
|
1461
1653
|
async def api_list_entities(
|
|
1462
1654
|
bank_id: str,
|
|
1463
1655
|
limit: int = Query(default=100, description="Maximum number of entities to return"),
|
|
1656
|
+
offset: int = Query(default=0, description="Offset for pagination"),
|
|
1464
1657
|
request_context: RequestContext = Depends(get_request_context),
|
|
1465
1658
|
):
|
|
1466
|
-
"""List entities for a memory bank."""
|
|
1659
|
+
"""List entities for a memory bank with pagination."""
|
|
1467
1660
|
try:
|
|
1468
|
-
|
|
1469
|
-
|
|
1661
|
+
data = await app.state.memory.list_entities(
|
|
1662
|
+
bank_id, limit=limit, offset=offset, request_context=request_context
|
|
1663
|
+
)
|
|
1664
|
+
return EntityListResponse(
|
|
1665
|
+
items=[EntityListItem(**e) for e in data["items"]],
|
|
1666
|
+
total=data["total"],
|
|
1667
|
+
limit=data["limit"],
|
|
1668
|
+
offset=data["offset"],
|
|
1669
|
+
)
|
|
1470
1670
|
except (AuthenticationError, HTTPException):
|
|
1471
1671
|
raise
|
|
1472
1672
|
except Exception as e:
|
|
@@ -1638,6 +1838,59 @@ def _register_routes(app: FastAPI):
|
|
|
1638
1838
|
logger.error(f"Error in /v1/default/banks/{bank_id}/documents/{document_id}: {error_detail}")
|
|
1639
1839
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1640
1840
|
|
|
1841
|
+
@app.get(
|
|
1842
|
+
"/v1/default/banks/{bank_id}/tags",
|
|
1843
|
+
response_model=ListTagsResponse,
|
|
1844
|
+
summary="List tags",
|
|
1845
|
+
description="List all unique tags in a memory bank with usage counts. "
|
|
1846
|
+
"Supports wildcard search using '*' (e.g., 'user:*', '*-fred', 'tag*-2'). Case-insensitive.",
|
|
1847
|
+
operation_id="list_tags",
|
|
1848
|
+
tags=["Memory"],
|
|
1849
|
+
)
|
|
1850
|
+
async def api_list_tags(
|
|
1851
|
+
bank_id: str,
|
|
1852
|
+
q: str | None = Query(
|
|
1853
|
+
default=None,
|
|
1854
|
+
description="Wildcard pattern to filter tags (e.g., 'user:*' for user:alice, '*-admin' for role-admin). "
|
|
1855
|
+
"Use '*' as wildcard. Case-insensitive.",
|
|
1856
|
+
),
|
|
1857
|
+
limit: int = Query(default=100, description="Maximum number of tags to return"),
|
|
1858
|
+
offset: int = Query(default=0, description="Offset for pagination"),
|
|
1859
|
+
request_context: RequestContext = Depends(get_request_context),
|
|
1860
|
+
):
|
|
1861
|
+
"""
|
|
1862
|
+
List all unique tags in a memory bank.
|
|
1863
|
+
|
|
1864
|
+
Use this endpoint to discover available tags or expand wildcard patterns.
|
|
1865
|
+
Supports '*' wildcards for flexible matching (case-insensitive):
|
|
1866
|
+
- 'user:*' matches user:alice, user:bob
|
|
1867
|
+
- '*-admin' matches role-admin, super-admin
|
|
1868
|
+
- 'env*-prod' matches env-prod, environment-prod
|
|
1869
|
+
|
|
1870
|
+
Args:
|
|
1871
|
+
bank_id: Memory Bank ID (from path)
|
|
1872
|
+
q: Wildcard pattern to filter tags (use '*' as wildcard)
|
|
1873
|
+
limit: Maximum number of tags to return (default: 100)
|
|
1874
|
+
offset: Offset for pagination (default: 0)
|
|
1875
|
+
"""
|
|
1876
|
+
try:
|
|
1877
|
+
data = await app.state.memory.list_tags(
|
|
1878
|
+
bank_id=bank_id,
|
|
1879
|
+
pattern=q,
|
|
1880
|
+
limit=limit,
|
|
1881
|
+
offset=offset,
|
|
1882
|
+
request_context=request_context,
|
|
1883
|
+
)
|
|
1884
|
+
return data
|
|
1885
|
+
except (AuthenticationError, HTTPException):
|
|
1886
|
+
raise
|
|
1887
|
+
except Exception as e:
|
|
1888
|
+
import traceback
|
|
1889
|
+
|
|
1890
|
+
error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
|
1891
|
+
logger.error(f"Error in /v1/default/banks/{bank_id}/tags: {error_detail}")
|
|
1892
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1893
|
+
|
|
1641
1894
|
@app.get(
|
|
1642
1895
|
"/v1/default/chunks/{chunk_id:path}",
|
|
1643
1896
|
response_model=ChunkResponse,
|
|
@@ -2000,28 +2253,37 @@ def _register_routes(app: FastAPI):
|
|
|
2000
2253
|
content_dict["document_id"] = item.document_id
|
|
2001
2254
|
if item.entities:
|
|
2002
2255
|
content_dict["entities"] = [{"text": e.text, "type": e.type or "CONCEPT"} for e in item.entities]
|
|
2256
|
+
if item.tags:
|
|
2257
|
+
content_dict["tags"] = item.tags
|
|
2003
2258
|
contents.append(content_dict)
|
|
2004
2259
|
|
|
2005
2260
|
if request.async_:
|
|
2006
2261
|
# Async processing: queue task and return immediately
|
|
2007
|
-
result = await app.state.memory.submit_async_retain(
|
|
2262
|
+
result = await app.state.memory.submit_async_retain(
|
|
2263
|
+
bank_id, contents, document_tags=request.document_tags, request_context=request_context
|
|
2264
|
+
)
|
|
2008
2265
|
return RetainResponse.model_validate(
|
|
2009
2266
|
{
|
|
2010
2267
|
"success": True,
|
|
2011
2268
|
"bank_id": bank_id,
|
|
2012
2269
|
"items_count": result["items_count"],
|
|
2013
2270
|
"async": True,
|
|
2271
|
+
"operation_id": result["operation_id"],
|
|
2014
2272
|
}
|
|
2015
2273
|
)
|
|
2016
2274
|
else:
|
|
2017
2275
|
# Synchronous processing: wait for completion (record metrics)
|
|
2018
|
-
with metrics.record_operation("retain", bank_id=bank_id):
|
|
2019
|
-
result = await app.state.memory.retain_batch_async(
|
|
2020
|
-
bank_id=bank_id,
|
|
2276
|
+
with metrics.record_operation("retain", bank_id=bank_id, source="api"):
|
|
2277
|
+
result, usage = await app.state.memory.retain_batch_async(
|
|
2278
|
+
bank_id=bank_id,
|
|
2279
|
+
contents=contents,
|
|
2280
|
+
document_tags=request.document_tags,
|
|
2281
|
+
request_context=request_context,
|
|
2282
|
+
return_usage=True,
|
|
2021
2283
|
)
|
|
2022
2284
|
|
|
2023
2285
|
return RetainResponse.model_validate(
|
|
2024
|
-
{"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False}
|
|
2286
|
+
{"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False, "usage": usage}
|
|
2025
2287
|
)
|
|
2026
2288
|
except OperationValidationError as e:
|
|
2027
2289
|
raise HTTPException(status_code=e.status_code, detail=e.reason)
|