hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +311 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  6. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  7. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  8. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  9. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  10. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  11. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  12. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  13. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  14. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  15. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  16. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  17. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  18. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  19. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  20. hindsight_api/api/http.py +1406 -118
  21. hindsight_api/api/mcp.py +11 -196
  22. hindsight_api/config.py +359 -27
  23. hindsight_api/engine/consolidation/__init__.py +5 -0
  24. hindsight_api/engine/consolidation/consolidator.py +859 -0
  25. hindsight_api/engine/consolidation/prompts.py +69 -0
  26. hindsight_api/engine/cross_encoder.py +706 -88
  27. hindsight_api/engine/db_budget.py +284 -0
  28. hindsight_api/engine/db_utils.py +11 -0
  29. hindsight_api/engine/directives/__init__.py +5 -0
  30. hindsight_api/engine/directives/models.py +37 -0
  31. hindsight_api/engine/embeddings.py +553 -29
  32. hindsight_api/engine/entity_resolver.py +8 -5
  33. hindsight_api/engine/interface.py +40 -17
  34. hindsight_api/engine/llm_wrapper.py +744 -68
  35. hindsight_api/engine/memory_engine.py +2505 -1017
  36. hindsight_api/engine/mental_models/__init__.py +14 -0
  37. hindsight_api/engine/mental_models/models.py +53 -0
  38. hindsight_api/engine/query_analyzer.py +4 -3
  39. hindsight_api/engine/reflect/__init__.py +18 -0
  40. hindsight_api/engine/reflect/agent.py +933 -0
  41. hindsight_api/engine/reflect/models.py +109 -0
  42. hindsight_api/engine/reflect/observations.py +186 -0
  43. hindsight_api/engine/reflect/prompts.py +483 -0
  44. hindsight_api/engine/reflect/tools.py +437 -0
  45. hindsight_api/engine/reflect/tools_schema.py +250 -0
  46. hindsight_api/engine/response_models.py +168 -4
  47. hindsight_api/engine/retain/bank_utils.py +79 -201
  48. hindsight_api/engine/retain/fact_extraction.py +424 -195
  49. hindsight_api/engine/retain/fact_storage.py +35 -12
  50. hindsight_api/engine/retain/link_utils.py +29 -24
  51. hindsight_api/engine/retain/orchestrator.py +24 -43
  52. hindsight_api/engine/retain/types.py +11 -2
  53. hindsight_api/engine/search/graph_retrieval.py +43 -14
  54. hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
  55. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  56. hindsight_api/engine/search/reranking.py +2 -2
  57. hindsight_api/engine/search/retrieval.py +848 -201
  58. hindsight_api/engine/search/tags.py +172 -0
  59. hindsight_api/engine/search/think_utils.py +42 -141
  60. hindsight_api/engine/search/trace.py +12 -1
  61. hindsight_api/engine/search/tracer.py +26 -6
  62. hindsight_api/engine/search/types.py +21 -3
  63. hindsight_api/engine/task_backend.py +113 -106
  64. hindsight_api/engine/utils.py +1 -152
  65. hindsight_api/extensions/__init__.py +10 -1
  66. hindsight_api/extensions/builtin/tenant.py +5 -1
  67. hindsight_api/extensions/context.py +10 -1
  68. hindsight_api/extensions/operation_validator.py +81 -4
  69. hindsight_api/extensions/tenant.py +26 -0
  70. hindsight_api/main.py +69 -6
  71. hindsight_api/mcp_local.py +12 -53
  72. hindsight_api/mcp_tools.py +494 -0
  73. hindsight_api/metrics.py +433 -48
  74. hindsight_api/migrations.py +141 -1
  75. hindsight_api/models.py +3 -3
  76. hindsight_api/pg0.py +53 -0
  77. hindsight_api/server.py +39 -2
  78. hindsight_api/worker/__init__.py +11 -0
  79. hindsight_api/worker/main.py +296 -0
  80. hindsight_api/worker/poller.py +486 -0
  81. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
  82. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  83. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
  84. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  85. hindsight_api/engine/search/observation_utils.py +0 -125
  86. hindsight_api/engine/search/scoring.py +0 -159
  87. hindsight_api-0.2.1.dist-info/RECORD +0 -75
  88. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
hindsight_api/api/http.py CHANGED
@@ -10,7 +10,7 @@ import logging
10
10
  import uuid
11
11
  from contextlib import asynccontextmanager
12
12
  from datetime import datetime
13
- from typing import Any
13
+ from typing import Any, Literal
14
14
 
15
15
  from fastapi import Depends, FastAPI, Header, HTTPException, Query
16
16
 
@@ -36,7 +36,9 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
36
36
  from hindsight_api import MemoryEngine
37
37
  from hindsight_api.engine.db_utils import acquire_with_retry
38
38
  from hindsight_api.engine.memory_engine import Budget, fq_table
39
- from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
39
+ from hindsight_api.engine.reflect.observations import Observation
40
+ from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES, TokenUsage
41
+ from hindsight_api.engine.search.tags import TagsMatch
40
42
  from hindsight_api.extensions import HttpExtension, OperationValidationError, load_extension
41
43
  from hindsight_api.metrics import create_metrics_collector, get_metrics_collector, initialize_metrics
42
44
  from hindsight_api.models import RequestContext
@@ -81,13 +83,17 @@ class RecallRequest(BaseModel):
81
83
  "trace": True,
82
84
  "query_timestamp": "2023-05-30T23:40:00",
83
85
  "include": {"entities": {"max_tokens": 500}},
86
+ "tags": ["user_a"],
87
+ "tags_match": "any",
84
88
  }
85
89
  }
86
90
  )
87
91
 
88
92
  query: str
89
93
  types: list[str] | None = Field(
90
- default=None, description="List of fact types to recall (defaults to all if not specified)"
94
+ default=None,
95
+ description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified. "
96
+ "Note: 'opinion' is accepted but ignored (opinions are excluded from recall).",
91
97
  )
92
98
  budget: Budget = Budget.MID
93
99
  max_tokens: int = 4096
@@ -99,6 +105,15 @@ class RecallRequest(BaseModel):
99
105
  default_factory=IncludeOptions,
100
106
  description="Options for including additional data (entities are included by default)",
101
107
  )
108
+ tags: list[str] | None = Field(
109
+ default=None,
110
+ description="Filter memories by tags. If not specified, all memories are returned.",
111
+ )
112
+ tags_match: TagsMatch = Field(
113
+ default="any",
114
+ description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
115
+ "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
116
+ )
102
117
 
103
118
 
104
119
  class RecallResult(BaseModel):
@@ -119,6 +134,7 @@ class RecallResult(BaseModel):
119
134
  "document_id": "session_abc123",
120
135
  "metadata": {"source": "slack"},
121
136
  "chunk_id": "456e7890-e12b-34d5-a678-901234567890",
137
+ "tags": ["user_a", "user_b"],
122
138
  }
123
139
  },
124
140
  }
@@ -134,6 +150,7 @@ class RecallResult(BaseModel):
134
150
  document_id: str | None = None # Document this memory belongs to
135
151
  metadata: dict[str, str] | None = None # User-defined metadata
136
152
  chunk_id: str | None = None # Chunk this fact was extracted from
153
+ tags: list[str] | None = None # Visibility scope tags
137
154
 
138
155
 
139
156
  class EntityObservationResponse(BaseModel):
@@ -188,12 +205,18 @@ class EntityListResponse(BaseModel):
188
205
  "first_seen": "2024-01-15T10:30:00Z",
189
206
  "last_seen": "2024-02-01T14:00:00Z",
190
207
  }
191
- ]
208
+ ],
209
+ "total": 150,
210
+ "limit": 100,
211
+ "offset": 0,
192
212
  }
193
213
  }
194
214
  )
195
215
 
196
216
  items: list[EntityListItem]
217
+ total: int
218
+ limit: int
219
+ offset: int
197
220
 
198
221
 
199
222
  class EntityDetailResponse(BaseModel):
@@ -300,6 +323,7 @@ class MemoryItem(BaseModel):
300
323
  "metadata": {"source": "slack", "channel": "engineering"},
301
324
  "document_id": "meeting_notes_2024_01_15",
302
325
  "entities": [{"text": "Alice"}, {"text": "ML model", "type": "CONCEPT"}],
326
+ "tags": ["user_a", "user_b"],
303
327
  }
304
328
  },
305
329
  )
@@ -313,6 +337,10 @@ class MemoryItem(BaseModel):
313
337
  default=None,
314
338
  description="Optional entities to combine with auto-extracted entities.",
315
339
  )
340
+ tags: list[str] | None = Field(
341
+ default=None,
342
+ description="Optional tags for visibility scoping. Memories with tags can be filtered during recall.",
343
+ )
316
344
 
317
345
  @field_validator("timestamp", mode="before")
318
346
  @classmethod
@@ -347,6 +375,7 @@ class RetainRequest(BaseModel):
347
375
  },
348
376
  ],
349
377
  "async": False,
378
+ "document_tags": ["user_a", "user_b"],
350
379
  }
351
380
  }
352
381
  )
@@ -357,6 +386,10 @@ class RetainRequest(BaseModel):
357
386
  alias="async",
358
387
  description="If true, process asynchronously in background. If false, wait for completion (default: false)",
359
388
  )
389
+ document_tags: list[str] | None = Field(
390
+ default=None,
391
+ description="Tags applied to all items in this request. These are merged with any item-level tags.",
392
+ )
360
393
 
361
394
 
362
395
  class RetainResponse(BaseModel):
@@ -364,7 +397,15 @@ class RetainResponse(BaseModel):
364
397
 
365
398
  model_config = ConfigDict(
366
399
  populate_by_name=True,
367
- json_schema_extra={"example": {"success": True, "bank_id": "user123", "items_count": 2, "async": False}},
400
+ json_schema_extra={
401
+ "example": {
402
+ "success": True,
403
+ "bank_id": "user123",
404
+ "items_count": 2,
405
+ "async": False,
406
+ "usage": {"input_tokens": 500, "output_tokens": 100, "total_tokens": 600},
407
+ }
408
+ },
368
409
  )
369
410
 
370
411
  success: bool
@@ -373,6 +414,14 @@ class RetainResponse(BaseModel):
373
414
  is_async: bool = Field(
374
415
  alias="async", serialization_alias="async", description="Whether the operation was processed asynchronously"
375
416
  )
417
+ operation_id: str | None = Field(
418
+ default=None,
419
+ description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations and find this ID. Only present when async=true.",
420
+ )
421
+ usage: TokenUsage | None = Field(
422
+ default=None,
423
+ description="Token usage metrics for LLM calls during fact extraction (only present for synchronous operations)",
424
+ )
376
425
 
377
426
 
378
427
  class FactsIncludeOptions(BaseModel):
@@ -381,6 +430,15 @@ class FactsIncludeOptions(BaseModel):
381
430
  pass # No additional options needed, just enable/disable
382
431
 
383
432
 
433
+ class ToolCallsIncludeOptions(BaseModel):
434
+ """Options for including tool calls in reflect results."""
435
+
436
+ output: bool = Field(
437
+ default=True,
438
+ description="Include tool outputs in the trace. Set to false to only include inputs (smaller payload).",
439
+ )
440
+
441
+
384
442
  class ReflectIncludeOptions(BaseModel):
385
443
  """Options for including additional data in reflect results."""
386
444
 
@@ -388,6 +446,10 @@ class ReflectIncludeOptions(BaseModel):
388
446
  default=None,
389
447
  description="Include facts that the answer is based on. Set to {} to enable, null to disable (default: disabled).",
390
448
  )
449
+ tool_calls: ToolCallsIncludeOptions | None = Field(
450
+ default=None,
451
+ description="Include tool calls trace. Set to {} for full trace (input+output), {output: false} for inputs only.",
452
+ )
391
453
 
392
454
 
393
455
  class ReflectRequest(BaseModel):
@@ -398,7 +460,6 @@ class ReflectRequest(BaseModel):
398
460
  "example": {
399
461
  "query": "What do you think about artificial intelligence?",
400
462
  "budget": "low",
401
- "context": "This is for a research paper on AI ethics",
402
463
  "max_tokens": 4096,
403
464
  "include": {"facts": {}},
404
465
  "response_schema": {
@@ -409,13 +470,21 @@ class ReflectRequest(BaseModel):
409
470
  },
410
471
  "required": ["summary", "key_points"],
411
472
  },
473
+ "tags": ["user_a"],
474
+ "tags_match": "any",
412
475
  }
413
476
  }
414
477
  )
415
478
 
416
479
  query: str
417
480
  budget: Budget = Budget.LOW
418
- context: str | None = None
481
+ context: str | None = Field(
482
+ default=None,
483
+ description="DEPRECATED: Additional context is now concatenated with the query. "
484
+ "Pass context directly in the query field instead. "
485
+ "If provided, it will be appended to the query for backward compatibility.",
486
+ deprecated=True,
487
+ )
419
488
  max_tokens: int = Field(default=4096, description="Maximum tokens for the response")
420
489
  include: ReflectIncludeOptions = Field(
421
490
  default_factory=ReflectIncludeOptions, description="Options for including additional data (disabled by default)"
@@ -424,6 +493,15 @@ class ReflectRequest(BaseModel):
424
493
  default=None,
425
494
  description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
426
495
  )
496
+ tags: list[str] | None = Field(
497
+ default=None,
498
+ description="Filter memories by tags during reflection. If not specified, all memories are considered.",
499
+ )
500
+ tags_match: TagsMatch = Field(
501
+ default="any",
502
+ description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
503
+ "'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
504
+ )
427
505
 
428
506
 
429
507
  class OpinionItem(BaseModel):
@@ -457,6 +535,58 @@ class ReflectFact(BaseModel):
457
535
  occurred_end: str | None = None
458
536
 
459
537
 
538
+ class ReflectDirective(BaseModel):
539
+ """A directive applied during reflect."""
540
+
541
+ id: str = Field(description="Directive ID")
542
+ name: str = Field(description="Directive name")
543
+ content: str = Field(description="Directive content")
544
+
545
+
546
+ class ReflectMentalModel(BaseModel):
547
+ """A mental model used during reflect."""
548
+
549
+ id: str = Field(description="Mental model ID")
550
+ text: str = Field(description="Mental model content")
551
+ context: str | None = Field(default=None, description="Additional context")
552
+
553
+
554
+ class ReflectToolCall(BaseModel):
555
+ """A tool call made during reflect agent execution."""
556
+
557
+ tool: str = Field(description="Tool name: lookup, recall, learn, expand")
558
+ input: dict = Field(description="Tool input parameters")
559
+ output: dict | None = Field(
560
+ default=None, description="Tool output (only included when include.tool_calls.output is true)"
561
+ )
562
+ duration_ms: int = Field(description="Execution time in milliseconds")
563
+ iteration: int = Field(default=0, description="Iteration number (1-based) when this tool was called")
564
+
565
+
566
+ class ReflectLLMCall(BaseModel):
567
+ """An LLM call made during reflect agent execution."""
568
+
569
+ scope: str = Field(description="Call scope: agent_1, agent_2, final, etc.")
570
+ duration_ms: int = Field(description="Execution time in milliseconds")
571
+
572
+
573
+ class ReflectBasedOn(BaseModel):
574
+ """Evidence the response is based on: memories, mental models, and directives."""
575
+
576
+ memories: list[ReflectFact] = Field(default_factory=list, description="Memory facts used to generate the response")
577
+ mental_models: list[ReflectMentalModel] = Field(
578
+ default_factory=list, description="Mental models used during reflection"
579
+ )
580
+ directives: list[ReflectDirective] = Field(default_factory=list, description="Directives applied during reflection")
581
+
582
+
583
+ class ReflectTrace(BaseModel):
584
+ """Execution trace of LLM and tool calls during reflection."""
585
+
586
+ tool_calls: list[ReflectToolCall] = Field(default_factory=list, description="Tool calls made during reflection")
587
+ llm_calls: list[ReflectLLMCall] = Field(default_factory=list, description="LLM calls made during reflection")
588
+
589
+
460
590
  class ReflectResponse(BaseModel):
461
591
  """Response model for think endpoint."""
462
592
 
@@ -464,24 +594,50 @@ class ReflectResponse(BaseModel):
464
594
  json_schema_extra={
465
595
  "example": {
466
596
  "text": "Based on my understanding, AI is a transformative technology...",
467
- "based_on": [
468
- {"id": "123", "text": "AI is used in healthcare", "type": "world"},
469
- {"id": "456", "text": "I discussed AI applications last week", "type": "experience"},
470
- ],
597
+ "based_on": {
598
+ "memories": [
599
+ {"id": "123", "text": "AI is used in healthcare", "type": "world"},
600
+ {"id": "456", "text": "I discussed AI applications last week", "type": "experience"},
601
+ ],
602
+ },
471
603
  "structured_output": {
472
604
  "summary": "AI is transformative",
473
605
  "key_points": ["Used in healthcare", "Discussed recently"],
474
606
  },
607
+ "usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
608
+ "trace": {
609
+ "tool_calls": [{"tool": "recall", "input": {"query": "AI"}, "duration_ms": 150}],
610
+ "llm_calls": [{"scope": "agent_1", "duration_ms": 1200}],
611
+ "observations": [
612
+ {
613
+ "id": "obs-1",
614
+ "name": "AI Technology",
615
+ "type": "concept",
616
+ "subtype": "structural",
617
+ }
618
+ ],
619
+ },
475
620
  }
476
621
  }
477
622
  )
478
623
 
479
624
  text: str
480
- based_on: list[ReflectFact] = [] # Facts used to generate the response
625
+ based_on: ReflectBasedOn | None = Field(
626
+ default=None,
627
+ description="Evidence used to generate the response. Only present when include.facts is set.",
628
+ )
481
629
  structured_output: dict | None = Field(
482
630
  default=None,
483
631
  description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
484
632
  )
633
+ usage: TokenUsage | None = Field(
634
+ default=None,
635
+ description="Token usage metrics for LLM calls during reflection.",
636
+ )
637
+ trace: ReflectTrace | None = Field(
638
+ default=None,
639
+ description="Execution trace of tool and LLM calls. Only present when include.tool_calls is set.",
640
+ )
485
641
 
486
642
 
487
643
  class BanksResponse(BaseModel):
@@ -511,7 +667,7 @@ class BankProfileResponse(BaseModel):
511
667
  "bank_id": "user123",
512
668
  "name": "Alice",
513
669
  "disposition": {"skepticism": 3, "literalism": 3, "empathy": 3},
514
- "background": "I am a software engineer with 10 years of experience in startups",
670
+ "mission": "I am a software engineer helping my team stay organized and ship quality code",
515
671
  }
516
672
  }
517
673
  )
@@ -519,7 +675,9 @@ class BankProfileResponse(BaseModel):
519
675
  bank_id: str
520
676
  name: str
521
677
  disposition: DispositionTraits
522
- background: str
678
+ mission: str = Field(description="The agent's mission - who they are and what they're trying to accomplish")
679
+ # Deprecated: use mission instead. Kept for backwards compatibility.
680
+ background: str | None = Field(default=None, description="Deprecated: use mission instead")
523
681
 
524
682
 
525
683
  class UpdateDispositionRequest(BaseModel):
@@ -528,8 +686,32 @@ class UpdateDispositionRequest(BaseModel):
528
686
  disposition: DispositionTraits
529
687
 
530
688
 
689
+ class SetMissionRequest(BaseModel):
690
+ """Request model for setting/updating the agent's mission."""
691
+
692
+ model_config = ConfigDict(
693
+ json_schema_extra={"example": {"content": "I am a PM helping my engineering team stay organized"}}
694
+ )
695
+
696
+ content: str = Field(description="The mission content - who you are and what you're trying to accomplish")
697
+
698
+
699
+ class MissionResponse(BaseModel):
700
+ """Response model for mission update."""
701
+
702
+ model_config = ConfigDict(
703
+ json_schema_extra={
704
+ "example": {
705
+ "mission": "I am a PM helping my engineering team stay organized and ship quality code.",
706
+ }
707
+ }
708
+ )
709
+
710
+ mission: str
711
+
712
+
531
713
  class AddBackgroundRequest(BaseModel):
532
- """Request model for adding/merging background information."""
714
+ """Request model for adding/merging background information. Deprecated: use SetMissionRequest instead."""
533
715
 
534
716
  model_config = ConfigDict(
535
717
  json_schema_extra={"example": {"content": "I was born in Texas", "update_disposition": True}}
@@ -537,23 +719,24 @@ class AddBackgroundRequest(BaseModel):
537
719
 
538
720
  content: str = Field(description="New background information to add or merge")
539
721
  update_disposition: bool = Field(
540
- default=True, description="If true, infer disposition traits from the merged background (default: true)"
722
+ default=True, description="Deprecated - disposition is no longer auto-inferred from mission"
541
723
  )
542
724
 
543
725
 
544
726
  class BackgroundResponse(BaseModel):
545
- """Response model for background update."""
727
+ """Response model for background update. Deprecated: use MissionResponse instead."""
546
728
 
547
729
  model_config = ConfigDict(
548
730
  json_schema_extra={
549
731
  "example": {
550
- "background": "I was born in Texas. I am a software engineer with 10 years of experience.",
551
- "disposition": {"skepticism": 3, "literalism": 3, "empathy": 3},
732
+ "mission": "I was born in Texas. I am a software engineer with 10 years of experience.",
552
733
  }
553
734
  }
554
735
  )
555
736
 
556
- background: str
737
+ mission: str
738
+ # Deprecated fields kept for backwards compatibility
739
+ background: str | None = Field(default=None, description="Deprecated: same as mission")
557
740
  disposition: DispositionTraits | None = None
558
741
 
559
742
 
@@ -563,7 +746,7 @@ class BankListItem(BaseModel):
563
746
  bank_id: str
564
747
  name: str | None = None
565
748
  disposition: DispositionTraits
566
- background: str | None = None
749
+ mission: str | None = None
567
750
  created_at: str | None = None
568
751
  updated_at: str | None = None
569
752
 
@@ -579,7 +762,7 @@ class BankListResponse(BaseModel):
579
762
  "bank_id": "user123",
580
763
  "name": "Alice",
581
764
  "disposition": {"skepticism": 3, "literalism": 3, "empathy": 3},
582
- "background": "I am a software engineer",
765
+ "mission": "I am a software engineer helping my team ship quality code",
583
766
  "created_at": "2024-01-15T10:30:00Z",
584
767
  "updated_at": "2024-01-16T14:20:00Z",
585
768
  }
@@ -599,14 +782,16 @@ class CreateBankRequest(BaseModel):
599
782
  "example": {
600
783
  "name": "Alice",
601
784
  "disposition": {"skepticism": 3, "literalism": 3, "empathy": 3},
602
- "background": "I am a creative software engineer with 10 years of experience",
785
+ "mission": "I am a PM helping my engineering team stay organized",
603
786
  }
604
787
  }
605
788
  )
606
789
 
607
790
  name: str | None = None
608
791
  disposition: DispositionTraits | None = None
609
- background: str | None = None
792
+ mission: str | None = Field(default=None, description="The agent's mission")
793
+ # Deprecated: use mission instead
794
+ background: str | None = Field(default=None, description="Deprecated: use mission instead")
610
795
 
611
796
 
612
797
  class GraphDataResponse(BaseModel):
@@ -630,6 +815,7 @@ class GraphDataResponse(BaseModel):
630
815
  }
631
816
  ],
632
817
  "total_units": 2,
818
+ "limit": 1000,
633
819
  }
634
820
  }
635
821
  )
@@ -638,6 +824,7 @@ class GraphDataResponse(BaseModel):
638
824
  edges: list[dict[str, Any]]
639
825
  table_rows: list[dict[str, Any]]
640
826
  total_units: int
827
+ limit: int
641
828
 
642
829
 
643
830
  class ListMemoryUnitsResponse(BaseModel):
@@ -699,6 +886,37 @@ class ListDocumentsResponse(BaseModel):
699
886
  offset: int
700
887
 
701
888
 
889
+ class TagItem(BaseModel):
890
+ """Single tag with usage count."""
891
+
892
+ tag: str = Field(description="The tag value")
893
+ count: int = Field(description="Number of memories with this tag")
894
+
895
+
896
+ class ListTagsResponse(BaseModel):
897
+ """Response model for list tags endpoint."""
898
+
899
+ model_config = ConfigDict(
900
+ json_schema_extra={
901
+ "example": {
902
+ "items": [
903
+ {"tag": "user:alice", "count": 42},
904
+ {"tag": "user:bob", "count": 15},
905
+ {"tag": "session:abc123", "count": 8},
906
+ ],
907
+ "total": 25,
908
+ "limit": 100,
909
+ "offset": 0,
910
+ }
911
+ }
912
+ )
913
+
914
+ items: list[TagItem]
915
+ total: int
916
+ limit: int
917
+ offset: int
918
+
919
+
702
920
  class DocumentResponse(BaseModel):
703
921
  """Response model for get document endpoint."""
704
922
 
@@ -712,6 +930,7 @@ class DocumentResponse(BaseModel):
712
930
  "created_at": "2024-01-15T10:30:00Z",
713
931
  "updated_at": "2024-01-15T10:30:00Z",
714
932
  "memory_unit_count": 15,
933
+ "tags": ["user_a", "session_123"],
715
934
  }
716
935
  }
717
936
  )
@@ -723,6 +942,7 @@ class DocumentResponse(BaseModel):
723
942
  created_at: str
724
943
  updated_at: str
725
944
  memory_unit_count: int
945
+ tags: list[str] = Field(default_factory=list, description="Tags associated with this document")
726
946
 
727
947
 
728
948
  class DeleteDocumentResponse(BaseModel):
@@ -797,6 +1017,9 @@ class BankStatsResponse(BaseModel):
797
1017
  "links_breakdown": {"fact": {"temporal": 100, "semantic": 60, "entity": 40}},
798
1018
  "pending_operations": 2,
799
1019
  "failed_operations": 0,
1020
+ "last_consolidated_at": "2024-01-15T10:30:00Z",
1021
+ "pending_consolidation": 0,
1022
+ "total_observations": 45,
800
1023
  }
801
1024
  }
802
1025
  )
@@ -811,6 +1034,156 @@ class BankStatsResponse(BaseModel):
811
1034
  links_breakdown: dict[str, dict[str, int]]
812
1035
  pending_operations: int
813
1036
  failed_operations: int
1037
+ # Consolidation stats
1038
+ last_consolidated_at: str | None = Field(default=None, description="When consolidation last ran (ISO format)")
1039
+ pending_consolidation: int = Field(default=0, description="Number of memories not yet processed into observations")
1040
+ total_observations: int = Field(default=0, description="Total number of observations")
1041
+
1042
+
1043
+ # Mental Model models
1044
+
1045
+
1046
+ class ObservationEvidenceResponse(BaseModel):
1047
+ """A single piece of evidence supporting an observation."""
1048
+
1049
+ memory_id: str = Field(description="ID of the memory unit this evidence comes from")
1050
+ quote: str = Field(description="Exact quote from the memory supporting the observation")
1051
+ relevance: str = Field(description="Brief explanation of how this quote supports the observation")
1052
+ timestamp: str = Field(description="When the source memory was created (ISO format)")
1053
+
1054
+
1055
+ # =========================================================================
1056
+ # Directive Models
1057
+ # =========================================================================
1058
+
1059
+
1060
+ class DirectiveResponse(BaseModel):
1061
+ """Response model for a directive."""
1062
+
1063
+ id: str
1064
+ bank_id: str
1065
+ name: str
1066
+ content: str
1067
+ priority: int = 0
1068
+ is_active: bool = True
1069
+ tags: list[str] = Field(default_factory=list)
1070
+ created_at: str | None = None
1071
+ updated_at: str | None = None
1072
+
1073
+
1074
+ class DirectiveListResponse(BaseModel):
1075
+ """Response model for listing directives."""
1076
+
1077
+ items: list[DirectiveResponse]
1078
+
1079
+
1080
+ class CreateDirectiveRequest(BaseModel):
1081
+ """Request model for creating a directive."""
1082
+
1083
+ name: str = Field(description="Human-readable name for the directive")
1084
+ content: str = Field(description="The directive text to inject into prompts")
1085
+ priority: int = Field(default=0, description="Higher priority directives are injected first")
1086
+ is_active: bool = Field(default=True, description="Whether this directive is active")
1087
+ tags: list[str] = Field(default_factory=list, description="Tags for filtering")
1088
+
1089
+
1090
+ class UpdateDirectiveRequest(BaseModel):
1091
+ """Request model for updating a directive."""
1092
+
1093
+ name: str | None = Field(default=None, description="New name")
1094
+ content: str | None = Field(default=None, description="New content")
1095
+ priority: int | None = Field(default=None, description="New priority")
1096
+ is_active: bool | None = Field(default=None, description="New active status")
1097
+ tags: list[str] | None = Field(default=None, description="New tags")
1098
+
1099
+
1100
+ # =========================================================================
1101
+ # Mental Models (stored reflect responses)
1102
+ # =========================================================================
1103
+
1104
+
1105
+ class MentalModelTrigger(BaseModel):
1106
+ """Trigger settings for a mental model."""
1107
+
1108
+ refresh_after_consolidation: bool = Field(
1109
+ default=False,
1110
+ description="If true, refresh this mental model after observations consolidation (real-time mode)",
1111
+ )
1112
+
1113
+
1114
+ class MentalModelResponse(BaseModel):
1115
+ """Response model for a mental model (stored reflect response)."""
1116
+
1117
+ id: str
1118
+ bank_id: str
1119
+ name: str
1120
+ source_query: str
1121
+ content: str
1122
+ tags: list[str] = Field(default_factory=list)
1123
+ max_tokens: int = Field(default=2048)
1124
+ trigger: MentalModelTrigger = Field(default_factory=MentalModelTrigger)
1125
+ last_refreshed_at: str | None = None
1126
+ created_at: str | None = None
1127
+ reflect_response: dict | None = Field(
1128
+ default=None,
1129
+ description="Full reflect API response payload including based_on facts and observations",
1130
+ )
1131
+
1132
+
1133
+ class MentalModelListResponse(BaseModel):
1134
+ """Response model for listing mental models."""
1135
+
1136
+ items: list[MentalModelResponse]
1137
+
1138
+
1139
+ class CreateMentalModelRequest(BaseModel):
1140
+ """Request model for creating a mental model."""
1141
+
1142
+ model_config = ConfigDict(
1143
+ json_schema_extra={
1144
+ "example": {
1145
+ "name": "Team Communication Preferences",
1146
+ "source_query": "How does the team prefer to communicate?",
1147
+ "tags": ["team"],
1148
+ "max_tokens": 2048,
1149
+ "trigger": {"refresh_after_consolidation": False},
1150
+ }
1151
+ }
1152
+ )
1153
+
1154
+ name: str = Field(description="Human-readable name for the mental model")
1155
+ source_query: str = Field(description="The query to run to generate content")
1156
+ tags: list[str] = Field(default_factory=list, description="Tags for scoped visibility")
1157
+ max_tokens: int = Field(default=2048, ge=256, le=8192, description="Maximum tokens for generated content")
1158
+ trigger: MentalModelTrigger = Field(default_factory=MentalModelTrigger, description="Trigger settings")
1159
+
1160
+
1161
+ class CreateMentalModelResponse(BaseModel):
1162
+ """Response model for mental model creation."""
1163
+
1164
+ operation_id: str = Field(description="Operation ID to track progress")
1165
+
1166
+
1167
+ class UpdateMentalModelRequest(BaseModel):
1168
+ """Request model for updating a mental model."""
1169
+
1170
+ model_config = ConfigDict(
1171
+ json_schema_extra={
1172
+ "example": {
1173
+ "name": "Updated Team Communication Preferences",
1174
+ "source_query": "How does the team prefer to communicate?",
1175
+ "max_tokens": 4096,
1176
+ "tags": ["team", "communication"],
1177
+ "trigger": {"refresh_after_consolidation": True},
1178
+ }
1179
+ }
1180
+ )
1181
+
1182
+ name: str | None = Field(default=None, description="New name for the mental model")
1183
+ source_query: str | None = Field(default=None, description="New source query for the mental model")
1184
+ max_tokens: int | None = Field(default=None, ge=256, le=8192, description="Maximum tokens for generated content")
1185
+ tags: list[str] | None = Field(default=None, description="Tags for scoped visibility")
1186
+ trigger: MentalModelTrigger | None = Field(default=None, description="Trigger settings")
814
1187
 
815
1188
 
816
1189
  class OperationResponse(BaseModel):
@@ -822,7 +1195,7 @@ class OperationResponse(BaseModel):
822
1195
  "id": "550e8400-e29b-41d4-a716-446655440000",
823
1196
  "task_type": "retain",
824
1197
  "items_count": 5,
825
- "document_id": "meeting-notes-2024",
1198
+ "document_id": None,
826
1199
  "created_at": "2024-01-15T10:30:00Z",
827
1200
  "status": "pending",
828
1201
  "error_message": None,
@@ -833,12 +1206,19 @@ class OperationResponse(BaseModel):
833
1206
  id: str
834
1207
  task_type: str
835
1208
  items_count: int
836
- document_id: str | None
1209
+ document_id: str | None = None
837
1210
  created_at: str
838
1211
  status: str
839
1212
  error_message: str | None
840
1213
 
841
1214
 
1215
+ class ConsolidationResponse(BaseModel):
1216
+ """Response model for consolidation trigger endpoint."""
1217
+
1218
+ operation_id: str = Field(description="ID of the async consolidation operation")
1219
+ deduplicated: bool = Field(default=False, description="True if an existing pending task was reused")
1220
+
1221
+
842
1222
  class OperationsListResponse(BaseModel):
843
1223
  """Response model for list operations endpoint."""
844
1224
 
@@ -846,12 +1226,13 @@ class OperationsListResponse(BaseModel):
846
1226
  json_schema_extra={
847
1227
  "example": {
848
1228
  "bank_id": "user123",
1229
+ "total": 150,
1230
+ "limit": 20,
1231
+ "offset": 0,
849
1232
  "operations": [
850
1233
  {
851
1234
  "id": "550e8400-e29b-41d4-a716-446655440000",
852
1235
  "task_type": "retain",
853
- "items_count": 5,
854
- "document_id": None,
855
1236
  "created_at": "2024-01-15T10:30:00Z",
856
1237
  "status": "pending",
857
1238
  "error_message": None,
@@ -862,6 +1243,9 @@ class OperationsListResponse(BaseModel):
862
1243
  )
863
1244
 
864
1245
  bank_id: str
1246
+ total: int
1247
+ limit: int
1248
+ offset: int
865
1249
  operations: list[OperationResponse]
866
1250
 
867
1251
 
@@ -883,6 +1267,76 @@ class CancelOperationResponse(BaseModel):
883
1267
  operation_id: str
884
1268
 
885
1269
 
1270
+ class OperationStatusResponse(BaseModel):
1271
+ """Response model for getting a single operation status."""
1272
+
1273
+ model_config = ConfigDict(
1274
+ json_schema_extra={
1275
+ "example": {
1276
+ "operation_id": "550e8400-e29b-41d4-a716-446655440000",
1277
+ "status": "completed",
1278
+ "operation_type": "refresh_mental_models",
1279
+ "created_at": "2024-01-15T10:30:00Z",
1280
+ "updated_at": "2024-01-15T10:31:30Z",
1281
+ "completed_at": "2024-01-15T10:31:30Z",
1282
+ "error_message": None,
1283
+ }
1284
+ }
1285
+ )
1286
+
1287
+ operation_id: str
1288
+ status: Literal["pending", "completed", "failed", "not_found"]
1289
+ operation_type: str | None = None
1290
+ created_at: str | None = None
1291
+ updated_at: str | None = None
1292
+ completed_at: str | None = None
1293
+ error_message: str | None = None
1294
+
1295
+
1296
+ class AsyncOperationSubmitResponse(BaseModel):
1297
+ """Response model for submitting an async operation."""
1298
+
1299
+ model_config = ConfigDict(
1300
+ json_schema_extra={
1301
+ "example": {
1302
+ "operation_id": "550e8400-e29b-41d4-a716-446655440000",
1303
+ "status": "queued",
1304
+ }
1305
+ }
1306
+ )
1307
+
1308
+ operation_id: str
1309
+ status: str
1310
+
1311
+
1312
+ class FeaturesInfo(BaseModel):
1313
+ """Feature flags indicating which capabilities are enabled."""
1314
+
1315
+ observations: bool = Field(description="Whether observations (auto-consolidation) are enabled")
1316
+ mcp: bool = Field(description="Whether MCP (Model Context Protocol) server is enabled")
1317
+ worker: bool = Field(description="Whether the background worker is enabled")
1318
+
1319
+
1320
+ class VersionResponse(BaseModel):
1321
+ """Response model for the version/info endpoint."""
1322
+
1323
+ model_config = ConfigDict(
1324
+ json_schema_extra={
1325
+ "example": {
1326
+ "api_version": "1.0.0",
1327
+ "features": {
1328
+ "observations": False,
1329
+ "mcp": True,
1330
+ "worker": True,
1331
+ },
1332
+ }
1333
+ }
1334
+ )
1335
+
1336
+ api_version: str = Field(description="API version string")
1337
+ features: FeaturesInfo = Field(description="Enabled feature flags")
1338
+
1339
+
886
1340
  def create_app(
887
1341
  memory: MemoryEngine,
888
1342
  initialize_memory: bool = True,
@@ -918,6 +1372,16 @@ def create_app(
918
1372
  Lifespan context manager for startup and shutdown events.
919
1373
  Note: This only fires when running the app standalone, not when mounted.
920
1374
  """
1375
+ import asyncio
1376
+ import socket
1377
+
1378
+ from hindsight_api.config import get_config
1379
+ from hindsight_api.worker import WorkerPoller
1380
+
1381
+ config = get_config()
1382
+ poller = None
1383
+ poller_task = None
1384
+
921
1385
  # Initialize OpenTelemetry metrics
922
1386
  try:
923
1387
  prometheus_reader = initialize_metrics(service_name="hindsight-api", service_version="1.0.0")
@@ -934,6 +1398,27 @@ def create_app(
934
1398
  await memory.initialize()
935
1399
  logging.info("Memory system initialized")
936
1400
 
1401
+ # Set up DB pool metrics after memory initialization
1402
+ metrics_collector = get_metrics_collector()
1403
+ if memory._pool is not None and hasattr(metrics_collector, "set_db_pool"):
1404
+ metrics_collector.set_db_pool(memory._pool)
1405
+ logging.info("DB pool metrics configured")
1406
+
1407
+ # Start worker poller if enabled (standalone mode)
1408
+ if config.worker_enabled and memory._pool is not None:
1409
+ worker_id = config.worker_id or socket.gethostname()
1410
+ poller = WorkerPoller(
1411
+ pool=memory._pool,
1412
+ worker_id=worker_id,
1413
+ executor=memory.execute_task,
1414
+ poll_interval_ms=config.worker_poll_interval_ms,
1415
+ batch_size=config.worker_batch_size,
1416
+ max_retries=config.worker_max_retries,
1417
+ tenant_extension=getattr(memory, "_tenant_extension", None),
1418
+ )
1419
+ poller_task = asyncio.create_task(poller.run())
1420
+ logging.info(f"Worker poller started (worker_id={worker_id})")
1421
+
937
1422
  # Call HTTP extension startup hook
938
1423
  if http_extension:
939
1424
  await http_extension.on_startup()
@@ -941,6 +1426,17 @@ def create_app(
941
1426
 
942
1427
  yield
943
1428
 
1429
+ # Shutdown worker poller if running
1430
+ if poller is not None:
1431
+ await poller.shutdown_graceful(timeout=30.0)
1432
+ if poller_task is not None:
1433
+ poller_task.cancel()
1434
+ try:
1435
+ await poller_task
1436
+ except asyncio.CancelledError:
1437
+ pass
1438
+ logging.info("Worker poller stopped")
1439
+
944
1440
  # Call HTTP extension shutdown hook
945
1441
  if http_extension:
946
1442
  await http_extension.on_shutdown()
@@ -970,6 +1466,30 @@ def create_app(
970
1466
  # This is required for mounted sub-applications where lifespan may not fire
971
1467
  app.state.memory = memory
972
1468
 
1469
+ # Add HTTP metrics middleware
1470
+ @app.middleware("http")
1471
+ async def http_metrics_middleware(request, call_next):
1472
+ """Record HTTP request metrics."""
1473
+ # Normalize endpoint path to reduce cardinality
1474
+ # Replace UUIDs and numeric IDs with placeholders
1475
+ import re
1476
+
1477
+ from starlette.requests import Request
1478
+
1479
+ path = request.url.path
1480
+ # Replace UUIDs
1481
+ path = re.sub(r"/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "/{id}", path)
1482
+ # Replace numeric IDs
1483
+ path = re.sub(r"/\d+(?=/|$)", "/{id}", path)
1484
+
1485
+ status_code = [500] # Default to 500, will be updated
1486
+ metrics_collector = get_metrics_collector()
1487
+
1488
+ with metrics_collector.record_http_request(request.method, path, lambda: status_code[0]):
1489
+ response = await call_next(request)
1490
+ status_code[0] = response.status_code
1491
+ return response
1492
+
973
1493
  # Register all routes
974
1494
  _register_routes(app)
975
1495
 
@@ -1031,6 +1551,34 @@ def _register_routes(app: FastAPI):
1031
1551
  status_code = 200 if health.get("status") == "healthy" else 503
1032
1552
  return JSONResponse(content=health, status_code=status_code)
1033
1553
 
1554
+ @app.get(
1555
+ "/version",
1556
+ response_model=VersionResponse,
1557
+ summary="Get API version and feature flags",
1558
+ description="Returns API version information and enabled feature flags. "
1559
+ "Use this to check which capabilities are available in this deployment.",
1560
+ tags=["Monitoring"],
1561
+ operation_id="get_version",
1562
+ )
1563
+ async def version_endpoint() -> VersionResponse:
1564
+ """
1565
+ Get API version and enabled features.
1566
+
1567
+ Returns version info and feature flags that can be used by clients
1568
+ to determine which capabilities are available.
1569
+ """
1570
+ from hindsight_api.config import get_config
1571
+
1572
+ config = get_config()
1573
+ return VersionResponse(
1574
+ api_version="1.0.0",
1575
+ features=FeaturesInfo(
1576
+ observations=config.enable_observations,
1577
+ mcp=config.mcp_enabled,
1578
+ worker=config.worker_enabled,
1579
+ ),
1580
+ )
1581
+
1034
1582
  @app.get(
1035
1583
  "/metrics",
1036
1584
  summary="Prometheus metrics endpoint",
@@ -1049,16 +1597,19 @@ def _register_routes(app: FastAPI):
1049
1597
  "/v1/default/banks/{bank_id}/graph",
1050
1598
  response_model=GraphDataResponse,
1051
1599
  summary="Get memory graph data",
1052
- description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion). Limited to 1000 most recent items.",
1600
+ description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion).",
1053
1601
  operation_id="get_graph",
1054
1602
  tags=["Memory"],
1055
1603
  )
1056
1604
  async def api_graph(
1057
- bank_id: str, type: str | None = None, request_context: RequestContext = Depends(get_request_context)
1605
+ bank_id: str,
1606
+ type: str | None = None,
1607
+ limit: int = 1000,
1608
+ request_context: RequestContext = Depends(get_request_context),
1058
1609
  ):
1059
1610
  """Get graph data from database, filtered by bank_id and optionally by type."""
1060
1611
  try:
1061
- data = await app.state.memory.get_graph_data(bank_id, type, request_context=request_context)
1612
+ data = await app.state.memory.get_graph_data(bank_id, type, limit=limit, request_context=request_context)
1062
1613
  return data
1063
1614
  except (AuthenticationError, HTTPException):
1064
1615
  raise
@@ -1117,11 +1668,42 @@ def _register_routes(app: FastAPI):
1117
1668
  logger.error(f"Error in /v1/default/banks/{bank_id}/memories/list: {error_detail}")
1118
1669
  raise HTTPException(status_code=500, detail=str(e))
1119
1670
 
1120
- @app.post(
1121
- "/v1/default/banks/{bank_id}/memories/recall",
1122
- response_model=RecallResponse,
1123
- summary="Recall memory",
1124
- description="Recall memory using semantic similarity and spreading activation.\n\n"
1671
+ @app.get(
1672
+ "/v1/default/banks/{bank_id}/memories/{memory_id}",
1673
+ summary="Get memory unit",
1674
+ description="Get a single memory unit by ID with all its metadata including entities and tags.",
1675
+ operation_id="get_memory",
1676
+ tags=["Memory"],
1677
+ )
1678
+ async def api_get_memory(
1679
+ bank_id: str,
1680
+ memory_id: str,
1681
+ request_context: RequestContext = Depends(get_request_context),
1682
+ ):
1683
+ """Get a single memory unit by ID."""
1684
+ try:
1685
+ data = await app.state.memory.get_memory_unit(
1686
+ bank_id=bank_id,
1687
+ memory_id=memory_id,
1688
+ request_context=request_context,
1689
+ )
1690
+ if data is None:
1691
+ raise HTTPException(status_code=404, detail=f"Memory unit '{memory_id}' not found")
1692
+ return data
1693
+ except (AuthenticationError, HTTPException):
1694
+ raise
1695
+ except Exception as e:
1696
+ import traceback
1697
+
1698
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1699
+ logger.error(f"Error in /v1/default/banks/{bank_id}/memories/{memory_id}: {error_detail}")
1700
+ raise HTTPException(status_code=500, detail=str(e))
1701
+
1702
+ @app.post(
1703
+ "/v1/default/banks/{bank_id}/memories/recall",
1704
+ response_model=RecallResponse,
1705
+ summary="Recall memory",
1706
+ description="Recall memory using semantic similarity and spreading activation.\n\n"
1125
1707
  "The type parameter is optional and must be one of:\n"
1126
1708
  "- `world`: General knowledge about people, places, events, and things that happen\n"
1127
1709
  "- `experience`: Memories about experience, conversations, actions taken, and tasks performed\n"
@@ -1134,11 +1716,16 @@ def _register_routes(app: FastAPI):
1134
1716
  bank_id: str, request: RecallRequest, request_context: RequestContext = Depends(get_request_context)
1135
1717
  ):
1136
1718
  """Run a recall and return results with trace."""
1719
+ import time
1720
+
1721
+ handler_start = time.time()
1137
1722
  metrics = get_metrics_collector()
1138
1723
 
1139
1724
  try:
1140
- # Default to world, experience, opinion if not specified (exclude observation by default)
1725
+ # Default to world and experience if not specified (exclude observation and opinion)
1726
+ # Filter out 'opinion' even if requested - opinions are excluded from recall
1141
1727
  fact_types = request.types if request.types else list(VALID_RECALL_FACT_TYPES)
1728
+ fact_types = [ft for ft in fact_types if ft != "opinion"]
1142
1729
 
1143
1730
  # Parse query_timestamp if provided
1144
1731
  question_date = None
@@ -1159,10 +1746,12 @@ def _register_routes(app: FastAPI):
1159
1746
  include_chunks = request.include.chunks is not None
1160
1747
  max_chunk_tokens = request.include.chunks.max_tokens if include_chunks else 8192
1161
1748
 
1749
+ pre_recall = time.time() - handler_start
1162
1750
  # Run recall with tracing (record metrics)
1163
1751
  with metrics.record_operation(
1164
- "recall", bank_id=bank_id, budget=request.budget.value, max_tokens=request.max_tokens
1752
+ "recall", bank_id=bank_id, source="api", budget=request.budget.value, max_tokens=request.max_tokens
1165
1753
  ):
1754
+ recall_start = time.time()
1166
1755
  core_result = await app.state.memory.recall_async(
1167
1756
  bank_id=bank_id,
1168
1757
  query=request.query,
@@ -1176,6 +1765,8 @@ def _register_routes(app: FastAPI):
1176
1765
  include_chunks=include_chunks,
1177
1766
  max_chunk_tokens=max_chunk_tokens,
1178
1767
  request_context=request_context,
1768
+ tags=request.tags,
1769
+ tags_match=request.tags_match,
1179
1770
  )
1180
1771
 
1181
1772
  # Convert core MemoryFact objects to API RecallResult objects (excluding internal metrics)
@@ -1191,6 +1782,7 @@ def _register_routes(app: FastAPI):
1191
1782
  mentioned_at=fact.mentioned_at,
1192
1783
  document_id=fact.document_id,
1193
1784
  chunk_id=fact.chunk_id,
1785
+ tags=fact.tags,
1194
1786
  )
1195
1787
  for fact in core_result.results
1196
1788
  ]
@@ -1221,9 +1813,24 @@ def _register_routes(app: FastAPI):
1221
1813
  ],
1222
1814
  )
1223
1815
 
1224
- return RecallResponse(
1225
- results=recall_results, trace=core_result.trace, entities=entities_response, chunks=chunks_response
1816
+ response = RecallResponse(
1817
+ results=recall_results,
1818
+ trace=core_result.trace,
1819
+ entities=entities_response,
1820
+ chunks=chunks_response,
1226
1821
  )
1822
+
1823
+ handler_duration = time.time() - handler_start
1824
+ recall_duration = time.time() - recall_start
1825
+ post_recall = handler_duration - pre_recall - recall_duration
1826
+ if handler_duration > 1.0:
1827
+ logging.info(
1828
+ f"[RECALL HTTP] bank={bank_id} handler_total={handler_duration:.3f}s "
1829
+ f"pre={pre_recall:.3f}s recall={recall_duration:.3f}s post={post_recall:.3f}s "
1830
+ f"results={len(recall_results)} entities={len(entities_response) if entities_response else 0}"
1831
+ )
1832
+
1833
+ return response
1227
1834
  except HTTPException:
1228
1835
  raise
1229
1836
  except OperationValidationError as e:
@@ -1233,8 +1840,11 @@ def _register_routes(app: FastAPI):
1233
1840
  except Exception as e:
1234
1841
  import traceback
1235
1842
 
1843
+ handler_duration = time.time() - handler_start
1236
1844
  error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1237
- logger.error(f"Error in /v1/default/banks/{bank_id}/memories/recall: {error_detail}")
1845
+ logger.error(
1846
+ f"[RECALL ERROR] bank={bank_id} handler_duration={handler_duration:.3f}s error={str(e)}\n{error_detail}"
1847
+ )
1238
1848
  raise HTTPException(status_code=500, detail=str(e))
1239
1849
 
1240
1850
  @app.post(
@@ -1258,38 +1868,92 @@ def _register_routes(app: FastAPI):
1258
1868
  metrics = get_metrics_collector()
1259
1869
 
1260
1870
  try:
1871
+ # Handle deprecated context field by concatenating with query
1872
+ query = request.query
1873
+ if request.context:
1874
+ query = f"{request.query}\n\nAdditional context: {request.context}"
1875
+
1261
1876
  # Use the memory system's reflect_async method (record metrics)
1262
- with metrics.record_operation("reflect", bank_id=bank_id, budget=request.budget.value):
1877
+ with metrics.record_operation("reflect", bank_id=bank_id, source="api", budget=request.budget.value):
1263
1878
  core_result = await app.state.memory.reflect_async(
1264
1879
  bank_id=bank_id,
1265
- query=request.query,
1880
+ query=query,
1266
1881
  budget=request.budget,
1267
- context=request.context,
1882
+ context=None, # Deprecated, now concatenated with query
1268
1883
  max_tokens=request.max_tokens,
1269
1884
  response_schema=request.response_schema,
1270
1885
  request_context=request_context,
1886
+ tags=request.tags,
1887
+ tags_match=request.tags_match,
1271
1888
  )
1272
1889
 
1273
- # Convert core MemoryFact objects to API ReflectFact objects if facts are requested
1274
- based_on_facts = []
1890
+ # Build based_on (memories + mental_models + directives) if facts are requested
1891
+ based_on_result: ReflectBasedOn | None = None
1275
1892
  if request.include.facts is not None:
1893
+ memories = []
1894
+ mental_models = []
1895
+ directives = []
1276
1896
  for fact_type, facts in core_result.based_on.items():
1277
- for fact in facts:
1278
- based_on_facts.append(
1279
- ReflectFact(
1280
- id=fact.id,
1281
- text=fact.text,
1282
- type=fact.fact_type,
1283
- context=fact.context,
1284
- occurred_start=fact.occurred_start,
1285
- occurred_end=fact.occurred_end,
1897
+ if fact_type == "directives":
1898
+ # Directives have different structure (id, name, content)
1899
+ for directive in facts:
1900
+ directives.append(
1901
+ ReflectDirective(
1902
+ id=directive.id,
1903
+ name=directive.name,
1904
+ content=directive.content,
1905
+ )
1906
+ )
1907
+ elif fact_type == "mental_models":
1908
+ # Mental models are MemoryFact with type "mental_models"
1909
+ for fact in facts:
1910
+ mental_models.append(
1911
+ ReflectMentalModel(
1912
+ id=fact.id,
1913
+ text=fact.text,
1914
+ context=fact.context,
1915
+ )
1286
1916
  )
1287
- )
1917
+ else:
1918
+ for fact in facts:
1919
+ memories.append(
1920
+ ReflectFact(
1921
+ id=fact.id,
1922
+ text=fact.text,
1923
+ type=fact.fact_type,
1924
+ context=fact.context,
1925
+ occurred_start=fact.occurred_start,
1926
+ occurred_end=fact.occurred_end,
1927
+ )
1928
+ )
1929
+ based_on_result = ReflectBasedOn(memories=memories, mental_models=mental_models, directives=directives)
1930
+
1931
+ # Build trace (tool_calls + llm_calls + observations) if tool_calls is requested
1932
+ trace_result: ReflectTrace | None = None
1933
+ if request.include.tool_calls is not None:
1934
+ include_output = request.include.tool_calls.output
1935
+ tool_calls = [
1936
+ ReflectToolCall(
1937
+ tool=tc.tool,
1938
+ input=tc.input,
1939
+ output=tc.output if include_output else None,
1940
+ duration_ms=tc.duration_ms,
1941
+ iteration=tc.iteration,
1942
+ )
1943
+ for tc in core_result.tool_trace
1944
+ ]
1945
+ llm_calls = [ReflectLLMCall(scope=lc.scope, duration_ms=lc.duration_ms) for lc in core_result.llm_trace]
1946
+ trace_result = ReflectTrace(
1947
+ tool_calls=tool_calls,
1948
+ llm_calls=llm_calls,
1949
+ )
1288
1950
 
1289
1951
  return ReflectResponse(
1290
1952
  text=core_result.text,
1291
- based_on=based_on_facts,
1953
+ based_on=based_on_result,
1292
1954
  structured_output=core_result.structured_output,
1955
+ usage=core_result.usage,
1956
+ trace=trace_result,
1293
1957
  )
1294
1958
 
1295
1959
  except OperationValidationError as e:
@@ -1333,9 +1997,14 @@ def _register_routes(app: FastAPI):
1333
1997
  operation_id="get_agent_stats",
1334
1998
  tags=["Banks"],
1335
1999
  )
1336
- async def api_stats(bank_id: str):
2000
+ async def api_stats(
2001
+ bank_id: str,
2002
+ request_context: RequestContext = Depends(get_request_context),
2003
+ ):
1337
2004
  """Get statistics about memory nodes and links for a memory bank."""
1338
2005
  try:
2006
+ # Authenticate and set tenant schema
2007
+ await app.state.memory._authenticate_tenant(request_context)
1339
2008
  pool = await app.state.memory._get_pool()
1340
2009
  async with acquire_with_retry(pool) as conn:
1341
2010
  # Get node counts by fact_type
@@ -1410,6 +2079,31 @@ def _register_routes(app: FastAPI):
1410
2079
  )
1411
2080
  total_documents = doc_count_result["count"] if doc_count_result else 0
1412
2081
 
2082
+ # Get consolidation stats from memory-level tracking
2083
+ consolidation_stats = await conn.fetchrow(
2084
+ f"""
2085
+ SELECT
2086
+ MAX(consolidated_at) as last_consolidated_at,
2087
+ COUNT(*) FILTER (WHERE consolidated_at IS NULL AND fact_type IN ('experience', 'world')) as pending
2088
+ FROM {fq_table("memory_units")}
2089
+ WHERE bank_id = $1
2090
+ """,
2091
+ bank_id,
2092
+ )
2093
+ last_consolidated_at = consolidation_stats["last_consolidated_at"] if consolidation_stats else None
2094
+ pending_consolidation = consolidation_stats["pending"] if consolidation_stats else 0
2095
+
2096
+ # Count total observations (consolidated knowledge)
2097
+ observation_count_result = await conn.fetchrow(
2098
+ f"""
2099
+ SELECT COUNT(*) as count
2100
+ FROM {fq_table("memory_units")}
2101
+ WHERE bank_id = $1 AND fact_type = 'observation'
2102
+ """,
2103
+ bank_id,
2104
+ )
2105
+ total_observations = observation_count_result["count"] if observation_count_result else 0
2106
+
1413
2107
  # Format results
1414
2108
  nodes_by_type = {row["fact_type"]: row["count"] for row in node_stats}
1415
2109
  links_by_type = {row["link_type"]: row["count"] for row in link_stats}
@@ -1439,6 +2133,9 @@ def _register_routes(app: FastAPI):
1439
2133
  links_breakdown=links_breakdown,
1440
2134
  pending_operations=pending_operations,
1441
2135
  failed_operations=failed_operations,
2136
+ last_consolidated_at=(last_consolidated_at.isoformat() if last_consolidated_at else None),
2137
+ pending_consolidation=pending_consolidation,
2138
+ total_observations=total_observations,
1442
2139
  )
1443
2140
 
1444
2141
  except (AuthenticationError, HTTPException):
@@ -1454,19 +2151,27 @@ def _register_routes(app: FastAPI):
1454
2151
  "/v1/default/banks/{bank_id}/entities",
1455
2152
  response_model=EntityListResponse,
1456
2153
  summary="List entities",
1457
- description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count.",
2154
+ description="List all entities (people, organizations, etc.) known by the bank, ordered by mention count. Supports pagination.",
1458
2155
  operation_id="list_entities",
1459
2156
  tags=["Entities"],
1460
2157
  )
1461
2158
  async def api_list_entities(
1462
2159
  bank_id: str,
1463
2160
  limit: int = Query(default=100, description="Maximum number of entities to return"),
2161
+ offset: int = Query(default=0, description="Offset for pagination"),
1464
2162
  request_context: RequestContext = Depends(get_request_context),
1465
2163
  ):
1466
- """List entities for a memory bank."""
2164
+ """List entities for a memory bank with pagination."""
1467
2165
  try:
1468
- entities = await app.state.memory.list_entities(bank_id, limit=limit, request_context=request_context)
1469
- return EntityListResponse(items=[EntityListItem(**e) for e in entities])
2166
+ data = await app.state.memory.list_entities(
2167
+ bank_id, limit=limit, offset=offset, request_context=request_context
2168
+ )
2169
+ return EntityListResponse(
2170
+ items=[EntityListItem(**e) for e in data["items"]],
2171
+ total=data["total"],
2172
+ limit=data["limit"],
2173
+ offset=data["offset"],
2174
+ )
1470
2175
  except (AuthenticationError, HTTPException):
1471
2176
  raise
1472
2177
  except Exception as e:
@@ -1518,54 +2223,422 @@ def _register_routes(app: FastAPI):
1518
2223
  @app.post(
1519
2224
  "/v1/default/banks/{bank_id}/entities/{entity_id}/regenerate",
1520
2225
  response_model=EntityDetailResponse,
1521
- summary="Regenerate entity observations",
1522
- description="Regenerate observations for an entity based on all facts mentioning it.",
2226
+ summary="Regenerate entity observations (deprecated)",
2227
+ description="This endpoint is deprecated. Entity observations have been replaced by mental models.",
1523
2228
  operation_id="regenerate_entity_observations",
1524
2229
  tags=["Entities"],
2230
+ deprecated=True,
1525
2231
  )
1526
2232
  async def api_regenerate_entity_observations(
1527
2233
  bank_id: str,
1528
2234
  entity_id: str,
1529
2235
  request_context: RequestContext = Depends(get_request_context),
1530
2236
  ):
1531
- """Regenerate observations for an entity."""
2237
+ """Regenerate observations for an entity. DEPRECATED."""
2238
+ raise HTTPException(
2239
+ status_code=410,
2240
+ detail="This endpoint is deprecated. Entity observations are no longer supported.",
2241
+ )
2242
+
2243
+ # =========================================================================
2244
+ # =========================================================================
2245
+ # MENTAL MODELS ENDPOINTS (stored reflect responses)
2246
+ # =========================================================================
2247
+
2248
+ @app.get(
2249
+ "/v1/default/banks/{bank_id}/mental-models",
2250
+ response_model=MentalModelListResponse,
2251
+ summary="List mental models",
2252
+ description="List user-curated living documents that stay current.",
2253
+ operation_id="list_mental_models",
2254
+ tags=["Mental Models"],
2255
+ )
2256
+ async def api_list_mental_models(
2257
+ bank_id: str,
2258
+ tags_filter: list[str] | None = Query(None, alias="tags", description="Filter by tags"),
2259
+ tags_match: Literal["any", "all", "exact"] = Query("any", description="How to match tags"),
2260
+ limit: int = Query(100, ge=1, le=1000),
2261
+ offset: int = Query(0, ge=0),
2262
+ request_context: RequestContext = Depends(get_request_context),
2263
+ ):
2264
+ """List mental models for a bank."""
1532
2265
  try:
1533
- # Get the entity to verify it exists and get canonical_name
1534
- entity = await app.state.memory.get_entity(bank_id, entity_id, request_context=request_context)
2266
+ mental_models = await app.state.memory.list_mental_models(
2267
+ bank_id=bank_id,
2268
+ tags=tags_filter,
2269
+ tags_match=tags_match,
2270
+ limit=limit,
2271
+ offset=offset,
2272
+ request_context=request_context,
2273
+ )
2274
+ return MentalModelListResponse(items=[MentalModelResponse(**m) for m in mental_models])
2275
+ except (AuthenticationError, HTTPException):
2276
+ raise
2277
+ except Exception as e:
2278
+ import traceback
1535
2279
 
1536
- if entity is None:
1537
- raise HTTPException(status_code=404, detail=f"Entity {entity_id} not found")
2280
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2281
+ logger.error(f"Error in GET /v1/default/banks/{bank_id}/mental-models: {error_detail}")
2282
+ raise HTTPException(status_code=500, detail=str(e))
1538
2283
 
1539
- # Regenerate observations
1540
- await app.state.memory.regenerate_entity_observations(
2284
+ @app.get(
2285
+ "/v1/default/banks/{bank_id}/mental-models/{mental_model_id}",
2286
+ response_model=MentalModelResponse,
2287
+ summary="Get mental model",
2288
+ description="Get a specific mental model by ID.",
2289
+ operation_id="get_mental_model",
2290
+ tags=["Mental Models"],
2291
+ )
2292
+ async def api_get_mental_model(
2293
+ bank_id: str,
2294
+ mental_model_id: str,
2295
+ request_context: RequestContext = Depends(get_request_context),
2296
+ ):
2297
+ """Get a mental model by ID."""
2298
+ try:
2299
+ mental_model = await app.state.memory.get_mental_model(
1541
2300
  bank_id=bank_id,
1542
- entity_id=entity_id,
1543
- entity_name=entity["canonical_name"],
2301
+ mental_model_id=mental_model_id,
1544
2302
  request_context=request_context,
1545
2303
  )
2304
+ if mental_model is None:
2305
+ raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
2306
+ return MentalModelResponse(**mental_model)
2307
+ except (AuthenticationError, HTTPException):
2308
+ raise
2309
+ except Exception as e:
2310
+ import traceback
1546
2311
 
1547
- # Get updated entity with new observations
1548
- entity = await app.state.memory.get_entity(bank_id, entity_id, request_context=request_context)
2312
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2313
+ logger.error(f"Error in GET /v1/default/banks/{bank_id}/mental-models/{mental_model_id}: {error_detail}")
2314
+ raise HTTPException(status_code=500, detail=str(e))
1549
2315
 
1550
- return EntityDetailResponse(
1551
- id=entity["id"],
1552
- canonical_name=entity["canonical_name"],
1553
- mention_count=entity["mention_count"],
1554
- first_seen=entity["first_seen"],
1555
- last_seen=entity["last_seen"],
1556
- metadata=_parse_metadata(entity["metadata"]),
1557
- observations=[
1558
- EntityObservationResponse(text=obs.text, mentioned_at=obs.mentioned_at)
1559
- for obs in entity["observations"]
1560
- ],
2316
+ @app.post(
2317
+ "/v1/default/banks/{bank_id}/mental-models",
2318
+ response_model=CreateMentalModelResponse,
2319
+ summary="Create mental model",
2320
+ description="Create a mental model by running reflect with the source query in the background. "
2321
+ "Returns an operation ID to track progress. The content is auto-generated by the reflect endpoint. "
2322
+ "Use the operations endpoint to check completion status.",
2323
+ operation_id="create_mental_model",
2324
+ tags=["Mental Models"],
2325
+ )
2326
+ async def api_create_mental_model(
2327
+ bank_id: str,
2328
+ body: CreateMentalModelRequest,
2329
+ request_context: RequestContext = Depends(get_request_context),
2330
+ ):
2331
+ """Create a mental model (async - returns operation_id)."""
2332
+ try:
2333
+ # 1. Create the mental model with placeholder content
2334
+ mental_model = await app.state.memory.create_mental_model(
2335
+ bank_id=bank_id,
2336
+ name=body.name,
2337
+ source_query=body.source_query,
2338
+ content="Generating content...",
2339
+ tags=body.tags if body.tags else None,
2340
+ max_tokens=body.max_tokens,
2341
+ trigger=body.trigger.model_dump() if body.trigger else None,
2342
+ request_context=request_context,
2343
+ )
2344
+ # 2. Schedule a refresh to generate the actual content
2345
+ result = await app.state.memory.submit_async_refresh_mental_model(
2346
+ bank_id=bank_id,
2347
+ mental_model_id=mental_model["id"],
2348
+ request_context=request_context,
2349
+ )
2350
+ return CreateMentalModelResponse(operation_id=result["operation_id"])
2351
+ except ValueError as e:
2352
+ raise HTTPException(status_code=400, detail=str(e))
2353
+ except (AuthenticationError, HTTPException):
2354
+ raise
2355
+ except Exception as e:
2356
+ import traceback
2357
+
2358
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2359
+ logger.error(f"Error in POST /v1/default/banks/{bank_id}/mental-models: {error_detail}")
2360
+ raise HTTPException(status_code=500, detail=str(e))
2361
+
2362
+ @app.post(
2363
+ "/v1/default/banks/{bank_id}/mental-models/{mental_model_id}/refresh",
2364
+ response_model=AsyncOperationSubmitResponse,
2365
+ summary="Refresh mental model",
2366
+ description="Submit an async task to re-run the source query through reflect and update the content.",
2367
+ operation_id="refresh_mental_model",
2368
+ tags=["Mental Models"],
2369
+ )
2370
+ async def api_refresh_mental_model(
2371
+ bank_id: str,
2372
+ mental_model_id: str,
2373
+ request_context: RequestContext = Depends(get_request_context),
2374
+ ):
2375
+ """Refresh a mental model by re-running its source query (async)."""
2376
+ try:
2377
+ result = await app.state.memory.submit_async_refresh_mental_model(
2378
+ bank_id=bank_id,
2379
+ mental_model_id=mental_model_id,
2380
+ request_context=request_context,
2381
+ )
2382
+ return AsyncOperationSubmitResponse(operation_id=result["operation_id"], status="queued")
2383
+ except ValueError as e:
2384
+ raise HTTPException(status_code=404, detail=str(e))
2385
+ except (AuthenticationError, HTTPException):
2386
+ raise
2387
+ except Exception as e:
2388
+ import traceback
2389
+
2390
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2391
+ logger.error(
2392
+ f"Error in POST /v1/default/banks/{bank_id}/mental-models/{mental_model_id}/refresh: {error_detail}"
2393
+ )
2394
+ raise HTTPException(status_code=500, detail=str(e))
2395
+
2396
+ @app.patch(
2397
+ "/v1/default/banks/{bank_id}/mental-models/{mental_model_id}",
2398
+ response_model=MentalModelResponse,
2399
+ summary="Update mental model",
2400
+ description="Update a mental model's name and/or source query.",
2401
+ operation_id="update_mental_model",
2402
+ tags=["Mental Models"],
2403
+ )
2404
+ async def api_update_mental_model(
2405
+ bank_id: str,
2406
+ mental_model_id: str,
2407
+ body: UpdateMentalModelRequest,
2408
+ request_context: RequestContext = Depends(get_request_context),
2409
+ ):
2410
+ """Update a mental model."""
2411
+ try:
2412
+ mental_model = await app.state.memory.update_mental_model(
2413
+ bank_id=bank_id,
2414
+ mental_model_id=mental_model_id,
2415
+ name=body.name,
2416
+ source_query=body.source_query,
2417
+ max_tokens=body.max_tokens,
2418
+ tags=body.tags,
2419
+ trigger=body.trigger.model_dump() if body.trigger else None,
2420
+ request_context=request_context,
2421
+ )
2422
+ if mental_model is None:
2423
+ raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
2424
+ return MentalModelResponse(**mental_model)
2425
+ except (AuthenticationError, HTTPException):
2426
+ raise
2427
+ except Exception as e:
2428
+ import traceback
2429
+
2430
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2431
+ logger.error(f"Error in PATCH /v1/default/banks/{bank_id}/mental-models/{mental_model_id}: {error_detail}")
2432
+ raise HTTPException(status_code=500, detail=str(e))
2433
+
2434
+ @app.delete(
2435
+ "/v1/default/banks/{bank_id}/mental-models/{mental_model_id}",
2436
+ summary="Delete mental model",
2437
+ description="Delete a mental model.",
2438
+ operation_id="delete_mental_model",
2439
+ tags=["Mental Models"],
2440
+ )
2441
+ async def api_delete_mental_model(
2442
+ bank_id: str,
2443
+ mental_model_id: str,
2444
+ request_context: RequestContext = Depends(get_request_context),
2445
+ ):
2446
+ """Delete a mental model."""
2447
+ try:
2448
+ deleted = await app.state.memory.delete_mental_model(
2449
+ bank_id=bank_id,
2450
+ mental_model_id=mental_model_id,
2451
+ request_context=request_context,
1561
2452
  )
2453
+ if not deleted:
2454
+ raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
2455
+ return {"status": "deleted"}
1562
2456
  except (AuthenticationError, HTTPException):
1563
2457
  raise
1564
2458
  except Exception as e:
1565
2459
  import traceback
1566
2460
 
1567
2461
  error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
1568
- logger.error(f"Error in /v1/default/banks/{bank_id}/entities/{entity_id}/regenerate: {error_detail}")
2462
+ logger.error(f"Error in DELETE /v1/default/banks/{bank_id}/mental-models/{mental_model_id}: {error_detail}")
2463
+ raise HTTPException(status_code=500, detail=str(e))
2464
+
2465
+ # =========================================================================
2466
+ # DIRECTIVES ENDPOINTS
2467
+ # =========================================================================
2468
+
2469
+ @app.get(
2470
+ "/v1/default/banks/{bank_id}/directives",
2471
+ response_model=DirectiveListResponse,
2472
+ summary="List directives",
2473
+ description="List hard rules that are injected into prompts.",
2474
+ operation_id="list_directives",
2475
+ tags=["Directives"],
2476
+ )
2477
+ async def api_list_directives(
2478
+ bank_id: str,
2479
+ tags_filter: list[str] | None = Query(None, alias="tags", description="Filter by tags"),
2480
+ tags_match: Literal["any", "all", "exact"] = Query("any", description="How to match tags"),
2481
+ active_only: bool = Query(True, description="Only return active directives"),
2482
+ limit: int = Query(100, ge=1, le=1000),
2483
+ offset: int = Query(0, ge=0),
2484
+ request_context: RequestContext = Depends(get_request_context),
2485
+ ):
2486
+ """List directives for a bank."""
2487
+ try:
2488
+ directives = await app.state.memory.list_directives(
2489
+ bank_id=bank_id,
2490
+ tags=tags_filter,
2491
+ tags_match=tags_match,
2492
+ active_only=active_only,
2493
+ limit=limit,
2494
+ offset=offset,
2495
+ request_context=request_context,
2496
+ )
2497
+ return DirectiveListResponse(items=[DirectiveResponse(**d) for d in directives])
2498
+ except (AuthenticationError, HTTPException):
2499
+ raise
2500
+ except Exception as e:
2501
+ import traceback
2502
+
2503
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2504
+ logger.error(f"Error in GET /v1/default/banks/{bank_id}/directives: {error_detail}")
2505
+ raise HTTPException(status_code=500, detail=str(e))
2506
+
2507
+ @app.get(
2508
+ "/v1/default/banks/{bank_id}/directives/{directive_id}",
2509
+ response_model=DirectiveResponse,
2510
+ summary="Get directive",
2511
+ description="Get a specific directive by ID.",
2512
+ operation_id="get_directive",
2513
+ tags=["Directives"],
2514
+ )
2515
+ async def api_get_directive(
2516
+ bank_id: str,
2517
+ directive_id: str,
2518
+ request_context: RequestContext = Depends(get_request_context),
2519
+ ):
2520
+ """Get a directive by ID."""
2521
+ try:
2522
+ directive = await app.state.memory.get_directive(
2523
+ bank_id=bank_id,
2524
+ directive_id=directive_id,
2525
+ request_context=request_context,
2526
+ )
2527
+ if directive is None:
2528
+ raise HTTPException(status_code=404, detail=f"Directive '{directive_id}' not found")
2529
+ return DirectiveResponse(**directive)
2530
+ except (AuthenticationError, HTTPException):
2531
+ raise
2532
+ except Exception as e:
2533
+ import traceback
2534
+
2535
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2536
+ logger.error(f"Error in GET /v1/default/banks/{bank_id}/directives/{directive_id}: {error_detail}")
2537
+ raise HTTPException(status_code=500, detail=str(e))
2538
+
2539
+ @app.post(
2540
+ "/v1/default/banks/{bank_id}/directives",
2541
+ response_model=DirectiveResponse,
2542
+ summary="Create directive",
2543
+ description="Create a hard rule that will be injected into prompts.",
2544
+ operation_id="create_directive",
2545
+ tags=["Directives"],
2546
+ )
2547
+ async def api_create_directive(
2548
+ bank_id: str,
2549
+ body: CreateDirectiveRequest,
2550
+ request_context: RequestContext = Depends(get_request_context),
2551
+ ):
2552
+ """Create a directive."""
2553
+ try:
2554
+ directive = await app.state.memory.create_directive(
2555
+ bank_id=bank_id,
2556
+ name=body.name,
2557
+ content=body.content,
2558
+ priority=body.priority,
2559
+ is_active=body.is_active,
2560
+ tags=body.tags,
2561
+ request_context=request_context,
2562
+ )
2563
+ return DirectiveResponse(**directive)
2564
+ except ValueError as e:
2565
+ raise HTTPException(status_code=400, detail=str(e))
2566
+ except (AuthenticationError, HTTPException):
2567
+ raise
2568
+ except Exception as e:
2569
+ import traceback
2570
+
2571
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2572
+ logger.error(f"Error in POST /v1/default/banks/{bank_id}/directives: {error_detail}")
2573
+ raise HTTPException(status_code=500, detail=str(e))
2574
+
2575
+ @app.patch(
2576
+ "/v1/default/banks/{bank_id}/directives/{directive_id}",
2577
+ response_model=DirectiveResponse,
2578
+ summary="Update directive",
2579
+ description="Update a directive's properties.",
2580
+ operation_id="update_directive",
2581
+ tags=["Directives"],
2582
+ )
2583
+ async def api_update_directive(
2584
+ bank_id: str,
2585
+ directive_id: str,
2586
+ body: UpdateDirectiveRequest,
2587
+ request_context: RequestContext = Depends(get_request_context),
2588
+ ):
2589
+ """Update a directive."""
2590
+ try:
2591
+ directive = await app.state.memory.update_directive(
2592
+ bank_id=bank_id,
2593
+ directive_id=directive_id,
2594
+ name=body.name,
2595
+ content=body.content,
2596
+ priority=body.priority,
2597
+ is_active=body.is_active,
2598
+ tags=body.tags,
2599
+ request_context=request_context,
2600
+ )
2601
+ if directive is None:
2602
+ raise HTTPException(status_code=404, detail=f"Directive '{directive_id}' not found")
2603
+ return DirectiveResponse(**directive)
2604
+ except (AuthenticationError, HTTPException):
2605
+ raise
2606
+ except Exception as e:
2607
+ import traceback
2608
+
2609
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2610
+ logger.error(f"Error in PATCH /v1/default/banks/{bank_id}/directives/{directive_id}: {error_detail}")
2611
+ raise HTTPException(status_code=500, detail=str(e))
2612
+
2613
+ @app.delete(
2614
+ "/v1/default/banks/{bank_id}/directives/{directive_id}",
2615
+ summary="Delete directive",
2616
+ description="Delete a directive.",
2617
+ operation_id="delete_directive",
2618
+ tags=["Directives"],
2619
+ )
2620
+ async def api_delete_directive(
2621
+ bank_id: str,
2622
+ directive_id: str,
2623
+ request_context: RequestContext = Depends(get_request_context),
2624
+ ):
2625
+ """Delete a directive."""
2626
+ try:
2627
+ deleted = await app.state.memory.delete_directive(
2628
+ bank_id=bank_id,
2629
+ directive_id=directive_id,
2630
+ request_context=request_context,
2631
+ )
2632
+ if not deleted:
2633
+ raise HTTPException(status_code=404, detail=f"Directive '{directive_id}' not found")
2634
+ return {"status": "deleted"}
2635
+ except (AuthenticationError, HTTPException):
2636
+ raise
2637
+ except Exception as e:
2638
+ import traceback
2639
+
2640
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2641
+ logger.error(f"Error in DELETE /v1/default/banks/{bank_id}/directives/{directive_id}: {error_detail}")
1569
2642
  raise HTTPException(status_code=500, detail=str(e))
1570
2643
 
1571
2644
  @app.get(
@@ -1638,6 +2711,59 @@ def _register_routes(app: FastAPI):
1638
2711
  logger.error(f"Error in /v1/default/banks/{bank_id}/documents/{document_id}: {error_detail}")
1639
2712
  raise HTTPException(status_code=500, detail=str(e))
1640
2713
 
2714
+ @app.get(
2715
+ "/v1/default/banks/{bank_id}/tags",
2716
+ response_model=ListTagsResponse,
2717
+ summary="List tags",
2718
+ description="List all unique tags in a memory bank with usage counts. "
2719
+ "Supports wildcard search using '*' (e.g., 'user:*', '*-fred', 'tag*-2'). Case-insensitive.",
2720
+ operation_id="list_tags",
2721
+ tags=["Memory"],
2722
+ )
2723
+ async def api_list_tags(
2724
+ bank_id: str,
2725
+ q: str | None = Query(
2726
+ default=None,
2727
+ description="Wildcard pattern to filter tags (e.g., 'user:*' for user:alice, '*-admin' for role-admin). "
2728
+ "Use '*' as wildcard. Case-insensitive.",
2729
+ ),
2730
+ limit: int = Query(default=100, description="Maximum number of tags to return"),
2731
+ offset: int = Query(default=0, description="Offset for pagination"),
2732
+ request_context: RequestContext = Depends(get_request_context),
2733
+ ):
2734
+ """
2735
+ List all unique tags in a memory bank.
2736
+
2737
+ Use this endpoint to discover available tags or expand wildcard patterns.
2738
+ Supports '*' wildcards for flexible matching (case-insensitive):
2739
+ - 'user:*' matches user:alice, user:bob
2740
+ - '*-admin' matches role-admin, super-admin
2741
+ - 'env*-prod' matches env-prod, environment-prod
2742
+
2743
+ Args:
2744
+ bank_id: Memory Bank ID (from path)
2745
+ q: Wildcard pattern to filter tags (use '*' as wildcard)
2746
+ limit: Maximum number of tags to return (default: 100)
2747
+ offset: Offset for pagination (default: 0)
2748
+ """
2749
+ try:
2750
+ data = await app.state.memory.list_tags(
2751
+ bank_id=bank_id,
2752
+ pattern=q,
2753
+ limit=limit,
2754
+ offset=offset,
2755
+ request_context=request_context,
2756
+ )
2757
+ return data
2758
+ except (AuthenticationError, HTTPException):
2759
+ raise
2760
+ except Exception as e:
2761
+ import traceback
2762
+
2763
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2764
+ logger.error(f"Error in /v1/default/banks/{bank_id}/tags: {error_detail}")
2765
+ raise HTTPException(status_code=500, detail=str(e))
2766
+
1641
2767
  @app.get(
1642
2768
  "/v1/default/chunks/{chunk_id:path}",
1643
2769
  response_model=ChunkResponse,
@@ -1715,17 +2841,28 @@ def _register_routes(app: FastAPI):
1715
2841
  "/v1/default/banks/{bank_id}/operations",
1716
2842
  response_model=OperationsListResponse,
1717
2843
  summary="List async operations",
1718
- description="Get a list of all async operations (pending and failed) for a specific agent, including error messages for failed operations",
2844
+ description="Get a list of async operations for a specific agent, with optional filtering by status. Results are sorted by most recent first.",
1719
2845
  operation_id="list_operations",
1720
2846
  tags=["Operations"],
1721
2847
  )
1722
- async def api_list_operations(bank_id: str, request_context: RequestContext = Depends(get_request_context)):
1723
- """List all async operations (pending and failed) for a memory bank."""
2848
+ async def api_list_operations(
2849
+ bank_id: str,
2850
+ status: str | None = Query(default=None, description="Filter by status: pending, completed, or failed"),
2851
+ limit: int = Query(default=20, ge=1, le=100, description="Maximum number of operations to return"),
2852
+ offset: int = Query(default=0, ge=0, description="Number of operations to skip"),
2853
+ request_context: RequestContext = Depends(get_request_context),
2854
+ ):
2855
+ """List async operations for a memory bank with optional filtering and pagination."""
1724
2856
  try:
1725
- operations = await app.state.memory.list_operations(bank_id, request_context=request_context)
2857
+ result = await app.state.memory.list_operations(
2858
+ bank_id, status=status, limit=limit, offset=offset, request_context=request_context
2859
+ )
1726
2860
  return OperationsListResponse(
1727
2861
  bank_id=bank_id,
1728
- operations=[OperationResponse(**op) for op in operations],
2862
+ total=result["total"],
2863
+ limit=limit,
2864
+ offset=offset,
2865
+ operations=[OperationResponse(**op) for op in result["operations"]],
1729
2866
  )
1730
2867
  except (AuthenticationError, HTTPException):
1731
2868
  raise
@@ -1736,6 +2873,37 @@ def _register_routes(app: FastAPI):
1736
2873
  logger.error(f"Error in /v1/default/banks/{bank_id}/operations: {error_detail}")
1737
2874
  raise HTTPException(status_code=500, detail=str(e))
1738
2875
 
2876
+ @app.get(
2877
+ "/v1/default/banks/{bank_id}/operations/{operation_id}",
2878
+ response_model=OperationStatusResponse,
2879
+ summary="Get operation status",
2880
+ description="Get the status of a specific async operation. Returns 'pending', 'completed', or 'failed'. "
2881
+ "Completed operations are removed from storage, so 'completed' means the operation finished successfully.",
2882
+ operation_id="get_operation_status",
2883
+ tags=["Operations"],
2884
+ )
2885
+ async def api_get_operation_status(
2886
+ bank_id: str, operation_id: str, request_context: RequestContext = Depends(get_request_context)
2887
+ ):
2888
+ """Get the status of an async operation."""
2889
+ try:
2890
+ # Validate UUID format
2891
+ try:
2892
+ uuid.UUID(operation_id)
2893
+ except ValueError:
2894
+ raise HTTPException(status_code=400, detail=f"Invalid operation_id format: {operation_id}")
2895
+
2896
+ result = await app.state.memory.get_operation_status(bank_id, operation_id, request_context=request_context)
2897
+ return OperationStatusResponse(**result)
2898
+ except (AuthenticationError, HTTPException):
2899
+ raise
2900
+ except Exception as e:
2901
+ import traceback
2902
+
2903
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
2904
+ logger.error(f"Error in GET /v1/default/banks/{bank_id}/operations/{operation_id}: {error_detail}")
2905
+ raise HTTPException(status_code=500, detail=str(e))
2906
+
1739
2907
  @app.delete(
1740
2908
  "/v1/default/banks/{bank_id}/operations/{operation_id}",
1741
2909
  response_model=CancelOperationResponse,
@@ -1772,12 +2940,12 @@ def _register_routes(app: FastAPI):
1772
2940
  "/v1/default/banks/{bank_id}/profile",
1773
2941
  response_model=BankProfileResponse,
1774
2942
  summary="Get memory bank profile",
1775
- description="Get disposition traits and background for a memory bank. Auto-creates agent with defaults if not exists.",
2943
+ description="Get disposition traits and mission for a memory bank. Auto-creates agent with defaults if not exists.",
1776
2944
  operation_id="get_bank_profile",
1777
2945
  tags=["Banks"],
1778
2946
  )
1779
2947
  async def api_get_bank_profile(bank_id: str, request_context: RequestContext = Depends(get_request_context)):
1780
- """Get memory bank profile (disposition + background)."""
2948
+ """Get memory bank profile (disposition + mission)."""
1781
2949
  try:
1782
2950
  profile = await app.state.memory.get_bank_profile(bank_id, request_context=request_context)
1783
2951
  # Convert DispositionTraits object to dict for Pydantic
@@ -1786,11 +2954,13 @@ def _register_routes(app: FastAPI):
1786
2954
  if hasattr(profile["disposition"], "model_dump")
1787
2955
  else dict(profile["disposition"])
1788
2956
  )
2957
+ mission = profile.get("mission") or ""
1789
2958
  return BankProfileResponse(
1790
2959
  bank_id=bank_id,
1791
2960
  name=profile["name"],
1792
2961
  disposition=DispositionTraits(**disposition_dict),
1793
- background=profile["background"],
2962
+ mission=mission,
2963
+ background=mission, # Backwards compat
1794
2964
  )
1795
2965
  except (AuthenticationError, HTTPException):
1796
2966
  raise
@@ -1826,11 +2996,13 @@ def _register_routes(app: FastAPI):
1826
2996
  if hasattr(profile["disposition"], "model_dump")
1827
2997
  else dict(profile["disposition"])
1828
2998
  )
2999
+ mission = profile.get("mission") or ""
1829
3000
  return BankProfileResponse(
1830
3001
  bank_id=bank_id,
1831
3002
  name=profile["name"],
1832
3003
  disposition=DispositionTraits(**disposition_dict),
1833
- background=profile["background"],
3004
+ mission=mission,
3005
+ background=mission, # Backwards compat
1834
3006
  )
1835
3007
  except (AuthenticationError, HTTPException):
1836
3008
  raise
@@ -1844,25 +3016,22 @@ def _register_routes(app: FastAPI):
1844
3016
  @app.post(
1845
3017
  "/v1/default/banks/{bank_id}/background",
1846
3018
  response_model=BackgroundResponse,
1847
- summary="Add/merge memory bank background",
1848
- description="Add new background information or merge with existing. LLM intelligently resolves conflicts, normalizes to first person, and optionally infers disposition traits.",
3019
+ summary="Add/merge memory bank background (deprecated)",
3020
+ description="Deprecated: Use PUT /mission instead. This endpoint now updates the mission field.",
1849
3021
  operation_id="add_bank_background",
1850
3022
  tags=["Banks"],
3023
+ deprecated=True,
1851
3024
  )
1852
3025
  async def api_add_bank_background(
1853
3026
  bank_id: str, request: AddBackgroundRequest, request_context: RequestContext = Depends(get_request_context)
1854
3027
  ):
1855
- """Add or merge bank background information. Optionally infer disposition traits."""
3028
+ """Deprecated: Add or merge bank background. Now updates mission field."""
1856
3029
  try:
1857
- result = await app.state.memory.merge_bank_background(
1858
- bank_id, request.content, update_disposition=request.update_disposition, request_context=request_context
3030
+ result = await app.state.memory.merge_bank_mission(
3031
+ bank_id, request.content, request_context=request_context
1859
3032
  )
1860
-
1861
- response = BackgroundResponse(background=result["background"])
1862
- if "disposition" in result:
1863
- response.disposition = DispositionTraits(**result["disposition"])
1864
-
1865
- return response
3033
+ mission = result.get("mission") or ""
3034
+ return BackgroundResponse(mission=mission, background=mission)
1866
3035
  except (AuthenticationError, HTTPException):
1867
3036
  raise
1868
3037
  except Exception as e:
@@ -1876,24 +3045,25 @@ def _register_routes(app: FastAPI):
1876
3045
  "/v1/default/banks/{bank_id}",
1877
3046
  response_model=BankProfileResponse,
1878
3047
  summary="Create or update memory bank",
1879
- description="Create a new agent or update existing agent with disposition and background. Auto-fills missing fields with defaults.",
3048
+ description="Create a new agent or update existing agent with disposition and mission. Auto-fills missing fields with defaults.",
1880
3049
  operation_id="create_or_update_bank",
1881
3050
  tags=["Banks"],
1882
3051
  )
1883
3052
  async def api_create_or_update_bank(
1884
3053
  bank_id: str, request: CreateBankRequest, request_context: RequestContext = Depends(get_request_context)
1885
3054
  ):
1886
- """Create or update an agent with disposition and background."""
3055
+ """Create or update an agent with disposition and mission."""
1887
3056
  try:
1888
3057
  # Ensure bank exists by getting profile (auto-creates with defaults)
1889
3058
  await app.state.memory.get_bank_profile(bank_id, request_context=request_context)
1890
3059
 
1891
- # Update name and/or background if provided
1892
- if request.name is not None or request.background is not None:
3060
+ # Update name and/or mission if provided (support both mission and deprecated background)
3061
+ mission_value = request.mission or request.background
3062
+ if request.name is not None or mission_value is not None:
1893
3063
  await app.state.memory.update_bank(
1894
3064
  bank_id,
1895
3065
  name=request.name,
1896
- background=request.background,
3066
+ mission=mission_value,
1897
3067
  request_context=request_context,
1898
3068
  )
1899
3069
 
@@ -1910,11 +3080,13 @@ def _register_routes(app: FastAPI):
1910
3080
  if hasattr(final_profile["disposition"], "model_dump")
1911
3081
  else dict(final_profile["disposition"])
1912
3082
  )
3083
+ mission = final_profile.get("mission") or ""
1913
3084
  return BankProfileResponse(
1914
3085
  bank_id=bank_id,
1915
3086
  name=final_profile["name"],
1916
3087
  disposition=DispositionTraits(**disposition_dict),
1917
- background=final_profile["background"],
3088
+ mission=mission,
3089
+ background=mission, # Backwards compat
1918
3090
  )
1919
3091
  except (AuthenticationError, HTTPException):
1920
3092
  raise
@@ -1925,6 +3097,62 @@ def _register_routes(app: FastAPI):
1925
3097
  logger.error(f"Error in /v1/default/banks/{bank_id}: {error_detail}")
1926
3098
  raise HTTPException(status_code=500, detail=str(e))
1927
3099
 
3100
+ @app.patch(
3101
+ "/v1/default/banks/{bank_id}",
3102
+ response_model=BankProfileResponse,
3103
+ summary="Partial update memory bank",
3104
+ description="Partially update an agent's profile. Only provided fields will be updated.",
3105
+ operation_id="update_bank",
3106
+ tags=["Banks"],
3107
+ )
3108
+ async def api_update_bank(
3109
+ bank_id: str, request: CreateBankRequest, request_context: RequestContext = Depends(get_request_context)
3110
+ ):
3111
+ """Partially update an agent's profile (name, mission, disposition)."""
3112
+ try:
3113
+ # Ensure bank exists
3114
+ await app.state.memory.get_bank_profile(bank_id, request_context=request_context)
3115
+
3116
+ # Update name and/or mission if provided
3117
+ mission_value = request.mission or request.background
3118
+ if request.name is not None or mission_value is not None:
3119
+ await app.state.memory.update_bank(
3120
+ bank_id,
3121
+ name=request.name,
3122
+ mission=mission_value,
3123
+ request_context=request_context,
3124
+ )
3125
+
3126
+ # Update disposition if provided
3127
+ if request.disposition is not None:
3128
+ await app.state.memory.update_bank_disposition(
3129
+ bank_id, request.disposition.model_dump(), request_context=request_context
3130
+ )
3131
+
3132
+ # Get final profile
3133
+ final_profile = await app.state.memory.get_bank_profile(bank_id, request_context=request_context)
3134
+ disposition_dict = (
3135
+ final_profile["disposition"].model_dump()
3136
+ if hasattr(final_profile["disposition"], "model_dump")
3137
+ else dict(final_profile["disposition"])
3138
+ )
3139
+ mission = final_profile.get("mission") or ""
3140
+ return BankProfileResponse(
3141
+ bank_id=bank_id,
3142
+ name=final_profile["name"],
3143
+ disposition=DispositionTraits(**disposition_dict),
3144
+ mission=mission,
3145
+ background=mission, # Backwards compat
3146
+ )
3147
+ except (AuthenticationError, HTTPException):
3148
+ raise
3149
+ except Exception as e:
3150
+ import traceback
3151
+
3152
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
3153
+ logger.error(f"Error in PATCH /v1/default/banks/{bank_id}: {error_detail}")
3154
+ raise HTTPException(status_code=500, detail=str(e))
3155
+
1928
3156
  @app.delete(
1929
3157
  "/v1/default/banks/{bank_id}",
1930
3158
  response_model=DeleteResponse,
@@ -1954,6 +3182,57 @@ def _register_routes(app: FastAPI):
1954
3182
  logger.error(f"Error in DELETE /v1/default/banks/{bank_id}: {error_detail}")
1955
3183
  raise HTTPException(status_code=500, detail=str(e))
1956
3184
 
3185
+ @app.delete(
3186
+ "/v1/default/banks/{bank_id}/observations",
3187
+ response_model=DeleteResponse,
3188
+ summary="Clear all observations",
3189
+ description="Delete all observations for a memory bank. This is useful for resetting the consolidated knowledge.",
3190
+ operation_id="clear_observations",
3191
+ tags=["Banks"],
3192
+ )
3193
+ async def api_clear_observations(bank_id: str, request_context: RequestContext = Depends(get_request_context)):
3194
+ """Clear all observations for a bank."""
3195
+ try:
3196
+ result = await app.state.memory.clear_observations(bank_id, request_context=request_context)
3197
+ return DeleteResponse(
3198
+ success=True,
3199
+ message=f"Cleared {result.get('deleted_count', 0)} observations",
3200
+ deleted_count=result.get("deleted_count", 0),
3201
+ )
3202
+ except (AuthenticationError, HTTPException):
3203
+ raise
3204
+ except Exception as e:
3205
+ import traceback
3206
+
3207
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
3208
+ logger.error(f"Error in DELETE /v1/default/banks/{bank_id}/observations: {error_detail}")
3209
+ raise HTTPException(status_code=500, detail=str(e))
3210
+
3211
+ @app.post(
3212
+ "/v1/default/banks/{bank_id}/consolidate",
3213
+ response_model=ConsolidationResponse,
3214
+ summary="Trigger consolidation",
3215
+ description="Run memory consolidation to create/update observations from recent memories.",
3216
+ operation_id="trigger_consolidation",
3217
+ tags=["Banks"],
3218
+ )
3219
+ async def api_trigger_consolidation(bank_id: str, request_context: RequestContext = Depends(get_request_context)):
3220
+ """Trigger consolidation for a bank (async)."""
3221
+ try:
3222
+ result = await app.state.memory.submit_async_consolidation(bank_id=bank_id, request_context=request_context)
3223
+ return ConsolidationResponse(
3224
+ operation_id=result["operation_id"],
3225
+ deduplicated=result.get("deduplicated", False),
3226
+ )
3227
+ except (AuthenticationError, HTTPException):
3228
+ raise
3229
+ except Exception as e:
3230
+ import traceback
3231
+
3232
+ error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
3233
+ logger.error(f"Error in POST /v1/default/banks/{bank_id}/consolidate: {error_detail}")
3234
+ raise HTTPException(status_code=500, detail=str(e))
3235
+
1957
3236
  @app.post(
1958
3237
  "/v1/default/banks/{bank_id}/memories",
1959
3238
  response_model=RetainResponse,
@@ -2000,28 +3279,37 @@ def _register_routes(app: FastAPI):
2000
3279
  content_dict["document_id"] = item.document_id
2001
3280
  if item.entities:
2002
3281
  content_dict["entities"] = [{"text": e.text, "type": e.type or "CONCEPT"} for e in item.entities]
3282
+ if item.tags:
3283
+ content_dict["tags"] = item.tags
2003
3284
  contents.append(content_dict)
2004
3285
 
2005
3286
  if request.async_:
2006
3287
  # Async processing: queue task and return immediately
2007
- result = await app.state.memory.submit_async_retain(bank_id, contents, request_context=request_context)
3288
+ result = await app.state.memory.submit_async_retain(
3289
+ bank_id, contents, document_tags=request.document_tags, request_context=request_context
3290
+ )
2008
3291
  return RetainResponse.model_validate(
2009
3292
  {
2010
3293
  "success": True,
2011
3294
  "bank_id": bank_id,
2012
3295
  "items_count": result["items_count"],
2013
3296
  "async": True,
3297
+ "operation_id": result["operation_id"],
2014
3298
  }
2015
3299
  )
2016
3300
  else:
2017
3301
  # Synchronous processing: wait for completion (record metrics)
2018
- with metrics.record_operation("retain", bank_id=bank_id):
2019
- result = await app.state.memory.retain_batch_async(
2020
- bank_id=bank_id, contents=contents, request_context=request_context
3302
+ with metrics.record_operation("retain", bank_id=bank_id, source="api"):
3303
+ result, usage = await app.state.memory.retain_batch_async(
3304
+ bank_id=bank_id,
3305
+ contents=contents,
3306
+ document_tags=request.document_tags,
3307
+ request_context=request_context,
3308
+ return_usage=True,
2021
3309
  )
2022
3310
 
2023
3311
  return RetainResponse.model_validate(
2024
- {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False}
3312
+ {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False, "usage": usage}
2025
3313
  )
2026
3314
  except OperationValidationError as e:
2027
3315
  raise HTTPException(status_code=e.status_code, detail=e.reason)