hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +311 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  6. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  7. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  8. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  9. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  10. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  11. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  12. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  13. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  14. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  15. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  16. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  17. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  18. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  19. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  20. hindsight_api/api/http.py +1406 -118
  21. hindsight_api/api/mcp.py +11 -196
  22. hindsight_api/config.py +359 -27
  23. hindsight_api/engine/consolidation/__init__.py +5 -0
  24. hindsight_api/engine/consolidation/consolidator.py +859 -0
  25. hindsight_api/engine/consolidation/prompts.py +69 -0
  26. hindsight_api/engine/cross_encoder.py +706 -88
  27. hindsight_api/engine/db_budget.py +284 -0
  28. hindsight_api/engine/db_utils.py +11 -0
  29. hindsight_api/engine/directives/__init__.py +5 -0
  30. hindsight_api/engine/directives/models.py +37 -0
  31. hindsight_api/engine/embeddings.py +553 -29
  32. hindsight_api/engine/entity_resolver.py +8 -5
  33. hindsight_api/engine/interface.py +40 -17
  34. hindsight_api/engine/llm_wrapper.py +744 -68
  35. hindsight_api/engine/memory_engine.py +2505 -1017
  36. hindsight_api/engine/mental_models/__init__.py +14 -0
  37. hindsight_api/engine/mental_models/models.py +53 -0
  38. hindsight_api/engine/query_analyzer.py +4 -3
  39. hindsight_api/engine/reflect/__init__.py +18 -0
  40. hindsight_api/engine/reflect/agent.py +933 -0
  41. hindsight_api/engine/reflect/models.py +109 -0
  42. hindsight_api/engine/reflect/observations.py +186 -0
  43. hindsight_api/engine/reflect/prompts.py +483 -0
  44. hindsight_api/engine/reflect/tools.py +437 -0
  45. hindsight_api/engine/reflect/tools_schema.py +250 -0
  46. hindsight_api/engine/response_models.py +168 -4
  47. hindsight_api/engine/retain/bank_utils.py +79 -201
  48. hindsight_api/engine/retain/fact_extraction.py +424 -195
  49. hindsight_api/engine/retain/fact_storage.py +35 -12
  50. hindsight_api/engine/retain/link_utils.py +29 -24
  51. hindsight_api/engine/retain/orchestrator.py +24 -43
  52. hindsight_api/engine/retain/types.py +11 -2
  53. hindsight_api/engine/search/graph_retrieval.py +43 -14
  54. hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
  55. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  56. hindsight_api/engine/search/reranking.py +2 -2
  57. hindsight_api/engine/search/retrieval.py +848 -201
  58. hindsight_api/engine/search/tags.py +172 -0
  59. hindsight_api/engine/search/think_utils.py +42 -141
  60. hindsight_api/engine/search/trace.py +12 -1
  61. hindsight_api/engine/search/tracer.py +26 -6
  62. hindsight_api/engine/search/types.py +21 -3
  63. hindsight_api/engine/task_backend.py +113 -106
  64. hindsight_api/engine/utils.py +1 -152
  65. hindsight_api/extensions/__init__.py +10 -1
  66. hindsight_api/extensions/builtin/tenant.py +5 -1
  67. hindsight_api/extensions/context.py +10 -1
  68. hindsight_api/extensions/operation_validator.py +81 -4
  69. hindsight_api/extensions/tenant.py +26 -0
  70. hindsight_api/main.py +69 -6
  71. hindsight_api/mcp_local.py +12 -53
  72. hindsight_api/mcp_tools.py +494 -0
  73. hindsight_api/metrics.py +433 -48
  74. hindsight_api/migrations.py +141 -1
  75. hindsight_api/models.py +3 -3
  76. hindsight_api/pg0.py +53 -0
  77. hindsight_api/server.py +39 -2
  78. hindsight_api/worker/__init__.py +11 -0
  79. hindsight_api/worker/main.py +296 -0
  80. hindsight_api/worker/poller.py +486 -0
  81. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
  82. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  83. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
  84. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  85. hindsight_api/engine/search/observation_utils.py +0 -125
  86. hindsight_api/engine/search/scoring.py +0 -159
  87. hindsight_api-0.2.1.dist-info/RECORD +0 -75
  88. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,437 @@
1
+ """
2
+ Tool implementations for the reflect agent.
3
+
4
+ Implements hierarchical retrieval:
5
+ 1. search_mental_models - User-curated stored reflect responses (highest quality)
6
+ 2. search_observations - Consolidated knowledge with freshness
7
+ 3. recall - Raw facts as ground truth
8
+ """
9
+
10
+ import logging
11
+ import uuid
12
+ from datetime import datetime, timedelta, timezone
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ if TYPE_CHECKING:
16
+ from asyncpg import Connection
17
+
18
+ from ...api.http import RequestContext
19
+ from ..memory_engine import MemoryEngine
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Observation is considered stale if not updated in this many days
24
+ STALE_THRESHOLD_DAYS = 7
25
+
26
+
27
+ async def tool_search_mental_models(
28
+ conn: "Connection",
29
+ bank_id: str,
30
+ query: str,
31
+ query_embedding: list[float],
32
+ max_results: int = 5,
33
+ tags: list[str] | None = None,
34
+ tags_match: str = "any",
35
+ exclude_ids: list[str] | None = None,
36
+ ) -> dict[str, Any]:
37
+ """
38
+ Search user-curated mental models by semantic similarity.
39
+
40
+ Mental models are high-quality, manually created summaries about specific topics.
41
+ They should be searched FIRST as they represent the most reliable synthesized knowledge.
42
+
43
+ Args:
44
+ conn: Database connection
45
+ bank_id: Bank identifier
46
+ query: Search query (for logging/tracing)
47
+ query_embedding: Pre-computed embedding for semantic search
48
+ max_results: Maximum number of mental models to return
49
+ tags: Optional tags to filter mental models
50
+ tags_match: How to match tags - "any" (OR), "all" (AND)
51
+ exclude_ids: Optional list of mental model IDs to exclude (e.g., when refreshing a mental model)
52
+
53
+ Returns:
54
+ Dict with matching mental models including content and freshness info
55
+ """
56
+ from ..memory_engine import fq_table
57
+
58
+ # Build filters dynamically
59
+ filters = ""
60
+ params: list[Any] = [bank_id, str(query_embedding), max_results]
61
+ next_param = 4
62
+
63
+ if tags:
64
+ if tags_match == "all":
65
+ filters += f" AND tags @> ${next_param}::varchar[]"
66
+ else:
67
+ filters += f" AND (tags && ${next_param}::varchar[] OR tags IS NULL OR tags = '{{}}')"
68
+ params.append(tags)
69
+ next_param += 1
70
+
71
+ if exclude_ids:
72
+ filters += f" AND id != ALL(${next_param}::uuid[])"
73
+ params.append(exclude_ids)
74
+ next_param += 1
75
+
76
+ # Search mental models by embedding similarity
77
+ rows = await conn.fetch(
78
+ f"""
79
+ SELECT
80
+ id, name, content,
81
+ tags, created_at, last_refreshed_at,
82
+ 1 - (embedding <=> $2::vector) as relevance
83
+ FROM {fq_table("mental_models")}
84
+ WHERE bank_id = $1 AND embedding IS NOT NULL {filters}
85
+ ORDER BY embedding <=> $2::vector
86
+ LIMIT $3
87
+ """,
88
+ *params,
89
+ )
90
+
91
+ now = datetime.now(timezone.utc)
92
+ mental_models = []
93
+
94
+ for row in rows:
95
+ last_refreshed_at = row["last_refreshed_at"]
96
+ if last_refreshed_at and last_refreshed_at.tzinfo is None:
97
+ last_refreshed_at = last_refreshed_at.replace(tzinfo=timezone.utc)
98
+
99
+ # Calculate freshness
100
+ is_stale = False
101
+ if last_refreshed_at:
102
+ age = now - last_refreshed_at
103
+ is_stale = age > timedelta(days=STALE_THRESHOLD_DAYS)
104
+
105
+ mental_models.append(
106
+ {
107
+ "id": str(row["id"]),
108
+ "name": row["name"],
109
+ "content": row["content"],
110
+ "tags": row["tags"] or [],
111
+ "relevance": round(row["relevance"], 4),
112
+ "updated_at": last_refreshed_at.isoformat() if last_refreshed_at else None,
113
+ "is_stale": is_stale,
114
+ }
115
+ )
116
+
117
+ return {
118
+ "query": query,
119
+ "count": len(mental_models),
120
+ "mental_models": mental_models,
121
+ }
122
+
123
+
124
+ async def tool_search_observations(
125
+ memory_engine: "MemoryEngine",
126
+ bank_id: str,
127
+ query: str,
128
+ request_context: "RequestContext",
129
+ max_tokens: int = 5000,
130
+ tags: list[str] | None = None,
131
+ tags_match: str = "any",
132
+ last_consolidated_at: datetime | None = None,
133
+ pending_consolidation: int = 0,
134
+ ) -> dict[str, Any]:
135
+ """
136
+ Search consolidated observations using recall with include_observations.
137
+
138
+ Observations are auto-generated from memories. Returns freshness info
139
+ so the agent knows if it should also verify with recall().
140
+
141
+ Args:
142
+ memory_engine: Memory engine instance
143
+ bank_id: Bank identifier
144
+ query: Search query
145
+ request_context: Request context for authentication
146
+ max_tokens: Maximum tokens for results (default 5000)
147
+ tags: Optional tags to filter observations
148
+ tags_match: How to match tags - "any" (OR), "all" (AND)
149
+ last_consolidated_at: When consolidation last ran (for staleness check)
150
+ pending_consolidation: Number of memories waiting to be consolidated
151
+
152
+ Returns:
153
+ Dict with matching observations including freshness info
154
+ """
155
+ from ..memory_engine import fq_table
156
+
157
+ # Use recall to search observations (they come back in results field when fact_type=["observation"])
158
+ result = await memory_engine.recall_async(
159
+ bank_id=bank_id,
160
+ query=query,
161
+ fact_type=["observation"], # Only retrieve observations
162
+ max_tokens=max_tokens, # Token budget controls how many observations are returned
163
+ enable_trace=False,
164
+ request_context=request_context,
165
+ tags=tags,
166
+ tags_match=tags_match,
167
+ _connection_budget=1,
168
+ _quiet=True,
169
+ )
170
+
171
+ observations = []
172
+
173
+ # When fact_type=["observation"], results come back in `results` field as MemoryFact objects
174
+ # We need to fetch additional fields (proof_count, source_memory_ids) from the database
175
+ if result.results:
176
+ obs_ids = [m.id for m in result.results]
177
+
178
+ # Fetch proof_count and source_memory_ids for these observations
179
+ pool = await memory_engine._get_pool()
180
+ async with pool.acquire() as conn:
181
+ obs_rows = await conn.fetch(
182
+ f"""
183
+ SELECT id, proof_count, source_memory_ids
184
+ FROM {fq_table("memory_units")}
185
+ WHERE id = ANY($1::uuid[])
186
+ """,
187
+ obs_ids,
188
+ )
189
+ obs_data = {str(row["id"]): row for row in obs_rows}
190
+
191
+ for m in result.results:
192
+ # Get additional data from DB lookup
193
+ extra = obs_data.get(m.id, {})
194
+ proof_count = extra.get("proof_count", 1) if extra else 1
195
+ source_ids = extra.get("source_memory_ids", []) if extra else []
196
+ # Convert UUIDs to strings
197
+ source_memory_ids = [str(sid) for sid in (source_ids or [])]
198
+
199
+ # Determine staleness
200
+ is_stale = False
201
+ staleness_reason = None
202
+ if pending_consolidation > 0:
203
+ is_stale = True
204
+ staleness_reason = f"{pending_consolidation} memories pending consolidation"
205
+
206
+ observations.append(
207
+ {
208
+ "id": str(m.id),
209
+ "text": m.text,
210
+ "proof_count": proof_count,
211
+ "source_memory_ids": source_memory_ids,
212
+ "tags": m.tags or [],
213
+ "is_stale": is_stale,
214
+ "staleness_reason": staleness_reason,
215
+ }
216
+ )
217
+
218
+ # Return freshness info (more understandable than raw pending_consolidation count)
219
+ if pending_consolidation == 0:
220
+ freshness = "up_to_date"
221
+ elif pending_consolidation < 10:
222
+ freshness = "slightly_stale"
223
+ else:
224
+ freshness = "stale"
225
+
226
+ return {
227
+ "query": query,
228
+ "count": len(observations),
229
+ "observations": observations,
230
+ "freshness": freshness,
231
+ }
232
+
233
+
234
+ async def tool_recall(
235
+ memory_engine: "MemoryEngine",
236
+ bank_id: str,
237
+ query: str,
238
+ request_context: "RequestContext",
239
+ max_tokens: int = 2048,
240
+ max_results: int = 50,
241
+ tags: list[str] | None = None,
242
+ tags_match: str = "any",
243
+ connection_budget: int = 1,
244
+ ) -> dict[str, Any]:
245
+ """
246
+ Search memories using TEMPR retrieval.
247
+
248
+ This is the ground truth - raw facts and experiences.
249
+ Use when mental models/observations don't exist, are stale, or need verification.
250
+
251
+ Args:
252
+ memory_engine: Memory engine instance
253
+ bank_id: Bank identifier
254
+ query: Search query
255
+ request_context: Request context for authentication
256
+ max_tokens: Maximum tokens for results (default 2048)
257
+ max_results: Maximum number of results
258
+ tags: Filter by tags (includes untagged memories)
259
+ tags_match: How to match tags - "any" (OR), "all" (AND), or "exact"
260
+ connection_budget: Max DB connections for this recall (default 1 for internal ops)
261
+
262
+ Returns:
263
+ Dict with list of matching memories
264
+ """
265
+ result = await memory_engine.recall_async(
266
+ bank_id=bank_id,
267
+ query=query,
268
+ fact_type=["experience", "world"], # Exclude opinions and observations
269
+ max_tokens=max_tokens,
270
+ enable_trace=False,
271
+ request_context=request_context,
272
+ tags=tags,
273
+ tags_match=tags_match,
274
+ _connection_budget=connection_budget,
275
+ _quiet=True, # Suppress logging for internal operations
276
+ )
277
+
278
+ memories = []
279
+ for m in result.results[:max_results]:
280
+ memories.append(
281
+ {
282
+ "id": str(m.id),
283
+ "text": m.text,
284
+ "type": m.fact_type,
285
+ "entities": m.entities or [],
286
+ "occurred": m.occurred_start, # Already ISO format string
287
+ }
288
+ )
289
+
290
+ return {
291
+ "query": query,
292
+ "count": len(memories),
293
+ "memories": memories,
294
+ }
295
+
296
+
297
+ async def tool_expand(
298
+ conn: "Connection",
299
+ bank_id: str,
300
+ memory_ids: list[str],
301
+ depth: str,
302
+ ) -> dict[str, Any]:
303
+ """
304
+ Expand multiple memories to get chunk or document context.
305
+
306
+ Args:
307
+ conn: Database connection
308
+ bank_id: Bank identifier
309
+ memory_ids: List of memory unit IDs
310
+ depth: "chunk" or "document"
311
+
312
+ Returns:
313
+ Dict with results array, each containing memory, chunk, and optionally document data
314
+ """
315
+ from ..memory_engine import fq_table
316
+
317
+ if not memory_ids:
318
+ return {"error": "memory_ids is required and must not be empty"}
319
+
320
+ # Validate and convert UUIDs
321
+ valid_uuids: list[uuid.UUID] = []
322
+ errors: dict[str, str] = {}
323
+ for mid in memory_ids:
324
+ try:
325
+ valid_uuids.append(uuid.UUID(mid))
326
+ except ValueError:
327
+ errors[mid] = f"Invalid memory_id format: {mid}"
328
+
329
+ if not valid_uuids:
330
+ return {"error": "No valid memory IDs provided", "details": errors}
331
+
332
+ # Batch fetch all memory units
333
+ memories = await conn.fetch(
334
+ f"""
335
+ SELECT id, text, chunk_id, document_id, fact_type, context
336
+ FROM {fq_table("memory_units")}
337
+ WHERE id = ANY($1) AND bank_id = $2
338
+ """,
339
+ valid_uuids,
340
+ bank_id,
341
+ )
342
+ memory_map = {row["id"]: row for row in memories}
343
+
344
+ # Collect chunk_ids and document_ids for batch fetching
345
+ chunk_ids = [m["chunk_id"] for m in memories if m["chunk_id"]]
346
+ doc_ids_from_chunks: set[str] = set()
347
+ doc_ids_direct: set[str] = set()
348
+
349
+ # Batch fetch all chunks
350
+ chunk_map: dict[str, Any] = {}
351
+ if chunk_ids:
352
+ chunks = await conn.fetch(
353
+ f"""
354
+ SELECT chunk_id, chunk_text, chunk_index, document_id
355
+ FROM {fq_table("chunks")}
356
+ WHERE chunk_id = ANY($1)
357
+ """,
358
+ chunk_ids,
359
+ )
360
+ chunk_map = {row["chunk_id"]: row for row in chunks}
361
+ if depth == "document":
362
+ doc_ids_from_chunks = {c["document_id"] for c in chunks if c["document_id"]}
363
+
364
+ # Collect direct document IDs (memories without chunks)
365
+ if depth == "document":
366
+ for m in memories:
367
+ if not m["chunk_id"] and m["document_id"]:
368
+ doc_ids_direct.add(m["document_id"])
369
+
370
+ # Batch fetch all documents
371
+ doc_map: dict[str, Any] = {}
372
+ all_doc_ids = list(doc_ids_from_chunks | doc_ids_direct)
373
+ if all_doc_ids:
374
+ docs = await conn.fetch(
375
+ f"""
376
+ SELECT id, original_text, metadata, retain_params
377
+ FROM {fq_table("documents")}
378
+ WHERE id = ANY($1) AND bank_id = $2
379
+ """,
380
+ all_doc_ids,
381
+ bank_id,
382
+ )
383
+ doc_map = {row["id"]: row for row in docs}
384
+
385
+ # Build results
386
+ results: list[dict[str, Any]] = []
387
+ for mid, mem_uuid in zip(memory_ids, valid_uuids):
388
+ if mid in errors:
389
+ results.append({"memory_id": mid, "error": errors[mid]})
390
+ continue
391
+
392
+ memory = memory_map.get(mem_uuid)
393
+ if not memory:
394
+ results.append({"memory_id": mid, "error": f"Memory not found: {mid}"})
395
+ continue
396
+
397
+ item: dict[str, Any] = {
398
+ "memory_id": mid,
399
+ "memory": {
400
+ "id": str(memory["id"]),
401
+ "text": memory["text"],
402
+ "type": memory["fact_type"],
403
+ "context": memory["context"],
404
+ },
405
+ }
406
+
407
+ # Add chunk if available
408
+ if memory["chunk_id"] and memory["chunk_id"] in chunk_map:
409
+ chunk = chunk_map[memory["chunk_id"]]
410
+ item["chunk"] = {
411
+ "id": chunk["chunk_id"],
412
+ "text": chunk["chunk_text"],
413
+ "index": chunk["chunk_index"],
414
+ "document_id": chunk["document_id"],
415
+ }
416
+ # Add document if depth=document
417
+ if depth == "document" and chunk["document_id"] in doc_map:
418
+ doc = doc_map[chunk["document_id"]]
419
+ item["document"] = {
420
+ "id": doc["id"],
421
+ "full_text": doc["original_text"],
422
+ "metadata": doc["metadata"],
423
+ "retain_params": doc["retain_params"],
424
+ }
425
+ elif memory["document_id"] and depth == "document" and memory["document_id"] in doc_map:
426
+ # No chunk, but has document_id
427
+ doc = doc_map[memory["document_id"]]
428
+ item["document"] = {
429
+ "id": doc["id"],
430
+ "full_text": doc["original_text"],
431
+ "metadata": doc["metadata"],
432
+ "retain_params": doc["retain_params"],
433
+ }
434
+
435
+ results.append(item)
436
+
437
+ return {"results": results, "count": len(results)}
@@ -0,0 +1,250 @@
1
+ """
2
+ Tool schema definitions for the reflect agent.
3
+
4
+ These are OpenAI-format tool definitions used with native tool calling.
5
+ The reflect agent uses a hierarchical retrieval strategy:
6
+ 1. search_mental_models - User-curated stored reflect responses (highest quality, if applicable)
7
+ 2. search_observations - Consolidated knowledge with freshness awareness
8
+ 3. recall - Raw facts (world/experience) as ground truth fallback
9
+ """
10
+
11
+ # Tool definitions in OpenAI format
12
+
13
+ TOOL_SEARCH_MENTAL_MODELS = {
14
+ "type": "function",
15
+ "function": {
16
+ "name": "search_mental_models",
17
+ "description": (
18
+ "Search user-curated mental models (stored reflect responses). These are high-quality, manually created "
19
+ "summaries about specific topics. Use FIRST when the question might be covered by an "
20
+ "existing mental model. Returns mental models with their content and last refresh time."
21
+ ),
22
+ "parameters": {
23
+ "type": "object",
24
+ "properties": {
25
+ "reason": {
26
+ "type": "string",
27
+ "description": "Brief explanation of why you're making this search (for debugging)",
28
+ },
29
+ "query": {
30
+ "type": "string",
31
+ "description": "Search query to find relevant mental models",
32
+ },
33
+ "max_results": {
34
+ "type": "integer",
35
+ "description": "Maximum number of mental models to return (default 5)",
36
+ },
37
+ },
38
+ "required": ["reason", "query"],
39
+ },
40
+ },
41
+ }
42
+
43
+ TOOL_SEARCH_OBSERVATIONS = {
44
+ "type": "function",
45
+ "function": {
46
+ "name": "search_observations",
47
+ "description": (
48
+ "Search consolidated observations (auto-generated knowledge). These are automatically "
49
+ "synthesized from memories. Returns observations with freshness info (updated_at, is_stale). "
50
+ "If an observation is STALE, you should ALSO use recall() to verify with current facts."
51
+ ),
52
+ "parameters": {
53
+ "type": "object",
54
+ "properties": {
55
+ "reason": {
56
+ "type": "string",
57
+ "description": "Brief explanation of why you're making this search (for debugging)",
58
+ },
59
+ "query": {
60
+ "type": "string",
61
+ "description": "Search query to find relevant observations",
62
+ },
63
+ "max_tokens": {
64
+ "type": "integer",
65
+ "description": "Maximum tokens for results (default 5000). Use higher values for broader searches.",
66
+ },
67
+ },
68
+ "required": ["reason", "query"],
69
+ },
70
+ },
71
+ }
72
+
73
+ TOOL_RECALL = {
74
+ "type": "function",
75
+ "function": {
76
+ "name": "recall",
77
+ "description": (
78
+ "Search raw memories (facts and experiences). This is the ground truth data. "
79
+ "Use when: (1) no reflections/mental models exist, (2) mental models are stale, "
80
+ "(3) you need specific details not in synthesized knowledge. "
81
+ "Returns individual memory facts with their timestamps."
82
+ ),
83
+ "parameters": {
84
+ "type": "object",
85
+ "properties": {
86
+ "reason": {
87
+ "type": "string",
88
+ "description": "Brief explanation of why you're making this search (for debugging)",
89
+ },
90
+ "query": {
91
+ "type": "string",
92
+ "description": "Search query string",
93
+ },
94
+ "max_tokens": {
95
+ "type": "integer",
96
+ "description": "Optional limit on result size (default 2048). Use higher values for broader searches.",
97
+ },
98
+ },
99
+ "required": ["reason", "query"],
100
+ },
101
+ },
102
+ }
103
+
104
+ TOOL_EXPAND = {
105
+ "type": "function",
106
+ "function": {
107
+ "name": "expand",
108
+ "description": "Get more context for one or more memories. Memory hierarchy: memory -> chunk -> document.",
109
+ "parameters": {
110
+ "type": "object",
111
+ "properties": {
112
+ "reason": {
113
+ "type": "string",
114
+ "description": "Brief explanation of why you need more context (for debugging)",
115
+ },
116
+ "memory_ids": {
117
+ "type": "array",
118
+ "items": {"type": "string"},
119
+ "description": "Array of memory IDs from recall results (batch multiple for efficiency)",
120
+ },
121
+ "depth": {
122
+ "type": "string",
123
+ "enum": ["chunk", "document"],
124
+ "description": "chunk: surrounding text chunk, document: full source document",
125
+ },
126
+ },
127
+ "required": ["reason", "memory_ids", "depth"],
128
+ },
129
+ },
130
+ }
131
+
132
+ TOOL_DONE_ANSWER = {
133
+ "type": "function",
134
+ "function": {
135
+ "name": "done",
136
+ "description": "Signal completion with your final answer. Use this when you have gathered enough information to answer the question.",
137
+ "parameters": {
138
+ "type": "object",
139
+ "properties": {
140
+ "answer": {
141
+ "type": "string",
142
+ "description": "Your response as plain text. Do NOT use markdown formatting. NEVER include memory IDs, UUIDs, or 'Memory references' in this text - put IDs only in memory_ids array.",
143
+ },
144
+ "memory_ids": {
145
+ "type": "array",
146
+ "items": {"type": "string"},
147
+ "description": "Array of memory IDs that support your answer (put IDs here, NOT in answer text)",
148
+ },
149
+ "mental_model_ids": {
150
+ "type": "array",
151
+ "items": {"type": "string"},
152
+ "description": "Array of mental model IDs that support your answer",
153
+ },
154
+ "observation_ids": {
155
+ "type": "array",
156
+ "items": {"type": "string"},
157
+ "description": "Array of observation IDs that support your answer",
158
+ },
159
+ },
160
+ "required": ["answer"],
161
+ },
162
+ },
163
+ }
164
+
165
+
166
+ def _build_done_tool_with_directives(directive_rules: list[str]) -> dict:
167
+ """
168
+ Build the done tool schema with directive compliance field.
169
+
170
+ When directives are present, adds a required field that forces the agent
171
+ to confirm compliance with each directive before submitting.
172
+
173
+ Args:
174
+ directive_rules: List of directive rule strings
175
+ """
176
+ # Build rules list for description
177
+ rules_list = "\n".join(f" {i + 1}. {rule}" for i, rule in enumerate(directive_rules))
178
+
179
+ # Build the tool with directive compliance field
180
+ return {
181
+ "type": "function",
182
+ "function": {
183
+ "name": "done",
184
+ "description": (
185
+ "Signal completion with your final answer. IMPORTANT: You must confirm directive compliance before submitting. "
186
+ "Your answer will be REJECTED if it violates any directive."
187
+ ),
188
+ "parameters": {
189
+ "type": "object",
190
+ "properties": {
191
+ "answer": {
192
+ "type": "string",
193
+ "description": "Your response as plain text. Do NOT use markdown formatting. NEVER include memory IDs, UUIDs, or 'Memory references' in this text - put IDs only in memory_ids array.",
194
+ },
195
+ "memory_ids": {
196
+ "type": "array",
197
+ "items": {"type": "string"},
198
+ "description": "Array of memory IDs that support your answer (put IDs here, NOT in answer text)",
199
+ },
200
+ "mental_model_ids": {
201
+ "type": "array",
202
+ "items": {"type": "string"},
203
+ "description": "Array of mental model IDs that support your answer",
204
+ },
205
+ "observation_ids": {
206
+ "type": "array",
207
+ "items": {"type": "string"},
208
+ "description": "Array of observation IDs that support your answer",
209
+ },
210
+ "directive_compliance": {
211
+ "type": "string",
212
+ "description": f"REQUIRED: Confirm your answer complies with ALL directives. List each directive and how your answer follows it:\n{rules_list}\n\nFormat: 'Directive 1: [how answer complies]. Directive 2: [how answer complies]...'",
213
+ },
214
+ },
215
+ "required": ["answer", "directive_compliance"],
216
+ },
217
+ },
218
+ }
219
+
220
+
221
+ def get_reflect_tools(directive_rules: list[str] | None = None) -> list[dict]:
222
+ """
223
+ Get the list of tools for the reflect agent.
224
+
225
+ The tools support a hierarchical retrieval strategy:
226
+ 1. search_mental_models - User-curated stored reflect responses (try first)
227
+ 2. search_observations - Consolidated knowledge with freshness
228
+ 3. recall - Raw facts as ground truth
229
+
230
+ Args:
231
+ directive_rules: Optional list of directive rule strings. If provided,
232
+ the done() tool will require directive compliance confirmation.
233
+
234
+ Returns:
235
+ List of tool definitions in OpenAI format
236
+ """
237
+ tools = [
238
+ TOOL_SEARCH_MENTAL_MODELS,
239
+ TOOL_SEARCH_OBSERVATIONS,
240
+ TOOL_RECALL,
241
+ TOOL_EXPAND,
242
+ ]
243
+
244
+ # Use directive-aware done tool if directives are present
245
+ if directive_rules:
246
+ tools.append(_build_done_tool_with_directives(directive_rules))
247
+ else:
248
+ tools.append(TOOL_DONE_ANSWER)
249
+
250
+ return tools