hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +311 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  6. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  7. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  8. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  9. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  10. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  11. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  12. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  13. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  14. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  15. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  16. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  17. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  18. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  19. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  20. hindsight_api/api/http.py +1406 -118
  21. hindsight_api/api/mcp.py +11 -196
  22. hindsight_api/config.py +359 -27
  23. hindsight_api/engine/consolidation/__init__.py +5 -0
  24. hindsight_api/engine/consolidation/consolidator.py +859 -0
  25. hindsight_api/engine/consolidation/prompts.py +69 -0
  26. hindsight_api/engine/cross_encoder.py +706 -88
  27. hindsight_api/engine/db_budget.py +284 -0
  28. hindsight_api/engine/db_utils.py +11 -0
  29. hindsight_api/engine/directives/__init__.py +5 -0
  30. hindsight_api/engine/directives/models.py +37 -0
  31. hindsight_api/engine/embeddings.py +553 -29
  32. hindsight_api/engine/entity_resolver.py +8 -5
  33. hindsight_api/engine/interface.py +40 -17
  34. hindsight_api/engine/llm_wrapper.py +744 -68
  35. hindsight_api/engine/memory_engine.py +2505 -1017
  36. hindsight_api/engine/mental_models/__init__.py +14 -0
  37. hindsight_api/engine/mental_models/models.py +53 -0
  38. hindsight_api/engine/query_analyzer.py +4 -3
  39. hindsight_api/engine/reflect/__init__.py +18 -0
  40. hindsight_api/engine/reflect/agent.py +933 -0
  41. hindsight_api/engine/reflect/models.py +109 -0
  42. hindsight_api/engine/reflect/observations.py +186 -0
  43. hindsight_api/engine/reflect/prompts.py +483 -0
  44. hindsight_api/engine/reflect/tools.py +437 -0
  45. hindsight_api/engine/reflect/tools_schema.py +250 -0
  46. hindsight_api/engine/response_models.py +168 -4
  47. hindsight_api/engine/retain/bank_utils.py +79 -201
  48. hindsight_api/engine/retain/fact_extraction.py +424 -195
  49. hindsight_api/engine/retain/fact_storage.py +35 -12
  50. hindsight_api/engine/retain/link_utils.py +29 -24
  51. hindsight_api/engine/retain/orchestrator.py +24 -43
  52. hindsight_api/engine/retain/types.py +11 -2
  53. hindsight_api/engine/search/graph_retrieval.py +43 -14
  54. hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
  55. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  56. hindsight_api/engine/search/reranking.py +2 -2
  57. hindsight_api/engine/search/retrieval.py +848 -201
  58. hindsight_api/engine/search/tags.py +172 -0
  59. hindsight_api/engine/search/think_utils.py +42 -141
  60. hindsight_api/engine/search/trace.py +12 -1
  61. hindsight_api/engine/search/tracer.py +26 -6
  62. hindsight_api/engine/search/types.py +21 -3
  63. hindsight_api/engine/task_backend.py +113 -106
  64. hindsight_api/engine/utils.py +1 -152
  65. hindsight_api/extensions/__init__.py +10 -1
  66. hindsight_api/extensions/builtin/tenant.py +5 -1
  67. hindsight_api/extensions/context.py +10 -1
  68. hindsight_api/extensions/operation_validator.py +81 -4
  69. hindsight_api/extensions/tenant.py +26 -0
  70. hindsight_api/main.py +69 -6
  71. hindsight_api/mcp_local.py +12 -53
  72. hindsight_api/mcp_tools.py +494 -0
  73. hindsight_api/metrics.py +433 -48
  74. hindsight_api/migrations.py +141 -1
  75. hindsight_api/models.py +3 -3
  76. hindsight_api/pg0.py +53 -0
  77. hindsight_api/server.py +39 -2
  78. hindsight_api/worker/__init__.py +11 -0
  79. hindsight_api/worker/main.py +296 -0
  80. hindsight_api/worker/poller.py +486 -0
  81. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
  82. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  83. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
  84. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  85. hindsight_api/engine/search/observation_utils.py +0 -125
  86. hindsight_api/engine/search/scoring.py +0 -159
  87. hindsight_api-0.2.1.dist-info/RECORD +0 -75
  88. {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,172 @@
1
+ """
2
+ Tags filtering utilities for retrieval.
3
+
4
+ Provides SQL building functions for filtering memories by tags.
5
+ Supports four matching modes via TagsMatch enum:
6
+ - "any": OR matching, includes untagged memories (default, backward compatible)
7
+ - "all": AND matching, includes untagged memories
8
+ - "any_strict": OR matching, excludes untagged memories
9
+ - "all_strict": AND matching, excludes untagged memories
10
+
11
+ OR matching (any/any_strict): Memory matches if ANY of its tags overlap with request tags
12
+ AND matching (all/all_strict): Memory matches if ALL request tags are present in its tags
13
+ """
14
+
15
+ from typing import Literal
16
+
17
+ TagsMatch = Literal["any", "all", "any_strict", "all_strict"]
18
+
19
+
20
+ def _parse_tags_match(match: TagsMatch) -> tuple[str, bool]:
21
+ """
22
+ Parse TagsMatch into operator and include_untagged flag.
23
+
24
+ Returns:
25
+ Tuple of (operator, include_untagged)
26
+ - operator: "&&" for any/any_strict, "@>" for all/all_strict
27
+ - include_untagged: True for any/all, False for any_strict/all_strict
28
+ """
29
+ if match == "any":
30
+ return "&&", True
31
+ elif match == "all":
32
+ return "@>", True
33
+ elif match == "any_strict":
34
+ return "&&", False
35
+ elif match == "all_strict":
36
+ return "@>", False
37
+ else:
38
+ # Default to "any" behavior
39
+ return "&&", True
40
+
41
+
42
+ def build_tags_where_clause(
43
+ tags: list[str] | None,
44
+ param_offset: int = 1,
45
+ table_alias: str = "",
46
+ match: TagsMatch = "any",
47
+ ) -> tuple[str, list, int]:
48
+ """
49
+ Build a SQL WHERE clause for filtering by tags.
50
+
51
+ Supports four matching modes:
52
+ - "any" (default): OR matching, includes untagged memories
53
+ - "all": AND matching, includes untagged memories
54
+ - "any_strict": OR matching, excludes untagged memories
55
+ - "all_strict": AND matching, excludes untagged memories
56
+
57
+ Args:
58
+ tags: List of tags to filter by. If None or empty, returns empty clause (no filtering).
59
+ param_offset: Starting parameter number for SQL placeholders (default 1).
60
+ table_alias: Optional table alias prefix (e.g., "mu." for "memory_units mu").
61
+ match: Matching mode. Defaults to "any".
62
+
63
+ Returns:
64
+ Tuple of (sql_clause, params, next_param_offset):
65
+ - sql_clause: SQL WHERE clause string
66
+ - params: List of parameter values to bind
67
+ - next_param_offset: Next available parameter number
68
+
69
+ Example:
70
+ >>> clause, params, next_offset = build_tags_where_clause(['user_a'], 3, 'mu.', 'any_strict')
71
+ >>> print(clause) # "AND mu.tags IS NOT NULL AND mu.tags != '{}' AND mu.tags && $3"
72
+ """
73
+ if not tags:
74
+ return "", [], param_offset
75
+
76
+ column = f"{table_alias}tags" if table_alias else "tags"
77
+ operator, include_untagged = _parse_tags_match(match)
78
+
79
+ if include_untagged:
80
+ # Include untagged memories (NULL or empty array) OR matching tags
81
+ clause = f"AND ({column} IS NULL OR {column} = '{{}}' OR {column} {operator} ${param_offset})"
82
+ else:
83
+ # Strict: only memories with matching tags (exclude NULL and empty)
84
+ clause = f"AND {column} IS NOT NULL AND {column} != '{{}}' AND {column} {operator} ${param_offset}"
85
+
86
+ return clause, [tags], param_offset + 1
87
+
88
+
89
+ def build_tags_where_clause_simple(
90
+ tags: list[str] | None,
91
+ param_num: int,
92
+ table_alias: str = "",
93
+ match: TagsMatch = "any",
94
+ ) -> str:
95
+ """
96
+ Build a simple SQL WHERE clause for tags filtering.
97
+
98
+ This is a convenience version that returns just the clause string,
99
+ assuming the caller will add the tags array to their params list.
100
+
101
+ Args:
102
+ tags: List of tags to filter by. If None or empty, returns empty string.
103
+ param_num: Parameter number to use in the clause.
104
+ table_alias: Optional table alias prefix.
105
+ match: Matching mode. Defaults to "any".
106
+
107
+ Returns:
108
+ SQL clause string or empty string.
109
+ """
110
+ if not tags:
111
+ return ""
112
+
113
+ column = f"{table_alias}tags" if table_alias else "tags"
114
+ operator, include_untagged = _parse_tags_match(match)
115
+
116
+ if include_untagged:
117
+ # Include untagged memories (NULL or empty array) OR matching tags
118
+ return f"AND ({column} IS NULL OR {column} = '{{}}' OR {column} {operator} ${param_num})"
119
+ else:
120
+ # Strict: only memories with matching tags (exclude NULL and empty)
121
+ return f"AND {column} IS NOT NULL AND {column} != '{{}}' AND {column} {operator} ${param_num}"
122
+
123
+
124
+ def filter_results_by_tags(
125
+ results: list,
126
+ tags: list[str] | None,
127
+ match: TagsMatch = "any",
128
+ ) -> list:
129
+ """
130
+ Filter retrieval results by tags in Python (for post-processing).
131
+
132
+ Used when SQL filtering isn't possible (e.g., graph traversal results).
133
+
134
+ Args:
135
+ results: List of RetrievalResult objects with a 'tags' attribute.
136
+ tags: List of tags to filter by. If None or empty, returns all results.
137
+ match: Matching mode. Defaults to "any".
138
+
139
+ Returns:
140
+ Filtered list of results.
141
+ """
142
+ if not tags:
143
+ return results
144
+
145
+ _, include_untagged = _parse_tags_match(match)
146
+ is_any_match = match in ("any", "any_strict")
147
+
148
+ tags_set = set(tags)
149
+ filtered = []
150
+
151
+ for result in results:
152
+ result_tags = getattr(result, "tags", None)
153
+
154
+ # Check if untagged
155
+ is_untagged = result_tags is None or len(result_tags) == 0
156
+
157
+ if is_untagged:
158
+ if include_untagged:
159
+ filtered.append(result)
160
+ # else: skip untagged
161
+ else:
162
+ result_tags_set = set(result_tags)
163
+ if is_any_match:
164
+ # Any overlap
165
+ if result_tags_set & tags_set:
166
+ filtered.append(result)
167
+ else:
168
+ # All tags must be present
169
+ if tags_set <= result_tags_set:
170
+ filtered.append(result)
171
+
172
+ return filtered
@@ -3,31 +3,13 @@ Think operation utilities for formulating answers based on agent and world facts
3
3
  """
4
4
 
5
5
  import logging
6
- import re
7
6
  from datetime import datetime
8
7
 
9
- from pydantic import BaseModel, Field
10
-
11
8
  from ..response_models import DispositionTraits, MemoryFact
12
9
 
13
10
  logger = logging.getLogger(__name__)
14
11
 
15
12
 
16
- class Opinion(BaseModel):
17
- """An opinion formed by the bank."""
18
-
19
- opinion: str = Field(description="The opinion or perspective with reasoning included")
20
- confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
21
-
22
-
23
- class OpinionExtractionResponse(BaseModel):
24
- """Response containing extracted opinions."""
25
-
26
- opinions: list[Opinion] = Field(
27
- default_factory=list, description="List of opinions formed with their supporting reasons and confidence scores"
28
- )
29
-
30
-
31
13
  def describe_trait_level(value: int) -> str:
32
14
  """Convert trait value (1-5) to descriptive text."""
33
15
  levels = {1: "very low", 2: "low", 3: "moderate", 4: "high", 5: "very high"}
@@ -93,17 +75,46 @@ def format_facts_for_prompt(facts: list[MemoryFact]) -> str:
93
75
  return json.dumps(formatted, indent=2)
94
76
 
95
77
 
78
+ def format_entity_summaries_for_prompt(entities: dict) -> str:
79
+ """Format entity summaries for inclusion in the reflect prompt.
80
+
81
+ Args:
82
+ entities: Dict mapping entity name to EntityState objects
83
+
84
+ Returns:
85
+ Formatted string with entity summaries, or empty string if no summaries
86
+ """
87
+ if not entities:
88
+ return ""
89
+
90
+ summaries = []
91
+ for name, state in entities.items():
92
+ # Get summary from observations (summary is stored as single observation)
93
+ if state.observations:
94
+ summary_text = state.observations[0].text
95
+ summaries.append(f"## {name}\n{summary_text}")
96
+
97
+ if not summaries:
98
+ return ""
99
+
100
+ return "\n\n".join(summaries)
101
+
102
+
96
103
  def build_think_prompt(
97
104
  agent_facts_text: str,
98
105
  world_facts_text: str,
99
- opinion_facts_text: str,
100
106
  query: str,
101
107
  name: str,
102
108
  disposition: DispositionTraits,
103
109
  background: str,
104
110
  context: str | None = None,
111
+ entity_summaries_text: str | None = None,
105
112
  ) -> str:
106
- """Build the think prompt for the LLM."""
113
+ """Build the think prompt for the LLM.
114
+
115
+ Note: opinion_facts_text parameter removed - opinions are now stored as mental models
116
+ and included via entity_summaries_text.
117
+ """
107
118
  disposition_desc = build_disposition_description(disposition)
108
119
 
109
120
  name_section = f"""
@@ -125,6 +136,14 @@ Your background:
125
136
  ADDITIONAL CONTEXT:
126
137
  {context}
127
138
 
139
+ """
140
+
141
+ entity_section = ""
142
+ if entity_summaries_text:
143
+ entity_section = f"""
144
+ KEY PEOPLE, PLACES & THINGS I KNOW ABOUT:
145
+ {entity_summaries_text}
146
+
128
147
  """
129
148
 
130
149
  return f"""Here's what I know and have experienced:
@@ -135,14 +154,11 @@ MY IDENTITY & EXPERIENCES:
135
154
  WHAT I KNOW ABOUT THE WORLD:
136
155
  {world_facts_text}
137
156
 
138
- MY EXISTING OPINIONS & BELIEFS:
139
- {opinion_facts_text}
140
-
141
- {context_section}{name_section}{disposition_desc}{background_section}
157
+ {entity_section}{context_section}{name_section}{disposition_desc}{background_section}
142
158
 
143
159
  QUESTION: {query}
144
160
 
145
- Based on everything I know, believe, and who I am (including my name, disposition and background), here's what I genuinely think about this question. I'll draw on my experiences, knowledge, opinions, and personal traits to give you my honest perspective."""
161
+ Based on everything I know, believe, and who I am (including my name, disposition and background), here's what I genuinely think about this question. I'll draw on my experiences, knowledge, and personal traits to give you my honest perspective."""
146
162
 
147
163
 
148
164
  def get_system_message(disposition: DispositionTraits) -> str:
@@ -172,117 +188,7 @@ def get_system_message(disposition: DispositionTraits) -> str:
172
188
  " ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
173
189
  )
174
190
 
175
- return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
176
-
177
-
178
- async def extract_opinions_from_text(llm_config, text: str, query: str) -> list[Opinion]:
179
- """
180
- Extract opinions with reasons and confidence from text using LLM.
181
-
182
- Args:
183
- llm_config: LLM configuration to use
184
- text: Text to extract opinions from
185
- query: The original query that prompted this response
186
-
187
- Returns:
188
- List of Opinion objects with text and confidence
189
- """
190
- extraction_prompt = f"""Extract any NEW opinions or perspectives from the answer below and rewrite them in FIRST-PERSON as if YOU are stating the opinion directly.
191
-
192
- ORIGINAL QUESTION:
193
- {query}
194
-
195
- ANSWER PROVIDED:
196
- {text}
197
-
198
- Your task: Find opinions in the answer and rewrite them AS IF YOU ARE THE ONE SAYING THEM.
199
-
200
- An opinion is a judgment, viewpoint, or conclusion that goes beyond just stating facts.
201
-
202
- IMPORTANT: Do NOT extract statements like:
203
- - "I don't have enough information"
204
- - "The facts don't contain information about X"
205
- - "I cannot answer because..."
206
-
207
- ONLY extract actual opinions about substantive topics.
208
-
209
- CRITICAL FORMAT REQUIREMENTS:
210
- 1. **ALWAYS start with first-person phrases**: "I think...", "I believe...", "In my view...", "I've come to believe...", "Previously I thought... but now..."
211
- 2. **NEVER use third-person**: Do NOT say "The speaker thinks..." or "They believe..." - always use "I"
212
- 3. Include the reasoning naturally within the statement
213
- 4. Provide a confidence score (0.0 to 1.0)
214
-
215
- CORRECT Examples (✓ FIRST-PERSON):
216
- - "I think Alice is more reliable because she consistently delivers on time and writes clean code"
217
- - "Previously I thought all engineers were equal, but now I feel that experience and track record really matter"
218
- - "I believe reliability is best measured by consistent output over time"
219
- - "I've come to believe that track records are more important than potential"
220
-
221
- WRONG Examples (✗ THIRD-PERSON - DO NOT USE):
222
- - "The speaker thinks Alice is more reliable"
223
- - "They believe reliability matters"
224
- - "It is believed that Alice is better"
225
-
226
- If no genuine opinions are expressed (e.g., the response just says "I don't know"), return an empty list."""
227
-
228
- try:
229
- result = await llm_config.call(
230
- messages=[
231
- {
232
- "role": "system",
233
- "content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'.",
234
- },
235
- {"role": "user", "content": extraction_prompt},
236
- ],
237
- response_format=OpinionExtractionResponse,
238
- scope="memory_extract_opinion",
239
- )
240
-
241
- # Format opinions with confidence score and convert to first-person
242
- formatted_opinions = []
243
- for op in result.opinions:
244
- # Convert third-person to first-person if needed
245
- opinion_text = op.opinion
246
-
247
- # Replace common third-person patterns with first-person
248
- def singularize_verb(verb):
249
- if verb.endswith("es"):
250
- return verb[:-1] # believes -> believe
251
- elif verb.endswith("s"):
252
- return verb[:-1] # thinks -> think
253
- return verb
254
-
255
- # Pattern: "The speaker/user [verb]..." -> "I [verb]..."
256
- match = re.match(
257
- r"^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$",
258
- opinion_text,
259
- re.IGNORECASE,
260
- )
261
- if match:
262
- verb = singularize_verb(match.group(2))
263
- that_part = match.group(3) or "" # Keep " that" if present
264
- rest = match.group(4)
265
- opinion_text = f"I {verb}{that_part}{rest}"
266
-
267
- # If still doesn't start with first-person, prepend "I believe that "
268
- first_person_starters = [
269
- "I think",
270
- "I believe",
271
- "I feel",
272
- "In my view",
273
- "I've come to believe",
274
- "Previously I",
275
- ]
276
- if not any(opinion_text.startswith(starter) for starter in first_person_starters):
277
- opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
278
-
279
- formatted_opinions.append(Opinion(opinion=opinion_text, confidence=op.confidence))
280
-
281
- return formatted_opinions
282
-
283
- except Exception as e:
284
- logger.warning(f"Failed to extract opinions: {str(e)}")
285
- return []
191
+ return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting. IMPORTANT: Detect the language of the question and respond in the SAME language. Do not translate to English if the question is in another language."
286
192
 
287
193
 
288
194
  async def reflect(
@@ -290,7 +196,6 @@ async def reflect(
290
196
  query: str,
291
197
  experience_facts: list[str] = None,
292
198
  world_facts: list[str] = None,
293
- opinion_facts: list[str] = None,
294
199
  name: str = "Assistant",
295
200
  disposition: DispositionTraits = None,
296
201
  background: str = "",
@@ -307,7 +212,6 @@ async def reflect(
307
212
  query: Question to answer
308
213
  experience_facts: List of experience/agent fact strings
309
214
  world_facts: List of world fact strings
310
- opinion_facts: List of opinion fact strings
311
215
  name: Name of the agent/persona
312
216
  disposition: Disposition traits (defaults to neutral)
313
217
  background: Background information
@@ -328,18 +232,15 @@ async def reflect(
328
232
 
329
233
  agent_results = to_memory_facts(experience_facts or [], "experience")
330
234
  world_results = to_memory_facts(world_facts or [], "world")
331
- opinion_results = to_memory_facts(opinion_facts or [], "opinion")
332
235
 
333
236
  # Format facts for prompt
334
237
  agent_facts_text = format_facts_for_prompt(agent_results)
335
238
  world_facts_text = format_facts_for_prompt(world_results)
336
- opinion_facts_text = format_facts_for_prompt(opinion_results)
337
239
 
338
240
  # Build prompt
339
241
  prompt = build_think_prompt(
340
242
  agent_facts_text=agent_facts_text,
341
243
  world_facts_text=world_facts_text,
342
- opinion_facts_text=opinion_facts_text,
343
244
  query=query,
344
245
  name=name,
345
246
  disposition=disposition,
@@ -11,6 +11,13 @@ from typing import Any, Literal
11
11
  from pydantic import BaseModel, Field
12
12
 
13
13
 
14
+ class TemporalConstraint(BaseModel):
15
+ """Detected temporal constraint from query analysis."""
16
+
17
+ start: datetime | None = Field(default=None, description="Start of temporal range")
18
+ end: datetime | None = Field(default=None, description="End of temporal range")
19
+
20
+
14
21
  class QueryInfo(BaseModel):
15
22
  """Information about the search query."""
16
23
 
@@ -19,6 +26,11 @@ class QueryInfo(BaseModel):
19
26
  timestamp: datetime = Field(description="When the query was executed")
20
27
  budget: int = Field(description="Maximum nodes to explore")
21
28
  max_tokens: int = Field(description="Maximum tokens to return in results")
29
+ tags: list[str] | None = Field(default=None, description="Tags filter applied to recall")
30
+ tags_match: str | None = Field(default=None, description="Tags matching mode: any, all, any_strict, all_strict")
31
+ temporal_constraint: TemporalConstraint | None = Field(
32
+ default=None, description="Detected temporal range from query"
33
+ )
22
34
 
23
35
 
24
36
  class EntryPoint(BaseModel):
@@ -73,7 +85,6 @@ class NodeVisit(BaseModel):
73
85
  text: str = Field(description="Memory unit text content")
74
86
  context: str = Field(description="Memory unit context")
75
87
  event_date: datetime | None = Field(default=None, description="When the memory occurred")
76
- access_count: int = Field(description="Number of times accessed before this search")
77
88
 
78
89
  # How this node was reached
79
90
  is_entry_point: bool = Field(description="Whether this is an entry point")
@@ -22,6 +22,7 @@ from .trace import (
22
22
  SearchPhaseMetrics,
23
23
  SearchSummary,
24
24
  SearchTrace,
25
+ TemporalConstraint,
25
26
  WeightComponents,
26
27
  )
27
28
 
@@ -45,7 +46,14 @@ class SearchTracer:
45
46
  json_output = trace.to_json()
46
47
  """
47
48
 
48
- def __init__(self, query: str, budget: int, max_tokens: int):
49
+ def __init__(
50
+ self,
51
+ query: str,
52
+ budget: int,
53
+ max_tokens: int,
54
+ tags: list[str] | None = None,
55
+ tags_match: str | None = None,
56
+ ):
49
57
  """
50
58
  Initialize tracer.
51
59
 
@@ -53,10 +61,14 @@ class SearchTracer:
53
61
  query: Search query text
54
62
  budget: Maximum nodes to explore
55
63
  max_tokens: Maximum tokens to return in results
64
+ tags: Tags filter applied to recall
65
+ tags_match: Tags matching mode (any, all, any_strict, all_strict)
56
66
  """
57
67
  self.query_text = query
58
68
  self.budget = budget
59
69
  self.max_tokens = max_tokens
70
+ self.tags = tags
71
+ self.tags_match = tags_match
60
72
 
61
73
  # Trace data
62
74
  self.query_embedding: list[float] | None = None
@@ -66,6 +78,9 @@ class SearchTracer:
66
78
  self.pruned: list[PruningDecision] = []
67
79
  self.phase_metrics: list[SearchPhaseMetrics] = []
68
80
 
81
+ # Temporal constraint detected from query
82
+ self.temporal_constraint: TemporalConstraint | None = None
83
+
69
84
  # New 4-way retrieval tracking
70
85
  self.retrieval_results: list[RetrievalMethodResults] = []
71
86
  self.rrf_merged: list[RRFMergeResult] = []
@@ -88,6 +103,11 @@ class SearchTracer:
88
103
  """Record the query embedding."""
89
104
  self.query_embedding = embedding
90
105
 
106
+ def record_temporal_constraint(self, start: datetime | None, end: datetime | None):
107
+ """Record the detected temporal constraint from query analysis."""
108
+ if start is not None or end is not None:
109
+ self.temporal_constraint = TemporalConstraint(start=start, end=end)
110
+
91
111
  def add_entry_point(self, node_id: str, text: str, similarity: float, rank: int):
92
112
  """
93
113
  Record an entry point.
@@ -116,7 +136,6 @@ class SearchTracer:
116
136
  text: str,
117
137
  context: str,
118
138
  event_date: datetime | None,
119
- access_count: int,
120
139
  is_entry_point: bool,
121
140
  parent_node_id: str | None,
122
141
  link_type: Literal["temporal", "semantic", "entity"] | None,
@@ -135,7 +154,6 @@ class SearchTracer:
135
154
  text: Memory unit text
136
155
  context: Memory unit context
137
156
  event_date: When the memory occurred
138
- access_count: Access count before this search
139
157
  is_entry_point: Whether this is an entry point
140
158
  parent_node_id: Node that led here (None for entry points)
141
159
  link_type: Type of link from parent
@@ -174,7 +192,6 @@ class SearchTracer:
174
192
  text=text,
175
193
  context=context,
176
194
  event_date=event_date,
177
- access_count=access_count,
178
195
  is_entry_point=is_entry_point,
179
196
  parent_node_id=parent_node_id,
180
197
  link_type=link_type,
@@ -313,8 +330,8 @@ class SearchTracer:
313
330
  RetrievalResult(
314
331
  rank=rank,
315
332
  node_id=doc_id,
316
- text=data.get("text", ""),
317
- context=data.get("context", ""),
333
+ text=data.get("text") or "",
334
+ context=data.get("context") or "",
318
335
  event_date=data.get("event_date"),
319
336
  fact_type=data.get("fact_type") or fact_type,
320
337
  score=score,
@@ -428,6 +445,9 @@ class SearchTracer:
428
445
  timestamp=datetime.now(UTC),
429
446
  budget=self.budget,
430
447
  max_tokens=self.max_tokens,
448
+ tags=self.tags,
449
+ tags_match=self.tags_match,
450
+ temporal_constraint=self.temporal_constraint,
431
451
  )
432
452
 
433
453
  # Create summary
@@ -10,6 +10,24 @@ from datetime import datetime
10
10
  from typing import Any
11
11
 
12
12
 
13
+ @dataclass
14
+ class MPFPTimings:
15
+ """Timing breakdown for a single MPFP retrieval call."""
16
+
17
+ fact_type: str
18
+ edge_count: int = 0 # Total edges loaded
19
+ db_queries: int = 0 # Number of DB queries for edge loading
20
+ edge_load_time: float = 0.0 # Time spent loading edges from DB
21
+ traverse: float = 0.0 # Total traversal time (includes edge loading)
22
+ pattern_count: int = 0 # Number of patterns executed
23
+ fusion: float = 0.0 # Time for RRF fusion
24
+ fetch: float = 0.0 # Time to fetch memory unit details
25
+ seeds_time: float = 0.0 # Time to find semantic seeds (if fallback used)
26
+ result_count: int = 0 # Number of results returned
27
+ # Detailed per-hop timing: list of {hop, exec_time, uncached, load_time, edges_loaded, total_time}
28
+ hop_details: list[dict] = field(default_factory=list)
29
+
30
+
13
31
  @dataclass
14
32
  class RetrievalResult:
15
33
  """
@@ -28,8 +46,8 @@ class RetrievalResult:
28
46
  mentioned_at: datetime | None = None
29
47
  document_id: str | None = None
30
48
  chunk_id: str | None = None
31
- access_count: int = 0
32
49
  embedding: list[float] | None = None
50
+ tags: list[str] | None = None # Visibility scope tags
33
51
 
34
52
  # Retrieval-specific scores (only one will be set depending on retrieval method)
35
53
  similarity: float | None = None # Semantic retrieval
@@ -52,8 +70,8 @@ class RetrievalResult:
52
70
  mentioned_at=row.get("mentioned_at"),
53
71
  document_id=row.get("document_id"),
54
72
  chunk_id=row.get("chunk_id"),
55
- access_count=row.get("access_count", 0),
56
73
  embedding=row.get("embedding"),
74
+ tags=row.get("tags"),
57
75
  similarity=row.get("similarity"),
58
76
  bm25_score=row.get("bm25_score"),
59
77
  activation=row.get("activation"),
@@ -136,8 +154,8 @@ class ScoredResult:
136
154
  "mentioned_at": self.retrieval.mentioned_at,
137
155
  "document_id": self.retrieval.document_id,
138
156
  "chunk_id": self.retrieval.chunk_id,
139
- "access_count": self.retrieval.access_count,
140
157
  "embedding": self.retrieval.embedding,
158
+ "tags": self.retrieval.tags,
141
159
  "semantic_similarity": self.retrieval.similarity,
142
160
  "bm25_score": self.retrieval.bm25_score,
143
161
  }