hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +31 -33
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +17 -12
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +23 -27
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +74 -88
  20. hindsight_api/engine/memory_engine.py +663 -673
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +15 -1
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +234 -0
  40. hindsight_api/engine/search/mpfp_retrieval.py +438 -0
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +388 -193
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -38
  48. hindsight_api/engine/search/tracer.py +49 -35
  49. hindsight_api/engine/search/types.py +22 -16
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +64 -337
  58. hindsight_api/server.py +3 -6
  59. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
  60. hindsight_api-0.1.6.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.4.dist-info/RECORD +0 -61
  63. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
@@ -4,11 +4,11 @@ Scoring functions for memory search and retrieval.
4
4
  Includes recency weighting, frequency weighting, temporal proximity,
5
5
  and similarity calculations used in memory activation and ranking.
6
6
  """
7
+
7
8
  from datetime import datetime
8
- from typing import List
9
9
 
10
10
 
11
- def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
11
+ def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
12
12
  """
13
13
  Calculate cosine similarity between two vectors.
14
14
 
@@ -58,6 +58,7 @@ def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -
58
58
  Weight between 0 and 1
59
59
  """
60
60
  import math
61
+
61
62
  # Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
62
63
  # This decays much slower than exponential, giving better long-term differentiation
63
64
  normalized_age = days_since / half_life_days
@@ -79,6 +80,7 @@ def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> flo
79
80
  Weight between 1.0 and max_boost
80
81
  """
81
82
  import math
83
+
82
84
  if access_count <= 0:
83
85
  return 1.0
84
86
 
@@ -116,11 +118,7 @@ def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime)
116
118
  return midpoint
117
119
 
118
120
 
119
- def calculate_temporal_proximity(
120
- anchor_a: datetime,
121
- anchor_b: datetime,
122
- half_life_days: float = 30.0
123
- ) -> float:
121
+ def calculate_temporal_proximity(anchor_a: datetime, anchor_b: datetime, half_life_days: float = 30.0) -> float:
124
122
  """
125
123
  Calculate temporal proximity between two temporal anchors.
126
124
 
@@ -4,16 +4,16 @@ Temporal extraction for time-aware search queries.
4
4
  Handles natural language temporal expressions using transformer-based query analysis.
5
5
  """
6
6
 
7
- from typing import Optional, Tuple
8
- from datetime import datetime
9
7
  import logging
10
- from hindsight_api.engine.query_analyzer import QueryAnalyzer, DateparserQueryAnalyzer
8
+ from datetime import datetime
9
+
10
+ from hindsight_api.engine.query_analyzer import DateparserQueryAnalyzer, QueryAnalyzer
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
  # Global default analyzer instance
15
15
  # Can be overridden by passing a custom analyzer to extract_temporal_constraint
16
- _default_analyzer: Optional[QueryAnalyzer] = None
16
+ _default_analyzer: QueryAnalyzer | None = None
17
17
 
18
18
 
19
19
  def get_default_analyzer() -> QueryAnalyzer:
@@ -33,9 +33,9 @@ def get_default_analyzer() -> QueryAnalyzer:
33
33
 
34
34
  def extract_temporal_constraint(
35
35
  query: str,
36
- reference_date: Optional[datetime] = None,
37
- analyzer: Optional[QueryAnalyzer] = None,
38
- ) -> Optional[Tuple[datetime, datetime]]:
36
+ reference_date: datetime | None = None,
37
+ analyzer: QueryAnalyzer | None = None,
38
+ ) -> tuple[datetime, datetime] | None:
39
39
  """
40
40
  Extract temporal constraint from query.
41
41
 
@@ -55,10 +55,7 @@ def extract_temporal_constraint(
55
55
  analysis = analyzer.analyze(query, reference_date)
56
56
 
57
57
  if analysis.temporal_constraint:
58
- result = (
59
- analysis.temporal_constraint.start_date,
60
- analysis.temporal_constraint.end_date
61
- )
58
+ result = (analysis.temporal_constraint.start_date, analysis.temporal_constraint.end_date)
62
59
  return result
63
60
 
64
61
  return None
@@ -2,41 +2,35 @@
2
2
  Think operation utilities for formulating answers based on agent and world facts.
3
3
  """
4
4
 
5
- import asyncio
6
5
  import logging
7
6
  import re
8
- from datetime import datetime, timezone
9
- from typing import Dict, List, Any
7
+ from datetime import datetime
8
+
10
9
  from pydantic import BaseModel, Field
11
10
 
12
- from ..response_models import ReflectResult, MemoryFact, DispositionTraits
11
+ from ..response_models import DispositionTraits, MemoryFact
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
16
15
 
17
16
  class Opinion(BaseModel):
18
17
  """An opinion formed by the bank."""
18
+
19
19
  opinion: str = Field(description="The opinion or perspective with reasoning included")
20
20
  confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
21
21
 
22
22
 
23
23
  class OpinionExtractionResponse(BaseModel):
24
24
  """Response containing extracted opinions."""
25
- opinions: List[Opinion] = Field(
26
- default_factory=list,
27
- description="List of opinions formed with their supporting reasons and confidence scores"
25
+
26
+ opinions: list[Opinion] = Field(
27
+ default_factory=list, description="List of opinions formed with their supporting reasons and confidence scores"
28
28
  )
29
29
 
30
30
 
31
31
  def describe_trait_level(value: int) -> str:
32
32
  """Convert trait value (1-5) to descriptive text."""
33
- levels = {
34
- 1: "very low",
35
- 2: "low",
36
- 3: "moderate",
37
- 4: "high",
38
- 5: "very high"
39
- }
33
+ levels = {1: "very low", 2: "low", 3: "moderate", 4: "high", 5: "very high"}
40
34
  return levels.get(value, "moderate")
41
35
 
42
36
 
@@ -47,7 +41,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
47
41
  2: "You tend to trust information but may question obvious inconsistencies.",
48
42
  3: "You have a balanced approach to information, neither too trusting nor too skeptical.",
49
43
  4: "You are somewhat skeptical and often question the reliability of information.",
50
- 5: "You are highly skeptical and critically examine all information for accuracy and hidden motives."
44
+ 5: "You are highly skeptical and critically examine all information for accuracy and hidden motives.",
51
45
  }
52
46
 
53
47
  literalism_desc = {
@@ -55,7 +49,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
55
49
  2: "You tend to consider context and implied meaning alongside literal statements.",
56
50
  3: "You balance literal interpretation with contextual understanding.",
57
51
  4: "You prefer to interpret information more literally and precisely.",
58
- 5: "You interpret information very literally and focus on exact wording and commitments."
52
+ 5: "You interpret information very literally and focus on exact wording and commitments.",
59
53
  }
60
54
 
61
55
  empathy_desc = {
@@ -63,7 +57,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
63
57
  2: "You consider facts first but acknowledge emotional factors exist.",
64
58
  3: "You balance factual analysis with emotional understanding.",
65
59
  4: "You give significant weight to emotional context and human factors.",
66
- 5: "You strongly consider the emotional state and circumstances of others when forming memories."
60
+ 5: "You strongly consider the emotional state and circumstances of others when forming memories.",
67
61
  }
68
62
 
69
63
  return f"""Your disposition traits:
@@ -72,7 +66,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
72
66
  - Empathy ({describe_trait_level(disposition.empathy)}): {empathy_desc.get(disposition.empathy, empathy_desc[3])}"""
73
67
 
74
68
 
75
- def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
69
+ def format_facts_for_prompt(facts: list[MemoryFact]) -> str:
76
70
  """Format facts as JSON for LLM prompt."""
77
71
  import json
78
72
 
@@ -80,9 +74,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
80
74
  return "[]"
81
75
  formatted = []
82
76
  for fact in facts:
83
- fact_obj = {
84
- "text": fact.text
85
- }
77
+ fact_obj = {"text": fact.text}
86
78
 
87
79
  # Add context if available
88
80
  if fact.context:
@@ -94,7 +86,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
94
86
  if isinstance(occurred_start, str):
95
87
  fact_obj["occurred_start"] = occurred_start
96
88
  elif isinstance(occurred_start, datetime):
97
- fact_obj["occurred_start"] = occurred_start.strftime('%Y-%m-%d %H:%M:%S')
89
+ fact_obj["occurred_start"] = occurred_start.strftime("%Y-%m-%d %H:%M:%S")
98
90
 
99
91
  formatted.append(fact_obj)
100
92
 
@@ -176,16 +168,14 @@ def get_system_message(disposition: DispositionTraits) -> str:
176
168
  elif disposition.empathy <= 2:
177
169
  instructions.append("Focus on facts and outcomes rather than emotional context.")
178
170
 
179
- disposition_instruction = " ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
171
+ disposition_instruction = (
172
+ " ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
173
+ )
180
174
 
181
175
  return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
182
176
 
183
177
 
184
- async def extract_opinions_from_text(
185
- llm_config,
186
- text: str,
187
- query: str
188
- ) -> List[Opinion]:
178
+ async def extract_opinions_from_text(llm_config, text: str, query: str) -> list[Opinion]:
189
179
  """
190
180
  Extract opinions with reasons and confidence from text using LLM.
191
181
 
@@ -238,11 +228,14 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
238
228
  try:
239
229
  result = await llm_config.call(
240
230
  messages=[
241
- {"role": "system", "content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'."},
242
- {"role": "user", "content": extraction_prompt}
231
+ {
232
+ "role": "system",
233
+ "content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'.",
234
+ },
235
+ {"role": "user", "content": extraction_prompt},
243
236
  ],
244
237
  response_format=OpinionExtractionResponse,
245
- scope="memory_extract_opinion"
238
+ scope="memory_extract_opinion",
246
239
  )
247
240
 
248
241
  # Format opinions with confidence score and convert to first-person
@@ -253,14 +246,18 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
253
246
 
254
247
  # Replace common third-person patterns with first-person
255
248
  def singularize_verb(verb):
256
- if verb.endswith('es'):
249
+ if verb.endswith("es"):
257
250
  return verb[:-1] # believes -> believe
258
- elif verb.endswith('s'):
251
+ elif verb.endswith("s"):
259
252
  return verb[:-1] # thinks -> think
260
253
  return verb
261
254
 
262
255
  # Pattern: "The speaker/user [verb]..." -> "I [verb]..."
263
- match = re.match(r'^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$', opinion_text, re.IGNORECASE)
256
+ match = re.match(
257
+ r"^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$",
258
+ opinion_text,
259
+ re.IGNORECASE,
260
+ )
264
261
  if match:
265
262
  verb = singularize_verb(match.group(2))
266
263
  that_part = match.group(3) or "" # Keep " that" if present
@@ -268,17 +265,96 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
268
265
  opinion_text = f"I {verb}{that_part}{rest}"
269
266
 
270
267
  # If still doesn't start with first-person, prepend "I believe that "
271
- first_person_starters = ["I think", "I believe", "I feel", "In my view", "I've come to believe", "Previously I"]
268
+ first_person_starters = [
269
+ "I think",
270
+ "I believe",
271
+ "I feel",
272
+ "In my view",
273
+ "I've come to believe",
274
+ "Previously I",
275
+ ]
272
276
  if not any(opinion_text.startswith(starter) for starter in first_person_starters):
273
277
  opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
274
278
 
275
- formatted_opinions.append(Opinion(
276
- opinion=opinion_text,
277
- confidence=op.confidence
278
- ))
279
+ formatted_opinions.append(Opinion(opinion=opinion_text, confidence=op.confidence))
279
280
 
280
281
  return formatted_opinions
281
282
 
282
283
  except Exception as e:
283
284
  logger.warning(f"Failed to extract opinions: {str(e)}")
284
285
  return []
286
+
287
+
288
+ async def reflect(
289
+ llm_config,
290
+ query: str,
291
+ experience_facts: list[str] = None,
292
+ world_facts: list[str] = None,
293
+ opinion_facts: list[str] = None,
294
+ name: str = "Assistant",
295
+ disposition: DispositionTraits = None,
296
+ background: str = "",
297
+ context: str = None,
298
+ ) -> str:
299
+ """
300
+ Standalone reflect function for generating answers based on facts.
301
+
302
+ This is a static version of the reflect operation that can be called
303
+ without a MemoryEngine instance, useful for testing.
304
+
305
+ Args:
306
+ llm_config: LLM provider instance
307
+ query: Question to answer
308
+ experience_facts: List of experience/agent fact strings
309
+ world_facts: List of world fact strings
310
+ opinion_facts: List of opinion fact strings
311
+ name: Name of the agent/persona
312
+ disposition: Disposition traits (defaults to neutral)
313
+ background: Background information
314
+ context: Additional context for the prompt
315
+
316
+ Returns:
317
+ Generated answer text
318
+ """
319
+ # Default disposition if not provided
320
+ if disposition is None:
321
+ disposition = DispositionTraits(skepticism=3, literalism=3, empathy=3)
322
+
323
+ # Convert string lists to MemoryFact format for formatting
324
+ def to_memory_facts(facts: list[str], fact_type: str) -> list[MemoryFact]:
325
+ if not facts:
326
+ return []
327
+ return [MemoryFact(id=f"test-{i}", text=f, fact_type=fact_type) for i, f in enumerate(facts)]
328
+
329
+ agent_results = to_memory_facts(experience_facts or [], "experience")
330
+ world_results = to_memory_facts(world_facts or [], "world")
331
+ opinion_results = to_memory_facts(opinion_facts or [], "opinion")
332
+
333
+ # Format facts for prompt
334
+ agent_facts_text = format_facts_for_prompt(agent_results)
335
+ world_facts_text = format_facts_for_prompt(world_results)
336
+ opinion_facts_text = format_facts_for_prompt(opinion_results)
337
+
338
+ # Build prompt
339
+ prompt = build_think_prompt(
340
+ agent_facts_text=agent_facts_text,
341
+ world_facts_text=world_facts_text,
342
+ opinion_facts_text=opinion_facts_text,
343
+ query=query,
344
+ name=name,
345
+ disposition=disposition,
346
+ background=background,
347
+ context=context,
348
+ )
349
+
350
+ system_message = get_system_message(disposition)
351
+
352
+ # Call LLM
353
+ answer_text = await llm_config.call(
354
+ messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
355
+ scope="memory_think",
356
+ temperature=0.9,
357
+ max_completion_tokens=1000,
358
+ )
359
+
360
+ return answer_text.strip()
@@ -4,15 +4,18 @@ Search trace models for debugging and visualization.
4
4
  These Pydantic models define the structure of search traces, capturing
5
5
  every step of the spreading activation search process for analysis.
6
6
  """
7
+
7
8
  from datetime import datetime
8
- from typing import List, Optional, Dict, Any, Literal
9
+ from typing import Any, Literal
10
+
9
11
  from pydantic import BaseModel, Field
10
12
 
11
13
 
12
14
  class QueryInfo(BaseModel):
13
15
  """Information about the search query."""
16
+
14
17
  query_text: str = Field(description="Original query text")
15
- query_embedding: List[float] = Field(description="Generated query embedding vector")
18
+ query_embedding: list[float] = Field(description="Generated query embedding vector")
16
19
  timestamp: datetime = Field(description="When the query was executed")
17
20
  budget: int = Field(description="Maximum nodes to explore")
18
21
  max_tokens: int = Field(description="Maximum tokens to return in results")
@@ -20,6 +23,7 @@ class QueryInfo(BaseModel):
20
23
 
21
24
  class EntryPoint(BaseModel):
22
25
  """An entry point node selected for search."""
26
+
23
27
  node_id: str = Field(description="Memory unit ID")
24
28
  text: str = Field(description="Memory unit text content")
25
29
  similarity_score: float = Field(description="Cosine similarity to query", ge=0.0, le=1.0)
@@ -28,6 +32,7 @@ class EntryPoint(BaseModel):
28
32
 
29
33
  class WeightComponents(BaseModel):
30
34
  """Breakdown of weight calculation components."""
35
+
31
36
  activation: float = Field(description="Activation from spreading (can exceed 1.0 through accumulation)", ge=0.0)
32
37
  semantic_similarity: float = Field(description="Semantic similarity to query", ge=0.0, le=1.0)
33
38
  recency: float = Field(description="Recency weight", ge=0.0, le=1.0)
@@ -43,98 +48,120 @@ class WeightComponents(BaseModel):
43
48
 
44
49
  class LinkInfo(BaseModel):
45
50
  """Information about a link to a neighbor."""
51
+
46
52
  to_node_id: str = Field(description="Target node ID")
47
53
  link_type: Literal["temporal", "semantic", "entity"] = Field(description="Type of link")
48
- link_weight: float = Field(description="Weight of the link (can exceed 1.0 when aggregating multiple connections)", ge=0.0)
49
- entity_id: Optional[str] = Field(default=None, description="Entity ID if link_type is 'entity'")
50
- new_activation: Optional[float] = Field(default=None, description="Activation that would be passed to neighbor (None for supplementary links)")
54
+ link_weight: float = Field(
55
+ description="Weight of the link (can exceed 1.0 when aggregating multiple connections)", ge=0.0
56
+ )
57
+ entity_id: str | None = Field(default=None, description="Entity ID if link_type is 'entity'")
58
+ new_activation: float | None = Field(
59
+ default=None, description="Activation that would be passed to neighbor (None for supplementary links)"
60
+ )
51
61
  followed: bool = Field(description="Whether this link was followed (or pruned)")
52
- prune_reason: Optional[str] = Field(default=None, description="Why link was not followed (if not followed)")
53
- is_supplementary: bool = Field(default=False, description="Whether this is a supplementary link (multiple connections to same node)")
62
+ prune_reason: str | None = Field(default=None, description="Why link was not followed (if not followed)")
63
+ is_supplementary: bool = Field(
64
+ default=False, description="Whether this is a supplementary link (multiple connections to same node)"
65
+ )
54
66
 
55
67
 
56
68
  class NodeVisit(BaseModel):
57
69
  """Information about visiting a node during search."""
70
+
58
71
  step: int = Field(description="Step number in search (1-based)")
59
72
  node_id: str = Field(description="Memory unit ID")
60
73
  text: str = Field(description="Memory unit text content")
61
74
  context: str = Field(description="Memory unit context")
62
- event_date: Optional[datetime] = Field(default=None, description="When the memory occurred")
75
+ event_date: datetime | None = Field(default=None, description="When the memory occurred")
63
76
  access_count: int = Field(description="Number of times accessed before this search")
64
77
 
65
78
  # How this node was reached
66
79
  is_entry_point: bool = Field(description="Whether this is an entry point")
67
- parent_node_id: Optional[str] = Field(default=None, description="Node that led to this one")
68
- link_type: Optional[Literal["temporal", "semantic", "entity"]] = Field(default=None, description="Type of link from parent")
69
- link_weight: Optional[float] = Field(default=None, description="Weight of link from parent")
80
+ parent_node_id: str | None = Field(default=None, description="Node that led to this one")
81
+ link_type: Literal["temporal", "semantic", "entity"] | None = Field(
82
+ default=None, description="Type of link from parent"
83
+ )
84
+ link_weight: float | None = Field(default=None, description="Weight of link from parent")
70
85
 
71
86
  # Weights
72
87
  weights: WeightComponents = Field(description="Weight calculation breakdown")
73
88
 
74
89
  # Neighbors discovered from this node
75
- neighbors_explored: List[LinkInfo] = Field(default_factory=list, description="Links explored from this node")
90
+ neighbors_explored: list[LinkInfo] = Field(default_factory=list, description="Links explored from this node")
76
91
 
77
92
  # Ranking
78
- final_rank: Optional[int] = Field(default=None, description="Final rank in results (1-based, None if not in top-k)")
93
+ final_rank: int | None = Field(default=None, description="Final rank in results (1-based, None if not in top-k)")
79
94
 
80
95
 
81
96
  class PruningDecision(BaseModel):
82
97
  """Records when a node was considered but not visited."""
98
+
83
99
  node_id: str = Field(description="Node that was pruned")
84
- reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(description="Why it was pruned")
100
+ reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(
101
+ description="Why it was pruned"
102
+ )
85
103
  activation: float = Field(description="Activation value when pruned")
86
104
  would_have_been_step: int = Field(description="What step it would have been if visited")
87
105
 
88
106
 
89
107
  class SearchPhaseMetrics(BaseModel):
90
108
  """Performance metrics for a search phase."""
109
+
91
110
  phase_name: str = Field(description="Name of the phase")
92
111
  duration_seconds: float = Field(description="Time taken in seconds")
93
- details: Dict[str, Any] = Field(default_factory=dict, description="Additional phase-specific metrics")
112
+ details: dict[str, Any] = Field(default_factory=dict, description="Additional phase-specific metrics")
94
113
 
95
114
 
96
115
  class RetrievalResult(BaseModel):
97
116
  """A single result from a retrieval method."""
117
+
98
118
  rank: int = Field(description="Rank in this retrieval method (1-based)")
99
119
  node_id: str = Field(description="Memory unit ID")
100
120
  text: str = Field(description="Memory unit text content")
101
121
  context: str = Field(default="", description="Memory unit context")
102
- event_date: Optional[datetime] = Field(default=None, description="When the memory occurred")
103
- fact_type: Optional[str] = Field(default=None, description="Fact type (world, experience, opinion)")
122
+ event_date: datetime | None = Field(default=None, description="When the memory occurred")
123
+ fact_type: str | None = Field(default=None, description="Fact type (world, experience, opinion)")
104
124
  score: float = Field(description="Score from this retrieval method")
105
125
  score_name: str = Field(description="Name of the score (e.g., 'similarity', 'bm25_score', 'activation')")
106
126
 
107
127
 
108
128
  class RetrievalMethodResults(BaseModel):
109
129
  """Results from a single retrieval method."""
130
+
110
131
  method_name: Literal["semantic", "bm25", "graph", "temporal"] = Field(description="Name of retrieval method")
111
- results: List[RetrievalResult] = Field(description="Retrieved results with ranks")
132
+ fact_type: str | None = Field(
133
+ default=None, description="Fact type this retrieval was for (world, experience, opinion)"
134
+ )
135
+ results: list[RetrievalResult] = Field(description="Retrieved results with ranks")
112
136
  duration_seconds: float = Field(description="Time taken for this retrieval")
113
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Method-specific metadata")
137
+ metadata: dict[str, Any] = Field(default_factory=dict, description="Method-specific metadata")
114
138
 
115
139
 
116
140
  class RRFMergeResult(BaseModel):
117
141
  """A result after RRF merging."""
142
+
118
143
  node_id: str = Field(description="Memory unit ID")
119
144
  text: str = Field(description="Memory unit text content")
120
145
  rrf_score: float = Field(description="Reciprocal Rank Fusion score")
121
- source_ranks: Dict[str, int] = Field(description="Rank in each source that contributed (method_name -> rank)")
146
+ source_ranks: dict[str, int] = Field(description="Rank in each source that contributed (method_name -> rank)")
122
147
  final_rrf_rank: int = Field(description="Rank after RRF merge (1-based)")
123
148
 
124
149
 
125
150
  class RerankedResult(BaseModel):
126
151
  """A result after reranking."""
152
+
127
153
  node_id: str = Field(description="Memory unit ID")
128
154
  text: str = Field(description="Memory unit text content")
129
155
  rerank_score: float = Field(description="Final reranking score")
130
156
  rerank_rank: int = Field(description="Rank after reranking (1-based)")
131
157
  rrf_rank: int = Field(description="Original RRF rank before reranking")
132
158
  rank_change: int = Field(description="Change in rank (positive = moved up)")
133
- score_components: Dict[str, float] = Field(default_factory=dict, description="Score breakdown")
159
+ score_components: dict[str, float] = Field(default_factory=dict, description="Score breakdown")
134
160
 
135
161
 
136
162
  class SearchSummary(BaseModel):
137
163
  """Summary statistics about the search."""
164
+
138
165
  total_nodes_visited: int = Field(description="Total nodes visited")
139
166
  total_nodes_pruned: int = Field(description="Total nodes pruned")
140
167
  entry_points_found: int = Field(description="Number of entry points")
@@ -149,33 +176,36 @@ class SearchSummary(BaseModel):
149
176
  entity_links_followed: int = Field(default=0, description="Entity links followed")
150
177
 
151
178
  # Phase timings
152
- phase_metrics: List[SearchPhaseMetrics] = Field(default_factory=list, description="Metrics for each phase")
179
+ phase_metrics: list[SearchPhaseMetrics] = Field(default_factory=list, description="Metrics for each phase")
153
180
 
154
181
 
155
182
  class SearchTrace(BaseModel):
156
183
  """Complete trace of a search operation."""
184
+
157
185
  query: QueryInfo = Field(description="Query information")
158
186
 
159
187
  # New 4-way retrieval architecture
160
- retrieval_results: List[RetrievalMethodResults] = Field(default_factory=list, description="Results from each retrieval method")
161
- rrf_merged: List[RRFMergeResult] = Field(default_factory=list, description="Results after RRF merging")
162
- reranked: List[RerankedResult] = Field(default_factory=list, description="Results after reranking")
188
+ retrieval_results: list[RetrievalMethodResults] = Field(
189
+ default_factory=list, description="Results from each retrieval method"
190
+ )
191
+ rrf_merged: list[RRFMergeResult] = Field(default_factory=list, description="Results after RRF merging")
192
+ reranked: list[RerankedResult] = Field(default_factory=list, description="Results after reranking")
163
193
 
164
194
  # Legacy fields (kept for backward compatibility with graph/temporal visualizations)
165
- entry_points: List[EntryPoint] = Field(default_factory=list, description="Entry points selected for search (legacy)")
166
- visits: List[NodeVisit] = Field(default_factory=list, description="All nodes visited during search (legacy, for graph viz)")
167
- pruned: List[PruningDecision] = Field(default_factory=list, description="Nodes that were pruned (legacy)")
195
+ entry_points: list[EntryPoint] = Field(
196
+ default_factory=list, description="Entry points selected for search (legacy)"
197
+ )
198
+ visits: list[NodeVisit] = Field(
199
+ default_factory=list, description="All nodes visited during search (legacy, for graph viz)"
200
+ )
201
+ pruned: list[PruningDecision] = Field(default_factory=list, description="Nodes that were pruned (legacy)")
168
202
 
169
203
  summary: SearchSummary = Field(description="Summary statistics")
170
204
 
171
205
  # Final results (for comparison with visits)
172
- final_results: List[Dict[str, Any]] = Field(description="Final ranked results returned to user")
206
+ final_results: list[dict[str, Any]] = Field(description="Final ranked results returned to user")
173
207
 
174
- model_config = {
175
- "json_encoders": {
176
- datetime: lambda v: v.isoformat()
177
- }
178
- }
208
+ model_config = {"json_encoders": {datetime: lambda v: v.isoformat()}}
179
209
 
180
210
  def to_json(self, **kwargs) -> str:
181
211
  """Export trace as JSON string."""
@@ -185,14 +215,14 @@ class SearchTrace(BaseModel):
185
215
  """Export trace as dictionary."""
186
216
  return self.model_dump()
187
217
 
188
- def get_visit_by_node_id(self, node_id: str) -> Optional[NodeVisit]:
218
+ def get_visit_by_node_id(self, node_id: str) -> NodeVisit | None:
189
219
  """Find a visit by node ID."""
190
220
  for visit in self.visits:
191
221
  if visit.node_id == node_id:
192
222
  return visit
193
223
  return None
194
224
 
195
- def get_search_path_to_node(self, node_id: str) -> List[NodeVisit]:
225
+ def get_search_path_to_node(self, node_id: str) -> list[NodeVisit]:
196
226
  """Get the path from entry point to a specific node."""
197
227
  path = []
198
228
  current_visit = self.get_visit_by_node_id(node_id)
@@ -206,10 +236,10 @@ class SearchTrace(BaseModel):
206
236
 
207
237
  return path
208
238
 
209
- def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) -> List[NodeVisit]:
239
+ def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) -> list[NodeVisit]:
210
240
  """Get all nodes reached via a specific link type."""
211
241
  return [v for v in self.visits if v.link_type == link_type]
212
242
 
213
- def get_entry_point_nodes(self) -> List[NodeVisit]:
243
+ def get_entry_point_nodes(self) -> list[NodeVisit]:
214
244
  """Get all entry point visits."""
215
245
  return [v for v in self.visits if v.is_entry_point]