hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +31 -33
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +17 -12
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +23 -27
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +74 -88
- hindsight_api/engine/memory_engine.py +663 -673
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +15 -1
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +234 -0
- hindsight_api/engine/search/mpfp_retrieval.py +438 -0
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +388 -193
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -38
- hindsight_api/engine/search/tracer.py +49 -35
- hindsight_api/engine/search/types.py +22 -16
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +64 -337
- hindsight_api/server.py +3 -6
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
- hindsight_api-0.1.6.dist-info/RECORD +64 -0
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.4.dist-info/RECORD +0 -61
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
|
@@ -4,11 +4,11 @@ Scoring functions for memory search and retrieval.
|
|
|
4
4
|
Includes recency weighting, frequency weighting, temporal proximity,
|
|
5
5
|
and similarity calculations used in memory activation and ranking.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from typing import List
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def cosine_similarity(vec1:
|
|
11
|
+
def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
|
|
12
12
|
"""
|
|
13
13
|
Calculate cosine similarity between two vectors.
|
|
14
14
|
|
|
@@ -58,6 +58,7 @@ def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -
|
|
|
58
58
|
Weight between 0 and 1
|
|
59
59
|
"""
|
|
60
60
|
import math
|
|
61
|
+
|
|
61
62
|
# Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
|
|
62
63
|
# This decays much slower than exponential, giving better long-term differentiation
|
|
63
64
|
normalized_age = days_since / half_life_days
|
|
@@ -79,6 +80,7 @@ def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> flo
|
|
|
79
80
|
Weight between 1.0 and max_boost
|
|
80
81
|
"""
|
|
81
82
|
import math
|
|
83
|
+
|
|
82
84
|
if access_count <= 0:
|
|
83
85
|
return 1.0
|
|
84
86
|
|
|
@@ -116,11 +118,7 @@ def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime)
|
|
|
116
118
|
return midpoint
|
|
117
119
|
|
|
118
120
|
|
|
119
|
-
def calculate_temporal_proximity(
|
|
120
|
-
anchor_a: datetime,
|
|
121
|
-
anchor_b: datetime,
|
|
122
|
-
half_life_days: float = 30.0
|
|
123
|
-
) -> float:
|
|
121
|
+
def calculate_temporal_proximity(anchor_a: datetime, anchor_b: datetime, half_life_days: float = 30.0) -> float:
|
|
124
122
|
"""
|
|
125
123
|
Calculate temporal proximity between two temporal anchors.
|
|
126
124
|
|
|
@@ -4,16 +4,16 @@ Temporal extraction for time-aware search queries.
|
|
|
4
4
|
Handles natural language temporal expressions using transformer-based query analysis.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Optional, Tuple
|
|
8
|
-
from datetime import datetime
|
|
9
7
|
import logging
|
|
10
|
-
from
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
from hindsight_api.engine.query_analyzer import DateparserQueryAnalyzer, QueryAnalyzer
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
# Global default analyzer instance
|
|
15
15
|
# Can be overridden by passing a custom analyzer to extract_temporal_constraint
|
|
16
|
-
_default_analyzer:
|
|
16
|
+
_default_analyzer: QueryAnalyzer | None = None
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def get_default_analyzer() -> QueryAnalyzer:
|
|
@@ -33,9 +33,9 @@ def get_default_analyzer() -> QueryAnalyzer:
|
|
|
33
33
|
|
|
34
34
|
def extract_temporal_constraint(
|
|
35
35
|
query: str,
|
|
36
|
-
reference_date:
|
|
37
|
-
analyzer:
|
|
38
|
-
) ->
|
|
36
|
+
reference_date: datetime | None = None,
|
|
37
|
+
analyzer: QueryAnalyzer | None = None,
|
|
38
|
+
) -> tuple[datetime, datetime] | None:
|
|
39
39
|
"""
|
|
40
40
|
Extract temporal constraint from query.
|
|
41
41
|
|
|
@@ -55,10 +55,7 @@ def extract_temporal_constraint(
|
|
|
55
55
|
analysis = analyzer.analyze(query, reference_date)
|
|
56
56
|
|
|
57
57
|
if analysis.temporal_constraint:
|
|
58
|
-
result = (
|
|
59
|
-
analysis.temporal_constraint.start_date,
|
|
60
|
-
analysis.temporal_constraint.end_date
|
|
61
|
-
)
|
|
58
|
+
result = (analysis.temporal_constraint.start_date, analysis.temporal_constraint.end_date)
|
|
62
59
|
return result
|
|
63
60
|
|
|
64
61
|
return None
|
|
@@ -2,41 +2,35 @@
|
|
|
2
2
|
Think operation utilities for formulating answers based on agent and world facts.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import asyncio
|
|
6
5
|
import logging
|
|
7
6
|
import re
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
10
9
|
from pydantic import BaseModel, Field
|
|
11
10
|
|
|
12
|
-
from ..response_models import
|
|
11
|
+
from ..response_models import DispositionTraits, MemoryFact
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class Opinion(BaseModel):
|
|
18
17
|
"""An opinion formed by the bank."""
|
|
18
|
+
|
|
19
19
|
opinion: str = Field(description="The opinion or perspective with reasoning included")
|
|
20
20
|
confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class OpinionExtractionResponse(BaseModel):
|
|
24
24
|
"""Response containing extracted opinions."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
description="List of opinions formed with their supporting reasons and confidence scores"
|
|
25
|
+
|
|
26
|
+
opinions: list[Opinion] = Field(
|
|
27
|
+
default_factory=list, description="List of opinions formed with their supporting reasons and confidence scores"
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def describe_trait_level(value: int) -> str:
|
|
32
32
|
"""Convert trait value (1-5) to descriptive text."""
|
|
33
|
-
levels = {
|
|
34
|
-
1: "very low",
|
|
35
|
-
2: "low",
|
|
36
|
-
3: "moderate",
|
|
37
|
-
4: "high",
|
|
38
|
-
5: "very high"
|
|
39
|
-
}
|
|
33
|
+
levels = {1: "very low", 2: "low", 3: "moderate", 4: "high", 5: "very high"}
|
|
40
34
|
return levels.get(value, "moderate")
|
|
41
35
|
|
|
42
36
|
|
|
@@ -47,7 +41,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
47
41
|
2: "You tend to trust information but may question obvious inconsistencies.",
|
|
48
42
|
3: "You have a balanced approach to information, neither too trusting nor too skeptical.",
|
|
49
43
|
4: "You are somewhat skeptical and often question the reliability of information.",
|
|
50
|
-
5: "You are highly skeptical and critically examine all information for accuracy and hidden motives."
|
|
44
|
+
5: "You are highly skeptical and critically examine all information for accuracy and hidden motives.",
|
|
51
45
|
}
|
|
52
46
|
|
|
53
47
|
literalism_desc = {
|
|
@@ -55,7 +49,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
55
49
|
2: "You tend to consider context and implied meaning alongside literal statements.",
|
|
56
50
|
3: "You balance literal interpretation with contextual understanding.",
|
|
57
51
|
4: "You prefer to interpret information more literally and precisely.",
|
|
58
|
-
5: "You interpret information very literally and focus on exact wording and commitments."
|
|
52
|
+
5: "You interpret information very literally and focus on exact wording and commitments.",
|
|
59
53
|
}
|
|
60
54
|
|
|
61
55
|
empathy_desc = {
|
|
@@ -63,7 +57,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
63
57
|
2: "You consider facts first but acknowledge emotional factors exist.",
|
|
64
58
|
3: "You balance factual analysis with emotional understanding.",
|
|
65
59
|
4: "You give significant weight to emotional context and human factors.",
|
|
66
|
-
5: "You strongly consider the emotional state and circumstances of others when forming memories."
|
|
60
|
+
5: "You strongly consider the emotional state and circumstances of others when forming memories.",
|
|
67
61
|
}
|
|
68
62
|
|
|
69
63
|
return f"""Your disposition traits:
|
|
@@ -72,7 +66,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
72
66
|
- Empathy ({describe_trait_level(disposition.empathy)}): {empathy_desc.get(disposition.empathy, empathy_desc[3])}"""
|
|
73
67
|
|
|
74
68
|
|
|
75
|
-
def format_facts_for_prompt(facts:
|
|
69
|
+
def format_facts_for_prompt(facts: list[MemoryFact]) -> str:
|
|
76
70
|
"""Format facts as JSON for LLM prompt."""
|
|
77
71
|
import json
|
|
78
72
|
|
|
@@ -80,9 +74,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
|
80
74
|
return "[]"
|
|
81
75
|
formatted = []
|
|
82
76
|
for fact in facts:
|
|
83
|
-
fact_obj = {
|
|
84
|
-
"text": fact.text
|
|
85
|
-
}
|
|
77
|
+
fact_obj = {"text": fact.text}
|
|
86
78
|
|
|
87
79
|
# Add context if available
|
|
88
80
|
if fact.context:
|
|
@@ -94,7 +86,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
|
94
86
|
if isinstance(occurred_start, str):
|
|
95
87
|
fact_obj["occurred_start"] = occurred_start
|
|
96
88
|
elif isinstance(occurred_start, datetime):
|
|
97
|
-
fact_obj["occurred_start"] = occurred_start.strftime(
|
|
89
|
+
fact_obj["occurred_start"] = occurred_start.strftime("%Y-%m-%d %H:%M:%S")
|
|
98
90
|
|
|
99
91
|
formatted.append(fact_obj)
|
|
100
92
|
|
|
@@ -176,16 +168,14 @@ def get_system_message(disposition: DispositionTraits) -> str:
|
|
|
176
168
|
elif disposition.empathy <= 2:
|
|
177
169
|
instructions.append("Focus on facts and outcomes rather than emotional context.")
|
|
178
170
|
|
|
179
|
-
disposition_instruction =
|
|
171
|
+
disposition_instruction = (
|
|
172
|
+
" ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
|
|
173
|
+
)
|
|
180
174
|
|
|
181
175
|
return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
|
|
182
176
|
|
|
183
177
|
|
|
184
|
-
async def extract_opinions_from_text(
|
|
185
|
-
llm_config,
|
|
186
|
-
text: str,
|
|
187
|
-
query: str
|
|
188
|
-
) -> List[Opinion]:
|
|
178
|
+
async def extract_opinions_from_text(llm_config, text: str, query: str) -> list[Opinion]:
|
|
189
179
|
"""
|
|
190
180
|
Extract opinions with reasons and confidence from text using LLM.
|
|
191
181
|
|
|
@@ -238,11 +228,14 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
238
228
|
try:
|
|
239
229
|
result = await llm_config.call(
|
|
240
230
|
messages=[
|
|
241
|
-
{
|
|
242
|
-
|
|
231
|
+
{
|
|
232
|
+
"role": "system",
|
|
233
|
+
"content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'.",
|
|
234
|
+
},
|
|
235
|
+
{"role": "user", "content": extraction_prompt},
|
|
243
236
|
],
|
|
244
237
|
response_format=OpinionExtractionResponse,
|
|
245
|
-
scope="memory_extract_opinion"
|
|
238
|
+
scope="memory_extract_opinion",
|
|
246
239
|
)
|
|
247
240
|
|
|
248
241
|
# Format opinions with confidence score and convert to first-person
|
|
@@ -253,14 +246,18 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
253
246
|
|
|
254
247
|
# Replace common third-person patterns with first-person
|
|
255
248
|
def singularize_verb(verb):
|
|
256
|
-
if verb.endswith(
|
|
249
|
+
if verb.endswith("es"):
|
|
257
250
|
return verb[:-1] # believes -> believe
|
|
258
|
-
elif verb.endswith(
|
|
251
|
+
elif verb.endswith("s"):
|
|
259
252
|
return verb[:-1] # thinks -> think
|
|
260
253
|
return verb
|
|
261
254
|
|
|
262
255
|
# Pattern: "The speaker/user [verb]..." -> "I [verb]..."
|
|
263
|
-
match = re.match(
|
|
256
|
+
match = re.match(
|
|
257
|
+
r"^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$",
|
|
258
|
+
opinion_text,
|
|
259
|
+
re.IGNORECASE,
|
|
260
|
+
)
|
|
264
261
|
if match:
|
|
265
262
|
verb = singularize_verb(match.group(2))
|
|
266
263
|
that_part = match.group(3) or "" # Keep " that" if present
|
|
@@ -268,17 +265,96 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
268
265
|
opinion_text = f"I {verb}{that_part}{rest}"
|
|
269
266
|
|
|
270
267
|
# If still doesn't start with first-person, prepend "I believe that "
|
|
271
|
-
first_person_starters = [
|
|
268
|
+
first_person_starters = [
|
|
269
|
+
"I think",
|
|
270
|
+
"I believe",
|
|
271
|
+
"I feel",
|
|
272
|
+
"In my view",
|
|
273
|
+
"I've come to believe",
|
|
274
|
+
"Previously I",
|
|
275
|
+
]
|
|
272
276
|
if not any(opinion_text.startswith(starter) for starter in first_person_starters):
|
|
273
277
|
opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
|
|
274
278
|
|
|
275
|
-
formatted_opinions.append(Opinion(
|
|
276
|
-
opinion=opinion_text,
|
|
277
|
-
confidence=op.confidence
|
|
278
|
-
))
|
|
279
|
+
formatted_opinions.append(Opinion(opinion=opinion_text, confidence=op.confidence))
|
|
279
280
|
|
|
280
281
|
return formatted_opinions
|
|
281
282
|
|
|
282
283
|
except Exception as e:
|
|
283
284
|
logger.warning(f"Failed to extract opinions: {str(e)}")
|
|
284
285
|
return []
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
async def reflect(
|
|
289
|
+
llm_config,
|
|
290
|
+
query: str,
|
|
291
|
+
experience_facts: list[str] = None,
|
|
292
|
+
world_facts: list[str] = None,
|
|
293
|
+
opinion_facts: list[str] = None,
|
|
294
|
+
name: str = "Assistant",
|
|
295
|
+
disposition: DispositionTraits = None,
|
|
296
|
+
background: str = "",
|
|
297
|
+
context: str = None,
|
|
298
|
+
) -> str:
|
|
299
|
+
"""
|
|
300
|
+
Standalone reflect function for generating answers based on facts.
|
|
301
|
+
|
|
302
|
+
This is a static version of the reflect operation that can be called
|
|
303
|
+
without a MemoryEngine instance, useful for testing.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
llm_config: LLM provider instance
|
|
307
|
+
query: Question to answer
|
|
308
|
+
experience_facts: List of experience/agent fact strings
|
|
309
|
+
world_facts: List of world fact strings
|
|
310
|
+
opinion_facts: List of opinion fact strings
|
|
311
|
+
name: Name of the agent/persona
|
|
312
|
+
disposition: Disposition traits (defaults to neutral)
|
|
313
|
+
background: Background information
|
|
314
|
+
context: Additional context for the prompt
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Generated answer text
|
|
318
|
+
"""
|
|
319
|
+
# Default disposition if not provided
|
|
320
|
+
if disposition is None:
|
|
321
|
+
disposition = DispositionTraits(skepticism=3, literalism=3, empathy=3)
|
|
322
|
+
|
|
323
|
+
# Convert string lists to MemoryFact format for formatting
|
|
324
|
+
def to_memory_facts(facts: list[str], fact_type: str) -> list[MemoryFact]:
|
|
325
|
+
if not facts:
|
|
326
|
+
return []
|
|
327
|
+
return [MemoryFact(id=f"test-{i}", text=f, fact_type=fact_type) for i, f in enumerate(facts)]
|
|
328
|
+
|
|
329
|
+
agent_results = to_memory_facts(experience_facts or [], "experience")
|
|
330
|
+
world_results = to_memory_facts(world_facts or [], "world")
|
|
331
|
+
opinion_results = to_memory_facts(opinion_facts or [], "opinion")
|
|
332
|
+
|
|
333
|
+
# Format facts for prompt
|
|
334
|
+
agent_facts_text = format_facts_for_prompt(agent_results)
|
|
335
|
+
world_facts_text = format_facts_for_prompt(world_results)
|
|
336
|
+
opinion_facts_text = format_facts_for_prompt(opinion_results)
|
|
337
|
+
|
|
338
|
+
# Build prompt
|
|
339
|
+
prompt = build_think_prompt(
|
|
340
|
+
agent_facts_text=agent_facts_text,
|
|
341
|
+
world_facts_text=world_facts_text,
|
|
342
|
+
opinion_facts_text=opinion_facts_text,
|
|
343
|
+
query=query,
|
|
344
|
+
name=name,
|
|
345
|
+
disposition=disposition,
|
|
346
|
+
background=background,
|
|
347
|
+
context=context,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
system_message = get_system_message(disposition)
|
|
351
|
+
|
|
352
|
+
# Call LLM
|
|
353
|
+
answer_text = await llm_config.call(
|
|
354
|
+
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
|
|
355
|
+
scope="memory_think",
|
|
356
|
+
temperature=0.9,
|
|
357
|
+
max_completion_tokens=1000,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return answer_text.strip()
|
|
@@ -4,15 +4,18 @@ Search trace models for debugging and visualization.
|
|
|
4
4
|
These Pydantic models define the structure of search traces, capturing
|
|
5
5
|
every step of the spreading activation search process for analysis.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from typing import
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
9
11
|
from pydantic import BaseModel, Field
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class QueryInfo(BaseModel):
|
|
13
15
|
"""Information about the search query."""
|
|
16
|
+
|
|
14
17
|
query_text: str = Field(description="Original query text")
|
|
15
|
-
query_embedding:
|
|
18
|
+
query_embedding: list[float] = Field(description="Generated query embedding vector")
|
|
16
19
|
timestamp: datetime = Field(description="When the query was executed")
|
|
17
20
|
budget: int = Field(description="Maximum nodes to explore")
|
|
18
21
|
max_tokens: int = Field(description="Maximum tokens to return in results")
|
|
@@ -20,6 +23,7 @@ class QueryInfo(BaseModel):
|
|
|
20
23
|
|
|
21
24
|
class EntryPoint(BaseModel):
|
|
22
25
|
"""An entry point node selected for search."""
|
|
26
|
+
|
|
23
27
|
node_id: str = Field(description="Memory unit ID")
|
|
24
28
|
text: str = Field(description="Memory unit text content")
|
|
25
29
|
similarity_score: float = Field(description="Cosine similarity to query", ge=0.0, le=1.0)
|
|
@@ -28,6 +32,7 @@ class EntryPoint(BaseModel):
|
|
|
28
32
|
|
|
29
33
|
class WeightComponents(BaseModel):
|
|
30
34
|
"""Breakdown of weight calculation components."""
|
|
35
|
+
|
|
31
36
|
activation: float = Field(description="Activation from spreading (can exceed 1.0 through accumulation)", ge=0.0)
|
|
32
37
|
semantic_similarity: float = Field(description="Semantic similarity to query", ge=0.0, le=1.0)
|
|
33
38
|
recency: float = Field(description="Recency weight", ge=0.0, le=1.0)
|
|
@@ -43,98 +48,120 @@ class WeightComponents(BaseModel):
|
|
|
43
48
|
|
|
44
49
|
class LinkInfo(BaseModel):
|
|
45
50
|
"""Information about a link to a neighbor."""
|
|
51
|
+
|
|
46
52
|
to_node_id: str = Field(description="Target node ID")
|
|
47
53
|
link_type: Literal["temporal", "semantic", "entity"] = Field(description="Type of link")
|
|
48
|
-
link_weight: float = Field(
|
|
49
|
-
|
|
50
|
-
|
|
54
|
+
link_weight: float = Field(
|
|
55
|
+
description="Weight of the link (can exceed 1.0 when aggregating multiple connections)", ge=0.0
|
|
56
|
+
)
|
|
57
|
+
entity_id: str | None = Field(default=None, description="Entity ID if link_type is 'entity'")
|
|
58
|
+
new_activation: float | None = Field(
|
|
59
|
+
default=None, description="Activation that would be passed to neighbor (None for supplementary links)"
|
|
60
|
+
)
|
|
51
61
|
followed: bool = Field(description="Whether this link was followed (or pruned)")
|
|
52
|
-
prune_reason:
|
|
53
|
-
is_supplementary: bool = Field(
|
|
62
|
+
prune_reason: str | None = Field(default=None, description="Why link was not followed (if not followed)")
|
|
63
|
+
is_supplementary: bool = Field(
|
|
64
|
+
default=False, description="Whether this is a supplementary link (multiple connections to same node)"
|
|
65
|
+
)
|
|
54
66
|
|
|
55
67
|
|
|
56
68
|
class NodeVisit(BaseModel):
|
|
57
69
|
"""Information about visiting a node during search."""
|
|
70
|
+
|
|
58
71
|
step: int = Field(description="Step number in search (1-based)")
|
|
59
72
|
node_id: str = Field(description="Memory unit ID")
|
|
60
73
|
text: str = Field(description="Memory unit text content")
|
|
61
74
|
context: str = Field(description="Memory unit context")
|
|
62
|
-
event_date:
|
|
75
|
+
event_date: datetime | None = Field(default=None, description="When the memory occurred")
|
|
63
76
|
access_count: int = Field(description="Number of times accessed before this search")
|
|
64
77
|
|
|
65
78
|
# How this node was reached
|
|
66
79
|
is_entry_point: bool = Field(description="Whether this is an entry point")
|
|
67
|
-
parent_node_id:
|
|
68
|
-
link_type:
|
|
69
|
-
|
|
80
|
+
parent_node_id: str | None = Field(default=None, description="Node that led to this one")
|
|
81
|
+
link_type: Literal["temporal", "semantic", "entity"] | None = Field(
|
|
82
|
+
default=None, description="Type of link from parent"
|
|
83
|
+
)
|
|
84
|
+
link_weight: float | None = Field(default=None, description="Weight of link from parent")
|
|
70
85
|
|
|
71
86
|
# Weights
|
|
72
87
|
weights: WeightComponents = Field(description="Weight calculation breakdown")
|
|
73
88
|
|
|
74
89
|
# Neighbors discovered from this node
|
|
75
|
-
neighbors_explored:
|
|
90
|
+
neighbors_explored: list[LinkInfo] = Field(default_factory=list, description="Links explored from this node")
|
|
76
91
|
|
|
77
92
|
# Ranking
|
|
78
|
-
final_rank:
|
|
93
|
+
final_rank: int | None = Field(default=None, description="Final rank in results (1-based, None if not in top-k)")
|
|
79
94
|
|
|
80
95
|
|
|
81
96
|
class PruningDecision(BaseModel):
|
|
82
97
|
"""Records when a node was considered but not visited."""
|
|
98
|
+
|
|
83
99
|
node_id: str = Field(description="Node that was pruned")
|
|
84
|
-
reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(
|
|
100
|
+
reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(
|
|
101
|
+
description="Why it was pruned"
|
|
102
|
+
)
|
|
85
103
|
activation: float = Field(description="Activation value when pruned")
|
|
86
104
|
would_have_been_step: int = Field(description="What step it would have been if visited")
|
|
87
105
|
|
|
88
106
|
|
|
89
107
|
class SearchPhaseMetrics(BaseModel):
|
|
90
108
|
"""Performance metrics for a search phase."""
|
|
109
|
+
|
|
91
110
|
phase_name: str = Field(description="Name of the phase")
|
|
92
111
|
duration_seconds: float = Field(description="Time taken in seconds")
|
|
93
|
-
details:
|
|
112
|
+
details: dict[str, Any] = Field(default_factory=dict, description="Additional phase-specific metrics")
|
|
94
113
|
|
|
95
114
|
|
|
96
115
|
class RetrievalResult(BaseModel):
|
|
97
116
|
"""A single result from a retrieval method."""
|
|
117
|
+
|
|
98
118
|
rank: int = Field(description="Rank in this retrieval method (1-based)")
|
|
99
119
|
node_id: str = Field(description="Memory unit ID")
|
|
100
120
|
text: str = Field(description="Memory unit text content")
|
|
101
121
|
context: str = Field(default="", description="Memory unit context")
|
|
102
|
-
event_date:
|
|
103
|
-
fact_type:
|
|
122
|
+
event_date: datetime | None = Field(default=None, description="When the memory occurred")
|
|
123
|
+
fact_type: str | None = Field(default=None, description="Fact type (world, experience, opinion)")
|
|
104
124
|
score: float = Field(description="Score from this retrieval method")
|
|
105
125
|
score_name: str = Field(description="Name of the score (e.g., 'similarity', 'bm25_score', 'activation')")
|
|
106
126
|
|
|
107
127
|
|
|
108
128
|
class RetrievalMethodResults(BaseModel):
|
|
109
129
|
"""Results from a single retrieval method."""
|
|
130
|
+
|
|
110
131
|
method_name: Literal["semantic", "bm25", "graph", "temporal"] = Field(description="Name of retrieval method")
|
|
111
|
-
|
|
132
|
+
fact_type: str | None = Field(
|
|
133
|
+
default=None, description="Fact type this retrieval was for (world, experience, opinion)"
|
|
134
|
+
)
|
|
135
|
+
results: list[RetrievalResult] = Field(description="Retrieved results with ranks")
|
|
112
136
|
duration_seconds: float = Field(description="Time taken for this retrieval")
|
|
113
|
-
metadata:
|
|
137
|
+
metadata: dict[str, Any] = Field(default_factory=dict, description="Method-specific metadata")
|
|
114
138
|
|
|
115
139
|
|
|
116
140
|
class RRFMergeResult(BaseModel):
|
|
117
141
|
"""A result after RRF merging."""
|
|
142
|
+
|
|
118
143
|
node_id: str = Field(description="Memory unit ID")
|
|
119
144
|
text: str = Field(description="Memory unit text content")
|
|
120
145
|
rrf_score: float = Field(description="Reciprocal Rank Fusion score")
|
|
121
|
-
source_ranks:
|
|
146
|
+
source_ranks: dict[str, int] = Field(description="Rank in each source that contributed (method_name -> rank)")
|
|
122
147
|
final_rrf_rank: int = Field(description="Rank after RRF merge (1-based)")
|
|
123
148
|
|
|
124
149
|
|
|
125
150
|
class RerankedResult(BaseModel):
|
|
126
151
|
"""A result after reranking."""
|
|
152
|
+
|
|
127
153
|
node_id: str = Field(description="Memory unit ID")
|
|
128
154
|
text: str = Field(description="Memory unit text content")
|
|
129
155
|
rerank_score: float = Field(description="Final reranking score")
|
|
130
156
|
rerank_rank: int = Field(description="Rank after reranking (1-based)")
|
|
131
157
|
rrf_rank: int = Field(description="Original RRF rank before reranking")
|
|
132
158
|
rank_change: int = Field(description="Change in rank (positive = moved up)")
|
|
133
|
-
score_components:
|
|
159
|
+
score_components: dict[str, float] = Field(default_factory=dict, description="Score breakdown")
|
|
134
160
|
|
|
135
161
|
|
|
136
162
|
class SearchSummary(BaseModel):
|
|
137
163
|
"""Summary statistics about the search."""
|
|
164
|
+
|
|
138
165
|
total_nodes_visited: int = Field(description="Total nodes visited")
|
|
139
166
|
total_nodes_pruned: int = Field(description="Total nodes pruned")
|
|
140
167
|
entry_points_found: int = Field(description="Number of entry points")
|
|
@@ -149,33 +176,36 @@ class SearchSummary(BaseModel):
|
|
|
149
176
|
entity_links_followed: int = Field(default=0, description="Entity links followed")
|
|
150
177
|
|
|
151
178
|
# Phase timings
|
|
152
|
-
phase_metrics:
|
|
179
|
+
phase_metrics: list[SearchPhaseMetrics] = Field(default_factory=list, description="Metrics for each phase")
|
|
153
180
|
|
|
154
181
|
|
|
155
182
|
class SearchTrace(BaseModel):
|
|
156
183
|
"""Complete trace of a search operation."""
|
|
184
|
+
|
|
157
185
|
query: QueryInfo = Field(description="Query information")
|
|
158
186
|
|
|
159
187
|
# New 4-way retrieval architecture
|
|
160
|
-
retrieval_results:
|
|
161
|
-
|
|
162
|
-
|
|
188
|
+
retrieval_results: list[RetrievalMethodResults] = Field(
|
|
189
|
+
default_factory=list, description="Results from each retrieval method"
|
|
190
|
+
)
|
|
191
|
+
rrf_merged: list[RRFMergeResult] = Field(default_factory=list, description="Results after RRF merging")
|
|
192
|
+
reranked: list[RerankedResult] = Field(default_factory=list, description="Results after reranking")
|
|
163
193
|
|
|
164
194
|
# Legacy fields (kept for backward compatibility with graph/temporal visualizations)
|
|
165
|
-
entry_points:
|
|
166
|
-
|
|
167
|
-
|
|
195
|
+
entry_points: list[EntryPoint] = Field(
|
|
196
|
+
default_factory=list, description="Entry points selected for search (legacy)"
|
|
197
|
+
)
|
|
198
|
+
visits: list[NodeVisit] = Field(
|
|
199
|
+
default_factory=list, description="All nodes visited during search (legacy, for graph viz)"
|
|
200
|
+
)
|
|
201
|
+
pruned: list[PruningDecision] = Field(default_factory=list, description="Nodes that were pruned (legacy)")
|
|
168
202
|
|
|
169
203
|
summary: SearchSummary = Field(description="Summary statistics")
|
|
170
204
|
|
|
171
205
|
# Final results (for comparison with visits)
|
|
172
|
-
final_results:
|
|
206
|
+
final_results: list[dict[str, Any]] = Field(description="Final ranked results returned to user")
|
|
173
207
|
|
|
174
|
-
model_config = {
|
|
175
|
-
"json_encoders": {
|
|
176
|
-
datetime: lambda v: v.isoformat()
|
|
177
|
-
}
|
|
178
|
-
}
|
|
208
|
+
model_config = {"json_encoders": {datetime: lambda v: v.isoformat()}}
|
|
179
209
|
|
|
180
210
|
def to_json(self, **kwargs) -> str:
|
|
181
211
|
"""Export trace as JSON string."""
|
|
@@ -185,14 +215,14 @@ class SearchTrace(BaseModel):
|
|
|
185
215
|
"""Export trace as dictionary."""
|
|
186
216
|
return self.model_dump()
|
|
187
217
|
|
|
188
|
-
def get_visit_by_node_id(self, node_id: str) ->
|
|
218
|
+
def get_visit_by_node_id(self, node_id: str) -> NodeVisit | None:
|
|
189
219
|
"""Find a visit by node ID."""
|
|
190
220
|
for visit in self.visits:
|
|
191
221
|
if visit.node_id == node_id:
|
|
192
222
|
return visit
|
|
193
223
|
return None
|
|
194
224
|
|
|
195
|
-
def get_search_path_to_node(self, node_id: str) ->
|
|
225
|
+
def get_search_path_to_node(self, node_id: str) -> list[NodeVisit]:
|
|
196
226
|
"""Get the path from entry point to a specific node."""
|
|
197
227
|
path = []
|
|
198
228
|
current_visit = self.get_visit_by_node_id(node_id)
|
|
@@ -206,10 +236,10 @@ class SearchTrace(BaseModel):
|
|
|
206
236
|
|
|
207
237
|
return path
|
|
208
238
|
|
|
209
|
-
def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) ->
|
|
239
|
+
def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) -> list[NodeVisit]:
|
|
210
240
|
"""Get all nodes reached via a specific link type."""
|
|
211
241
|
return [v for v in self.visits if v.link_type == link_type]
|
|
212
242
|
|
|
213
|
-
def get_entry_point_nodes(self) ->
|
|
243
|
+
def get_entry_point_nodes(self) -> list[NodeVisit]:
|
|
214
244
|
"""Get all entry point visits."""
|
|
215
245
|
return [v for v in self.visits if v.is_entry_point]
|