hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +311 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1406 -118
- hindsight_api/api/mcp.py +11 -196
- hindsight_api/config.py +359 -27
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +706 -88
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +553 -29
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +40 -17
- hindsight_api/engine/llm_wrapper.py +744 -68
- hindsight_api/engine/memory_engine.py +2505 -1017
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +168 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +424 -195
- hindsight_api/engine/retain/fact_storage.py +35 -12
- hindsight_api/engine/retain/link_utils.py +29 -24
- hindsight_api/engine/retain/orchestrator.py +24 -43
- hindsight_api/engine/retain/types.py +11 -2
- hindsight_api/engine/search/graph_retrieval.py +43 -14
- hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +848 -201
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +42 -141
- hindsight_api/engine/search/trace.py +12 -1
- hindsight_api/engine/search/tracer.py +26 -6
- hindsight_api/engine/search/types.py +21 -3
- hindsight_api/engine/task_backend.py +113 -106
- hindsight_api/engine/utils.py +1 -152
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +69 -6
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -3
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tags filtering utilities for retrieval.
|
|
3
|
+
|
|
4
|
+
Provides SQL building functions for filtering memories by tags.
|
|
5
|
+
Supports four matching modes via TagsMatch enum:
|
|
6
|
+
- "any": OR matching, includes untagged memories (default, backward compatible)
|
|
7
|
+
- "all": AND matching, includes untagged memories
|
|
8
|
+
- "any_strict": OR matching, excludes untagged memories
|
|
9
|
+
- "all_strict": AND matching, excludes untagged memories
|
|
10
|
+
|
|
11
|
+
OR matching (any/any_strict): Memory matches if ANY of its tags overlap with request tags
|
|
12
|
+
AND matching (all/all_strict): Memory matches if ALL request tags are present in its tags
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Literal
|
|
16
|
+
|
|
17
|
+
TagsMatch = Literal["any", "all", "any_strict", "all_strict"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _parse_tags_match(match: TagsMatch) -> tuple[str, bool]:
|
|
21
|
+
"""
|
|
22
|
+
Parse TagsMatch into operator and include_untagged flag.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Tuple of (operator, include_untagged)
|
|
26
|
+
- operator: "&&" for any/any_strict, "@>" for all/all_strict
|
|
27
|
+
- include_untagged: True for any/all, False for any_strict/all_strict
|
|
28
|
+
"""
|
|
29
|
+
if match == "any":
|
|
30
|
+
return "&&", True
|
|
31
|
+
elif match == "all":
|
|
32
|
+
return "@>", True
|
|
33
|
+
elif match == "any_strict":
|
|
34
|
+
return "&&", False
|
|
35
|
+
elif match == "all_strict":
|
|
36
|
+
return "@>", False
|
|
37
|
+
else:
|
|
38
|
+
# Default to "any" behavior
|
|
39
|
+
return "&&", True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_tags_where_clause(
|
|
43
|
+
tags: list[str] | None,
|
|
44
|
+
param_offset: int = 1,
|
|
45
|
+
table_alias: str = "",
|
|
46
|
+
match: TagsMatch = "any",
|
|
47
|
+
) -> tuple[str, list, int]:
|
|
48
|
+
"""
|
|
49
|
+
Build a SQL WHERE clause for filtering by tags.
|
|
50
|
+
|
|
51
|
+
Supports four matching modes:
|
|
52
|
+
- "any" (default): OR matching, includes untagged memories
|
|
53
|
+
- "all": AND matching, includes untagged memories
|
|
54
|
+
- "any_strict": OR matching, excludes untagged memories
|
|
55
|
+
- "all_strict": AND matching, excludes untagged memories
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
tags: List of tags to filter by. If None or empty, returns empty clause (no filtering).
|
|
59
|
+
param_offset: Starting parameter number for SQL placeholders (default 1).
|
|
60
|
+
table_alias: Optional table alias prefix (e.g., "mu." for "memory_units mu").
|
|
61
|
+
match: Matching mode. Defaults to "any".
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple of (sql_clause, params, next_param_offset):
|
|
65
|
+
- sql_clause: SQL WHERE clause string
|
|
66
|
+
- params: List of parameter values to bind
|
|
67
|
+
- next_param_offset: Next available parameter number
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
>>> clause, params, next_offset = build_tags_where_clause(['user_a'], 3, 'mu.', 'any_strict')
|
|
71
|
+
>>> print(clause) # "AND mu.tags IS NOT NULL AND mu.tags != '{}' AND mu.tags && $3"
|
|
72
|
+
"""
|
|
73
|
+
if not tags:
|
|
74
|
+
return "", [], param_offset
|
|
75
|
+
|
|
76
|
+
column = f"{table_alias}tags" if table_alias else "tags"
|
|
77
|
+
operator, include_untagged = _parse_tags_match(match)
|
|
78
|
+
|
|
79
|
+
if include_untagged:
|
|
80
|
+
# Include untagged memories (NULL or empty array) OR matching tags
|
|
81
|
+
clause = f"AND ({column} IS NULL OR {column} = '{{}}' OR {column} {operator} ${param_offset})"
|
|
82
|
+
else:
|
|
83
|
+
# Strict: only memories with matching tags (exclude NULL and empty)
|
|
84
|
+
clause = f"AND {column} IS NOT NULL AND {column} != '{{}}' AND {column} {operator} ${param_offset}"
|
|
85
|
+
|
|
86
|
+
return clause, [tags], param_offset + 1
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def build_tags_where_clause_simple(
|
|
90
|
+
tags: list[str] | None,
|
|
91
|
+
param_num: int,
|
|
92
|
+
table_alias: str = "",
|
|
93
|
+
match: TagsMatch = "any",
|
|
94
|
+
) -> str:
|
|
95
|
+
"""
|
|
96
|
+
Build a simple SQL WHERE clause for tags filtering.
|
|
97
|
+
|
|
98
|
+
This is a convenience version that returns just the clause string,
|
|
99
|
+
assuming the caller will add the tags array to their params list.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
tags: List of tags to filter by. If None or empty, returns empty string.
|
|
103
|
+
param_num: Parameter number to use in the clause.
|
|
104
|
+
table_alias: Optional table alias prefix.
|
|
105
|
+
match: Matching mode. Defaults to "any".
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
SQL clause string or empty string.
|
|
109
|
+
"""
|
|
110
|
+
if not tags:
|
|
111
|
+
return ""
|
|
112
|
+
|
|
113
|
+
column = f"{table_alias}tags" if table_alias else "tags"
|
|
114
|
+
operator, include_untagged = _parse_tags_match(match)
|
|
115
|
+
|
|
116
|
+
if include_untagged:
|
|
117
|
+
# Include untagged memories (NULL or empty array) OR matching tags
|
|
118
|
+
return f"AND ({column} IS NULL OR {column} = '{{}}' OR {column} {operator} ${param_num})"
|
|
119
|
+
else:
|
|
120
|
+
# Strict: only memories with matching tags (exclude NULL and empty)
|
|
121
|
+
return f"AND {column} IS NOT NULL AND {column} != '{{}}' AND {column} {operator} ${param_num}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def filter_results_by_tags(
|
|
125
|
+
results: list,
|
|
126
|
+
tags: list[str] | None,
|
|
127
|
+
match: TagsMatch = "any",
|
|
128
|
+
) -> list:
|
|
129
|
+
"""
|
|
130
|
+
Filter retrieval results by tags in Python (for post-processing).
|
|
131
|
+
|
|
132
|
+
Used when SQL filtering isn't possible (e.g., graph traversal results).
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
results: List of RetrievalResult objects with a 'tags' attribute.
|
|
136
|
+
tags: List of tags to filter by. If None or empty, returns all results.
|
|
137
|
+
match: Matching mode. Defaults to "any".
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Filtered list of results.
|
|
141
|
+
"""
|
|
142
|
+
if not tags:
|
|
143
|
+
return results
|
|
144
|
+
|
|
145
|
+
_, include_untagged = _parse_tags_match(match)
|
|
146
|
+
is_any_match = match in ("any", "any_strict")
|
|
147
|
+
|
|
148
|
+
tags_set = set(tags)
|
|
149
|
+
filtered = []
|
|
150
|
+
|
|
151
|
+
for result in results:
|
|
152
|
+
result_tags = getattr(result, "tags", None)
|
|
153
|
+
|
|
154
|
+
# Check if untagged
|
|
155
|
+
is_untagged = result_tags is None or len(result_tags) == 0
|
|
156
|
+
|
|
157
|
+
if is_untagged:
|
|
158
|
+
if include_untagged:
|
|
159
|
+
filtered.append(result)
|
|
160
|
+
# else: skip untagged
|
|
161
|
+
else:
|
|
162
|
+
result_tags_set = set(result_tags)
|
|
163
|
+
if is_any_match:
|
|
164
|
+
# Any overlap
|
|
165
|
+
if result_tags_set & tags_set:
|
|
166
|
+
filtered.append(result)
|
|
167
|
+
else:
|
|
168
|
+
# All tags must be present
|
|
169
|
+
if tags_set <= result_tags_set:
|
|
170
|
+
filtered.append(result)
|
|
171
|
+
|
|
172
|
+
return filtered
|
|
@@ -3,31 +3,13 @@ Think operation utilities for formulating answers based on agent and world facts
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
import re
|
|
7
6
|
from datetime import datetime
|
|
8
7
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
10
|
-
|
|
11
8
|
from ..response_models import DispositionTraits, MemoryFact
|
|
12
9
|
|
|
13
10
|
logger = logging.getLogger(__name__)
|
|
14
11
|
|
|
15
12
|
|
|
16
|
-
class Opinion(BaseModel):
|
|
17
|
-
"""An opinion formed by the bank."""
|
|
18
|
-
|
|
19
|
-
opinion: str = Field(description="The opinion or perspective with reasoning included")
|
|
20
|
-
confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class OpinionExtractionResponse(BaseModel):
|
|
24
|
-
"""Response containing extracted opinions."""
|
|
25
|
-
|
|
26
|
-
opinions: list[Opinion] = Field(
|
|
27
|
-
default_factory=list, description="List of opinions formed with their supporting reasons and confidence scores"
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
|
|
31
13
|
def describe_trait_level(value: int) -> str:
|
|
32
14
|
"""Convert trait value (1-5) to descriptive text."""
|
|
33
15
|
levels = {1: "very low", 2: "low", 3: "moderate", 4: "high", 5: "very high"}
|
|
@@ -93,17 +75,46 @@ def format_facts_for_prompt(facts: list[MemoryFact]) -> str:
|
|
|
93
75
|
return json.dumps(formatted, indent=2)
|
|
94
76
|
|
|
95
77
|
|
|
78
|
+
def format_entity_summaries_for_prompt(entities: dict) -> str:
|
|
79
|
+
"""Format entity summaries for inclusion in the reflect prompt.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
entities: Dict mapping entity name to EntityState objects
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Formatted string with entity summaries, or empty string if no summaries
|
|
86
|
+
"""
|
|
87
|
+
if not entities:
|
|
88
|
+
return ""
|
|
89
|
+
|
|
90
|
+
summaries = []
|
|
91
|
+
for name, state in entities.items():
|
|
92
|
+
# Get summary from observations (summary is stored as single observation)
|
|
93
|
+
if state.observations:
|
|
94
|
+
summary_text = state.observations[0].text
|
|
95
|
+
summaries.append(f"## {name}\n{summary_text}")
|
|
96
|
+
|
|
97
|
+
if not summaries:
|
|
98
|
+
return ""
|
|
99
|
+
|
|
100
|
+
return "\n\n".join(summaries)
|
|
101
|
+
|
|
102
|
+
|
|
96
103
|
def build_think_prompt(
|
|
97
104
|
agent_facts_text: str,
|
|
98
105
|
world_facts_text: str,
|
|
99
|
-
opinion_facts_text: str,
|
|
100
106
|
query: str,
|
|
101
107
|
name: str,
|
|
102
108
|
disposition: DispositionTraits,
|
|
103
109
|
background: str,
|
|
104
110
|
context: str | None = None,
|
|
111
|
+
entity_summaries_text: str | None = None,
|
|
105
112
|
) -> str:
|
|
106
|
-
"""Build the think prompt for the LLM.
|
|
113
|
+
"""Build the think prompt for the LLM.
|
|
114
|
+
|
|
115
|
+
Note: opinion_facts_text parameter removed - opinions are now stored as mental models
|
|
116
|
+
and included via entity_summaries_text.
|
|
117
|
+
"""
|
|
107
118
|
disposition_desc = build_disposition_description(disposition)
|
|
108
119
|
|
|
109
120
|
name_section = f"""
|
|
@@ -125,6 +136,14 @@ Your background:
|
|
|
125
136
|
ADDITIONAL CONTEXT:
|
|
126
137
|
{context}
|
|
127
138
|
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
entity_section = ""
|
|
142
|
+
if entity_summaries_text:
|
|
143
|
+
entity_section = f"""
|
|
144
|
+
KEY PEOPLE, PLACES & THINGS I KNOW ABOUT:
|
|
145
|
+
{entity_summaries_text}
|
|
146
|
+
|
|
128
147
|
"""
|
|
129
148
|
|
|
130
149
|
return f"""Here's what I know and have experienced:
|
|
@@ -135,14 +154,11 @@ MY IDENTITY & EXPERIENCES:
|
|
|
135
154
|
WHAT I KNOW ABOUT THE WORLD:
|
|
136
155
|
{world_facts_text}
|
|
137
156
|
|
|
138
|
-
|
|
139
|
-
{opinion_facts_text}
|
|
140
|
-
|
|
141
|
-
{context_section}{name_section}{disposition_desc}{background_section}
|
|
157
|
+
{entity_section}{context_section}{name_section}{disposition_desc}{background_section}
|
|
142
158
|
|
|
143
159
|
QUESTION: {query}
|
|
144
160
|
|
|
145
|
-
Based on everything I know, believe, and who I am (including my name, disposition and background), here's what I genuinely think about this question. I'll draw on my experiences, knowledge,
|
|
161
|
+
Based on everything I know, believe, and who I am (including my name, disposition and background), here's what I genuinely think about this question. I'll draw on my experiences, knowledge, and personal traits to give you my honest perspective."""
|
|
146
162
|
|
|
147
163
|
|
|
148
164
|
def get_system_message(disposition: DispositionTraits) -> str:
|
|
@@ -172,117 +188,7 @@ def get_system_message(disposition: DispositionTraits) -> str:
|
|
|
172
188
|
" ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
|
|
173
189
|
)
|
|
174
190
|
|
|
175
|
-
return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
async def extract_opinions_from_text(llm_config, text: str, query: str) -> list[Opinion]:
|
|
179
|
-
"""
|
|
180
|
-
Extract opinions with reasons and confidence from text using LLM.
|
|
181
|
-
|
|
182
|
-
Args:
|
|
183
|
-
llm_config: LLM configuration to use
|
|
184
|
-
text: Text to extract opinions from
|
|
185
|
-
query: The original query that prompted this response
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
List of Opinion objects with text and confidence
|
|
189
|
-
"""
|
|
190
|
-
extraction_prompt = f"""Extract any NEW opinions or perspectives from the answer below and rewrite them in FIRST-PERSON as if YOU are stating the opinion directly.
|
|
191
|
-
|
|
192
|
-
ORIGINAL QUESTION:
|
|
193
|
-
{query}
|
|
194
|
-
|
|
195
|
-
ANSWER PROVIDED:
|
|
196
|
-
{text}
|
|
197
|
-
|
|
198
|
-
Your task: Find opinions in the answer and rewrite them AS IF YOU ARE THE ONE SAYING THEM.
|
|
199
|
-
|
|
200
|
-
An opinion is a judgment, viewpoint, or conclusion that goes beyond just stating facts.
|
|
201
|
-
|
|
202
|
-
IMPORTANT: Do NOT extract statements like:
|
|
203
|
-
- "I don't have enough information"
|
|
204
|
-
- "The facts don't contain information about X"
|
|
205
|
-
- "I cannot answer because..."
|
|
206
|
-
|
|
207
|
-
ONLY extract actual opinions about substantive topics.
|
|
208
|
-
|
|
209
|
-
CRITICAL FORMAT REQUIREMENTS:
|
|
210
|
-
1. **ALWAYS start with first-person phrases**: "I think...", "I believe...", "In my view...", "I've come to believe...", "Previously I thought... but now..."
|
|
211
|
-
2. **NEVER use third-person**: Do NOT say "The speaker thinks..." or "They believe..." - always use "I"
|
|
212
|
-
3. Include the reasoning naturally within the statement
|
|
213
|
-
4. Provide a confidence score (0.0 to 1.0)
|
|
214
|
-
|
|
215
|
-
CORRECT Examples (✓ FIRST-PERSON):
|
|
216
|
-
- "I think Alice is more reliable because she consistently delivers on time and writes clean code"
|
|
217
|
-
- "Previously I thought all engineers were equal, but now I feel that experience and track record really matter"
|
|
218
|
-
- "I believe reliability is best measured by consistent output over time"
|
|
219
|
-
- "I've come to believe that track records are more important than potential"
|
|
220
|
-
|
|
221
|
-
WRONG Examples (✗ THIRD-PERSON - DO NOT USE):
|
|
222
|
-
- "The speaker thinks Alice is more reliable"
|
|
223
|
-
- "They believe reliability matters"
|
|
224
|
-
- "It is believed that Alice is better"
|
|
225
|
-
|
|
226
|
-
If no genuine opinions are expressed (e.g., the response just says "I don't know"), return an empty list."""
|
|
227
|
-
|
|
228
|
-
try:
|
|
229
|
-
result = await llm_config.call(
|
|
230
|
-
messages=[
|
|
231
|
-
{
|
|
232
|
-
"role": "system",
|
|
233
|
-
"content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'.",
|
|
234
|
-
},
|
|
235
|
-
{"role": "user", "content": extraction_prompt},
|
|
236
|
-
],
|
|
237
|
-
response_format=OpinionExtractionResponse,
|
|
238
|
-
scope="memory_extract_opinion",
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
# Format opinions with confidence score and convert to first-person
|
|
242
|
-
formatted_opinions = []
|
|
243
|
-
for op in result.opinions:
|
|
244
|
-
# Convert third-person to first-person if needed
|
|
245
|
-
opinion_text = op.opinion
|
|
246
|
-
|
|
247
|
-
# Replace common third-person patterns with first-person
|
|
248
|
-
def singularize_verb(verb):
|
|
249
|
-
if verb.endswith("es"):
|
|
250
|
-
return verb[:-1] # believes -> believe
|
|
251
|
-
elif verb.endswith("s"):
|
|
252
|
-
return verb[:-1] # thinks -> think
|
|
253
|
-
return verb
|
|
254
|
-
|
|
255
|
-
# Pattern: "The speaker/user [verb]..." -> "I [verb]..."
|
|
256
|
-
match = re.match(
|
|
257
|
-
r"^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$",
|
|
258
|
-
opinion_text,
|
|
259
|
-
re.IGNORECASE,
|
|
260
|
-
)
|
|
261
|
-
if match:
|
|
262
|
-
verb = singularize_verb(match.group(2))
|
|
263
|
-
that_part = match.group(3) or "" # Keep " that" if present
|
|
264
|
-
rest = match.group(4)
|
|
265
|
-
opinion_text = f"I {verb}{that_part}{rest}"
|
|
266
|
-
|
|
267
|
-
# If still doesn't start with first-person, prepend "I believe that "
|
|
268
|
-
first_person_starters = [
|
|
269
|
-
"I think",
|
|
270
|
-
"I believe",
|
|
271
|
-
"I feel",
|
|
272
|
-
"In my view",
|
|
273
|
-
"I've come to believe",
|
|
274
|
-
"Previously I",
|
|
275
|
-
]
|
|
276
|
-
if not any(opinion_text.startswith(starter) for starter in first_person_starters):
|
|
277
|
-
opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
|
|
278
|
-
|
|
279
|
-
formatted_opinions.append(Opinion(opinion=opinion_text, confidence=op.confidence))
|
|
280
|
-
|
|
281
|
-
return formatted_opinions
|
|
282
|
-
|
|
283
|
-
except Exception as e:
|
|
284
|
-
logger.warning(f"Failed to extract opinions: {str(e)}")
|
|
285
|
-
return []
|
|
191
|
+
return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting. IMPORTANT: Detect the language of the question and respond in the SAME language. Do not translate to English if the question is in another language."
|
|
286
192
|
|
|
287
193
|
|
|
288
194
|
async def reflect(
|
|
@@ -290,7 +196,6 @@ async def reflect(
|
|
|
290
196
|
query: str,
|
|
291
197
|
experience_facts: list[str] = None,
|
|
292
198
|
world_facts: list[str] = None,
|
|
293
|
-
opinion_facts: list[str] = None,
|
|
294
199
|
name: str = "Assistant",
|
|
295
200
|
disposition: DispositionTraits = None,
|
|
296
201
|
background: str = "",
|
|
@@ -307,7 +212,6 @@ async def reflect(
|
|
|
307
212
|
query: Question to answer
|
|
308
213
|
experience_facts: List of experience/agent fact strings
|
|
309
214
|
world_facts: List of world fact strings
|
|
310
|
-
opinion_facts: List of opinion fact strings
|
|
311
215
|
name: Name of the agent/persona
|
|
312
216
|
disposition: Disposition traits (defaults to neutral)
|
|
313
217
|
background: Background information
|
|
@@ -328,18 +232,15 @@ async def reflect(
|
|
|
328
232
|
|
|
329
233
|
agent_results = to_memory_facts(experience_facts or [], "experience")
|
|
330
234
|
world_results = to_memory_facts(world_facts or [], "world")
|
|
331
|
-
opinion_results = to_memory_facts(opinion_facts or [], "opinion")
|
|
332
235
|
|
|
333
236
|
# Format facts for prompt
|
|
334
237
|
agent_facts_text = format_facts_for_prompt(agent_results)
|
|
335
238
|
world_facts_text = format_facts_for_prompt(world_results)
|
|
336
|
-
opinion_facts_text = format_facts_for_prompt(opinion_results)
|
|
337
239
|
|
|
338
240
|
# Build prompt
|
|
339
241
|
prompt = build_think_prompt(
|
|
340
242
|
agent_facts_text=agent_facts_text,
|
|
341
243
|
world_facts_text=world_facts_text,
|
|
342
|
-
opinion_facts_text=opinion_facts_text,
|
|
343
244
|
query=query,
|
|
344
245
|
name=name,
|
|
345
246
|
disposition=disposition,
|
|
@@ -11,6 +11,13 @@ from typing import Any, Literal
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
class TemporalConstraint(BaseModel):
|
|
15
|
+
"""Detected temporal constraint from query analysis."""
|
|
16
|
+
|
|
17
|
+
start: datetime | None = Field(default=None, description="Start of temporal range")
|
|
18
|
+
end: datetime | None = Field(default=None, description="End of temporal range")
|
|
19
|
+
|
|
20
|
+
|
|
14
21
|
class QueryInfo(BaseModel):
|
|
15
22
|
"""Information about the search query."""
|
|
16
23
|
|
|
@@ -19,6 +26,11 @@ class QueryInfo(BaseModel):
|
|
|
19
26
|
timestamp: datetime = Field(description="When the query was executed")
|
|
20
27
|
budget: int = Field(description="Maximum nodes to explore")
|
|
21
28
|
max_tokens: int = Field(description="Maximum tokens to return in results")
|
|
29
|
+
tags: list[str] | None = Field(default=None, description="Tags filter applied to recall")
|
|
30
|
+
tags_match: str | None = Field(default=None, description="Tags matching mode: any, all, any_strict, all_strict")
|
|
31
|
+
temporal_constraint: TemporalConstraint | None = Field(
|
|
32
|
+
default=None, description="Detected temporal range from query"
|
|
33
|
+
)
|
|
22
34
|
|
|
23
35
|
|
|
24
36
|
class EntryPoint(BaseModel):
|
|
@@ -73,7 +85,6 @@ class NodeVisit(BaseModel):
|
|
|
73
85
|
text: str = Field(description="Memory unit text content")
|
|
74
86
|
context: str = Field(description="Memory unit context")
|
|
75
87
|
event_date: datetime | None = Field(default=None, description="When the memory occurred")
|
|
76
|
-
access_count: int = Field(description="Number of times accessed before this search")
|
|
77
88
|
|
|
78
89
|
# How this node was reached
|
|
79
90
|
is_entry_point: bool = Field(description="Whether this is an entry point")
|
|
@@ -22,6 +22,7 @@ from .trace import (
|
|
|
22
22
|
SearchPhaseMetrics,
|
|
23
23
|
SearchSummary,
|
|
24
24
|
SearchTrace,
|
|
25
|
+
TemporalConstraint,
|
|
25
26
|
WeightComponents,
|
|
26
27
|
)
|
|
27
28
|
|
|
@@ -45,7 +46,14 @@ class SearchTracer:
|
|
|
45
46
|
json_output = trace.to_json()
|
|
46
47
|
"""
|
|
47
48
|
|
|
48
|
-
def __init__(
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
query: str,
|
|
52
|
+
budget: int,
|
|
53
|
+
max_tokens: int,
|
|
54
|
+
tags: list[str] | None = None,
|
|
55
|
+
tags_match: str | None = None,
|
|
56
|
+
):
|
|
49
57
|
"""
|
|
50
58
|
Initialize tracer.
|
|
51
59
|
|
|
@@ -53,10 +61,14 @@ class SearchTracer:
|
|
|
53
61
|
query: Search query text
|
|
54
62
|
budget: Maximum nodes to explore
|
|
55
63
|
max_tokens: Maximum tokens to return in results
|
|
64
|
+
tags: Tags filter applied to recall
|
|
65
|
+
tags_match: Tags matching mode (any, all, any_strict, all_strict)
|
|
56
66
|
"""
|
|
57
67
|
self.query_text = query
|
|
58
68
|
self.budget = budget
|
|
59
69
|
self.max_tokens = max_tokens
|
|
70
|
+
self.tags = tags
|
|
71
|
+
self.tags_match = tags_match
|
|
60
72
|
|
|
61
73
|
# Trace data
|
|
62
74
|
self.query_embedding: list[float] | None = None
|
|
@@ -66,6 +78,9 @@ class SearchTracer:
|
|
|
66
78
|
self.pruned: list[PruningDecision] = []
|
|
67
79
|
self.phase_metrics: list[SearchPhaseMetrics] = []
|
|
68
80
|
|
|
81
|
+
# Temporal constraint detected from query
|
|
82
|
+
self.temporal_constraint: TemporalConstraint | None = None
|
|
83
|
+
|
|
69
84
|
# New 4-way retrieval tracking
|
|
70
85
|
self.retrieval_results: list[RetrievalMethodResults] = []
|
|
71
86
|
self.rrf_merged: list[RRFMergeResult] = []
|
|
@@ -88,6 +103,11 @@ class SearchTracer:
|
|
|
88
103
|
"""Record the query embedding."""
|
|
89
104
|
self.query_embedding = embedding
|
|
90
105
|
|
|
106
|
+
def record_temporal_constraint(self, start: datetime | None, end: datetime | None):
|
|
107
|
+
"""Record the detected temporal constraint from query analysis."""
|
|
108
|
+
if start is not None or end is not None:
|
|
109
|
+
self.temporal_constraint = TemporalConstraint(start=start, end=end)
|
|
110
|
+
|
|
91
111
|
def add_entry_point(self, node_id: str, text: str, similarity: float, rank: int):
|
|
92
112
|
"""
|
|
93
113
|
Record an entry point.
|
|
@@ -116,7 +136,6 @@ class SearchTracer:
|
|
|
116
136
|
text: str,
|
|
117
137
|
context: str,
|
|
118
138
|
event_date: datetime | None,
|
|
119
|
-
access_count: int,
|
|
120
139
|
is_entry_point: bool,
|
|
121
140
|
parent_node_id: str | None,
|
|
122
141
|
link_type: Literal["temporal", "semantic", "entity"] | None,
|
|
@@ -135,7 +154,6 @@ class SearchTracer:
|
|
|
135
154
|
text: Memory unit text
|
|
136
155
|
context: Memory unit context
|
|
137
156
|
event_date: When the memory occurred
|
|
138
|
-
access_count: Access count before this search
|
|
139
157
|
is_entry_point: Whether this is an entry point
|
|
140
158
|
parent_node_id: Node that led here (None for entry points)
|
|
141
159
|
link_type: Type of link from parent
|
|
@@ -174,7 +192,6 @@ class SearchTracer:
|
|
|
174
192
|
text=text,
|
|
175
193
|
context=context,
|
|
176
194
|
event_date=event_date,
|
|
177
|
-
access_count=access_count,
|
|
178
195
|
is_entry_point=is_entry_point,
|
|
179
196
|
parent_node_id=parent_node_id,
|
|
180
197
|
link_type=link_type,
|
|
@@ -313,8 +330,8 @@ class SearchTracer:
|
|
|
313
330
|
RetrievalResult(
|
|
314
331
|
rank=rank,
|
|
315
332
|
node_id=doc_id,
|
|
316
|
-
text=data.get("text"
|
|
317
|
-
context=data.get("context"
|
|
333
|
+
text=data.get("text") or "",
|
|
334
|
+
context=data.get("context") or "",
|
|
318
335
|
event_date=data.get("event_date"),
|
|
319
336
|
fact_type=data.get("fact_type") or fact_type,
|
|
320
337
|
score=score,
|
|
@@ -428,6 +445,9 @@ class SearchTracer:
|
|
|
428
445
|
timestamp=datetime.now(UTC),
|
|
429
446
|
budget=self.budget,
|
|
430
447
|
max_tokens=self.max_tokens,
|
|
448
|
+
tags=self.tags,
|
|
449
|
+
tags_match=self.tags_match,
|
|
450
|
+
temporal_constraint=self.temporal_constraint,
|
|
431
451
|
)
|
|
432
452
|
|
|
433
453
|
# Create summary
|
|
@@ -10,6 +10,24 @@ from datetime import datetime
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
@dataclass
|
|
14
|
+
class MPFPTimings:
|
|
15
|
+
"""Timing breakdown for a single MPFP retrieval call."""
|
|
16
|
+
|
|
17
|
+
fact_type: str
|
|
18
|
+
edge_count: int = 0 # Total edges loaded
|
|
19
|
+
db_queries: int = 0 # Number of DB queries for edge loading
|
|
20
|
+
edge_load_time: float = 0.0 # Time spent loading edges from DB
|
|
21
|
+
traverse: float = 0.0 # Total traversal time (includes edge loading)
|
|
22
|
+
pattern_count: int = 0 # Number of patterns executed
|
|
23
|
+
fusion: float = 0.0 # Time for RRF fusion
|
|
24
|
+
fetch: float = 0.0 # Time to fetch memory unit details
|
|
25
|
+
seeds_time: float = 0.0 # Time to find semantic seeds (if fallback used)
|
|
26
|
+
result_count: int = 0 # Number of results returned
|
|
27
|
+
# Detailed per-hop timing: list of {hop, exec_time, uncached, load_time, edges_loaded, total_time}
|
|
28
|
+
hop_details: list[dict] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
|
|
13
31
|
@dataclass
|
|
14
32
|
class RetrievalResult:
|
|
15
33
|
"""
|
|
@@ -28,8 +46,8 @@ class RetrievalResult:
|
|
|
28
46
|
mentioned_at: datetime | None = None
|
|
29
47
|
document_id: str | None = None
|
|
30
48
|
chunk_id: str | None = None
|
|
31
|
-
access_count: int = 0
|
|
32
49
|
embedding: list[float] | None = None
|
|
50
|
+
tags: list[str] | None = None # Visibility scope tags
|
|
33
51
|
|
|
34
52
|
# Retrieval-specific scores (only one will be set depending on retrieval method)
|
|
35
53
|
similarity: float | None = None # Semantic retrieval
|
|
@@ -52,8 +70,8 @@ class RetrievalResult:
|
|
|
52
70
|
mentioned_at=row.get("mentioned_at"),
|
|
53
71
|
document_id=row.get("document_id"),
|
|
54
72
|
chunk_id=row.get("chunk_id"),
|
|
55
|
-
access_count=row.get("access_count", 0),
|
|
56
73
|
embedding=row.get("embedding"),
|
|
74
|
+
tags=row.get("tags"),
|
|
57
75
|
similarity=row.get("similarity"),
|
|
58
76
|
bm25_score=row.get("bm25_score"),
|
|
59
77
|
activation=row.get("activation"),
|
|
@@ -136,8 +154,8 @@ class ScoredResult:
|
|
|
136
154
|
"mentioned_at": self.retrieval.mentioned_at,
|
|
137
155
|
"document_id": self.retrieval.document_id,
|
|
138
156
|
"chunk_id": self.retrieval.chunk_id,
|
|
139
|
-
"access_count": self.retrieval.access_count,
|
|
140
157
|
"embedding": self.retrieval.embedding,
|
|
158
|
+
"tags": self.retrieval.tags,
|
|
141
159
|
"semantic_similarity": self.retrieval.similarity,
|
|
142
160
|
"bm25_score": self.retrieval.bm25_score,
|
|
143
161
|
}
|