omni-cortex 1.17.3__py3-none-any.whl → 1.17.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omni_cortex/_bundled/dashboard/backend/main.py +2 -2
- omni_cortex/_bundled/dashboard/backend/test_database.py +301 -0
- omni_cortex/_bundled/dashboard/backend/tmpclaude-2dfa-cwd +1 -0
- omni_cortex/_bundled/dashboard/backend/tmpclaude-c460-cwd +1 -0
- omni_cortex/_bundled/dashboard/frontend/dist/assets/index-CQlQK3nE.js +551 -0
- omni_cortex/_bundled/dashboard/frontend/dist/assets/index-CmUNNfe4.css +1 -0
- omni_cortex/_bundled/dashboard/frontend/dist/index.html +14 -0
- omni_cortex/_bundled/hooks/user_prompt.py +113 -2
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/main.py +2 -2
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/user_prompt.py +113 -2
- {omni_cortex-1.17.3.dist-info → omni_cortex-1.17.4.dist-info}/METADATA +6 -1
- omni_cortex-1.17.4.dist-info/RECORD +53 -0
- omni_cortex/__init__.py +0 -3
- omni_cortex/categorization/__init__.py +0 -9
- omni_cortex/categorization/auto_tags.py +0 -166
- omni_cortex/categorization/auto_type.py +0 -165
- omni_cortex/config.py +0 -141
- omni_cortex/dashboard.py +0 -238
- omni_cortex/database/__init__.py +0 -24
- omni_cortex/database/connection.py +0 -137
- omni_cortex/database/migrations.py +0 -210
- omni_cortex/database/schema.py +0 -212
- omni_cortex/database/sync.py +0 -421
- omni_cortex/decay/__init__.py +0 -7
- omni_cortex/decay/importance.py +0 -147
- omni_cortex/embeddings/__init__.py +0 -35
- omni_cortex/embeddings/local.py +0 -442
- omni_cortex/models/__init__.py +0 -20
- omni_cortex/models/activity.py +0 -265
- omni_cortex/models/agent.py +0 -144
- omni_cortex/models/memory.py +0 -395
- omni_cortex/models/relationship.py +0 -206
- omni_cortex/models/session.py +0 -290
- omni_cortex/resources/__init__.py +0 -1
- omni_cortex/search/__init__.py +0 -22
- omni_cortex/search/hybrid.py +0 -197
- omni_cortex/search/keyword.py +0 -204
- omni_cortex/search/ranking.py +0 -127
- omni_cortex/search/semantic.py +0 -232
- omni_cortex/server.py +0 -360
- omni_cortex/setup.py +0 -284
- omni_cortex/tools/__init__.py +0 -13
- omni_cortex/tools/activities.py +0 -453
- omni_cortex/tools/memories.py +0 -536
- omni_cortex/tools/sessions.py +0 -311
- omni_cortex/tools/utilities.py +0 -477
- omni_cortex/utils/__init__.py +0 -13
- omni_cortex/utils/formatting.py +0 -282
- omni_cortex/utils/ids.py +0 -72
- omni_cortex/utils/timestamps.py +0 -129
- omni_cortex/utils/truncation.py +0 -111
- omni_cortex-1.17.3.dist-info/RECORD +0 -86
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/.env.example +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/backfill_summaries.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/chat_service.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/database.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/image_service.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/logging_config.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/models.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/project_config.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/project_scanner.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/prompt_security.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/pyproject.toml +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/security.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/uv.lock +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/dashboard/backend/websocket_manager.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/post_tool_use.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/pre_tool_use.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/session_utils.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/stop.py +0 -0
- {omni_cortex-1.17.3.data → omni_cortex-1.17.4.data}/data/share/omni-cortex/hooks/subagent_stop.py +0 -0
- {omni_cortex-1.17.3.dist-info → omni_cortex-1.17.4.dist-info}/WHEEL +0 -0
- {omni_cortex-1.17.3.dist-info → omni_cortex-1.17.4.dist-info}/entry_points.txt +0 -0
- {omni_cortex-1.17.3.dist-info → omni_cortex-1.17.4.dist-info}/licenses/LICENSE +0 -0
omni_cortex/search/keyword.py
DELETED
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
"""Keyword search using SQLite FTS5."""
|
|
2
|
-
|
|
3
|
-
import sqlite3
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
from ..models.memory import Memory, _row_to_memory
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def keyword_search(
|
|
10
|
-
conn: sqlite3.Connection,
|
|
11
|
-
query: str,
|
|
12
|
-
type_filter: Optional[str] = None,
|
|
13
|
-
tags_filter: Optional[list[str]] = None,
|
|
14
|
-
status_filter: Optional[str] = None,
|
|
15
|
-
min_importance: Optional[int] = None,
|
|
16
|
-
include_archived: bool = False,
|
|
17
|
-
limit: int = 10,
|
|
18
|
-
) -> list[tuple[Memory, float]]:
|
|
19
|
-
"""Search memories using FTS5 keyword search.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
conn: Database connection
|
|
23
|
-
query: Search query string
|
|
24
|
-
type_filter: Filter by memory type
|
|
25
|
-
tags_filter: Filter by tags
|
|
26
|
-
status_filter: Filter by status
|
|
27
|
-
min_importance: Minimum importance score
|
|
28
|
-
include_archived: Include archived memories
|
|
29
|
-
limit: Maximum results
|
|
30
|
-
|
|
31
|
-
Returns:
|
|
32
|
-
List of (Memory, score) tuples
|
|
33
|
-
"""
|
|
34
|
-
# Build the FTS query
|
|
35
|
-
# Escape special FTS5 characters
|
|
36
|
-
fts_query = _escape_fts_query(query)
|
|
37
|
-
|
|
38
|
-
# Build WHERE conditions for the join
|
|
39
|
-
where_conditions = []
|
|
40
|
-
params: list = [fts_query]
|
|
41
|
-
|
|
42
|
-
if type_filter:
|
|
43
|
-
where_conditions.append("m.type = ?")
|
|
44
|
-
params.append(type_filter)
|
|
45
|
-
|
|
46
|
-
if status_filter:
|
|
47
|
-
where_conditions.append("m.status = ?")
|
|
48
|
-
params.append(status_filter)
|
|
49
|
-
elif not include_archived:
|
|
50
|
-
where_conditions.append("m.status != 'archived'")
|
|
51
|
-
|
|
52
|
-
if min_importance is not None:
|
|
53
|
-
where_conditions.append("m.importance_score >= ?")
|
|
54
|
-
params.append(min_importance)
|
|
55
|
-
|
|
56
|
-
if tags_filter:
|
|
57
|
-
tag_conditions = []
|
|
58
|
-
for tag in tags_filter:
|
|
59
|
-
tag_conditions.append("m.tags LIKE ?")
|
|
60
|
-
params.append(f'%"{tag}"%')
|
|
61
|
-
where_conditions.append(f"({' OR '.join(tag_conditions)})")
|
|
62
|
-
|
|
63
|
-
where_sql = ""
|
|
64
|
-
if where_conditions:
|
|
65
|
-
where_sql = "AND " + " AND ".join(where_conditions)
|
|
66
|
-
|
|
67
|
-
params.append(limit)
|
|
68
|
-
|
|
69
|
-
cursor = conn.cursor()
|
|
70
|
-
|
|
71
|
-
# Use FTS5 with bm25 ranking
|
|
72
|
-
try:
|
|
73
|
-
cursor.execute(
|
|
74
|
-
f"""
|
|
75
|
-
SELECT m.*, bm25(memories_fts) as score
|
|
76
|
-
FROM memories_fts fts
|
|
77
|
-
JOIN memories m ON fts.rowid = m.rowid
|
|
78
|
-
WHERE memories_fts MATCH ?
|
|
79
|
-
{where_sql}
|
|
80
|
-
ORDER BY score
|
|
81
|
-
LIMIT ?
|
|
82
|
-
""",
|
|
83
|
-
params,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
results = []
|
|
87
|
-
for row in cursor.fetchall():
|
|
88
|
-
# bm25 returns negative scores (more negative = better match)
|
|
89
|
-
# Convert to positive scores for our ranking
|
|
90
|
-
score = -row["score"]
|
|
91
|
-
memory = _row_to_memory(row)
|
|
92
|
-
results.append((memory, score))
|
|
93
|
-
|
|
94
|
-
return results
|
|
95
|
-
|
|
96
|
-
except sqlite3.OperationalError as e:
|
|
97
|
-
# If FTS query fails, fall back to LIKE search
|
|
98
|
-
if "fts5" in str(e).lower() or "match" in str(e).lower():
|
|
99
|
-
return _fallback_like_search(
|
|
100
|
-
conn, query, type_filter, tags_filter, status_filter,
|
|
101
|
-
min_importance, include_archived, limit
|
|
102
|
-
)
|
|
103
|
-
raise
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _escape_fts_query(query: str) -> str:
|
|
107
|
-
"""Escape special characters for FTS5 query.
|
|
108
|
-
|
|
109
|
-
Args:
|
|
110
|
-
query: Raw search query
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Escaped FTS5 query
|
|
114
|
-
"""
|
|
115
|
-
# Remove FTS5 special characters that could cause syntax errors
|
|
116
|
-
special_chars = ['"', "'", "(", ")", "*", ":", "^", "-", "+"]
|
|
117
|
-
escaped = query
|
|
118
|
-
for char in special_chars:
|
|
119
|
-
escaped = escaped.replace(char, " ")
|
|
120
|
-
|
|
121
|
-
# Clean up whitespace
|
|
122
|
-
words = escaped.split()
|
|
123
|
-
|
|
124
|
-
# Handle empty query
|
|
125
|
-
if not words:
|
|
126
|
-
return '""'
|
|
127
|
-
|
|
128
|
-
# For simple queries, just use OR matching
|
|
129
|
-
if len(words) == 1:
|
|
130
|
-
return f'"{words[0]}"'
|
|
131
|
-
|
|
132
|
-
# For multi-word queries, match any word
|
|
133
|
-
return " OR ".join(f'"{word}"' for word in words)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def _fallback_like_search(
|
|
137
|
-
conn: sqlite3.Connection,
|
|
138
|
-
query: str,
|
|
139
|
-
type_filter: Optional[str],
|
|
140
|
-
tags_filter: Optional[list[str]],
|
|
141
|
-
status_filter: Optional[str],
|
|
142
|
-
min_importance: Optional[int],
|
|
143
|
-
include_archived: bool,
|
|
144
|
-
limit: int,
|
|
145
|
-
) -> list[tuple[Memory, float]]:
|
|
146
|
-
"""Fallback to LIKE search if FTS5 fails."""
|
|
147
|
-
words = query.lower().split()
|
|
148
|
-
if not words:
|
|
149
|
-
return []
|
|
150
|
-
|
|
151
|
-
where_conditions = []
|
|
152
|
-
params: list = []
|
|
153
|
-
|
|
154
|
-
# Match any word in content or context
|
|
155
|
-
word_conditions = []
|
|
156
|
-
for word in words:
|
|
157
|
-
word_conditions.append("(LOWER(content) LIKE ? OR LOWER(context) LIKE ?)")
|
|
158
|
-
params.extend([f"%{word}%", f"%{word}%"])
|
|
159
|
-
where_conditions.append(f"({' OR '.join(word_conditions)})")
|
|
160
|
-
|
|
161
|
-
if type_filter:
|
|
162
|
-
where_conditions.append("type = ?")
|
|
163
|
-
params.append(type_filter)
|
|
164
|
-
|
|
165
|
-
if status_filter:
|
|
166
|
-
where_conditions.append("status = ?")
|
|
167
|
-
params.append(status_filter)
|
|
168
|
-
elif not include_archived:
|
|
169
|
-
where_conditions.append("status != 'archived'")
|
|
170
|
-
|
|
171
|
-
if min_importance is not None:
|
|
172
|
-
where_conditions.append("importance_score >= ?")
|
|
173
|
-
params.append(min_importance)
|
|
174
|
-
|
|
175
|
-
if tags_filter:
|
|
176
|
-
tag_conds = []
|
|
177
|
-
for tag in tags_filter:
|
|
178
|
-
tag_conds.append("tags LIKE ?")
|
|
179
|
-
params.append(f'%"{tag}"%')
|
|
180
|
-
where_conditions.append(f"({' OR '.join(tag_conds)})")
|
|
181
|
-
|
|
182
|
-
params.append(limit)
|
|
183
|
-
|
|
184
|
-
cursor = conn.cursor()
|
|
185
|
-
cursor.execute(
|
|
186
|
-
f"""
|
|
187
|
-
SELECT *
|
|
188
|
-
FROM memories
|
|
189
|
-
WHERE {' AND '.join(where_conditions)}
|
|
190
|
-
ORDER BY importance_score DESC, last_accessed DESC
|
|
191
|
-
LIMIT ?
|
|
192
|
-
""",
|
|
193
|
-
params,
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
results = []
|
|
197
|
-
for row in cursor.fetchall():
|
|
198
|
-
memory = _row_to_memory(row)
|
|
199
|
-
# Calculate a simple score based on word matches
|
|
200
|
-
content = (memory.content + " " + (memory.context or "")).lower()
|
|
201
|
-
score = sum(1 for word in words if word in content)
|
|
202
|
-
results.append((memory, float(score)))
|
|
203
|
-
|
|
204
|
-
return results
|
omni_cortex/search/ranking.py
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
"""Multi-factor relevance ranking for search results."""
|
|
2
|
-
|
|
3
|
-
import math
|
|
4
|
-
from datetime import datetime, timezone
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
from ..models.memory import Memory
|
|
8
|
-
from ..utils.timestamps import parse_iso
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def calculate_relevance_score(
|
|
12
|
-
memory: Memory,
|
|
13
|
-
keyword_score: float = 0.0,
|
|
14
|
-
semantic_score: float = 0.0,
|
|
15
|
-
query: Optional[str] = None,
|
|
16
|
-
) -> float:
|
|
17
|
-
"""Calculate multi-factor relevance score for a memory.
|
|
18
|
-
|
|
19
|
-
Scoring factors:
|
|
20
|
-
- Keyword match score (40%)
|
|
21
|
-
- Semantic similarity score (40%)
|
|
22
|
-
- Access frequency (log scale, max +20)
|
|
23
|
-
- Recency (exponential decay, max +15)
|
|
24
|
-
- Freshness status bonus/penalty
|
|
25
|
-
- Importance score (0-15)
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
memory: Memory object to score
|
|
29
|
-
keyword_score: Score from keyword search (0-1 normalized)
|
|
30
|
-
semantic_score: Score from semantic search (0-1 normalized)
|
|
31
|
-
query: Optional query string for additional matching
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
Combined relevance score (higher = more relevant)
|
|
35
|
-
"""
|
|
36
|
-
score = 0.0
|
|
37
|
-
|
|
38
|
-
# Base scores from search (40% each, max 80 combined)
|
|
39
|
-
# Normalize to 0-40 range
|
|
40
|
-
score += min(40.0, keyword_score * 40.0)
|
|
41
|
-
score += min(40.0, semantic_score * 40.0)
|
|
42
|
-
|
|
43
|
-
# Access frequency bonus (log scale, max +20)
|
|
44
|
-
# More frequently accessed memories are likely more useful
|
|
45
|
-
access_count = memory.access_count or 0
|
|
46
|
-
access_bonus = min(20.0, math.log1p(access_count) * 5.0)
|
|
47
|
-
score += access_bonus
|
|
48
|
-
|
|
49
|
-
# Recency bonus (exponential decay over 30 days, max +15)
|
|
50
|
-
# Recently accessed memories are more relevant
|
|
51
|
-
last_accessed = parse_iso(memory.last_accessed)
|
|
52
|
-
now = datetime.now(timezone.utc)
|
|
53
|
-
days_since_access = (now - last_accessed).days
|
|
54
|
-
|
|
55
|
-
recency_bonus = max(0.0, 15.0 * math.exp(-days_since_access / 30.0))
|
|
56
|
-
score += recency_bonus
|
|
57
|
-
|
|
58
|
-
# Freshness status bonus/penalty
|
|
59
|
-
freshness_bonus = {
|
|
60
|
-
"fresh": 10.0,
|
|
61
|
-
"needs_review": 0.0,
|
|
62
|
-
"outdated": -10.0,
|
|
63
|
-
"archived": -30.0,
|
|
64
|
-
}
|
|
65
|
-
score += freshness_bonus.get(memory.status, 0.0)
|
|
66
|
-
|
|
67
|
-
# Importance score contribution (0-100 scaled to 0-15)
|
|
68
|
-
importance = memory.importance_score or 50.0
|
|
69
|
-
score += importance * 0.15
|
|
70
|
-
|
|
71
|
-
# Exact phrase match bonus (if query provided)
|
|
72
|
-
if query and query.lower() in memory.content.lower():
|
|
73
|
-
score += 10.0
|
|
74
|
-
|
|
75
|
-
return score
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def rank_memories(
|
|
79
|
-
memories_with_scores: list[tuple[Memory, float, float]],
|
|
80
|
-
query: Optional[str] = None,
|
|
81
|
-
) -> list[tuple[Memory, float]]:
|
|
82
|
-
"""Rank memories by combined relevance score.
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
memories_with_scores: List of (Memory, keyword_score, semantic_score) tuples
|
|
86
|
-
query: Optional query string
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
List of (Memory, final_score) tuples, sorted by score descending
|
|
90
|
-
"""
|
|
91
|
-
results = []
|
|
92
|
-
|
|
93
|
-
for memory, keyword_score, semantic_score in memories_with_scores:
|
|
94
|
-
final_score = calculate_relevance_score(
|
|
95
|
-
memory,
|
|
96
|
-
keyword_score=keyword_score,
|
|
97
|
-
semantic_score=semantic_score,
|
|
98
|
-
query=query,
|
|
99
|
-
)
|
|
100
|
-
results.append((memory, final_score))
|
|
101
|
-
|
|
102
|
-
# Sort by score descending
|
|
103
|
-
results.sort(key=lambda x: x[1], reverse=True)
|
|
104
|
-
|
|
105
|
-
return results
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def normalize_scores(scores: list[float]) -> list[float]:
|
|
109
|
-
"""Normalize a list of scores to 0-1 range.
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
scores: List of raw scores
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
List of normalized scores
|
|
116
|
-
"""
|
|
117
|
-
if not scores:
|
|
118
|
-
return []
|
|
119
|
-
|
|
120
|
-
min_score = min(scores)
|
|
121
|
-
max_score = max(scores)
|
|
122
|
-
range_score = max_score - min_score
|
|
123
|
-
|
|
124
|
-
if range_score == 0:
|
|
125
|
-
return [1.0] * len(scores)
|
|
126
|
-
|
|
127
|
-
return [(s - min_score) / range_score for s in scores]
|
omni_cortex/search/semantic.py
DELETED
|
@@ -1,232 +0,0 @@
|
|
|
1
|
-
"""Semantic search using vector embeddings."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import sqlite3
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
|
|
9
|
-
from ..models.memory import Memory, _row_to_memory
|
|
10
|
-
from ..embeddings.local import (
|
|
11
|
-
generate_embedding,
|
|
12
|
-
blob_to_vector,
|
|
13
|
-
DEFAULT_MODEL_NAME,
|
|
14
|
-
)
|
|
15
|
-
from ..config import load_config
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
|
21
|
-
"""Calculate cosine similarity between two vectors.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
a: First vector
|
|
25
|
-
b: Second vector
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
Similarity score between -1 and 1
|
|
29
|
-
"""
|
|
30
|
-
dot_product = np.dot(a, b)
|
|
31
|
-
norm_a = np.linalg.norm(a)
|
|
32
|
-
norm_b = np.linalg.norm(b)
|
|
33
|
-
|
|
34
|
-
if norm_a == 0 or norm_b == 0:
|
|
35
|
-
return 0.0
|
|
36
|
-
|
|
37
|
-
return float(dot_product / (norm_a * norm_b))
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def semantic_search(
|
|
41
|
-
conn: sqlite3.Connection,
|
|
42
|
-
query: str,
|
|
43
|
-
type_filter: Optional[str] = None,
|
|
44
|
-
tags_filter: Optional[list[str]] = None,
|
|
45
|
-
status_filter: Optional[str] = None,
|
|
46
|
-
min_importance: Optional[int] = None,
|
|
47
|
-
include_archived: bool = False,
|
|
48
|
-
limit: int = 10,
|
|
49
|
-
similarity_threshold: float = 0.3,
|
|
50
|
-
model_name: str = DEFAULT_MODEL_NAME,
|
|
51
|
-
) -> list[tuple[Memory, float]]:
|
|
52
|
-
"""Search memories using semantic similarity.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
conn: Database connection
|
|
56
|
-
query: Search query string
|
|
57
|
-
type_filter: Filter by memory type
|
|
58
|
-
tags_filter: Filter by tags
|
|
59
|
-
status_filter: Filter by status
|
|
60
|
-
min_importance: Minimum importance score
|
|
61
|
-
include_archived: Include archived memories
|
|
62
|
-
limit: Maximum results
|
|
63
|
-
similarity_threshold: Minimum similarity score
|
|
64
|
-
model_name: Embedding model to use
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
List of (Memory, similarity_score) tuples
|
|
68
|
-
"""
|
|
69
|
-
# Check if embeddings are enabled - skip semantic search if disabled
|
|
70
|
-
config = load_config()
|
|
71
|
-
if not config.embedding_enabled:
|
|
72
|
-
logger.debug("Embeddings disabled, skipping semantic search")
|
|
73
|
-
return []
|
|
74
|
-
|
|
75
|
-
# Generate embedding for query
|
|
76
|
-
try:
|
|
77
|
-
query_embedding = generate_embedding(query, model_name)
|
|
78
|
-
except ImportError:
|
|
79
|
-
logger.warning("sentence-transformers not available, cannot perform semantic search")
|
|
80
|
-
return []
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logger.error(f"Failed to generate query embedding: {e}")
|
|
83
|
-
return []
|
|
84
|
-
|
|
85
|
-
# Build WHERE conditions for filtering
|
|
86
|
-
where_conditions = []
|
|
87
|
-
params: list = []
|
|
88
|
-
|
|
89
|
-
# Only search memories with embeddings
|
|
90
|
-
where_conditions.append("m.has_embedding = 1")
|
|
91
|
-
|
|
92
|
-
if type_filter:
|
|
93
|
-
where_conditions.append("m.type = ?")
|
|
94
|
-
params.append(type_filter)
|
|
95
|
-
|
|
96
|
-
if status_filter:
|
|
97
|
-
where_conditions.append("m.status = ?")
|
|
98
|
-
params.append(status_filter)
|
|
99
|
-
elif not include_archived:
|
|
100
|
-
where_conditions.append("m.status != 'archived'")
|
|
101
|
-
|
|
102
|
-
if min_importance is not None:
|
|
103
|
-
where_conditions.append("m.importance_score >= ?")
|
|
104
|
-
params.append(min_importance)
|
|
105
|
-
|
|
106
|
-
if tags_filter:
|
|
107
|
-
tag_conditions = []
|
|
108
|
-
for tag in tags_filter:
|
|
109
|
-
tag_conditions.append("m.tags LIKE ?")
|
|
110
|
-
params.append(f'%"{tag}"%')
|
|
111
|
-
where_conditions.append(f"({' OR '.join(tag_conditions)})")
|
|
112
|
-
|
|
113
|
-
where_sql = "WHERE " + " AND ".join(where_conditions)
|
|
114
|
-
|
|
115
|
-
# Get all matching memories with their embeddings
|
|
116
|
-
cursor = conn.cursor()
|
|
117
|
-
cursor.execute(
|
|
118
|
-
f"""
|
|
119
|
-
SELECT m.*, e.vector
|
|
120
|
-
FROM memories m
|
|
121
|
-
JOIN embeddings e ON m.id = e.memory_id
|
|
122
|
-
{where_sql}
|
|
123
|
-
""",
|
|
124
|
-
params,
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
# Calculate similarity scores
|
|
128
|
-
results = []
|
|
129
|
-
for row in cursor.fetchall():
|
|
130
|
-
memory_embedding = blob_to_vector(row["vector"])
|
|
131
|
-
similarity = cosine_similarity(query_embedding, memory_embedding)
|
|
132
|
-
|
|
133
|
-
# Apply threshold
|
|
134
|
-
if similarity >= similarity_threshold:
|
|
135
|
-
memory = _row_to_memory(row)
|
|
136
|
-
results.append((memory, similarity))
|
|
137
|
-
|
|
138
|
-
# Sort by similarity (highest first)
|
|
139
|
-
results.sort(key=lambda x: x[1], reverse=True)
|
|
140
|
-
|
|
141
|
-
# Limit results
|
|
142
|
-
return results[:limit]
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def find_similar_memories(
|
|
146
|
-
conn: sqlite3.Connection,
|
|
147
|
-
memory_id: str,
|
|
148
|
-
limit: int = 10,
|
|
149
|
-
similarity_threshold: float = 0.5,
|
|
150
|
-
exclude_ids: Optional[list[str]] = None,
|
|
151
|
-
) -> list[tuple[Memory, float]]:
|
|
152
|
-
"""Find memories similar to a given memory.
|
|
153
|
-
|
|
154
|
-
Args:
|
|
155
|
-
conn: Database connection
|
|
156
|
-
memory_id: ID of the source memory
|
|
157
|
-
limit: Maximum results
|
|
158
|
-
similarity_threshold: Minimum similarity score
|
|
159
|
-
exclude_ids: Memory IDs to exclude
|
|
160
|
-
|
|
161
|
-
Returns:
|
|
162
|
-
List of (Memory, similarity_score) tuples
|
|
163
|
-
"""
|
|
164
|
-
# Get the source memory's embedding
|
|
165
|
-
cursor = conn.cursor()
|
|
166
|
-
cursor.execute(
|
|
167
|
-
"SELECT vector FROM embeddings WHERE memory_id = ?",
|
|
168
|
-
(memory_id,),
|
|
169
|
-
)
|
|
170
|
-
row = cursor.fetchone()
|
|
171
|
-
|
|
172
|
-
if not row:
|
|
173
|
-
logger.warning(f"No embedding found for memory {memory_id}")
|
|
174
|
-
return []
|
|
175
|
-
|
|
176
|
-
source_embedding = blob_to_vector(row["vector"])
|
|
177
|
-
|
|
178
|
-
# Build exclusion list
|
|
179
|
-
exclude_set = set(exclude_ids or [])
|
|
180
|
-
exclude_set.add(memory_id) # Always exclude the source
|
|
181
|
-
|
|
182
|
-
# Get all other embeddings
|
|
183
|
-
cursor.execute(
|
|
184
|
-
"""
|
|
185
|
-
SELECT m.*, e.vector
|
|
186
|
-
FROM memories m
|
|
187
|
-
JOIN embeddings e ON m.id = e.memory_id
|
|
188
|
-
WHERE m.has_embedding = 1 AND m.status != 'archived'
|
|
189
|
-
"""
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
results = []
|
|
193
|
-
for row in cursor.fetchall():
|
|
194
|
-
if row["id"] in exclude_set:
|
|
195
|
-
continue
|
|
196
|
-
|
|
197
|
-
memory_embedding = blob_to_vector(row["vector"])
|
|
198
|
-
similarity = cosine_similarity(source_embedding, memory_embedding)
|
|
199
|
-
|
|
200
|
-
if similarity >= similarity_threshold:
|
|
201
|
-
memory = _row_to_memory(row)
|
|
202
|
-
results.append((memory, similarity))
|
|
203
|
-
|
|
204
|
-
# Sort by similarity
|
|
205
|
-
results.sort(key=lambda x: x[1], reverse=True)
|
|
206
|
-
|
|
207
|
-
return results[:limit]
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def get_embedding_coverage(conn: sqlite3.Connection) -> dict:
|
|
211
|
-
"""Get statistics about embedding coverage.
|
|
212
|
-
|
|
213
|
-
Returns:
|
|
214
|
-
Dict with total_memories, with_embeddings, without_embeddings, coverage_pct
|
|
215
|
-
"""
|
|
216
|
-
cursor = conn.cursor()
|
|
217
|
-
|
|
218
|
-
cursor.execute("SELECT COUNT(*) FROM memories")
|
|
219
|
-
total = cursor.fetchone()[0]
|
|
220
|
-
|
|
221
|
-
cursor.execute("SELECT COUNT(*) FROM memories WHERE has_embedding = 1")
|
|
222
|
-
with_embeddings = cursor.fetchone()[0]
|
|
223
|
-
|
|
224
|
-
without_embeddings = total - with_embeddings
|
|
225
|
-
coverage_pct = (with_embeddings / total * 100) if total > 0 else 0.0
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
"total_memories": total,
|
|
229
|
-
"with_embeddings": with_embeddings,
|
|
230
|
-
"without_embeddings": without_embeddings,
|
|
231
|
-
"coverage_pct": round(coverage_pct, 1),
|
|
232
|
-
}
|