omni-cortex 1.17.0__py3-none-any.whl → 1.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omni_cortex/__init__.py +3 -0
- omni_cortex/categorization/__init__.py +9 -0
- omni_cortex/categorization/auto_tags.py +166 -0
- omni_cortex/categorization/auto_type.py +165 -0
- omni_cortex/config.py +141 -0
- omni_cortex/dashboard.py +232 -0
- omni_cortex/database/__init__.py +24 -0
- omni_cortex/database/connection.py +137 -0
- omni_cortex/database/migrations.py +210 -0
- omni_cortex/database/schema.py +212 -0
- omni_cortex/database/sync.py +421 -0
- omni_cortex/decay/__init__.py +7 -0
- omni_cortex/decay/importance.py +147 -0
- omni_cortex/embeddings/__init__.py +35 -0
- omni_cortex/embeddings/local.py +442 -0
- omni_cortex/models/__init__.py +20 -0
- omni_cortex/models/activity.py +265 -0
- omni_cortex/models/agent.py +144 -0
- omni_cortex/models/memory.py +395 -0
- omni_cortex/models/relationship.py +206 -0
- omni_cortex/models/session.py +290 -0
- omni_cortex/resources/__init__.py +1 -0
- omni_cortex/search/__init__.py +22 -0
- omni_cortex/search/hybrid.py +197 -0
- omni_cortex/search/keyword.py +204 -0
- omni_cortex/search/ranking.py +127 -0
- omni_cortex/search/semantic.py +232 -0
- omni_cortex/server.py +360 -0
- omni_cortex/setup.py +278 -0
- omni_cortex/tools/__init__.py +13 -0
- omni_cortex/tools/activities.py +453 -0
- omni_cortex/tools/memories.py +536 -0
- omni_cortex/tools/sessions.py +311 -0
- omni_cortex/tools/utilities.py +477 -0
- omni_cortex/utils/__init__.py +13 -0
- omni_cortex/utils/formatting.py +282 -0
- omni_cortex/utils/ids.py +72 -0
- omni_cortex/utils/timestamps.py +129 -0
- omni_cortex/utils/truncation.py +111 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/main.py +43 -13
- {omni_cortex-1.17.0.dist-info → omni_cortex-1.17.2.dist-info}/METADATA +1 -1
- omni_cortex-1.17.2.dist-info/RECORD +65 -0
- omni_cortex-1.17.0.dist-info/RECORD +0 -26
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/.env.example +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/backfill_summaries.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/chat_service.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/database.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/image_service.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/logging_config.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/models.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/project_config.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/project_scanner.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/prompt_security.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/pyproject.toml +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/security.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/uv.lock +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/dashboard/backend/websocket_manager.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/post_tool_use.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/pre_tool_use.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/session_utils.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/stop.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/subagent_stop.py +0 -0
- {omni_cortex-1.17.0.data → omni_cortex-1.17.2.data}/data/share/omni-cortex/hooks/user_prompt.py +0 -0
- {omni_cortex-1.17.0.dist-info → omni_cortex-1.17.2.dist-info}/WHEEL +0 -0
- {omni_cortex-1.17.0.dist-info → omni_cortex-1.17.2.dist-info}/entry_points.txt +0 -0
- {omni_cortex-1.17.0.dist-info → omni_cortex-1.17.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Importance decay algorithm for memories."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from ..utils.timestamps import parse_iso
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def calculate_decayed_importance(
|
|
11
|
+
base_importance: float,
|
|
12
|
+
last_accessed: str,
|
|
13
|
+
access_count: int,
|
|
14
|
+
manual_importance: Optional[int] = None,
|
|
15
|
+
decay_rate: float = 0.5,
|
|
16
|
+
) -> float:
|
|
17
|
+
"""Calculate the decayed importance score for a memory.
|
|
18
|
+
|
|
19
|
+
The decay formula:
|
|
20
|
+
- Starts from base importance (typically 50 or user-set value)
|
|
21
|
+
- Decays linearly by decay_rate points per day since last access
|
|
22
|
+
- Gains back importance from access frequency (log scale)
|
|
23
|
+
- Manual importance overrides all calculations
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
base_importance: Original importance score (0-100)
|
|
27
|
+
last_accessed: ISO timestamp of last access
|
|
28
|
+
access_count: Number of times memory was accessed
|
|
29
|
+
manual_importance: User-set importance (overrides calculation)
|
|
30
|
+
decay_rate: Points to decay per day (default 0.5)
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Current importance score (0-100)
|
|
34
|
+
"""
|
|
35
|
+
# Manual importance always wins
|
|
36
|
+
if manual_importance is not None:
|
|
37
|
+
return float(manual_importance)
|
|
38
|
+
|
|
39
|
+
# Calculate days since last access
|
|
40
|
+
last_dt = parse_iso(last_accessed)
|
|
41
|
+
now = datetime.now(timezone.utc)
|
|
42
|
+
days = (now - last_dt).days
|
|
43
|
+
|
|
44
|
+
# Apply decay
|
|
45
|
+
decayed = base_importance - (days * decay_rate)
|
|
46
|
+
|
|
47
|
+
# Access boost: frequently used memories resist decay
|
|
48
|
+
# log1p(10) ≈ 2.4, so 10 accesses = +12 importance
|
|
49
|
+
access_boost = math.log1p(access_count) * 5.0
|
|
50
|
+
|
|
51
|
+
# Calculate final score
|
|
52
|
+
final = decayed + access_boost
|
|
53
|
+
|
|
54
|
+
# Clamp to 0-100
|
|
55
|
+
return max(0.0, min(100.0, final))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def should_mark_for_review(
|
|
59
|
+
last_verified: Optional[str],
|
|
60
|
+
review_days: int = 30,
|
|
61
|
+
) -> bool:
|
|
62
|
+
"""Check if a memory should be marked for review.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
last_verified: ISO timestamp of last verification, or None
|
|
66
|
+
review_days: Days threshold for review
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
True if memory should be reviewed
|
|
70
|
+
"""
|
|
71
|
+
if last_verified is None:
|
|
72
|
+
return False # Never verified, use created_at logic elsewhere
|
|
73
|
+
|
|
74
|
+
verified_dt = parse_iso(last_verified)
|
|
75
|
+
now = datetime.now(timezone.utc)
|
|
76
|
+
days = (now - verified_dt).days
|
|
77
|
+
|
|
78
|
+
return days >= review_days
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_freshness_status(
|
|
82
|
+
created_at: str,
|
|
83
|
+
last_verified: Optional[str],
|
|
84
|
+
current_status: str,
|
|
85
|
+
review_days: int = 30,
|
|
86
|
+
) -> str:
|
|
87
|
+
"""Determine the freshness status for a memory.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
created_at: ISO timestamp of creation
|
|
91
|
+
last_verified: ISO timestamp of last verification
|
|
92
|
+
current_status: Current status value
|
|
93
|
+
review_days: Days threshold for review
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
New status: fresh, needs_review, outdated, or archived
|
|
97
|
+
"""
|
|
98
|
+
# Archived stays archived until explicitly changed
|
|
99
|
+
if current_status == "archived":
|
|
100
|
+
return "archived"
|
|
101
|
+
|
|
102
|
+
# Outdated stays outdated until verified
|
|
103
|
+
if current_status == "outdated":
|
|
104
|
+
return "outdated"
|
|
105
|
+
|
|
106
|
+
# Check if needs review
|
|
107
|
+
reference_date = last_verified or created_at
|
|
108
|
+
reference_dt = parse_iso(reference_date)
|
|
109
|
+
now = datetime.now(timezone.utc)
|
|
110
|
+
days = (now - reference_dt).days
|
|
111
|
+
|
|
112
|
+
if days >= review_days * 2:
|
|
113
|
+
return "outdated"
|
|
114
|
+
elif days >= review_days:
|
|
115
|
+
return "needs_review"
|
|
116
|
+
|
|
117
|
+
return "fresh"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def apply_decay_to_memory(
|
|
121
|
+
importance_score: float,
|
|
122
|
+
last_accessed: str,
|
|
123
|
+
access_count: int,
|
|
124
|
+
manual_importance: Optional[int],
|
|
125
|
+
decay_rate: float = 0.5,
|
|
126
|
+
) -> float:
|
|
127
|
+
"""Apply decay calculation to a memory's importance.
|
|
128
|
+
|
|
129
|
+
This is a convenience function that wraps calculate_decayed_importance.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
importance_score: Current stored importance
|
|
133
|
+
last_accessed: Last access timestamp
|
|
134
|
+
access_count: Access count
|
|
135
|
+
manual_importance: Manual override if any
|
|
136
|
+
decay_rate: Decay rate
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Updated importance score
|
|
140
|
+
"""
|
|
141
|
+
return calculate_decayed_importance(
|
|
142
|
+
base_importance=importance_score,
|
|
143
|
+
last_accessed=last_accessed,
|
|
144
|
+
access_count=access_count,
|
|
145
|
+
manual_importance=manual_importance,
|
|
146
|
+
decay_rate=decay_rate,
|
|
147
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Embedding generation for semantic search."""
|
|
2
|
+
|
|
3
|
+
from .local import (
|
|
4
|
+
DEFAULT_MODEL_NAME,
|
|
5
|
+
EMBEDDING_DIMENSIONS,
|
|
6
|
+
generate_embedding,
|
|
7
|
+
generate_embeddings_batch,
|
|
8
|
+
generate_and_store_embedding,
|
|
9
|
+
get_embedding,
|
|
10
|
+
get_all_embeddings,
|
|
11
|
+
store_embedding,
|
|
12
|
+
delete_embedding,
|
|
13
|
+
vector_to_blob,
|
|
14
|
+
blob_to_vector,
|
|
15
|
+
get_memories_without_embeddings,
|
|
16
|
+
backfill_embeddings,
|
|
17
|
+
is_model_available,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"DEFAULT_MODEL_NAME",
|
|
22
|
+
"EMBEDDING_DIMENSIONS",
|
|
23
|
+
"generate_embedding",
|
|
24
|
+
"generate_embeddings_batch",
|
|
25
|
+
"generate_and_store_embedding",
|
|
26
|
+
"get_embedding",
|
|
27
|
+
"get_all_embeddings",
|
|
28
|
+
"store_embedding",
|
|
29
|
+
"delete_embedding",
|
|
30
|
+
"vector_to_blob",
|
|
31
|
+
"blob_to_vector",
|
|
32
|
+
"get_memories_without_embeddings",
|
|
33
|
+
"backfill_embeddings",
|
|
34
|
+
"is_model_available",
|
|
35
|
+
]
|
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""Local embedding generation using sentence-transformers.
|
|
2
|
+
|
|
3
|
+
This module provides embedding generation with robust timeout handling
|
|
4
|
+
to prevent hangs during model loading. The model loading happens in a
|
|
5
|
+
subprocess that can be killed if it takes too long.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
import sqlite3
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
import tempfile
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from ..utils.ids import generate_embedding_id
|
|
21
|
+
from ..utils.timestamps import now_iso
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Model configuration
|
|
26
|
+
DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2"
|
|
27
|
+
EMBEDDING_DIMENSIONS = 384
|
|
28
|
+
EMBEDDING_TIMEOUT = 60 # seconds - timeout for embedding generation
|
|
29
|
+
|
|
30
|
+
# Security: allowed model name pattern (alphanumeric, hyphens, underscores, forward slashes for org/model)
|
|
31
|
+
MODEL_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-/]+$')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_model_available() -> bool:
|
|
35
|
+
"""Check if sentence-transformers is available.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
True if the package is installed
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
import sentence_transformers
|
|
42
|
+
return True
|
|
43
|
+
except ImportError:
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate_model_name(model_name: str) -> None:
|
|
48
|
+
"""Validate model name to prevent code injection.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
model_name: The model name to validate
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If model name contains invalid characters
|
|
55
|
+
"""
|
|
56
|
+
if not MODEL_NAME_PATTERN.match(model_name):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Invalid model name '{model_name}'. "
|
|
59
|
+
"Model names may only contain letters, numbers, hyphens, underscores, and forward slashes."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _generate_embedding_subprocess(text: str, model_name: str, timeout: float) -> Optional[np.ndarray]:
|
|
64
|
+
"""Generate embedding using a subprocess with timeout.
|
|
65
|
+
|
|
66
|
+
This runs the embedding generation in a completely separate process
|
|
67
|
+
that can be killed if it hangs during model loading.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
text: Text to embed
|
|
71
|
+
model_name: Model name
|
|
72
|
+
timeout: Timeout in seconds
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Numpy array of embedding values, or None if failed/timed out
|
|
76
|
+
"""
|
|
77
|
+
# Validate model name to prevent code injection
|
|
78
|
+
_validate_model_name(model_name)
|
|
79
|
+
|
|
80
|
+
# Python script to run in subprocess
|
|
81
|
+
script = f'''
|
|
82
|
+
import sys
|
|
83
|
+
import json
|
|
84
|
+
import numpy as np
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
from sentence_transformers import SentenceTransformer
|
|
88
|
+
|
|
89
|
+
# Load model and generate embedding
|
|
90
|
+
model = SentenceTransformer("{model_name}")
|
|
91
|
+
embedding = model.encode(sys.stdin.read(), convert_to_numpy=True)
|
|
92
|
+
|
|
93
|
+
# Output as JSON list
|
|
94
|
+
print(json.dumps(embedding.tolist()))
|
|
95
|
+
except Exception as e:
|
|
96
|
+
print(json.dumps({{"error": str(e)}}))
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
'''
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Run embedding generation in subprocess
|
|
102
|
+
result = subprocess.run(
|
|
103
|
+
[sys.executable, "-c", script],
|
|
104
|
+
input=text,
|
|
105
|
+
capture_output=True,
|
|
106
|
+
text=True,
|
|
107
|
+
timeout=timeout,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if result.returncode != 0:
|
|
111
|
+
error_msg = result.stderr.strip() if result.stderr else "Unknown error"
|
|
112
|
+
logger.error(f"Embedding subprocess failed: {error_msg}")
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
# Parse output
|
|
116
|
+
output = result.stdout.strip()
|
|
117
|
+
if not output:
|
|
118
|
+
logger.error("Embedding subprocess returned empty output")
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
data = json.loads(output)
|
|
122
|
+
|
|
123
|
+
if isinstance(data, dict) and "error" in data:
|
|
124
|
+
logger.error(f"Embedding generation error: {data['error']}")
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
return np.array(data, dtype=np.float32)
|
|
128
|
+
|
|
129
|
+
except subprocess.TimeoutExpired:
|
|
130
|
+
logger.warning(f"Embedding generation timed out after {timeout}s")
|
|
131
|
+
return None
|
|
132
|
+
except json.JSONDecodeError as e:
|
|
133
|
+
logger.error(f"Failed to parse embedding output: {e}")
|
|
134
|
+
return None
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error(f"Embedding subprocess error: {e}")
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def generate_embedding(
|
|
141
|
+
text: str,
|
|
142
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
143
|
+
timeout: float = EMBEDDING_TIMEOUT,
|
|
144
|
+
) -> np.ndarray:
|
|
145
|
+
"""Generate embedding for a text string.
|
|
146
|
+
|
|
147
|
+
Uses subprocess with timeout to prevent hangs during model loading.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
text: Text to embed
|
|
151
|
+
model_name: Name of the model to use
|
|
152
|
+
timeout: Timeout in seconds
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Numpy array of embedding values (384 dimensions)
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
RuntimeError: If embedding generation fails or times out
|
|
159
|
+
"""
|
|
160
|
+
if not is_model_available():
|
|
161
|
+
raise ImportError(
|
|
162
|
+
"sentence-transformers is required for embeddings. "
|
|
163
|
+
"Install with: pip install sentence-transformers"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
embedding = _generate_embedding_subprocess(text, model_name, timeout)
|
|
167
|
+
|
|
168
|
+
if embedding is None:
|
|
169
|
+
raise RuntimeError(
|
|
170
|
+
f"Embedding generation failed or timed out after {timeout}s. "
|
|
171
|
+
"This may happen on first run while the model downloads (~90MB). "
|
|
172
|
+
"Try again or disable embeddings with embedding_enabled: false in config."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return embedding
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def generate_embeddings_batch(
|
|
179
|
+
texts: list[str],
|
|
180
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
181
|
+
timeout: float = EMBEDDING_TIMEOUT,
|
|
182
|
+
) -> list[np.ndarray]:
|
|
183
|
+
"""Generate embeddings for multiple texts.
|
|
184
|
+
|
|
185
|
+
Note: Currently processes one at a time for reliability.
|
|
186
|
+
Batch processing could be added later for performance.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
texts: List of texts to embed
|
|
190
|
+
model_name: Name of the model to use
|
|
191
|
+
timeout: Timeout per text in seconds
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List of embedding arrays (may be shorter than input if some fail)
|
|
195
|
+
"""
|
|
196
|
+
embeddings = []
|
|
197
|
+
for text in texts:
|
|
198
|
+
try:
|
|
199
|
+
embedding = generate_embedding(text, model_name, timeout)
|
|
200
|
+
embeddings.append(embedding)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.warning(f"Failed to generate embedding: {e}")
|
|
203
|
+
# Continue with remaining texts
|
|
204
|
+
return embeddings
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def vector_to_blob(vector: np.ndarray) -> bytes:
|
|
208
|
+
"""Convert numpy array to SQLite BLOB.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
vector: Numpy array of float32 values
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Bytes representation
|
|
215
|
+
"""
|
|
216
|
+
vector = vector.astype(np.float32)
|
|
217
|
+
return vector.tobytes()
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def blob_to_vector(blob: bytes) -> np.ndarray:
|
|
221
|
+
"""Convert SQLite BLOB to numpy array.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
blob: Bytes from database
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Numpy array of float32 values
|
|
228
|
+
"""
|
|
229
|
+
return np.frombuffer(blob, dtype=np.float32)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def store_embedding(
|
|
233
|
+
conn: sqlite3.Connection,
|
|
234
|
+
memory_id: str,
|
|
235
|
+
vector: np.ndarray,
|
|
236
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
237
|
+
) -> str:
|
|
238
|
+
"""Store an embedding in the database.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
conn: Database connection
|
|
242
|
+
memory_id: ID of the memory
|
|
243
|
+
vector: Embedding vector
|
|
244
|
+
model_name: Model used to generate the embedding
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Embedding ID
|
|
248
|
+
"""
|
|
249
|
+
embedding_id = generate_embedding_id()
|
|
250
|
+
blob = vector_to_blob(vector)
|
|
251
|
+
|
|
252
|
+
cursor = conn.cursor()
|
|
253
|
+
|
|
254
|
+
# Insert or replace embedding
|
|
255
|
+
cursor.execute(
|
|
256
|
+
"""
|
|
257
|
+
INSERT OR REPLACE INTO embeddings (id, memory_id, model_name, vector, dimensions, created_at)
|
|
258
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
259
|
+
""",
|
|
260
|
+
(embedding_id, memory_id, model_name, blob, len(vector), now_iso()),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Update memory's has_embedding flag
|
|
264
|
+
cursor.execute(
|
|
265
|
+
"UPDATE memories SET has_embedding = 1 WHERE id = ?",
|
|
266
|
+
(memory_id,),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
conn.commit()
|
|
270
|
+
return embedding_id
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_embedding(
|
|
274
|
+
conn: sqlite3.Connection,
|
|
275
|
+
memory_id: str,
|
|
276
|
+
) -> Optional[np.ndarray]:
|
|
277
|
+
"""Get the embedding for a memory.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
conn: Database connection
|
|
281
|
+
memory_id: Memory ID
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Embedding vector or None if not found
|
|
285
|
+
"""
|
|
286
|
+
cursor = conn.cursor()
|
|
287
|
+
cursor.execute(
|
|
288
|
+
"SELECT vector FROM embeddings WHERE memory_id = ?",
|
|
289
|
+
(memory_id,),
|
|
290
|
+
)
|
|
291
|
+
row = cursor.fetchone()
|
|
292
|
+
|
|
293
|
+
if not row:
|
|
294
|
+
return None
|
|
295
|
+
|
|
296
|
+
return blob_to_vector(row["vector"])
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def get_all_embeddings(
|
|
300
|
+
conn: sqlite3.Connection,
|
|
301
|
+
) -> list[tuple[str, np.ndarray]]:
|
|
302
|
+
"""Get all embeddings from the database.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
List of (memory_id, vector) tuples
|
|
306
|
+
"""
|
|
307
|
+
cursor = conn.cursor()
|
|
308
|
+
cursor.execute("SELECT memory_id, vector FROM embeddings")
|
|
309
|
+
|
|
310
|
+
results = []
|
|
311
|
+
for row in cursor.fetchall():
|
|
312
|
+
vector = blob_to_vector(row["vector"])
|
|
313
|
+
results.append((row["memory_id"], vector))
|
|
314
|
+
|
|
315
|
+
return results
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def delete_embedding(
|
|
319
|
+
conn: sqlite3.Connection,
|
|
320
|
+
memory_id: str,
|
|
321
|
+
) -> bool:
|
|
322
|
+
"""Delete embedding for a memory.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
conn: Database connection
|
|
326
|
+
memory_id: Memory ID
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
True if deleted
|
|
330
|
+
"""
|
|
331
|
+
cursor = conn.cursor()
|
|
332
|
+
cursor.execute("DELETE FROM embeddings WHERE memory_id = ?", (memory_id,))
|
|
333
|
+
|
|
334
|
+
if cursor.rowcount > 0:
|
|
335
|
+
cursor.execute(
|
|
336
|
+
"UPDATE memories SET has_embedding = 0 WHERE id = ?",
|
|
337
|
+
(memory_id,),
|
|
338
|
+
)
|
|
339
|
+
conn.commit()
|
|
340
|
+
return True
|
|
341
|
+
|
|
342
|
+
return False
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def generate_and_store_embedding(
|
|
346
|
+
conn: sqlite3.Connection,
|
|
347
|
+
memory_id: str,
|
|
348
|
+
content: str,
|
|
349
|
+
context: Optional[str] = None,
|
|
350
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
351
|
+
timeout: float = EMBEDDING_TIMEOUT,
|
|
352
|
+
) -> Optional[str]:
|
|
353
|
+
"""Generate and store embedding for a memory.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
conn: Database connection
|
|
357
|
+
memory_id: Memory ID
|
|
358
|
+
content: Memory content
|
|
359
|
+
context: Optional context
|
|
360
|
+
model_name: Model to use
|
|
361
|
+
timeout: Timeout in seconds
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Embedding ID or None if failed
|
|
365
|
+
"""
|
|
366
|
+
try:
|
|
367
|
+
# Combine content and context for embedding
|
|
368
|
+
text = content
|
|
369
|
+
if context:
|
|
370
|
+
text = f"{content}\n\nContext: {context}"
|
|
371
|
+
|
|
372
|
+
vector = generate_embedding(text, model_name, timeout)
|
|
373
|
+
embedding_id = store_embedding(conn, memory_id, vector, model_name)
|
|
374
|
+
|
|
375
|
+
logger.debug(f"Generated embedding for memory {memory_id}")
|
|
376
|
+
return embedding_id
|
|
377
|
+
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.warning(f"Failed to generate embedding for {memory_id}: {e}")
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def get_memories_without_embeddings(
|
|
384
|
+
conn: sqlite3.Connection,
|
|
385
|
+
limit: int = 100,
|
|
386
|
+
) -> list[tuple[str, str, Optional[str]]]:
|
|
387
|
+
"""Get memories that don't have embeddings.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
conn: Database connection
|
|
391
|
+
limit: Maximum number to return
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
List of (memory_id, content, context) tuples
|
|
395
|
+
"""
|
|
396
|
+
cursor = conn.cursor()
|
|
397
|
+
cursor.execute(
|
|
398
|
+
"""
|
|
399
|
+
SELECT id, content, context
|
|
400
|
+
FROM memories
|
|
401
|
+
WHERE has_embedding = 0
|
|
402
|
+
LIMIT ?
|
|
403
|
+
""",
|
|
404
|
+
(limit,),
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return [(row["id"], row["content"], row["context"]) for row in cursor.fetchall()]
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def backfill_embeddings(
|
|
411
|
+
conn: sqlite3.Connection,
|
|
412
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
413
|
+
timeout_per_memory: float = EMBEDDING_TIMEOUT,
|
|
414
|
+
) -> int:
|
|
415
|
+
"""Generate embeddings for all memories that don't have them.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
conn: Database connection
|
|
419
|
+
model_name: Model to use
|
|
420
|
+
timeout_per_memory: Timeout per memory in seconds
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Number of embeddings generated
|
|
424
|
+
"""
|
|
425
|
+
total_generated = 0
|
|
426
|
+
|
|
427
|
+
while True:
|
|
428
|
+
# Get batch of memories without embeddings
|
|
429
|
+
memories = get_memories_without_embeddings(conn, limit=10)
|
|
430
|
+
|
|
431
|
+
if not memories:
|
|
432
|
+
break
|
|
433
|
+
|
|
434
|
+
for memory_id, content, context in memories:
|
|
435
|
+
result = generate_and_store_embedding(
|
|
436
|
+
conn, memory_id, content, context, model_name, timeout_per_memory
|
|
437
|
+
)
|
|
438
|
+
if result:
|
|
439
|
+
total_generated += 1
|
|
440
|
+
logger.info(f"Generated embedding {total_generated}: {memory_id}")
|
|
441
|
+
|
|
442
|
+
return total_generated
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Pydantic models for Omni Cortex entities."""
|
|
2
|
+
|
|
3
|
+
from .memory import Memory, MemoryCreate, MemoryUpdate
|
|
4
|
+
from .activity import Activity, ActivityCreate
|
|
5
|
+
from .session import Session, SessionCreate, SessionSummary
|
|
6
|
+
from .agent import Agent
|
|
7
|
+
from .relationship import MemoryRelationship
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Memory",
|
|
11
|
+
"MemoryCreate",
|
|
12
|
+
"MemoryUpdate",
|
|
13
|
+
"Activity",
|
|
14
|
+
"ActivityCreate",
|
|
15
|
+
"Session",
|
|
16
|
+
"SessionCreate",
|
|
17
|
+
"SessionSummary",
|
|
18
|
+
"Agent",
|
|
19
|
+
"MemoryRelationship",
|
|
20
|
+
]
|