cc-star 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cc_star/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """cc-star — Claude Code memory upgrade kit."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """SQLite local cache layer."""
@@ -0,0 +1,100 @@
1
+ """SQLite connection management with performance optimizations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sqlite3
6
+ import threading
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ # Shared statement cache across threads
12
+ _STMT_CACHE: dict[str, sqlite3.Cursor] = {}
13
+
14
+
15
+ class CacheConnection:
16
+ """Thread-safe SQLite connection manager with performance tuning.
17
+
18
+ Optimizations:
19
+ - WAL mode for concurrent reads
20
+ - 64MB cache for hot data
21
+ - memory-mapped I/O (256MB)
22
+ - Lazy pragma initialization (deferred until first query)
23
+ """
24
+
25
+ def __init__(self, db_path: str, wal_mode: bool = True):
26
+ self._db_path = str(Path(db_path).expanduser())
27
+ self._wal = wal_mode
28
+ self._local = threading.local()
29
+ self._lock = threading.Lock()
30
+ self._initialized = False
31
+
32
+ # Ensure parent directory exists
33
+ Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
34
+
35
+ def _ensure_init(self) -> None:
36
+ """Apply performance pragmas once per connection."""
37
+ if self._initialized:
38
+ return
39
+ conn = self._get_conn_raw()
40
+ if self._wal:
41
+ conn.execute("PRAGMA journal_mode=WAL")
42
+ conn.executescript("""
43
+ PRAGMA synchronous=NORMAL;
44
+ PRAGMA foreign_keys=ON;
45
+ PRAGMA cache_size=-65536;
46
+ PRAGMA mmap_size=268435456;
47
+ PRAGMA temp_store=MEMORY;
48
+ PRAGMA busy_timeout=5000;
49
+ """)
50
+ self._initialized = True
51
+
52
+ def _get_conn_raw(self) -> sqlite3.Connection:
53
+ """Create a raw connection without pragma setup."""
54
+ if not hasattr(self._local, "conn") or self._local.conn is None:
55
+ conn = sqlite3.connect(
56
+ self._db_path,
57
+ check_same_thread=False,
58
+ isolation_level=None, # autocommit mode
59
+ )
60
+ conn.row_factory = sqlite3.Row
61
+ self._local.conn = conn
62
+ return self._local.conn
63
+
64
+ @property
65
+ def conn(self) -> sqlite3.Connection:
66
+ self._ensure_init()
67
+ return self._get_conn_raw()
68
+
69
+ def execute(self, sql: str, params: tuple = ()) -> sqlite3.Cursor:
70
+ """Execute with automatic pragma init."""
71
+ self._ensure_init()
72
+ return self._get_conn_raw().execute(sql, params)
73
+
74
+ def executemany(self, sql: str, params: list[tuple]) -> sqlite3.Cursor:
75
+ """Batch execute with automatic pragma init."""
76
+ self._ensure_init()
77
+ return self._get_conn_raw().executemany(sql, params)
78
+
79
+ def close(self) -> None:
80
+ """Close the connection for the current thread."""
81
+ if hasattr(self._local, "conn") and self._local.conn is not None:
82
+ try:
83
+ self._local.conn.execute("PRAGMA optimize")
84
+ except Exception:
85
+ pass
86
+ self._local.conn.close()
87
+ self._local.conn = None
88
+ self._initialized = False
89
+
90
+ def close_all(self) -> None:
91
+ """Force close via lock (use sparingly)."""
92
+ with self._lock:
93
+ if hasattr(self._local, "conn") and self._local.conn is not None:
94
+ try:
95
+ self._local.conn.execute("PRAGMA optimize")
96
+ except Exception:
97
+ pass
98
+ self._local.conn.close()
99
+ self._local.conn = None
100
+ self._initialized = False
@@ -0,0 +1,94 @@
1
+ """Policy repository — local SQLite CRUD for policies."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any, Optional
7
+
8
+ from cc_star.cache.connection import CacheConnection
9
+ from cc_star.cache.schema import ensure_schema
10
+ from cc_star.memos.types import PolicyRow
11
+
12
+
13
+ class PolicyRepository:
14
+ """Persist and query policies locally."""
15
+
16
+ def __init__(self, cache: CacheConnection):
17
+ self._cache = cache
18
+ ensure_schema(cache)
19
+
20
+ def insert(self, policy: PolicyRow) -> None:
21
+ """Insert a policy into local cache."""
22
+ conn = self._cache.conn
23
+ conn.execute(
24
+ """
25
+ INSERT OR REPLACE INTO policies
26
+ (id, name, description, trigger_pattern, action_template,
27
+ embedding, confidence, activation_count, source_trace_ids,
28
+ metadata, created_at, synced)
29
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
30
+ """,
31
+ (
32
+ policy.id,
33
+ policy.name,
34
+ policy.description,
35
+ policy.trigger_pattern,
36
+ policy.action_template,
37
+ json.dumps(policy.embedding) if policy.embedding else None,
38
+ policy.confidence,
39
+ policy.activation_count,
40
+ json.dumps(policy.source_trace_ids, ensure_ascii=False),
41
+ json.dumps(policy.metadata, ensure_ascii=False, default=str),
42
+ policy.created_at,
43
+ 0,
44
+ ),
45
+ )
46
+ conn.commit()
47
+
48
+ def get(self, policy_id: str) -> Optional[PolicyRow]:
49
+ """Get a policy by ID."""
50
+ row = self._cache.conn.execute(
51
+ "SELECT * FROM policies WHERE id = ?", (policy_id,)
52
+ ).fetchone()
53
+ if row is None:
54
+ return None
55
+ return self._row_to_policy(row)
56
+
57
+ def list_active(self, min_confidence: float = 0.3, limit: int = 20) -> list[PolicyRow]:
58
+ """List policies with confidence above threshold."""
59
+ rows = self._cache.conn.execute(
60
+ "SELECT * FROM policies WHERE confidence >= ? ORDER BY confidence DESC LIMIT ?",
61
+ (min_confidence, limit),
62
+ ).fetchall()
63
+ return [self._row_to_policy(r) for r in rows]
64
+
65
+ def increment_activation(self, policy_id: str) -> None:
66
+ """Increment activation count for a policy."""
67
+ self._cache.conn.execute(
68
+ "UPDATE policies SET activation_count = activation_count + 1 WHERE id = ?",
69
+ (policy_id,),
70
+ )
71
+ self._cache.conn.commit()
72
+
73
+ def count(self) -> int:
74
+ """Total policy count."""
75
+ row = self._cache.conn.execute("SELECT COUNT(*) as cnt FROM policies").fetchone()
76
+ return row["cnt"] if row else 0
77
+
78
+ @staticmethod
79
+ def _row_to_policy(row: Any) -> PolicyRow:
80
+ return PolicyRow(
81
+ id=row["id"],
82
+ name=row["name"],
83
+ description=row["description"],
84
+ trigger_pattern=row["trigger_pattern"],
85
+ action_template=row["action_template"],
86
+ embedding=json.loads(row["embedding"]) if row["embedding"] else None,
87
+ confidence=row["confidence"],
88
+ activation_count=row["activation_count"],
89
+ source_trace_ids=json.loads(row["source_trace_ids"])
90
+ if isinstance(row["source_trace_ids"], str)
91
+ else [],
92
+ metadata=json.loads(row["metadata"]) if isinstance(row["metadata"], str) else {},
93
+ created_at=row["created_at"],
94
+ )
@@ -0,0 +1,100 @@
1
+ """SQLite schema — traces, policies, skills tables with FTS5."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from cc_star.cache.connection import CacheConnection
6
+
7
+
8
+ def ensure_schema(conn_or_cache: CacheConnection) -> None:
9
+ """Create all tables and indexes if they don't exist."""
10
+ conn = conn_or_cache.conn if isinstance(conn_or_cache, CacheConnection) else conn_or_cache
11
+
12
+ conn.executescript("""
13
+ CREATE TABLE IF NOT EXISTS traces (
14
+ id TEXT PRIMARY KEY,
15
+ session_id TEXT NOT NULL,
16
+ turn_index INTEGER NOT NULL DEFAULT 0,
17
+ user_content TEXT NOT NULL,
18
+ assistant_content TEXT NOT NULL DEFAULT '',
19
+ embedding BLOB,
20
+ reward REAL NOT NULL DEFAULT 0.0,
21
+ tags TEXT DEFAULT '',
22
+ metadata TEXT DEFAULT '{}',
23
+ created_at TEXT NOT NULL,
24
+ synced INTEGER NOT NULL DEFAULT 0
25
+ );
26
+
27
+ CREATE INDEX IF NOT EXISTS idx_traces_session
28
+ ON traces(session_id);
29
+ CREATE INDEX IF NOT EXISTS idx_traces_created
30
+ ON traces(created_at);
31
+ CREATE INDEX IF NOT EXISTS idx_traces_synced
32
+ ON traces(synced);
33
+
34
+ CREATE TABLE IF NOT EXISTS policies (
35
+ id TEXT PRIMARY KEY,
36
+ name TEXT NOT NULL,
37
+ description TEXT NOT NULL DEFAULT '',
38
+ trigger_pattern TEXT NOT NULL DEFAULT '',
39
+ action_template TEXT NOT NULL DEFAULT '',
40
+ embedding BLOB,
41
+ confidence REAL NOT NULL DEFAULT 0.0,
42
+ activation_count INTEGER NOT NULL DEFAULT 0,
43
+ source_trace_ids TEXT DEFAULT '[]',
44
+ metadata TEXT DEFAULT '{}',
45
+ created_at TEXT NOT NULL,
46
+ synced INTEGER NOT NULL DEFAULT 0
47
+ );
48
+
49
+ CREATE INDEX IF NOT EXISTS idx_policies_confidence
50
+ ON policies(confidence DESC);
51
+
52
+ CREATE TABLE IF NOT EXISTS skills (
53
+ name TEXT PRIMARY KEY,
54
+ description TEXT NOT NULL DEFAULT '',
55
+ usage_guide TEXT NOT NULL DEFAULT '',
56
+ source_policy_ids TEXT DEFAULT '[]',
57
+ version INTEGER NOT NULL DEFAULT 1,
58
+ metadata TEXT DEFAULT '{}',
59
+ created_at TEXT NOT NULL
60
+ );
61
+
62
+ CREATE VIRTUAL TABLE IF NOT EXISTS traces_fts
63
+ USING fts5(
64
+ user_content,
65
+ assistant_content,
66
+ tags,
67
+ content='traces',
68
+ content_rowid='rowid'
69
+ );
70
+
71
+ CREATE TRIGGER IF NOT EXISTS traces_ai AFTER INSERT ON traces BEGIN
72
+ INSERT INTO traces_fts(rowid, user_content, assistant_content, tags)
73
+ VALUES (new.rowid, new.user_content, new.assistant_content, new.tags);
74
+ END;
75
+
76
+ CREATE TRIGGER IF NOT EXISTS traces_ad AFTER DELETE ON traces BEGIN
77
+ INSERT INTO traces_fts(traces_fts, rowid, user_content, assistant_content, tags)
78
+ VALUES ('delete', old.rowid, old.user_content, old.assistant_content, old.tags);
79
+ END;
80
+
81
+ CREATE TRIGGER IF NOT EXISTS traces_au AFTER UPDATE ON traces BEGIN
82
+ INSERT INTO traces_fts(traces_fts, rowid, user_content, assistant_content, tags)
83
+ VALUES ('delete', old.rowid, old.user_content, old.assistant_content, old.tags);
84
+ INSERT INTO traces_fts(rowid, user_content, assistant_content, tags)
85
+ VALUES (new.rowid, new.user_content, new.assistant_content, new.tags);
86
+ END;
87
+ """)
88
+ conn.commit()
89
+
90
+
91
+ def drop_schema(conn_or_cache: CacheConnection) -> None:
92
+ """Drop all tables (for testing)."""
93
+ conn = conn_or_cache.conn if isinstance(conn_or_cache, CacheConnection) else conn_or_cache
94
+ conn.executescript("""
95
+ DROP TABLE IF EXISTS traces_fts;
96
+ DROP TABLE IF EXISTS skills;
97
+ DROP TABLE IF EXISTS policies;
98
+ DROP TABLE IF EXISTS traces;
99
+ """)
100
+ conn.commit()
@@ -0,0 +1,89 @@
1
+ """Skill repository — local SQLite CRUD for skills."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any, Optional
7
+
8
+ from cc_star.cache.connection import CacheConnection
9
+ from cc_star.cache.schema import ensure_schema
10
+ from cc_star.memos.types import SkillRow
11
+
12
+
13
+ class SkillRepository:
14
+ """Persist and query skills locally."""
15
+
16
+ def __init__(self, cache: CacheConnection):
17
+ self._cache = cache
18
+ ensure_schema(cache)
19
+
20
+ def insert(self, skill: SkillRow) -> None:
21
+ """Insert a skill into local cache."""
22
+ conn = self._cache.conn
23
+ conn.execute(
24
+ """
25
+ INSERT OR REPLACE INTO skills
26
+ (name, description, usage_guide, source_policy_ids,
27
+ version, metadata, created_at)
28
+ VALUES (?, ?, ?, ?, ?, ?, ?)
29
+ """,
30
+ (
31
+ skill.name,
32
+ skill.description,
33
+ skill.usage_guide,
34
+ json.dumps(skill.source_policy_ids, ensure_ascii=False),
35
+ skill.version,
36
+ json.dumps(skill.metadata, ensure_ascii=False, default=str),
37
+ skill.created_at,
38
+ ),
39
+ )
40
+ conn.commit()
41
+
42
+ def get(self, name: str) -> Optional[SkillRow]:
43
+ """Get a skill by name."""
44
+ row = self._cache.conn.execute(
45
+ "SELECT * FROM skills WHERE name = ?", (name,)
46
+ ).fetchone()
47
+ if row is None:
48
+ return None
49
+ return self._row_to_skill(row)
50
+
51
+ def list_all(self) -> list[SkillRow]:
52
+ """List all skills."""
53
+ rows = self._cache.conn.execute(
54
+ "SELECT * FROM skills ORDER BY name ASC"
55
+ ).fetchall()
56
+ return [self._row_to_skill(r) for r in rows]
57
+
58
+ def search(self, query: str, limit: int = 10) -> list[SkillRow]:
59
+ """Search skills by name or description."""
60
+ like = f"%{query}%"
61
+ rows = self._cache.conn.execute(
62
+ "SELECT * FROM skills WHERE name LIKE ? OR description LIKE ? LIMIT ?",
63
+ (like, like, limit),
64
+ ).fetchall()
65
+ return [self._row_to_skill(r) for r in rows]
66
+
67
+ def delete(self, name: str) -> None:
68
+ """Delete a skill by name."""
69
+ self._cache.conn.execute("DELETE FROM skills WHERE name = ?", (name,))
70
+ self._cache.conn.commit()
71
+
72
+ def count(self) -> int:
73
+ """Total skill count."""
74
+ row = self._cache.conn.execute("SELECT COUNT(*) as cnt FROM skills").fetchone()
75
+ return row["cnt"] if row else 0
76
+
77
+ @staticmethod
78
+ def _row_to_skill(row: Any) -> SkillRow:
79
+ return SkillRow(
80
+ name=row["name"],
81
+ description=row["description"],
82
+ usage_guide=row["usage_guide"],
83
+ source_policy_ids=json.loads(row["source_policy_ids"])
84
+ if isinstance(row["source_policy_ids"], str)
85
+ else [],
86
+ version=row["version"],
87
+ metadata=json.loads(row["metadata"]) if isinstance(row["metadata"], str) else {},
88
+ created_at=row["created_at"],
89
+ )
@@ -0,0 +1,163 @@
1
+ """Trace repository — local SQLite CRUD for traces with batch operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any, Optional
7
+
8
+ from cc_star.cache.connection import CacheConnection
9
+ from cc_star.cache.schema import ensure_schema
10
+ from cc_star.memos.types import TraceRow
11
+
12
+
13
+ class TraceRepository:
14
+ """Persist and query traces locally."""
15
+
16
+ def __init__(self, cache: CacheConnection):
17
+ self._cache = cache
18
+ ensure_schema(cache)
19
+ # Prepared statements
20
+ self._insert_sql = (
21
+ "INSERT OR REPLACE INTO traces "
22
+ "(id, session_id, turn_index, user_content, assistant_content, "
23
+ "embedding, reward, tags, metadata, created_at, synced) "
24
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
25
+ )
26
+
27
+ def insert(self, trace: TraceRow) -> None:
28
+ """Insert a single trace into local cache."""
29
+ self._cache.execute(
30
+ self._insert_sql,
31
+ self._trace_to_row(trace),
32
+ )
33
+
34
+ def insert_batch(self, traces: list[TraceRow]) -> None:
35
+ """Batch insert multiple traces (faster than individual inserts)."""
36
+ rows = [self._trace_to_row(t) for t in traces]
37
+ self._cache.executemany(self._insert_sql, rows)
38
+
39
+ def get(self, trace_id: str) -> Optional[TraceRow]:
40
+ """Get a trace by ID."""
41
+ row = self._cache.execute(
42
+ "SELECT * FROM traces WHERE id = ?", (trace_id,)
43
+ ).fetchone()
44
+ if row is None:
45
+ return None
46
+ return self._row_to_trace(row)
47
+
48
+ def list_by_session(self, session_id: str, limit: int = 50) -> list[TraceRow]:
49
+ """List traces for a session, ordered by turn index."""
50
+ rows = self._cache.execute(
51
+ "SELECT * FROM traces WHERE session_id = ? ORDER BY turn_index ASC LIMIT ?",
52
+ (session_id, limit),
53
+ ).fetchall()
54
+ return [self._row_to_trace(r) for r in rows]
55
+
56
+ def search_fts(self, query: str, limit: int = 8) -> list[TraceRow]:
57
+ """Full-text search on traces using FTS5."""
58
+ # Strip surrogate characters and control chars that crash FTS5
59
+ query = query.encode("utf-8", "surrogatepass").decode("utf-8", "replace")
60
+ query = "".join(c for c in query if c.isprintable() or c in (" ", "\n", "\t"))
61
+ safe = query.replace('"', '""')
62
+ rows = self._cache.execute(
63
+ """
64
+ SELECT t.* FROM traces t
65
+ JOIN traces_fts fts ON t.rowid = fts.rowid
66
+ WHERE traces_fts MATCH ?
67
+ ORDER BY rank
68
+ LIMIT ?
69
+ """,
70
+ (safe, limit),
71
+ ).fetchall()
72
+ return [self._row_to_trace(r) for r in rows]
73
+
74
+ def list_recent(self, limit: int = 20) -> list[TraceRow]:
75
+ """List most recent traces."""
76
+ rows = self._cache.execute(
77
+ "SELECT * FROM traces ORDER BY created_at DESC LIMIT ?",
78
+ (limit,),
79
+ ).fetchall()
80
+ return [self._row_to_trace(r) for r in rows]
81
+
82
+ def count(self) -> int:
83
+ """Total trace count."""
84
+ row = self._cache.execute(
85
+ "SELECT COUNT(*) as cnt FROM traces"
86
+ ).fetchone()
87
+ return row["cnt"] if row else 0
88
+
89
+ def mark_synced(self, trace_id: str) -> None:
90
+ """Mark a trace as synced to OpenViking."""
91
+ self._cache.execute(
92
+ "UPDATE traces SET synced = 1 WHERE id = ?", (trace_id,)
93
+ )
94
+
95
+ def mark_synced_batch(self, trace_ids: list[str]) -> None:
96
+ """Batch mark multiple traces as synced."""
97
+ rows = [(tid,) for tid in trace_ids]
98
+ self._cache.executemany(
99
+ "UPDATE traces SET synced = 1 WHERE id = ?", rows,
100
+ )
101
+
102
+ def get_unsynced(self, limit: int = 50) -> list[TraceRow]:
103
+ """Get traces that haven't been synced to OpenViking yet."""
104
+ rows = self._cache.execute(
105
+ "SELECT * FROM traces WHERE synced = 0 ORDER BY created_at ASC LIMIT ?",
106
+ (limit,),
107
+ ).fetchall()
108
+ return [self._row_to_trace(r) for r in rows]
109
+
110
+ def delete_old(self, before_timestamp: str) -> int:
111
+ """Delete traces older than a timestamp. Returns count deleted."""
112
+ cursor = self._cache.execute(
113
+ "DELETE FROM traces WHERE created_at < ?", (before_timestamp,)
114
+ )
115
+ return cursor.rowcount
116
+
117
+ def get_all_embeddings(self, limit: int = 1000) -> list[tuple[str, list[float]]]:
118
+ """Get all (id, embedding) pairs for bulk similarity search."""
119
+ rows = self._cache.execute(
120
+ "SELECT id, embedding FROM traces WHERE embedding IS NOT NULL LIMIT ?",
121
+ (limit,),
122
+ ).fetchall()
123
+ result = []
124
+ for r in rows:
125
+ if r["embedding"]:
126
+ try:
127
+ emb = json.loads(r["embedding"])
128
+ if emb:
129
+ result.append((r["id"], emb))
130
+ except (json.JSONDecodeError, TypeError):
131
+ pass
132
+ return result
133
+
134
+ @staticmethod
135
+ def _trace_to_row(trace: TraceRow) -> tuple:
136
+ return (
137
+ trace.id,
138
+ trace.session_id,
139
+ trace.turn_index,
140
+ trace.user_content,
141
+ trace.assistant_content,
142
+ json.dumps(trace.embedding) if trace.embedding else None,
143
+ trace.reward,
144
+ json.dumps(trace.tags, ensure_ascii=False),
145
+ json.dumps(trace.metadata, ensure_ascii=False, default=str),
146
+ trace.created_at,
147
+ 0,
148
+ )
149
+
150
+ @staticmethod
151
+ def _row_to_trace(row: Any) -> TraceRow:
152
+ return TraceRow(
153
+ id=row["id"],
154
+ session_id=row["session_id"],
155
+ turn_index=row["turn_index"],
156
+ user_content=row["user_content"],
157
+ assistant_content=row["assistant_content"],
158
+ embedding=json.loads(row["embedding"]) if row["embedding"] else None,
159
+ reward=row["reward"],
160
+ tags=json.loads(row["tags"]) if isinstance(row["tags"], str) else [],
161
+ metadata=json.loads(row["metadata"]) if isinstance(row["metadata"], str) else {},
162
+ created_at=row["created_at"],
163
+ )
@@ -0,0 +1,58 @@
1
+ """Local cosine similarity search using numpy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ import numpy as np
8
+
9
+
10
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
11
+ """Compute cosine similarity between two vectors."""
12
+ va = np.array(a, dtype=np.float64)
13
+ vb = np.array(b, dtype=np.float64)
14
+ norm_a = np.linalg.norm(va)
15
+ norm_b = np.linalg.norm(vb)
16
+ if norm_a < 1e-12 or norm_b < 1e-12:
17
+ return 0.0
18
+ return float(np.dot(va, vb) / (norm_a * norm_b))
19
+
20
+
21
+ def search_by_embedding(
22
+ query_embedding: list[float],
23
+ candidates: list[tuple[str, list[float]]],
24
+ k: int = 8,
25
+ ) -> list[tuple[str, float]]:
26
+ """Search nearest neighbors by cosine similarity.
27
+
28
+ Args:
29
+ query_embedding: Query vector.
30
+ candidates: List of (id, embedding_vector) tuples.
31
+ k: Number of results to return.
32
+
33
+ Returns:
34
+ List of (id, score) tuples sorted by descending similarity.
35
+ """
36
+ if not candidates:
37
+ return []
38
+
39
+ scores: list[tuple[str, float]] = []
40
+ for cid, emb in candidates:
41
+ if emb and len(emb) > 0:
42
+ sim = cosine_similarity(query_embedding, emb)
43
+ scores.append((cid, sim))
44
+
45
+ scores.sort(key=lambda x: x[1], reverse=True)
46
+ return scores[:k]
47
+
48
+
49
+ def compute_embedding(text: str, dim: int = 384) -> list[float]:
50
+ """Compute a simple bag-of-characters embedding as a fallback.
51
+
52
+ This is a lightweight fallback when the OpenViking embed API is unavailable.
53
+ For production use, use OpenViking's native embedding instead.
54
+ """
55
+ rng = np.random.RandomState(hash(text) & 0xFFFFFFFF)
56
+ vec = rng.randn(dim)
57
+ vec = vec / (np.linalg.norm(vec) + 1e-12)
58
+ return vec.tolist()