arkaos 2.8.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/config/cognition/prompts/dreaming.md +208 -0
- package/config/cognition/prompts/research.md +194 -0
- package/config/cognition/schedules.yaml +25 -0
- package/core/cognition/__init__.py +7 -0
- package/core/cognition/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/capture/__init__.py +5 -0
- package/core/cognition/capture/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/capture/__pycache__/collector.cpython-313.pyc +0 -0
- package/core/cognition/capture/__pycache__/store.cpython-313.pyc +0 -0
- package/core/cognition/capture/collector.py +80 -0
- package/core/cognition/capture/store.py +158 -0
- package/core/cognition/insights/__init__.py +5 -0
- package/core/cognition/insights/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/insights/__pycache__/store.cpython-313.pyc +0 -0
- package/core/cognition/insights/store.py +155 -0
- package/core/cognition/memory/__init__.py +9 -0
- package/core/cognition/memory/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/memory/__pycache__/obsidian.cpython-313.pyc +0 -0
- package/core/cognition/memory/__pycache__/schemas.cpython-313.pyc +0 -0
- package/core/cognition/memory/__pycache__/vector.cpython-313.pyc +0 -0
- package/core/cognition/memory/__pycache__/writer.cpython-313.pyc +0 -0
- package/core/cognition/memory/obsidian.py +73 -0
- package/core/cognition/memory/schemas.py +141 -0
- package/core/cognition/memory/vector.py +223 -0
- package/core/cognition/memory/writer.py +57 -0
- package/core/cognition/research/__init__.py +5 -0
- package/core/cognition/research/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/research/__pycache__/profiler.cpython-313.pyc +0 -0
- package/core/cognition/research/profiler.py +256 -0
- package/core/cognition/scheduler/__init__.py +5 -0
- package/core/cognition/scheduler/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/cognition/scheduler/__pycache__/cli.cpython-313.pyc +0 -0
- package/core/cognition/scheduler/__pycache__/daemon.cpython-313.pyc +0 -0
- package/core/cognition/scheduler/__pycache__/platform.cpython-313.pyc +0 -0
- package/core/cognition/scheduler/cli.py +86 -0
- package/core/cognition/scheduler/daemon.py +172 -0
- package/core/cognition/scheduler/platform.py +292 -0
- package/knowledge/ecosystems.json +362 -10
- package/package.json +1 -1
- package/pyproject.toml +1 -1
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""SQLite CRUD store for actionable insights.
|
|
2
|
+
|
|
3
|
+
Persists ActionableInsight instances with support for status-based retrieval,
|
|
4
|
+
project filtering, presentation lifecycle, and dismissal analytics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sqlite3
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from core.cognition.memory.schemas import ActionableInsight
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InsightStore:
|
|
15
|
+
"""SQLite-backed store for actionable insights."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, db_path: str) -> None:
|
|
18
|
+
"""Connect to SQLite database and initialize tables."""
|
|
19
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
self._db_path = db_path
|
|
21
|
+
self._init_db()
|
|
22
|
+
|
|
23
|
+
def _conn(self) -> sqlite3.Connection:
|
|
24
|
+
conn = sqlite3.connect(self._db_path)
|
|
25
|
+
conn.row_factory = sqlite3.Row
|
|
26
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
27
|
+
return conn
|
|
28
|
+
|
|
29
|
+
def _init_db(self) -> None:
|
|
30
|
+
with self._conn() as conn:
|
|
31
|
+
conn.execute("""
|
|
32
|
+
CREATE TABLE IF NOT EXISTS insights (
|
|
33
|
+
id TEXT PRIMARY KEY,
|
|
34
|
+
project TEXT NOT NULL,
|
|
35
|
+
trigger_source TEXT NOT NULL,
|
|
36
|
+
date_generated TEXT NOT NULL,
|
|
37
|
+
category TEXT NOT NULL,
|
|
38
|
+
severity TEXT NOT NULL,
|
|
39
|
+
title TEXT NOT NULL,
|
|
40
|
+
description TEXT NOT NULL,
|
|
41
|
+
recommendation TEXT NOT NULL,
|
|
42
|
+
context TEXT NOT NULL,
|
|
43
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
44
|
+
presented_at TEXT
|
|
45
|
+
)
|
|
46
|
+
""")
|
|
47
|
+
conn.execute(
|
|
48
|
+
"CREATE INDEX IF NOT EXISTS idx_insights_project ON insights (project)"
|
|
49
|
+
)
|
|
50
|
+
conn.execute(
|
|
51
|
+
"CREATE INDEX IF NOT EXISTS idx_insights_status ON insights (status)"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def _row_to_insight(self, row: sqlite3.Row) -> ActionableInsight:
|
|
55
|
+
data = dict(row)
|
|
56
|
+
# Map trigger_source back to trigger (SQL keyword workaround)
|
|
57
|
+
data["trigger"] = data.pop("trigger_source")
|
|
58
|
+
# Parse datetime fields
|
|
59
|
+
data["date_generated"] = datetime.fromisoformat(data["date_generated"])
|
|
60
|
+
if data["presented_at"] is not None:
|
|
61
|
+
data["presented_at"] = datetime.fromisoformat(data["presented_at"])
|
|
62
|
+
return ActionableInsight(**data)
|
|
63
|
+
|
|
64
|
+
def save(self, insight: ActionableInsight) -> None:
|
|
65
|
+
"""Insert or replace an ActionableInsight record."""
|
|
66
|
+
with self._conn() as conn:
|
|
67
|
+
conn.execute(
|
|
68
|
+
"""
|
|
69
|
+
INSERT OR REPLACE INTO insights
|
|
70
|
+
(id, project, trigger_source, date_generated, category,
|
|
71
|
+
severity, title, description, recommendation, context,
|
|
72
|
+
status, presented_at)
|
|
73
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
74
|
+
""",
|
|
75
|
+
(
|
|
76
|
+
insight.id,
|
|
77
|
+
insight.project,
|
|
78
|
+
insight.trigger,
|
|
79
|
+
insight.date_generated.isoformat(),
|
|
80
|
+
insight.category,
|
|
81
|
+
insight.severity,
|
|
82
|
+
insight.title,
|
|
83
|
+
insight.description,
|
|
84
|
+
insight.recommendation,
|
|
85
|
+
insight.context,
|
|
86
|
+
insight.status,
|
|
87
|
+
insight.presented_at.isoformat() if insight.presented_at else None,
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def get_pending(self, project: str) -> list[ActionableInsight]:
|
|
92
|
+
"""Return pending insights for a specific project."""
|
|
93
|
+
with self._conn() as conn:
|
|
94
|
+
rows = conn.execute(
|
|
95
|
+
"""
|
|
96
|
+
SELECT * FROM insights
|
|
97
|
+
WHERE project = ? AND status = 'pending'
|
|
98
|
+
ORDER BY date_generated ASC
|
|
99
|
+
""",
|
|
100
|
+
(project,),
|
|
101
|
+
).fetchall()
|
|
102
|
+
return [self._row_to_insight(r) for r in rows]
|
|
103
|
+
|
|
104
|
+
def get_all_pending(self) -> list[ActionableInsight]:
|
|
105
|
+
"""Return all pending insights across all projects."""
|
|
106
|
+
with self._conn() as conn:
|
|
107
|
+
rows = conn.execute(
|
|
108
|
+
"SELECT * FROM insights WHERE status = 'pending' ORDER BY date_generated ASC"
|
|
109
|
+
).fetchall()
|
|
110
|
+
return [self._row_to_insight(r) for r in rows]
|
|
111
|
+
|
|
112
|
+
def get_by_project(self, project: str) -> list[ActionableInsight]:
|
|
113
|
+
"""Return all insights for a project regardless of status."""
|
|
114
|
+
with self._conn() as conn:
|
|
115
|
+
rows = conn.execute(
|
|
116
|
+
"SELECT * FROM insights WHERE project = ? ORDER BY date_generated ASC",
|
|
117
|
+
(project,),
|
|
118
|
+
).fetchall()
|
|
119
|
+
return [self._row_to_insight(r) for r in rows]
|
|
120
|
+
|
|
121
|
+
def update_status(self, insight_id: str, status: str) -> None:
|
|
122
|
+
"""Update the status of a single insight."""
|
|
123
|
+
with self._conn() as conn:
|
|
124
|
+
conn.execute(
|
|
125
|
+
"UPDATE insights SET status = ? WHERE id = ?",
|
|
126
|
+
(status, insight_id),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def mark_presented(self, ids: list[str]) -> None:
|
|
130
|
+
"""Mark insights as presented and record the timestamp."""
|
|
131
|
+
if not ids:
|
|
132
|
+
return
|
|
133
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
134
|
+
placeholders = ",".join("?" * len(ids))
|
|
135
|
+
with self._conn() as conn:
|
|
136
|
+
conn.execute(
|
|
137
|
+
f"UPDATE insights SET status = 'presented', presented_at = ? WHERE id IN ({placeholders})",
|
|
138
|
+
[now, *ids],
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def dismissed_counts(self, project: str) -> dict[str, int]:
|
|
142
|
+
"""Return count of dismissed insights grouped by category for a project."""
|
|
143
|
+
with self._conn() as conn:
|
|
144
|
+
rows = conn.execute(
|
|
145
|
+
"""
|
|
146
|
+
SELECT category, COUNT(*) as cnt FROM insights
|
|
147
|
+
WHERE project = ? AND status = 'dismissed'
|
|
148
|
+
GROUP BY category
|
|
149
|
+
""",
|
|
150
|
+
(project,),
|
|
151
|
+
).fetchall()
|
|
152
|
+
return {r["category"]: r["cnt"] for r in rows}
|
|
153
|
+
|
|
154
|
+
def close(self) -> None:
|
|
155
|
+
"""No-op — connections are opened per-operation."""
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Cognitive Layer memory module — Pydantic schemas for capture, knowledge, and insights."""
|
|
2
|
+
|
|
3
|
+
from core.cognition.memory.schemas import (
|
|
4
|
+
ActionableInsight,
|
|
5
|
+
KnowledgeEntry,
|
|
6
|
+
RawCapture,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = ["RawCapture", "KnowledgeEntry", "ActionableInsight"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""ObsidianWriter — persists KnowledgeEntry objects as Obsidian markdown notes."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from core.cognition.memory.schemas import KnowledgeEntry
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
CATEGORY_FOLDERS = {
|
|
10
|
+
"pattern": "Patterns",
|
|
11
|
+
"anti_pattern": "Anti-Patterns",
|
|
12
|
+
"solution": "Solutions",
|
|
13
|
+
"architecture": "Architecture",
|
|
14
|
+
"config": "Config",
|
|
15
|
+
"lesson": "Lessons",
|
|
16
|
+
"improvement": "Improvements",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _slugify(title: str, max_len: int = 80) -> str:
|
|
21
|
+
"""Convert title to a safe filename slug."""
|
|
22
|
+
slug = title.lower()
|
|
23
|
+
slug = re.sub(r"[^a-z0-9\s-]", "", slug)
|
|
24
|
+
slug = re.sub(r"[\s]+", "-", slug.strip())
|
|
25
|
+
slug = re.sub(r"-+", "-", slug)
|
|
26
|
+
return slug[:max_len].rstrip("-")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _format_frontmatter(entry: KnowledgeEntry) -> str:
|
|
30
|
+
"""Build YAML frontmatter string from a KnowledgeEntry."""
|
|
31
|
+
tags_inline = "[" + ", ".join(entry.tags) + "]"
|
|
32
|
+
stacks_inline = "[" + ", ".join(entry.stacks) + "]"
|
|
33
|
+
created = entry.created_at.isoformat()
|
|
34
|
+
updated = entry.updated_at.isoformat()
|
|
35
|
+
|
|
36
|
+
lines = [
|
|
37
|
+
"---",
|
|
38
|
+
f"title: {entry.title}",
|
|
39
|
+
f"id: {entry.id}",
|
|
40
|
+
f"category: {entry.category}",
|
|
41
|
+
f"tags: {tags_inline}",
|
|
42
|
+
f"stacks: {stacks_inline}",
|
|
43
|
+
f"source_project: {entry.source_project}",
|
|
44
|
+
f"applicable_to: {entry.applicable_to}",
|
|
45
|
+
f"confidence: {entry.confidence}",
|
|
46
|
+
f"times_used: {entry.times_used}",
|
|
47
|
+
f"created_at: {created}",
|
|
48
|
+
f"updated_at: {updated}",
|
|
49
|
+
"---",
|
|
50
|
+
]
|
|
51
|
+
return "\n".join(lines)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ObsidianWriter:
|
|
55
|
+
"""Writes KnowledgeEntry objects as Obsidian-compatible markdown notes."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, vault_base_path: str) -> None:
|
|
58
|
+
self._vault = Path(vault_base_path)
|
|
59
|
+
|
|
60
|
+
def write(self, entry: KnowledgeEntry) -> str:
|
|
61
|
+
"""Persist a KnowledgeEntry as a markdown note. Returns the file path."""
|
|
62
|
+
folder_name = CATEGORY_FOLDERS.get(entry.category, "Knowledge")
|
|
63
|
+
folder = self._vault / folder_name
|
|
64
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
|
|
66
|
+
slug = _slugify(entry.title)
|
|
67
|
+
file_path = folder / f"{slug}.md"
|
|
68
|
+
|
|
69
|
+
frontmatter = _format_frontmatter(entry)
|
|
70
|
+
note = f"{frontmatter}\n\n{entry.content}\n"
|
|
71
|
+
|
|
72
|
+
file_path.write_text(note, encoding="utf-8")
|
|
73
|
+
return str(file_path)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Pydantic schemas for the ArkaOS Cognitive Layer memory system.
|
|
2
|
+
|
|
3
|
+
Three core models:
|
|
4
|
+
- RawCapture: Raw session captures during the day
|
|
5
|
+
- KnowledgeEntry: Curated knowledge for dual-write (SQLite + Obsidian)
|
|
6
|
+
- ActionableInsight: Insights for proactive presentation to the user
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import uuid
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field, field_validator
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# --- Allowed categories and statuses ---
|
|
17
|
+
|
|
18
|
+
RAW_CAPTURE_CATEGORIES = ("decision", "solution", "pattern", "error", "config")
|
|
19
|
+
|
|
20
|
+
KNOWLEDGE_ENTRY_CATEGORIES = (
|
|
21
|
+
"pattern",
|
|
22
|
+
"anti_pattern",
|
|
23
|
+
"solution",
|
|
24
|
+
"architecture",
|
|
25
|
+
"config",
|
|
26
|
+
"lesson",
|
|
27
|
+
"improvement",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
ACTIONABLE_INSIGHT_CATEGORIES = ("business", "technical", "ux", "strategy")
|
|
31
|
+
|
|
32
|
+
ACTIONABLE_INSIGHT_SEVERITIES = ("rethink", "improve", "consider")
|
|
33
|
+
|
|
34
|
+
ACTIONABLE_INSIGHT_STATUSES = ("pending", "presented", "accepted", "dismissed")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _utc_now() -> datetime:
|
|
38
|
+
return datetime.now(timezone.utc)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _new_uuid() -> str:
|
|
42
|
+
return str(uuid.uuid4())
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# --- Models ---
|
|
46
|
+
|
|
47
|
+
class RawCapture(BaseModel):
|
|
48
|
+
"""Raw session capture — recorded during active work sessions."""
|
|
49
|
+
|
|
50
|
+
id: str = Field(default_factory=_new_uuid)
|
|
51
|
+
timestamp: datetime = Field(default_factory=_utc_now)
|
|
52
|
+
session_id: str
|
|
53
|
+
project_path: str
|
|
54
|
+
project_name: str
|
|
55
|
+
category: str
|
|
56
|
+
content: str
|
|
57
|
+
context: dict = Field(default_factory=dict)
|
|
58
|
+
|
|
59
|
+
@field_validator("category")
|
|
60
|
+
@classmethod
|
|
61
|
+
def validate_category(cls, v: str) -> str:
|
|
62
|
+
if v not in RAW_CAPTURE_CATEGORIES:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Invalid category '{v}'. Must be one of: {RAW_CAPTURE_CATEGORIES}"
|
|
65
|
+
)
|
|
66
|
+
return v
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class KnowledgeEntry(BaseModel):
|
|
70
|
+
"""Curated knowledge entry — written to SQLite and Obsidian on dual-write."""
|
|
71
|
+
|
|
72
|
+
id: str = Field(default_factory=_new_uuid)
|
|
73
|
+
title: str
|
|
74
|
+
category: str
|
|
75
|
+
tags: list[str] = Field(default_factory=list)
|
|
76
|
+
stacks: list[str] = Field(default_factory=list)
|
|
77
|
+
content: str
|
|
78
|
+
source_project: str
|
|
79
|
+
applicable_to: str = "any"
|
|
80
|
+
confidence: float = 0.5
|
|
81
|
+
times_used: int = 0
|
|
82
|
+
created_at: datetime = Field(default_factory=_utc_now)
|
|
83
|
+
updated_at: datetime = Field(default_factory=_utc_now)
|
|
84
|
+
|
|
85
|
+
@field_validator("category")
|
|
86
|
+
@classmethod
|
|
87
|
+
def validate_category(cls, v: str) -> str:
|
|
88
|
+
if v not in KNOWLEDGE_ENTRY_CATEGORIES:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
f"Invalid category '{v}'. Must be one of: {KNOWLEDGE_ENTRY_CATEGORIES}"
|
|
91
|
+
)
|
|
92
|
+
return v
|
|
93
|
+
|
|
94
|
+
@field_validator("confidence")
|
|
95
|
+
@classmethod
|
|
96
|
+
def clamp_confidence(cls, v: float) -> float:
|
|
97
|
+
return max(0.0, min(1.0, v))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ActionableInsight(BaseModel):
|
|
101
|
+
"""Actionable insight — generated by agents for proactive presentation."""
|
|
102
|
+
|
|
103
|
+
id: str = Field(default_factory=_new_uuid)
|
|
104
|
+
project: str
|
|
105
|
+
trigger: str
|
|
106
|
+
date_generated: datetime = Field(default_factory=_utc_now)
|
|
107
|
+
category: str
|
|
108
|
+
severity: str
|
|
109
|
+
title: str
|
|
110
|
+
description: str
|
|
111
|
+
recommendation: str
|
|
112
|
+
context: str
|
|
113
|
+
status: str = "pending"
|
|
114
|
+
presented_at: datetime | None = None
|
|
115
|
+
|
|
116
|
+
@field_validator("category")
|
|
117
|
+
@classmethod
|
|
118
|
+
def validate_category(cls, v: str) -> str:
|
|
119
|
+
if v not in ACTIONABLE_INSIGHT_CATEGORIES:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Invalid category '{v}'. Must be one of: {ACTIONABLE_INSIGHT_CATEGORIES}"
|
|
122
|
+
)
|
|
123
|
+
return v
|
|
124
|
+
|
|
125
|
+
@field_validator("severity")
|
|
126
|
+
@classmethod
|
|
127
|
+
def validate_severity(cls, v: str) -> str:
|
|
128
|
+
if v not in ACTIONABLE_INSIGHT_SEVERITIES:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Invalid severity '{v}'. Must be one of: {ACTIONABLE_INSIGHT_SEVERITIES}"
|
|
131
|
+
)
|
|
132
|
+
return v
|
|
133
|
+
|
|
134
|
+
@field_validator("status")
|
|
135
|
+
@classmethod
|
|
136
|
+
def validate_status(cls, v: str) -> str:
|
|
137
|
+
if v not in ACTIONABLE_INSIGHT_STATUSES:
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Invalid status '{v}'. Must be one of: {ACTIONABLE_INSIGHT_STATUSES}"
|
|
140
|
+
)
|
|
141
|
+
return v
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""VectorWriter — stores KnowledgeEntry objects in SQLite with optional embeddings.
|
|
2
|
+
|
|
3
|
+
Graceful degradation: if fastembed is not installed, falls back to text-based
|
|
4
|
+
keyword matching for search.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import math
|
|
9
|
+
import sqlite3
|
|
10
|
+
import struct
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from core.cognition.memory.schemas import KnowledgeEntry
|
|
14
|
+
|
|
15
|
+
# Optional embedder — imported lazily to survive missing fastembed
|
|
16
|
+
try:
|
|
17
|
+
from core.knowledge import embedder as _embedder
|
|
18
|
+
except ImportError:
|
|
19
|
+
_embedder = None # type: ignore[assignment]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_CREATE_TABLE = """
|
|
23
|
+
CREATE TABLE IF NOT EXISTS knowledge_entries (
|
|
24
|
+
id TEXT PRIMARY KEY,
|
|
25
|
+
entry_id TEXT UNIQUE NOT NULL,
|
|
26
|
+
title TEXT NOT NULL,
|
|
27
|
+
category TEXT NOT NULL,
|
|
28
|
+
tags TEXT NOT NULL,
|
|
29
|
+
stacks TEXT NOT NULL,
|
|
30
|
+
content TEXT NOT NULL,
|
|
31
|
+
source_project TEXT NOT NULL,
|
|
32
|
+
applicable_to TEXT NOT NULL,
|
|
33
|
+
confidence REAL NOT NULL,
|
|
34
|
+
times_used INTEGER NOT NULL,
|
|
35
|
+
embedding BLOB,
|
|
36
|
+
created_at TEXT NOT NULL,
|
|
37
|
+
updated_at TEXT NOT NULL
|
|
38
|
+
)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
_CREATE_IDX_ENTRY_ID = "CREATE INDEX IF NOT EXISTS idx_entry_id ON knowledge_entries (entry_id)"
|
|
42
|
+
_CREATE_IDX_CATEGORY = "CREATE INDEX IF NOT EXISTS idx_category ON knowledge_entries (category)"
|
|
43
|
+
_CREATE_IDX_APPLICABLE = "CREATE INDEX IF NOT EXISTS idx_applicable_to ON knowledge_entries (applicable_to)"
|
|
44
|
+
|
|
45
|
+
_UPSERT = """
|
|
46
|
+
INSERT INTO knowledge_entries
|
|
47
|
+
(id, entry_id, title, category, tags, stacks, content, source_project,
|
|
48
|
+
applicable_to, confidence, times_used, embedding, created_at, updated_at)
|
|
49
|
+
VALUES
|
|
50
|
+
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
51
|
+
ON CONFLICT(entry_id) DO UPDATE SET
|
|
52
|
+
title = excluded.title,
|
|
53
|
+
category = excluded.category,
|
|
54
|
+
tags = excluded.tags,
|
|
55
|
+
stacks = excluded.stacks,
|
|
56
|
+
content = excluded.content,
|
|
57
|
+
source_project = excluded.source_project,
|
|
58
|
+
applicable_to = excluded.applicable_to,
|
|
59
|
+
confidence = excluded.confidence,
|
|
60
|
+
times_used = excluded.times_used,
|
|
61
|
+
embedding = excluded.embedding,
|
|
62
|
+
updated_at = excluded.updated_at
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _pack_embedding(floats: list[float]) -> bytes:
|
|
67
|
+
"""Pack a list of floats into a compact binary blob."""
|
|
68
|
+
return struct.pack(f"{len(floats)}f", *floats)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _unpack_embedding(blob: bytes) -> list[float]:
|
|
72
|
+
"""Unpack a binary blob back into a list of floats."""
|
|
73
|
+
count = len(blob) // struct.calcsize("f")
|
|
74
|
+
return list(struct.unpack(f"{count}f", blob))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
78
|
+
"""Compute cosine similarity between two vectors."""
|
|
79
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
80
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
81
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
82
|
+
if norm_a == 0.0 or norm_b == 0.0:
|
|
83
|
+
return 0.0
|
|
84
|
+
return dot / (norm_a * norm_b)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _text_score(query: str, row: dict) -> int:
|
|
88
|
+
"""Count keyword matches in title, tags, and content."""
|
|
89
|
+
terms = query.lower().split()
|
|
90
|
+
haystack = " ".join([
|
|
91
|
+
row["title"],
|
|
92
|
+
" ".join(json.loads(row["tags"])),
|
|
93
|
+
row["content"],
|
|
94
|
+
]).lower()
|
|
95
|
+
return sum(1 for term in terms if term in haystack)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _row_to_dict(row: sqlite3.Row) -> dict:
|
|
99
|
+
"""Convert a sqlite3.Row to the public result dict."""
|
|
100
|
+
return {
|
|
101
|
+
"entry_id": row["entry_id"],
|
|
102
|
+
"title": row["title"],
|
|
103
|
+
"category": row["category"],
|
|
104
|
+
"tags": json.loads(row["tags"]),
|
|
105
|
+
"stacks": json.loads(row["stacks"]),
|
|
106
|
+
"content": row["content"],
|
|
107
|
+
"source_project": row["source_project"],
|
|
108
|
+
"applicable_to": row["applicable_to"],
|
|
109
|
+
"confidence": row["confidence"],
|
|
110
|
+
"times_used": row["times_used"],
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class VectorWriter:
|
|
115
|
+
"""Writes KnowledgeEntry objects to SQLite, optionally with embeddings."""
|
|
116
|
+
|
|
117
|
+
def __init__(self, db_path: str) -> None:
|
|
118
|
+
self._db_path = db_path
|
|
119
|
+
self._conn = sqlite3.connect(db_path)
|
|
120
|
+
self._conn.row_factory = sqlite3.Row
|
|
121
|
+
self._bootstrap()
|
|
122
|
+
|
|
123
|
+
# --- Setup ---
|
|
124
|
+
|
|
125
|
+
def _bootstrap(self) -> None:
|
|
126
|
+
cur = self._conn.cursor()
|
|
127
|
+
cur.execute(_CREATE_TABLE)
|
|
128
|
+
cur.execute(_CREATE_IDX_ENTRY_ID)
|
|
129
|
+
cur.execute(_CREATE_IDX_CATEGORY)
|
|
130
|
+
cur.execute(_CREATE_IDX_APPLICABLE)
|
|
131
|
+
self._conn.commit()
|
|
132
|
+
|
|
133
|
+
# --- Public API ---
|
|
134
|
+
|
|
135
|
+
def write(self, entry: KnowledgeEntry) -> bool:
|
|
136
|
+
"""UPSERT a KnowledgeEntry. Returns True on success."""
|
|
137
|
+
embedding_blob: Optional[bytes] = None
|
|
138
|
+
vector = self._embed(entry)
|
|
139
|
+
if vector is not None:
|
|
140
|
+
embedding_blob = _pack_embedding(vector)
|
|
141
|
+
|
|
142
|
+
self._conn.execute(
|
|
143
|
+
_UPSERT,
|
|
144
|
+
(
|
|
145
|
+
entry.id,
|
|
146
|
+
entry.id, # entry_id mirrors id (stable natural key)
|
|
147
|
+
entry.title,
|
|
148
|
+
entry.category,
|
|
149
|
+
json.dumps(entry.tags),
|
|
150
|
+
json.dumps(entry.stacks),
|
|
151
|
+
entry.content,
|
|
152
|
+
entry.source_project,
|
|
153
|
+
entry.applicable_to,
|
|
154
|
+
entry.confidence,
|
|
155
|
+
entry.times_used,
|
|
156
|
+
embedding_blob,
|
|
157
|
+
entry.created_at.isoformat(),
|
|
158
|
+
entry.updated_at.isoformat(),
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
self._conn.commit()
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
def search(self, query: str, top_k: int = 5) -> list[dict]:
|
|
165
|
+
"""Search entries. Semantic when embeddings available, text fallback otherwise."""
|
|
166
|
+
rows = self._conn.execute(
|
|
167
|
+
"SELECT * FROM knowledge_entries"
|
|
168
|
+
).fetchall()
|
|
169
|
+
|
|
170
|
+
if not rows:
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
use_semantic = self._can_embed() and any(r["embedding"] is not None for r in rows)
|
|
174
|
+
|
|
175
|
+
if use_semantic:
|
|
176
|
+
return self._semantic_search(query, rows, top_k)
|
|
177
|
+
return self._text_search(query, rows, top_k)
|
|
178
|
+
|
|
179
|
+
def close(self) -> None:
|
|
180
|
+
"""Close the database connection."""
|
|
181
|
+
self._conn.close()
|
|
182
|
+
|
|
183
|
+
# --- Internal helpers ---
|
|
184
|
+
|
|
185
|
+
def _embed(self, entry: KnowledgeEntry) -> Optional[list[float]]:
|
|
186
|
+
"""Try to embed the entry's title + content. Returns None on failure."""
|
|
187
|
+
if not self._can_embed():
|
|
188
|
+
return None
|
|
189
|
+
text = f"{entry.title}\n{entry.content}"
|
|
190
|
+
return _embedder.embed(text) # type: ignore[union-attr]
|
|
191
|
+
|
|
192
|
+
def _can_embed(self) -> bool:
|
|
193
|
+
return _embedder is not None and _embedder.is_available()
|
|
194
|
+
|
|
195
|
+
def _semantic_search(
|
|
196
|
+
self, query: str, rows: list[sqlite3.Row], top_k: int
|
|
197
|
+
) -> list[dict]:
|
|
198
|
+
query_vec = _embedder.embed(query) # type: ignore[union-attr]
|
|
199
|
+
if query_vec is None:
|
|
200
|
+
return self._text_search(query, rows, top_k)
|
|
201
|
+
|
|
202
|
+
scored = []
|
|
203
|
+
for row in rows:
|
|
204
|
+
if row["embedding"] is None:
|
|
205
|
+
continue
|
|
206
|
+
entry_vec = _unpack_embedding(row["embedding"])
|
|
207
|
+
score = _cosine_similarity(query_vec, entry_vec)
|
|
208
|
+
scored.append((score, row))
|
|
209
|
+
|
|
210
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
211
|
+
return [_row_to_dict(r) for _, r in scored[:top_k]]
|
|
212
|
+
|
|
213
|
+
def _text_search(
|
|
214
|
+
self, query: str, rows: list[sqlite3.Row], top_k: int
|
|
215
|
+
) -> list[dict]:
|
|
216
|
+
scored = []
|
|
217
|
+
for row in rows:
|
|
218
|
+
score = _text_score(query, dict(row))
|
|
219
|
+
if score > 0:
|
|
220
|
+
scored.append((score, row))
|
|
221
|
+
|
|
222
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
223
|
+
return [_row_to_dict(r) for _, r in scored[:top_k]]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""DualWriter — unified interface that writes KnowledgeEntry to Obsidian and Vector DB."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from core.cognition.memory.obsidian import ObsidianWriter
|
|
6
|
+
from core.cognition.memory.schemas import KnowledgeEntry
|
|
7
|
+
from core.cognition.memory.vector import VectorWriter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class WriteResult:
|
|
12
|
+
"""Result of a dual-write operation, with independent success/error per backend."""
|
|
13
|
+
|
|
14
|
+
obsidian_path: str | None = None
|
|
15
|
+
obsidian_error: str | None = None
|
|
16
|
+
vector_indexed: bool = False
|
|
17
|
+
vector_error: str | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DualWriter:
|
|
21
|
+
"""Writes KnowledgeEntry objects to both Obsidian and Vector DB in one call.
|
|
22
|
+
|
|
23
|
+
Each backend fails independently — if Obsidian fails, Vector still writes,
|
|
24
|
+
and vice versa. All results are captured in WriteResult.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, obsidian_base: str, vector_db_path: str) -> None:
|
|
28
|
+
self._obsidian = ObsidianWriter(vault_base_path=obsidian_base)
|
|
29
|
+
self._vector = VectorWriter(db_path=vector_db_path)
|
|
30
|
+
|
|
31
|
+
def write(self, entry: KnowledgeEntry) -> WriteResult:
|
|
32
|
+
"""Write a single entry to both backends. Returns a WriteResult."""
|
|
33
|
+
result = WriteResult()
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
result.obsidian_path = self._obsidian.write(entry)
|
|
37
|
+
except Exception as exc: # noqa: BLE001
|
|
38
|
+
result.obsidian_error = str(exc)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
result.vector_indexed = self._vector.write(entry)
|
|
42
|
+
except Exception as exc: # noqa: BLE001
|
|
43
|
+
result.vector_error = str(exc)
|
|
44
|
+
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
def write_batch(self, entries: list[KnowledgeEntry]) -> list[WriteResult]:
|
|
48
|
+
"""Write a list of entries. Returns one WriteResult per entry."""
|
|
49
|
+
return [self.write(entry) for entry in entries]
|
|
50
|
+
|
|
51
|
+
def search(self, query: str, top_k: int = 5) -> list[dict]:
|
|
52
|
+
"""Search the vector store. Delegates directly to VectorWriter."""
|
|
53
|
+
return self._vector.search(query, top_k=top_k)
|
|
54
|
+
|
|
55
|
+
def close(self) -> None:
|
|
56
|
+
"""Close the vector DB connection."""
|
|
57
|
+
self._vector.close()
|