@geravant/sinain 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +4 -2
- package/install.js +79 -12
- package/package.json +2 -2
- package/sinain-agent/CLAUDE.md +14 -7
- package/sinain-core/.env.example +0 -1
- package/sinain-core/package.json +1 -1
- package/sinain-core/src/config.ts +0 -1
- package/sinain-core/src/escalation/escalator.ts +56 -7
- package/sinain-core/src/escalation/message-builder.ts +54 -1
- package/sinain-core/src/index.ts +37 -0
- package/sinain-core/src/overlay/commands.ts +8 -10
- package/sinain-core/src/overlay/ws-handler.ts +3 -0
- package/sinain-core/src/server.ts +54 -0
- package/sinain-core/src/types.ts +14 -2
- package/sinain-knowledge/curation/engine.ts +137 -24
- package/sinain-knowledge/data/git-store.ts +24 -0
- package/sinain-knowledge/data/store.ts +117 -0
- package/sinain-mcp-server/index.ts +109 -14
- package/sinain-memory/graph_query.py +210 -0
- package/sinain-memory/knowledge_integrator.py +519 -0
- package/sinain-memory/memory-config.json +3 -1
- package/sinain-memory/session_distiller.py +162 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Knowledge Integrator — update playbook + knowledge graph from a SessionDigest.
|
|
3
|
+
|
|
4
|
+
Takes a session digest (from session_distiller.py), the current playbook, and
|
|
5
|
+
the knowledge graph, then produces:
|
|
6
|
+
1. Updated playbook (working memory)
|
|
7
|
+
2. Graph operations (long-term memory: assert/reinforce/retract facts)
|
|
8
|
+
|
|
9
|
+
Single LLM call, ~15s. Replaces: playbook_curator + feedback_analyzer +
|
|
10
|
+
triple_extractor + triple_ingest.
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
python3 knowledge_integrator.py --memory-dir memory/ \
|
|
14
|
+
--digest '{"whatHappened":"...","patterns":[...]}' \
|
|
15
|
+
[--bootstrap] # one-time: seed graph from current playbook
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import hashlib
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
import shutil
|
|
23
|
+
import sys
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from common import (
|
|
28
|
+
LLMError,
|
|
29
|
+
call_llm_with_fallback,
|
|
30
|
+
extract_json,
|
|
31
|
+
output_json,
|
|
32
|
+
read_playbook,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
SYSTEM_PROMPT = """\
|
|
36
|
+
You are a knowledge integrator for a personal AI overlay system (sinain).
|
|
37
|
+
You maintain TWO knowledge stores:
|
|
38
|
+
|
|
39
|
+
1. PLAYBOOK (working memory, ~50 lines): actively curated patterns, anti-patterns,
|
|
40
|
+
and preferences. Injected into every agent prompt. Must be concise and current.
|
|
41
|
+
|
|
42
|
+
2. KNOWLEDGE GRAPH (long-term memory): durable facts that survive playbook pruning.
|
|
43
|
+
Stored as entity-attribute-value triples. Facts can be reinforced (seen again),
|
|
44
|
+
retracted (contradicted or outdated), or newly asserted.
|
|
45
|
+
|
|
46
|
+
Given a session digest (what happened), the current playbook, and existing graph facts:
|
|
47
|
+
|
|
48
|
+
FOR THE PLAYBOOK:
|
|
49
|
+
- ADD patterns from the digest that are novel (not already in playbook)
|
|
50
|
+
- REINFORCE existing patterns that the session confirms (increment "seen" count)
|
|
51
|
+
- PRUNE patterns contradicted by session evidence
|
|
52
|
+
- PROMOTE frequently-reinforced patterns (seen 3+) to "established"
|
|
53
|
+
- Keep under 50 lines. Density over completeness.
|
|
54
|
+
- DO NOT modify header/footer comments (<!-- mining-index ... --> and <!-- effectiveness ... -->)
|
|
55
|
+
- Three Laws: (1) don't remove error-prevention patterns, (2) preserve high-scoring approaches, (3) then evolve
|
|
56
|
+
|
|
57
|
+
FOR THE KNOWLEDGE GRAPH:
|
|
58
|
+
- ASSERT new durable facts (error→fix mappings, domain knowledge, user expertise)
|
|
59
|
+
- REINFORCE existing facts confirmed by the session (list their entity_ids)
|
|
60
|
+
- RETRACT facts contradicted by session evidence (list their entity_ids)
|
|
61
|
+
- Each fact needs: entity (domain/tool/workflow), attribute (relationship type), value (the knowledge), confidence (0.0-1.0), domain (for module scoping)
|
|
62
|
+
- Entity naming: use lowercase-hyphenated slugs (e.g., "react-native", "metro-bundler")
|
|
63
|
+
- Only assert DURABLE facts — not ephemeral session details
|
|
64
|
+
|
|
65
|
+
If the session was empty/idle, return minimal changes.
|
|
66
|
+
|
|
67
|
+
Respond with ONLY a JSON object:
|
|
68
|
+
{
|
|
69
|
+
"updatedPlaybook": "full playbook body text (between header and footer comments)",
|
|
70
|
+
"changes": {
|
|
71
|
+
"added": ["pattern text", ...],
|
|
72
|
+
"pruned": ["pattern text", ...],
|
|
73
|
+
"promoted": ["pattern text", ...],
|
|
74
|
+
"reinforced": ["pattern text", ...]
|
|
75
|
+
},
|
|
76
|
+
"graphOps": [
|
|
77
|
+
{"op": "assert", "entity": "entity-slug", "attribute": "attr-name", "value": "fact text", "confidence": 0.8, "domain": "domain-name"},
|
|
78
|
+
{"op": "reinforce", "entityId": "fact:existing-slug"},
|
|
79
|
+
{"op": "retract", "entityId": "fact:existing-slug", "reason": "why"}
|
|
80
|
+
]
|
|
81
|
+
}"""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
_STOPWORDS = frozenset({
|
|
85
|
+
"the", "and", "for", "when", "with", "that", "this", "from", "into",
|
|
86
|
+
"after", "before", "during", "should", "would", "could", "been", "have",
|
|
87
|
+
"will", "also", "then", "than", "not", "but", "are", "was", "were",
|
|
88
|
+
"can", "may", "use", "run", "set", "get", "try", "all", "any", "new",
|
|
89
|
+
"score", "seen",
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _extract_tags(value: str) -> list[str]:
|
|
94
|
+
"""Extract searchable keyword tags from fact value text.
|
|
95
|
+
|
|
96
|
+
Returns up to 10 deduplicated lowercase tags suitable for AVET-indexed lookup.
|
|
97
|
+
"""
|
|
98
|
+
# Lowercase words (including hyphenated compounds like "react-native")
|
|
99
|
+
words = re.findall(r"[a-z][a-z0-9-]+", value.lower())
|
|
100
|
+
tags = [w for w in words if len(w) > 2 and w not in _STOPWORDS]
|
|
101
|
+
# Detect compound terms from CamelCase or "Title Case" patterns
|
|
102
|
+
compounds = re.findall(r"[A-Z][a-z]+ [A-Z][a-z]+", value)
|
|
103
|
+
for c in compounds:
|
|
104
|
+
tags.append(c.lower().replace(" ", "-"))
|
|
105
|
+
# Numeric tokens that look meaningful (error codes, port numbers)
|
|
106
|
+
nums = re.findall(r"\b\d{3,5}\b", value)
|
|
107
|
+
tags.extend(nums)
|
|
108
|
+
# Deduplicate preserving order, cap at 10
|
|
109
|
+
return list(dict.fromkeys(tags))[:10]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _fact_id(entity: str, attribute: str, value: str) -> str:
|
|
113
|
+
"""Generate a deterministic fact entity ID from entity+attribute+value."""
|
|
114
|
+
content = f"{entity}:{attribute}:{value}"
|
|
115
|
+
h = hashlib.sha256(content.encode()).hexdigest()[:12]
|
|
116
|
+
slug = entity.replace(" ", "-").lower()[:30]
|
|
117
|
+
return f"fact:{slug}-{h}"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _load_graph_facts(db_path: str, entities: list[str] | None = None, limit: int = 50) -> list[dict]:
|
|
121
|
+
"""Load relevant facts from the knowledge graph for LLM context."""
|
|
122
|
+
if not Path(db_path).exists():
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
from triplestore import TripleStore
|
|
127
|
+
store = TripleStore(db_path)
|
|
128
|
+
|
|
129
|
+
# Get all non-retracted fact entities with their attributes
|
|
130
|
+
if entities:
|
|
131
|
+
# Tag-based search: find facts whose tags match any of the keywords
|
|
132
|
+
# Normalize keywords to lowercase for tag matching
|
|
133
|
+
keywords = [e.lower().replace(" ", "-") for e in entities]
|
|
134
|
+
placeholders = ",".join(["?" for _ in keywords])
|
|
135
|
+
rows = store._conn.execute(
|
|
136
|
+
f"""SELECT entity_id, COUNT(*) as matches
|
|
137
|
+
FROM triples
|
|
138
|
+
WHERE attribute = 'tag' AND NOT retracted
|
|
139
|
+
AND value IN ({placeholders})
|
|
140
|
+
GROUP BY entity_id
|
|
141
|
+
ORDER BY matches DESC
|
|
142
|
+
LIMIT ?""",
|
|
143
|
+
(*keywords, limit),
|
|
144
|
+
).fetchall()
|
|
145
|
+
fact_ids = [r["entity_id"] for r in rows]
|
|
146
|
+
else:
|
|
147
|
+
# Top-N by confidence
|
|
148
|
+
rows = store._conn.execute(
|
|
149
|
+
"""SELECT entity_id, CAST(value AS REAL) as conf
|
|
150
|
+
FROM triples
|
|
151
|
+
WHERE attribute = 'confidence' AND NOT retracted
|
|
152
|
+
AND entity_id LIKE 'fact:%'
|
|
153
|
+
ORDER BY conf DESC
|
|
154
|
+
LIMIT ?""",
|
|
155
|
+
(limit,),
|
|
156
|
+
).fetchall()
|
|
157
|
+
fact_ids = [r["entity_id"] for r in rows]
|
|
158
|
+
|
|
159
|
+
facts = []
|
|
160
|
+
for fid in fact_ids:
|
|
161
|
+
attrs = store.entity(fid)
|
|
162
|
+
if attrs:
|
|
163
|
+
fact = {"entityId": fid}
|
|
164
|
+
for attr_name, values in attrs.items():
|
|
165
|
+
fact[attr_name] = values[0] if len(values) == 1 else values
|
|
166
|
+
facts.append(fact)
|
|
167
|
+
|
|
168
|
+
store.close()
|
|
169
|
+
return facts
|
|
170
|
+
except Exception as e:
|
|
171
|
+
print(f"[warn] Failed to load graph facts: {e}", file=sys.stderr)
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str) -> dict:
|
|
176
|
+
"""Execute graph operations (assert/reinforce/retract) on the knowledge graph."""
|
|
177
|
+
if not ops:
|
|
178
|
+
return {"asserted": 0, "reinforced": 0, "retracted": 0}
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
from triplestore import TripleStore
|
|
182
|
+
store = TripleStore(db_path)
|
|
183
|
+
stats = {"asserted": 0, "reinforced": 0, "retracted": 0}
|
|
184
|
+
|
|
185
|
+
for op_data in ops:
|
|
186
|
+
op = op_data.get("op", "")
|
|
187
|
+
|
|
188
|
+
if op == "assert":
|
|
189
|
+
entity = op_data.get("entity", "")
|
|
190
|
+
attribute = op_data.get("attribute", "")
|
|
191
|
+
value = op_data.get("value", "")
|
|
192
|
+
confidence = op_data.get("confidence", 0.7)
|
|
193
|
+
domain = op_data.get("domain", "")
|
|
194
|
+
|
|
195
|
+
if not entity or not attribute or not value:
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
entity_id = _fact_id(entity, attribute, value)
|
|
199
|
+
tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({"digest_ts": digest_ts}))
|
|
200
|
+
store.assert_triple(tx, entity_id, "entity", entity)
|
|
201
|
+
store.assert_triple(tx, entity_id, "attribute", attribute)
|
|
202
|
+
store.assert_triple(tx, entity_id, "value", value)
|
|
203
|
+
store.assert_triple(tx, entity_id, "confidence", str(confidence))
|
|
204
|
+
store.assert_triple(tx, entity_id, "first_seen", digest_ts)
|
|
205
|
+
store.assert_triple(tx, entity_id, "last_reinforced", digest_ts)
|
|
206
|
+
store.assert_triple(tx, entity_id, "reinforce_count", "1")
|
|
207
|
+
if domain:
|
|
208
|
+
store.assert_triple(tx, entity_id, "domain", domain)
|
|
209
|
+
# Auto-tag for keyword-based discovery
|
|
210
|
+
for tag in _extract_tags(value):
|
|
211
|
+
store.assert_triple(tx, entity_id, "tag", tag)
|
|
212
|
+
stats["asserted"] += 1
|
|
213
|
+
|
|
214
|
+
elif op == "reinforce":
|
|
215
|
+
entity_id = op_data.get("entityId", "")
|
|
216
|
+
if not entity_id:
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
# Read current confidence and reinforce count
|
|
220
|
+
attrs = store.entity(entity_id)
|
|
221
|
+
if not attrs:
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
cur_conf = 0.5
|
|
225
|
+
cur_count = 0
|
|
226
|
+
if "confidence" in attrs:
|
|
227
|
+
try:
|
|
228
|
+
cur_conf = float(attrs["confidence"][0])
|
|
229
|
+
except (ValueError, IndexError):
|
|
230
|
+
pass
|
|
231
|
+
if "reinforce_count" in attrs:
|
|
232
|
+
try:
|
|
233
|
+
cur_count = int(attrs["reinforce_count"][0])
|
|
234
|
+
except (ValueError, IndexError):
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
new_conf = min(1.0, cur_conf + 0.15)
|
|
238
|
+
new_count = cur_count + 1
|
|
239
|
+
|
|
240
|
+
tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({
|
|
241
|
+
"op": "reinforce", "entity_id": entity_id, "digest_ts": digest_ts
|
|
242
|
+
}))
|
|
243
|
+
# Retract old values, assert new
|
|
244
|
+
store.retract_triple(tx, entity_id, "confidence", str(cur_conf))
|
|
245
|
+
store.assert_triple(tx, entity_id, "confidence", str(round(new_conf, 2)))
|
|
246
|
+
store.retract_triple(tx, entity_id, "reinforce_count", str(cur_count))
|
|
247
|
+
store.assert_triple(tx, entity_id, "reinforce_count", str(new_count))
|
|
248
|
+
# Retract old last_reinforced if present
|
|
249
|
+
old_reinforced = attrs.get("last_reinforced", [])
|
|
250
|
+
for val in old_reinforced:
|
|
251
|
+
store.retract_triple(tx, entity_id, "last_reinforced", val)
|
|
252
|
+
store.assert_triple(tx, entity_id, "last_reinforced", digest_ts)
|
|
253
|
+
stats["reinforced"] += 1
|
|
254
|
+
|
|
255
|
+
elif op == "retract":
|
|
256
|
+
entity_id = op_data.get("entityId", "")
|
|
257
|
+
reason = op_data.get("reason", "")
|
|
258
|
+
if not entity_id:
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({
|
|
262
|
+
"op": "retract", "entity_id": entity_id, "reason": reason, "digest_ts": digest_ts
|
|
263
|
+
}))
|
|
264
|
+
# Retract all attributes of this entity
|
|
265
|
+
attrs = store.entity(entity_id)
|
|
266
|
+
for attr_name, values in attrs.items():
|
|
267
|
+
for val in values:
|
|
268
|
+
store.retract_triple(tx, entity_id, attr_name, val)
|
|
269
|
+
stats["retracted"] += 1
|
|
270
|
+
|
|
271
|
+
store.close()
|
|
272
|
+
return stats
|
|
273
|
+
except Exception as e:
|
|
274
|
+
print(f"[warn] Failed to execute graph ops: {e}", file=sys.stderr)
|
|
275
|
+
return {"asserted": 0, "reinforced": 0, "retracted": 0, "error": str(e)}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _extract_header_footer(playbook: str) -> tuple[str, str, str]:
|
|
279
|
+
"""Split playbook into (header, body, footer)."""
|
|
280
|
+
lines = playbook.splitlines()
|
|
281
|
+
header_lines: list[str] = []
|
|
282
|
+
footer_lines: list[str] = []
|
|
283
|
+
body_lines: list[str] = []
|
|
284
|
+
|
|
285
|
+
in_header = True
|
|
286
|
+
for line in lines:
|
|
287
|
+
stripped = line.strip()
|
|
288
|
+
if in_header and stripped.startswith("<!--"):
|
|
289
|
+
header_lines.append(line)
|
|
290
|
+
continue
|
|
291
|
+
in_header = False
|
|
292
|
+
if stripped.startswith("<!-- effectiveness"):
|
|
293
|
+
footer_lines.append(line)
|
|
294
|
+
else:
|
|
295
|
+
body_lines.append(line)
|
|
296
|
+
|
|
297
|
+
return "\n".join(header_lines), "\n".join(body_lines), "\n".join(footer_lines)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _archive_playbook(memory_dir: str) -> str | None:
|
|
301
|
+
"""Archive current playbook. Returns archive path or None."""
|
|
302
|
+
src = Path(memory_dir) / "sinain-playbook.md"
|
|
303
|
+
if not src.exists():
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
archive_dir = Path(memory_dir) / "playbook-archive"
|
|
307
|
+
archive_dir.mkdir(parents=True, exist_ok=True)
|
|
308
|
+
|
|
309
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M")
|
|
310
|
+
dest = archive_dir / f"sinain-playbook-{ts}.md"
|
|
311
|
+
shutil.copy2(src, dest)
|
|
312
|
+
return str(dest)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _bootstrap_graph(memory_dir: str, db_path: str) -> dict:
|
|
316
|
+
"""One-time: seed knowledge graph from current playbook patterns."""
|
|
317
|
+
playbook = read_playbook(memory_dir)
|
|
318
|
+
if not playbook:
|
|
319
|
+
return {"bootstrapped": 0}
|
|
320
|
+
|
|
321
|
+
import re
|
|
322
|
+
# Extract patterns from playbook (lines starting with "- ")
|
|
323
|
+
patterns = []
|
|
324
|
+
for line in playbook.splitlines():
|
|
325
|
+
line = line.strip()
|
|
326
|
+
if line.startswith("- ") and ("score" in line or "seen" in line):
|
|
327
|
+
patterns.append(line[2:])
|
|
328
|
+
|
|
329
|
+
if not patterns:
|
|
330
|
+
return {"bootstrapped": 0}
|
|
331
|
+
|
|
332
|
+
# Generate assert ops for each pattern
|
|
333
|
+
ops = []
|
|
334
|
+
for pattern in patterns:
|
|
335
|
+
# Extract score if present
|
|
336
|
+
score_match = re.search(r"score\s*[\d.]+", pattern)
|
|
337
|
+
confidence = 0.6
|
|
338
|
+
if score_match:
|
|
339
|
+
try:
|
|
340
|
+
confidence = float(re.search(r"[\d.]+", score_match.group()).group())
|
|
341
|
+
except (ValueError, AttributeError):
|
|
342
|
+
pass
|
|
343
|
+
|
|
344
|
+
# Determine domain from pattern text (basic heuristic)
|
|
345
|
+
domain = "general"
|
|
346
|
+
domain_keywords = {
|
|
347
|
+
"react": "react-native", "metro": "react-native", "flutter": "flutter",
|
|
348
|
+
"ocr": "vision", "audio": "audio", "hud": "sinain-hud",
|
|
349
|
+
"docker": "infrastructure", "ssh": "infrastructure", "deploy": "infrastructure",
|
|
350
|
+
"intellij": "intellij", "psi": "intellij", "claude": "ai-agents",
|
|
351
|
+
"gemini": "ai-agents", "openrouter": "ai-agents", "escalation": "sinain-core",
|
|
352
|
+
}
|
|
353
|
+
lower = pattern.lower()
|
|
354
|
+
for kw, dom in domain_keywords.items():
|
|
355
|
+
if kw in lower:
|
|
356
|
+
domain = dom
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
ops.append({
|
|
360
|
+
"op": "assert",
|
|
361
|
+
"entity": domain,
|
|
362
|
+
"attribute": "pattern",
|
|
363
|
+
"value": pattern[:200],
|
|
364
|
+
"confidence": confidence,
|
|
365
|
+
"domain": domain,
|
|
366
|
+
})
|
|
367
|
+
|
|
368
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
369
|
+
stats = _execute_graph_ops(db_path, ops, now)
|
|
370
|
+
return {"bootstrapped": stats.get("asserted", 0)}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def main() -> None:
|
|
374
|
+
parser = argparse.ArgumentParser(description="Knowledge Integrator")
|
|
375
|
+
parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
|
|
376
|
+
parser.add_argument("--digest", default=None, help="SessionDigest JSON string")
|
|
377
|
+
parser.add_argument("--bootstrap", action="store_true", help="One-time: seed graph from playbook")
|
|
378
|
+
parser.add_argument("--retag", action="store_true", help="Re-extract tags for all existing facts")
|
|
379
|
+
args = parser.parse_args()
|
|
380
|
+
|
|
381
|
+
memory_dir = args.memory_dir
|
|
382
|
+
db_path = str(Path(memory_dir) / "knowledge-graph.db")
|
|
383
|
+
|
|
384
|
+
# Bootstrap mode: seed graph from current playbook
|
|
385
|
+
if args.bootstrap:
|
|
386
|
+
result = _bootstrap_graph(memory_dir, db_path)
|
|
387
|
+
output_json(result)
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# Retag mode: extract tags for all existing facts
|
|
391
|
+
if args.retag:
|
|
392
|
+
if not Path(db_path).exists():
|
|
393
|
+
output_json({"error": "knowledge-graph.db not found"})
|
|
394
|
+
return
|
|
395
|
+
from triplestore import TripleStore
|
|
396
|
+
store = TripleStore(db_path)
|
|
397
|
+
# Get all fact entities that have a 'value' attribute
|
|
398
|
+
rows = store._conn.execute(
|
|
399
|
+
"SELECT DISTINCT entity_id FROM triples WHERE attribute = 'value' AND NOT retracted AND entity_id LIKE 'fact:%'"
|
|
400
|
+
).fetchall()
|
|
401
|
+
tagged = 0
|
|
402
|
+
for row in rows:
|
|
403
|
+
fid = row["entity_id"]
|
|
404
|
+
attrs = store.entity(fid)
|
|
405
|
+
value_text = attrs.get("value", [""])[0] if attrs else ""
|
|
406
|
+
existing_tags = set(attrs.get("tag", [])) if attrs else set()
|
|
407
|
+
new_tags = _extract_tags(value_text)
|
|
408
|
+
missing = [t for t in new_tags if t not in existing_tags]
|
|
409
|
+
if missing:
|
|
410
|
+
tx = store.begin_tx("retag", metadata=json.dumps({"entity_id": fid}))
|
|
411
|
+
for tag in missing:
|
|
412
|
+
store.assert_triple(tx, fid, "tag", tag)
|
|
413
|
+
tagged += 1
|
|
414
|
+
store.close()
|
|
415
|
+
output_json({"retagged": tagged, "total_facts": len(rows)})
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
# Normal mode: integrate session digest
|
|
419
|
+
if not args.digest:
|
|
420
|
+
print("--digest is required (unless --bootstrap or --retag)", file=sys.stderr)
|
|
421
|
+
output_json({"error": "--digest required"})
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
digest = json.loads(args.digest)
|
|
426
|
+
except json.JSONDecodeError as e:
|
|
427
|
+
output_json({"error": f"Invalid digest JSON: {e}"})
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
# Skip if digest indicates empty session
|
|
431
|
+
if digest.get("isEmpty", False):
|
|
432
|
+
output_json({"skipped": True, "reason": "empty session"})
|
|
433
|
+
return
|
|
434
|
+
|
|
435
|
+
# Read current playbook
|
|
436
|
+
playbook = read_playbook(memory_dir)
|
|
437
|
+
header, body, footer = _extract_header_footer(playbook)
|
|
438
|
+
|
|
439
|
+
# Load relevant graph facts for LLM context
|
|
440
|
+
digest_entities = digest.get("entities", [])
|
|
441
|
+
existing_facts = _load_graph_facts(db_path, entities=digest_entities if digest_entities else None)
|
|
442
|
+
|
|
443
|
+
# Build user prompt
|
|
444
|
+
facts_text = ""
|
|
445
|
+
if existing_facts:
|
|
446
|
+
facts_lines = []
|
|
447
|
+
for f in existing_facts[:30]:
|
|
448
|
+
eid = f.get("entityId", "?")
|
|
449
|
+
val = f.get("value", "")
|
|
450
|
+
conf = f.get("confidence", "?")
|
|
451
|
+
domain = f.get("domain", "?")
|
|
452
|
+
facts_lines.append(f"- [{eid}] ({domain}, confidence={conf}) {val}")
|
|
453
|
+
facts_text = f"\n\n## Existing Graph Facts (for reference — reinforce or retract as needed)\n" + "\n".join(facts_lines)
|
|
454
|
+
|
|
455
|
+
user_prompt = f"""## Session Digest
|
|
456
|
+
{json.dumps(digest, indent=2, ensure_ascii=False)}
|
|
457
|
+
|
|
458
|
+
## Current Playbook Body
|
|
459
|
+
{body}{facts_text}"""
|
|
460
|
+
|
|
461
|
+
try:
|
|
462
|
+
raw = call_llm_with_fallback(
|
|
463
|
+
SYSTEM_PROMPT,
|
|
464
|
+
user_prompt,
|
|
465
|
+
script="knowledge_integrator",
|
|
466
|
+
json_mode=True,
|
|
467
|
+
)
|
|
468
|
+
result = extract_json(raw)
|
|
469
|
+
except (ValueError, LLMError) as e:
|
|
470
|
+
print(f"LLM integration failed: {e}", file=sys.stderr)
|
|
471
|
+
output_json({"error": str(e)})
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
# Archive current playbook before mutation
|
|
475
|
+
archive_path = _archive_playbook(memory_dir)
|
|
476
|
+
|
|
477
|
+
# Write updated playbook
|
|
478
|
+
updated_body = result.get("updatedPlaybook", body)
|
|
479
|
+
new_playbook = f"{header}\n\n{updated_body}\n\n{footer}".strip() + "\n"
|
|
480
|
+
playbook_path = Path(memory_dir) / "sinain-playbook.md"
|
|
481
|
+
playbook_path.write_text(new_playbook, encoding="utf-8")
|
|
482
|
+
|
|
483
|
+
# Execute graph operations
|
|
484
|
+
graph_ops = result.get("graphOps", [])
|
|
485
|
+
digest_ts = digest.get("ts", datetime.now(timezone.utc).isoformat())
|
|
486
|
+
graph_stats = _execute_graph_ops(db_path, graph_ops, digest_ts)
|
|
487
|
+
|
|
488
|
+
# Append digest to session-digests.jsonl
|
|
489
|
+
digests_path = Path(memory_dir) / "session-digests.jsonl"
|
|
490
|
+
with open(digests_path, "a", encoding="utf-8") as f:
|
|
491
|
+
f.write(json.dumps(digest, ensure_ascii=False) + "\n")
|
|
492
|
+
|
|
493
|
+
# Write integration log
|
|
494
|
+
log_entry = {
|
|
495
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
496
|
+
"_type": "integration",
|
|
497
|
+
"changes": result.get("changes", {}),
|
|
498
|
+
"graphStats": graph_stats,
|
|
499
|
+
"digestEntities": digest_entities,
|
|
500
|
+
"archivePath": archive_path,
|
|
501
|
+
"playbookLines": len(new_playbook.splitlines()),
|
|
502
|
+
}
|
|
503
|
+
log_dir = Path(memory_dir) / "playbook-logs"
|
|
504
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
505
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
506
|
+
log_file = log_dir / f"{today}.jsonl"
|
|
507
|
+
with open(log_file, "a", encoding="utf-8") as f:
|
|
508
|
+
f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
|
|
509
|
+
|
|
510
|
+
output_json({
|
|
511
|
+
"status": "ok",
|
|
512
|
+
"changes": result.get("changes", {}),
|
|
513
|
+
"graphStats": graph_stats,
|
|
514
|
+
"playbookLines": len(new_playbook.splitlines()),
|
|
515
|
+
})
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
if __name__ == "__main__":
|
|
519
|
+
main()
|
|
@@ -12,7 +12,9 @@
|
|
|
12
12
|
"module_manager": { "model": "fast", "maxTokens": 2000 },
|
|
13
13
|
"tick_evaluator": { "model": "smart", "maxTokens": 200, "timeout": 30 },
|
|
14
14
|
"eval_reporter": { "model": "smart", "maxTokens": 1000 },
|
|
15
|
-
"triple_extractor": { "model": "fast", "maxTokens": 1500, "timeout": 30 }
|
|
15
|
+
"triple_extractor": { "model": "fast", "maxTokens": 1500, "timeout": 30 },
|
|
16
|
+
"session_distiller": { "model": "smart", "maxTokens": 1500, "timeout": 30 },
|
|
17
|
+
"knowledge_integrator": { "model": "smart", "maxTokens": 3000, "timeout": 60 }
|
|
16
18
|
},
|
|
17
19
|
"defaults": { "model": "fast", "maxTokens": 1500 },
|
|
18
20
|
"triplestore": {
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Session Distiller — condense session transcript into a SessionDigest.
|
|
3
|
+
|
|
4
|
+
Takes feed items + agent digests from sinain-core and produces a structured
|
|
5
|
+
digest of what happened, what patterns emerged, and what was learned.
|
|
6
|
+
|
|
7
|
+
Single LLM call, ~10s. Replaces: signal_analyzer + insight_synthesizer +
|
|
8
|
+
memory_miner for the purpose of knowledge extraction.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python3 session_distiller.py --memory-dir memory/ \
|
|
12
|
+
--transcript '[ ... feed items ... ]' \
|
|
13
|
+
--session-meta '{"sessionKey":"...","durationMs":...}'
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import json
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from common import (
|
|
22
|
+
LLMError,
|
|
23
|
+
call_llm_with_fallback,
|
|
24
|
+
extract_json,
|
|
25
|
+
output_json,
|
|
26
|
+
read_effective_playbook,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
SYSTEM_PROMPT = """\
|
|
30
|
+
You are a session distiller for a personal AI overlay system (sinain).
|
|
31
|
+
Your job: analyze a session transcript and extract structured knowledge.
|
|
32
|
+
|
|
33
|
+
The transcript contains feed items from sinain-core:
|
|
34
|
+
- audio: transcribed speech from the user's environment
|
|
35
|
+
- agent: sinain's analysis digests and HUD messages
|
|
36
|
+
- openclaw: responses from the AI escalation system
|
|
37
|
+
- system: system events and status messages
|
|
38
|
+
|
|
39
|
+
Extract:
|
|
40
|
+
1. whatHappened: 2-3 sentences summarizing what was accomplished in this session
|
|
41
|
+
2. patterns: up to 5 reusable patterns discovered (things that worked, techniques used)
|
|
42
|
+
3. antiPatterns: up to 3 things that failed and why
|
|
43
|
+
4. preferences: up to 3 user preferences or workflow habits observed
|
|
44
|
+
5. entities: key domains, tools, technologies, or topics worked with (for graph linking)
|
|
45
|
+
6. toolInsights: tool usage insights (e.g., "grep before read reduces misses")
|
|
46
|
+
|
|
47
|
+
Focus on ACTIONABLE knowledge that would help a future agent in similar contexts.
|
|
48
|
+
Skip trivial observations. If the session was idle or empty, say so briefly.
|
|
49
|
+
|
|
50
|
+
Respond with ONLY a JSON object:
|
|
51
|
+
{
|
|
52
|
+
"whatHappened": "string",
|
|
53
|
+
"patterns": ["string", ...],
|
|
54
|
+
"antiPatterns": ["string", ...],
|
|
55
|
+
"preferences": ["string", ...],
|
|
56
|
+
"entities": ["string", ...],
|
|
57
|
+
"toolInsights": ["string", ...],
|
|
58
|
+
"isEmpty": false
|
|
59
|
+
}"""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _truncate_transcript(items: list[dict], max_chars: int = 100_000) -> str:
|
|
63
|
+
"""Format and truncate feed items to fit context window."""
|
|
64
|
+
lines: list[str] = []
|
|
65
|
+
total = 0
|
|
66
|
+
for item in items:
|
|
67
|
+
source = item.get("source", "?")
|
|
68
|
+
text = item.get("text", "")
|
|
69
|
+
ts = item.get("ts", "")
|
|
70
|
+
|
|
71
|
+
# Strip [PERIODIC] items — they're overlay refresh noise
|
|
72
|
+
if text.startswith("[PERIODIC]"):
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
# Format timestamp as HH:MM:SS if numeric
|
|
76
|
+
ts_str = ""
|
|
77
|
+
if isinstance(ts, (int, float)) and ts > 0:
|
|
78
|
+
from datetime import datetime, timezone
|
|
79
|
+
ts_str = datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%H:%M:%S")
|
|
80
|
+
elif isinstance(ts, str):
|
|
81
|
+
ts_str = ts[-8:] if len(ts) > 8 else ts
|
|
82
|
+
|
|
83
|
+
line = f"[{ts_str}] ({source}) {text}"
|
|
84
|
+
if total + len(line) > max_chars:
|
|
85
|
+
lines.append(f"... truncated ({len(items) - len(lines)} more items)")
|
|
86
|
+
break
|
|
87
|
+
lines.append(line)
|
|
88
|
+
total += len(line)
|
|
89
|
+
|
|
90
|
+
return "\n".join(lines)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def main() -> None:
|
|
94
|
+
parser = argparse.ArgumentParser(description="Session Distiller")
|
|
95
|
+
parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
|
|
96
|
+
parser.add_argument("--transcript", required=True, help="JSON array of feed items")
|
|
97
|
+
parser.add_argument("--session-meta", default="{}", help="JSON session metadata")
|
|
98
|
+
args = parser.parse_args()
|
|
99
|
+
|
|
100
|
+
# Parse inputs
|
|
101
|
+
try:
|
|
102
|
+
items = json.loads(args.transcript)
|
|
103
|
+
except json.JSONDecodeError as e:
|
|
104
|
+
print(f"Invalid transcript JSON: {e}", file=sys.stderr)
|
|
105
|
+
output_json({"error": f"Invalid transcript JSON: {e}", "isEmpty": True})
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
meta = json.loads(args.session_meta) if args.session_meta else {}
|
|
109
|
+
|
|
110
|
+
# Skip if transcript is trivially empty
|
|
111
|
+
if not items or len(items) < 2:
|
|
112
|
+
output_json({
|
|
113
|
+
"whatHappened": "Empty or trivial session",
|
|
114
|
+
"patterns": [],
|
|
115
|
+
"antiPatterns": [],
|
|
116
|
+
"preferences": [],
|
|
117
|
+
"entities": [],
|
|
118
|
+
"toolInsights": [],
|
|
119
|
+
"isEmpty": True,
|
|
120
|
+
})
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
# Format transcript
|
|
124
|
+
transcript_text = _truncate_transcript(items)
|
|
125
|
+
|
|
126
|
+
# Include current playbook for context (helps avoid re-discovering known patterns)
|
|
127
|
+
playbook = read_effective_playbook(args.memory_dir)
|
|
128
|
+
playbook_summary = ""
|
|
129
|
+
if playbook:
|
|
130
|
+
lines = [l for l in playbook.splitlines() if l.strip() and not l.startswith("<!--")]
|
|
131
|
+
playbook_summary = f"\n\n## Current Playbook (for reference — don't repeat known patterns)\n{chr(10).join(lines[:30])}"
|
|
132
|
+
|
|
133
|
+
user_prompt = f"""## Session Transcript ({len(items)} items)
|
|
134
|
+
{transcript_text}
|
|
135
|
+
|
|
136
|
+
## Session Metadata
|
|
137
|
+
{json.dumps(meta, indent=2)}{playbook_summary}"""
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
raw = call_llm_with_fallback(
|
|
141
|
+
SYSTEM_PROMPT,
|
|
142
|
+
user_prompt,
|
|
143
|
+
script="session_distiller",
|
|
144
|
+
json_mode=True,
|
|
145
|
+
)
|
|
146
|
+
result = extract_json(raw)
|
|
147
|
+
except (ValueError, LLMError) as e:
|
|
148
|
+
print(f"LLM distillation failed: {e}", file=sys.stderr)
|
|
149
|
+
output_json({"error": str(e), "isEmpty": True})
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
# Add metadata
|
|
153
|
+
result["ts"] = meta.get("ts", "")
|
|
154
|
+
result["sessionKey"] = meta.get("sessionKey", "")
|
|
155
|
+
result["durationMs"] = meta.get("durationMs", 0)
|
|
156
|
+
result["feedItemCount"] = len(items)
|
|
157
|
+
|
|
158
|
+
output_json(result)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
main()
|