@geravant/sinain 1.0.19 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +10 -1
  2. package/cli.js +176 -0
  3. package/index.ts +4 -2
  4. package/install.js +89 -14
  5. package/launcher.js +622 -0
  6. package/openclaw.plugin.json +4 -0
  7. package/pack-prepare.js +48 -0
  8. package/package.json +24 -5
  9. package/sense_client/README.md +82 -0
  10. package/sense_client/__init__.py +1 -0
  11. package/sense_client/__main__.py +462 -0
  12. package/sense_client/app_detector.py +54 -0
  13. package/sense_client/app_detector_win.py +83 -0
  14. package/sense_client/capture.py +215 -0
  15. package/sense_client/capture_win.py +88 -0
  16. package/sense_client/change_detector.py +86 -0
  17. package/sense_client/config.py +64 -0
  18. package/sense_client/gate.py +145 -0
  19. package/sense_client/ocr.py +347 -0
  20. package/sense_client/privacy.py +65 -0
  21. package/sense_client/requirements.txt +13 -0
  22. package/sense_client/roi_extractor.py +84 -0
  23. package/sense_client/sender.py +173 -0
  24. package/sense_client/tests/__init__.py +0 -0
  25. package/sense_client/tests/test_stream1_optimizations.py +234 -0
  26. package/setup-overlay.js +82 -0
  27. package/sinain-agent/.env.example +17 -0
  28. package/sinain-agent/CLAUDE.md +87 -0
  29. package/sinain-agent/mcp-config.json +12 -0
  30. package/sinain-agent/run.sh +248 -0
  31. package/sinain-core/.env.example +93 -0
  32. package/sinain-core/package-lock.json +552 -0
  33. package/sinain-core/package.json +21 -0
  34. package/sinain-core/src/agent/analyzer.ts +366 -0
  35. package/sinain-core/src/agent/context-window.ts +172 -0
  36. package/sinain-core/src/agent/loop.ts +404 -0
  37. package/sinain-core/src/agent/situation-writer.ts +187 -0
  38. package/sinain-core/src/agent/traits.ts +520 -0
  39. package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
  40. package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
  41. package/sinain-core/src/audio/capture-spawner.ts +14 -0
  42. package/sinain-core/src/audio/pipeline.ts +335 -0
  43. package/sinain-core/src/audio/transcription-local.ts +141 -0
  44. package/sinain-core/src/audio/transcription.ts +278 -0
  45. package/sinain-core/src/buffers/feed-buffer.ts +71 -0
  46. package/sinain-core/src/buffers/sense-buffer.ts +425 -0
  47. package/sinain-core/src/config.ts +245 -0
  48. package/sinain-core/src/escalation/escalation-slot.ts +136 -0
  49. package/sinain-core/src/escalation/escalator.ts +828 -0
  50. package/sinain-core/src/escalation/message-builder.ts +370 -0
  51. package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
  52. package/sinain-core/src/escalation/scorer.ts +166 -0
  53. package/sinain-core/src/index.ts +537 -0
  54. package/sinain-core/src/learning/feedback-store.ts +253 -0
  55. package/sinain-core/src/learning/signal-collector.ts +218 -0
  56. package/sinain-core/src/log.ts +24 -0
  57. package/sinain-core/src/overlay/commands.ts +126 -0
  58. package/sinain-core/src/overlay/ws-handler.ts +267 -0
  59. package/sinain-core/src/privacy/index.ts +18 -0
  60. package/sinain-core/src/privacy/presets.ts +40 -0
  61. package/sinain-core/src/privacy/redact.ts +92 -0
  62. package/sinain-core/src/profiler.ts +181 -0
  63. package/sinain-core/src/recorder.ts +186 -0
  64. package/sinain-core/src/server.ts +456 -0
  65. package/sinain-core/src/trace/trace-store.ts +73 -0
  66. package/sinain-core/src/trace/tracer.ts +94 -0
  67. package/sinain-core/src/types.ts +427 -0
  68. package/sinain-core/src/util/dedup.ts +48 -0
  69. package/sinain-core/src/util/task-store.ts +84 -0
  70. package/sinain-core/tsconfig.json +18 -0
  71. package/sinain-knowledge/curation/engine.ts +137 -24
  72. package/sinain-knowledge/data/git-store.ts +26 -0
  73. package/sinain-knowledge/data/store.ts +117 -0
  74. package/sinain-mcp-server/index.ts +417 -0
  75. package/sinain-mcp-server/package.json +19 -0
  76. package/sinain-mcp-server/tsconfig.json +15 -0
  77. package/sinain-memory/graph_query.py +185 -0
  78. package/sinain-memory/knowledge_integrator.py +450 -0
  79. package/sinain-memory/memory-config.json +3 -1
  80. package/sinain-memory/session_distiller.py +162 -0
@@ -0,0 +1,450 @@
1
+ #!/usr/bin/env python3
2
+ """Knowledge Integrator — update playbook + knowledge graph from a SessionDigest.
3
+
4
+ Takes a session digest (from session_distiller.py), the current playbook, and
5
+ the knowledge graph, then produces:
6
+ 1. Updated playbook (working memory)
7
+ 2. Graph operations (long-term memory: assert/reinforce/retract facts)
8
+
9
+ Single LLM call, ~15s. Replaces: playbook_curator + feedback_analyzer +
10
+ triple_extractor + triple_ingest.
11
+
12
+ Usage:
13
+ python3 knowledge_integrator.py --memory-dir memory/ \
14
+ --digest '{"whatHappened":"...","patterns":[...]}' \
15
+ [--bootstrap] # one-time: seed graph from current playbook
16
+ """
17
+
18
+ import argparse
19
+ import hashlib
20
+ import json
21
+ import shutil
22
+ import sys
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ from common import (
27
+ LLMError,
28
+ call_llm_with_fallback,
29
+ extract_json,
30
+ output_json,
31
+ read_playbook,
32
+ )
33
+
34
+ SYSTEM_PROMPT = """\
35
+ You are a knowledge integrator for a personal AI overlay system (sinain).
36
+ You maintain TWO knowledge stores:
37
+
38
+ 1. PLAYBOOK (working memory, ~50 lines): actively curated patterns, anti-patterns,
39
+ and preferences. Injected into every agent prompt. Must be concise and current.
40
+
41
+ 2. KNOWLEDGE GRAPH (long-term memory): durable facts that survive playbook pruning.
42
+ Stored as entity-attribute-value triples. Facts can be reinforced (seen again),
43
+ retracted (contradicted or outdated), or newly asserted.
44
+
45
+ Given a session digest (what happened), the current playbook, and existing graph facts:
46
+
47
+ FOR THE PLAYBOOK:
48
+ - ADD patterns from the digest that are novel (not already in playbook)
49
+ - REINFORCE existing patterns that the session confirms (increment "seen" count)
50
+ - PRUNE patterns contradicted by session evidence
51
+ - PROMOTE frequently-reinforced patterns (seen 3+) to "established"
52
+ - Keep under 50 lines. Density over completeness.
53
+ - DO NOT modify header/footer comments (<!-- mining-index ... --> and <!-- effectiveness ... -->)
54
+ - Three Laws: (1) don't remove error-prevention patterns, (2) preserve high-scoring approaches, (3) then evolve
55
+
56
+ FOR THE KNOWLEDGE GRAPH:
57
+ - ASSERT new durable facts (error→fix mappings, domain knowledge, user expertise)
58
+ - REINFORCE existing facts confirmed by the session (list their entity_ids)
59
+ - RETRACT facts contradicted by session evidence (list their entity_ids)
60
+ - Each fact needs: entity (domain/tool/workflow), attribute (relationship type), value (the knowledge), confidence (0.0-1.0), domain (for module scoping)
61
+ - Entity naming: use lowercase-hyphenated slugs (e.g., "react-native", "metro-bundler")
62
+ - Only assert DURABLE facts — not ephemeral session details
63
+
64
+ If the session was empty/idle, return minimal changes.
65
+
66
+ Respond with ONLY a JSON object:
67
+ {
68
+ "updatedPlaybook": "full playbook body text (between header and footer comments)",
69
+ "changes": {
70
+ "added": ["pattern text", ...],
71
+ "pruned": ["pattern text", ...],
72
+ "promoted": ["pattern text", ...],
73
+ "reinforced": ["pattern text", ...]
74
+ },
75
+ "graphOps": [
76
+ {"op": "assert", "entity": "entity-slug", "attribute": "attr-name", "value": "fact text", "confidence": 0.8, "domain": "domain-name"},
77
+ {"op": "reinforce", "entityId": "fact:existing-slug"},
78
+ {"op": "retract", "entityId": "fact:existing-slug", "reason": "why"}
79
+ ]
80
+ }"""
81
+
82
+
83
+ def _fact_id(entity: str, attribute: str, value: str) -> str:
84
+ """Generate a deterministic fact entity ID from entity+attribute+value."""
85
+ content = f"{entity}:{attribute}:{value}"
86
+ h = hashlib.sha256(content.encode()).hexdigest()[:12]
87
+ slug = entity.replace(" ", "-").lower()[:30]
88
+ return f"fact:{slug}-{h}"
89
+
90
+
91
+ def _load_graph_facts(db_path: str, entities: list[str] | None = None, limit: int = 50) -> list[dict]:
92
+ """Load relevant facts from the knowledge graph for LLM context."""
93
+ if not Path(db_path).exists():
94
+ return []
95
+
96
+ try:
97
+ from triplestore import TripleStore
98
+ store = TripleStore(db_path)
99
+
100
+ # Get all non-retracted fact entities with their attributes
101
+ if entities:
102
+ # Entity-scoped query: find facts related to specified domains
103
+ domain_clause = " OR ".join([f"value = ?" for _ in entities])
104
+ rows = store._conn.execute(
105
+ f"""SELECT DISTINCT entity_id FROM triples
106
+ WHERE attribute = 'domain' AND NOT retracted
107
+ AND ({domain_clause})
108
+ LIMIT ?""",
109
+ (*entities, limit),
110
+ ).fetchall()
111
+ fact_ids = [r["entity_id"] for r in rows]
112
+ else:
113
+ # Top-N by confidence
114
+ rows = store._conn.execute(
115
+ """SELECT entity_id, CAST(value AS REAL) as conf
116
+ FROM triples
117
+ WHERE attribute = 'confidence' AND NOT retracted
118
+ AND entity_id LIKE 'fact:%'
119
+ ORDER BY conf DESC
120
+ LIMIT ?""",
121
+ (limit,),
122
+ ).fetchall()
123
+ fact_ids = [r["entity_id"] for r in rows]
124
+
125
+ facts = []
126
+ for fid in fact_ids:
127
+ attrs = store.entity(fid)
128
+ if attrs:
129
+ fact = {"entityId": fid}
130
+ for a in attrs:
131
+ fact[a["attribute"]] = a["value"]
132
+ facts.append(fact)
133
+
134
+ store.close()
135
+ return facts
136
+ except Exception as e:
137
+ print(f"[warn] Failed to load graph facts: {e}", file=sys.stderr)
138
+ return []
139
+
140
+
141
+ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str) -> dict:
142
+ """Execute graph operations (assert/reinforce/retract) on the knowledge graph."""
143
+ if not ops:
144
+ return {"asserted": 0, "reinforced": 0, "retracted": 0}
145
+
146
+ try:
147
+ from triplestore import TripleStore
148
+ store = TripleStore(db_path)
149
+ stats = {"asserted": 0, "reinforced": 0, "retracted": 0}
150
+
151
+ for op_data in ops:
152
+ op = op_data.get("op", "")
153
+
154
+ if op == "assert":
155
+ entity = op_data.get("entity", "")
156
+ attribute = op_data.get("attribute", "")
157
+ value = op_data.get("value", "")
158
+ confidence = op_data.get("confidence", 0.7)
159
+ domain = op_data.get("domain", "")
160
+
161
+ if not entity or not attribute or not value:
162
+ continue
163
+
164
+ entity_id = _fact_id(entity, attribute, value)
165
+ tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({"digest_ts": digest_ts}))
166
+ store.assert_triple(tx, entity_id, "entity", entity)
167
+ store.assert_triple(tx, entity_id, "attribute", attribute)
168
+ store.assert_triple(tx, entity_id, "value", value)
169
+ store.assert_triple(tx, entity_id, "confidence", str(confidence))
170
+ store.assert_triple(tx, entity_id, "first_seen", digest_ts)
171
+ store.assert_triple(tx, entity_id, "last_reinforced", digest_ts)
172
+ store.assert_triple(tx, entity_id, "reinforce_count", "1")
173
+ if domain:
174
+ store.assert_triple(tx, entity_id, "domain", domain)
175
+ stats["asserted"] += 1
176
+
177
+ elif op == "reinforce":
178
+ entity_id = op_data.get("entityId", "")
179
+ if not entity_id:
180
+ continue
181
+
182
+ # Read current confidence and reinforce count
183
+ attrs = store.entity(entity_id)
184
+ if not attrs:
185
+ continue
186
+
187
+ cur_conf = 0.5
188
+ cur_count = 0
189
+ for a in attrs:
190
+ if a["attribute"] == "confidence":
191
+ try:
192
+ cur_conf = float(a["value"])
193
+ except ValueError:
194
+ pass
195
+ elif a["attribute"] == "reinforce_count":
196
+ try:
197
+ cur_count = int(a["value"])
198
+ except ValueError:
199
+ pass
200
+
201
+ new_conf = min(1.0, cur_conf + 0.15)
202
+ new_count = cur_count + 1
203
+
204
+ tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({
205
+ "op": "reinforce", "entity_id": entity_id, "digest_ts": digest_ts
206
+ }))
207
+ # Retract old values, assert new
208
+ store.retract_triple(tx, entity_id, "confidence", str(cur_conf))
209
+ store.assert_triple(tx, entity_id, "confidence", str(round(new_conf, 2)))
210
+ store.retract_triple(tx, entity_id, "reinforce_count", str(cur_count))
211
+ store.assert_triple(tx, entity_id, "reinforce_count", str(new_count))
212
+ store.retract_triple(tx, entity_id, "last_reinforced", "") # retract any
213
+ store.assert_triple(tx, entity_id, "last_reinforced", digest_ts)
214
+ stats["reinforced"] += 1
215
+
216
+ elif op == "retract":
217
+ entity_id = op_data.get("entityId", "")
218
+ reason = op_data.get("reason", "")
219
+ if not entity_id:
220
+ continue
221
+
222
+ tx = store.begin_tx("knowledge_integrator", metadata=json.dumps({
223
+ "op": "retract", "entity_id": entity_id, "reason": reason, "digest_ts": digest_ts
224
+ }))
225
+ # Retract all attributes of this entity
226
+ attrs = store.entity(entity_id)
227
+ for a in attrs:
228
+ store.retract_triple(tx, entity_id, a["attribute"], a["value"])
229
+ stats["retracted"] += 1
230
+
231
+ store.close()
232
+ return stats
233
+ except Exception as e:
234
+ print(f"[warn] Failed to execute graph ops: {e}", file=sys.stderr)
235
+ return {"asserted": 0, "reinforced": 0, "retracted": 0, "error": str(e)}
236
+
237
+
238
+ def _extract_header_footer(playbook: str) -> tuple[str, str, str]:
239
+ """Split playbook into (header, body, footer)."""
240
+ lines = playbook.splitlines()
241
+ header_lines: list[str] = []
242
+ footer_lines: list[str] = []
243
+ body_lines: list[str] = []
244
+
245
+ in_header = True
246
+ for line in lines:
247
+ stripped = line.strip()
248
+ if in_header and stripped.startswith("<!--"):
249
+ header_lines.append(line)
250
+ continue
251
+ in_header = False
252
+ if stripped.startswith("<!-- effectiveness"):
253
+ footer_lines.append(line)
254
+ else:
255
+ body_lines.append(line)
256
+
257
+ return "\n".join(header_lines), "\n".join(body_lines), "\n".join(footer_lines)
258
+
259
+
260
+ def _archive_playbook(memory_dir: str) -> str | None:
261
+ """Archive current playbook. Returns archive path or None."""
262
+ src = Path(memory_dir) / "sinain-playbook.md"
263
+ if not src.exists():
264
+ return None
265
+
266
+ archive_dir = Path(memory_dir) / "playbook-archive"
267
+ archive_dir.mkdir(parents=True, exist_ok=True)
268
+
269
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M")
270
+ dest = archive_dir / f"sinain-playbook-{ts}.md"
271
+ shutil.copy2(src, dest)
272
+ return str(dest)
273
+
274
+
275
+ def _bootstrap_graph(memory_dir: str, db_path: str) -> dict:
276
+ """One-time: seed knowledge graph from current playbook patterns."""
277
+ playbook = read_playbook(memory_dir)
278
+ if not playbook:
279
+ return {"bootstrapped": 0}
280
+
281
+ import re
282
+ # Extract patterns from playbook (lines starting with "- ")
283
+ patterns = []
284
+ for line in playbook.splitlines():
285
+ line = line.strip()
286
+ if line.startswith("- ") and ("score" in line or "seen" in line):
287
+ patterns.append(line[2:])
288
+
289
+ if not patterns:
290
+ return {"bootstrapped": 0}
291
+
292
+ # Generate assert ops for each pattern
293
+ ops = []
294
+ for pattern in patterns:
295
+ # Extract score if present
296
+ score_match = re.search(r"score\s*[\d.]+", pattern)
297
+ confidence = 0.6
298
+ if score_match:
299
+ try:
300
+ confidence = float(re.search(r"[\d.]+", score_match.group()).group())
301
+ except (ValueError, AttributeError):
302
+ pass
303
+
304
+ # Determine domain from pattern text (basic heuristic)
305
+ domain = "general"
306
+ domain_keywords = {
307
+ "react": "react-native", "metro": "react-native", "flutter": "flutter",
308
+ "ocr": "vision", "audio": "audio", "hud": "sinain-hud",
309
+ "docker": "infrastructure", "ssh": "infrastructure", "deploy": "infrastructure",
310
+ "intellij": "intellij", "psi": "intellij", "claude": "ai-agents",
311
+ "gemini": "ai-agents", "openrouter": "ai-agents", "escalation": "sinain-core",
312
+ }
313
+ lower = pattern.lower()
314
+ for kw, dom in domain_keywords.items():
315
+ if kw in lower:
316
+ domain = dom
317
+ break
318
+
319
+ ops.append({
320
+ "op": "assert",
321
+ "entity": domain,
322
+ "attribute": "pattern",
323
+ "value": pattern[:200],
324
+ "confidence": confidence,
325
+ "domain": domain,
326
+ })
327
+
328
+ now = datetime.now(timezone.utc).isoformat()
329
+ stats = _execute_graph_ops(db_path, ops, now)
330
+ return {"bootstrapped": stats.get("asserted", 0)}
331
+
332
+
333
+ def main() -> None:
334
+ parser = argparse.ArgumentParser(description="Knowledge Integrator")
335
+ parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
336
+ parser.add_argument("--digest", default=None, help="SessionDigest JSON string")
337
+ parser.add_argument("--bootstrap", action="store_true", help="One-time: seed graph from playbook")
338
+ args = parser.parse_args()
339
+
340
+ memory_dir = args.memory_dir
341
+ db_path = str(Path(memory_dir) / "knowledge-graph.db")
342
+
343
+ # Bootstrap mode: seed graph from current playbook
344
+ if args.bootstrap:
345
+ result = _bootstrap_graph(memory_dir, db_path)
346
+ output_json(result)
347
+ return
348
+
349
+ # Normal mode: integrate session digest
350
+ if not args.digest:
351
+ print("--digest is required (unless --bootstrap)", file=sys.stderr)
352
+ output_json({"error": "--digest required"})
353
+ return
354
+
355
+ try:
356
+ digest = json.loads(args.digest)
357
+ except json.JSONDecodeError as e:
358
+ output_json({"error": f"Invalid digest JSON: {e}"})
359
+ return
360
+
361
+ # Skip if digest indicates empty session
362
+ if digest.get("isEmpty", False):
363
+ output_json({"skipped": True, "reason": "empty session"})
364
+ return
365
+
366
+ # Read current playbook
367
+ playbook = read_playbook(memory_dir)
368
+ header, body, footer = _extract_header_footer(playbook)
369
+
370
+ # Load relevant graph facts for LLM context
371
+ digest_entities = digest.get("entities", [])
372
+ existing_facts = _load_graph_facts(db_path, entities=digest_entities if digest_entities else None)
373
+
374
+ # Build user prompt
375
+ facts_text = ""
376
+ if existing_facts:
377
+ facts_lines = []
378
+ for f in existing_facts[:30]:
379
+ eid = f.get("entityId", "?")
380
+ val = f.get("value", "")
381
+ conf = f.get("confidence", "?")
382
+ domain = f.get("domain", "?")
383
+ facts_lines.append(f"- [{eid}] ({domain}, confidence={conf}) {val}")
384
+ facts_text = f"\n\n## Existing Graph Facts (for reference — reinforce or retract as needed)\n" + "\n".join(facts_lines)
385
+
386
+ user_prompt = f"""## Session Digest
387
+ {json.dumps(digest, indent=2, ensure_ascii=False)}
388
+
389
+ ## Current Playbook Body
390
+ {body}{facts_text}"""
391
+
392
+ try:
393
+ raw = call_llm_with_fallback(
394
+ SYSTEM_PROMPT,
395
+ user_prompt,
396
+ script="knowledge_integrator",
397
+ json_mode=True,
398
+ )
399
+ result = extract_json(raw)
400
+ except (ValueError, LLMError) as e:
401
+ print(f"LLM integration failed: {e}", file=sys.stderr)
402
+ output_json({"error": str(e)})
403
+ return
404
+
405
+ # Archive current playbook before mutation
406
+ archive_path = _archive_playbook(memory_dir)
407
+
408
+ # Write updated playbook
409
+ updated_body = result.get("updatedPlaybook", body)
410
+ new_playbook = f"{header}\n\n{updated_body}\n\n{footer}".strip() + "\n"
411
+ playbook_path = Path(memory_dir) / "sinain-playbook.md"
412
+ playbook_path.write_text(new_playbook, encoding="utf-8")
413
+
414
+ # Execute graph operations
415
+ graph_ops = result.get("graphOps", [])
416
+ digest_ts = digest.get("ts", datetime.now(timezone.utc).isoformat())
417
+ graph_stats = _execute_graph_ops(db_path, graph_ops, digest_ts)
418
+
419
+ # Append digest to session-digests.jsonl
420
+ digests_path = Path(memory_dir) / "session-digests.jsonl"
421
+ with open(digests_path, "a", encoding="utf-8") as f:
422
+ f.write(json.dumps(digest, ensure_ascii=False) + "\n")
423
+
424
+ # Write integration log
425
+ log_entry = {
426
+ "ts": datetime.now(timezone.utc).isoformat(),
427
+ "_type": "integration",
428
+ "changes": result.get("changes", {}),
429
+ "graphStats": graph_stats,
430
+ "digestEntities": digest_entities,
431
+ "archivePath": archive_path,
432
+ "playbookLines": len(new_playbook.splitlines()),
433
+ }
434
+ log_dir = Path(memory_dir) / "playbook-logs"
435
+ log_dir.mkdir(parents=True, exist_ok=True)
436
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
437
+ log_file = log_dir / f"{today}.jsonl"
438
+ with open(log_file, "a", encoding="utf-8") as f:
439
+ f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
440
+
441
+ output_json({
442
+ "status": "ok",
443
+ "changes": result.get("changes", {}),
444
+ "graphStats": graph_stats,
445
+ "playbookLines": len(new_playbook.splitlines()),
446
+ })
447
+
448
+
449
+ if __name__ == "__main__":
450
+ main()
@@ -12,7 +12,9 @@
12
12
  "module_manager": { "model": "fast", "maxTokens": 2000 },
13
13
  "tick_evaluator": { "model": "smart", "maxTokens": 200, "timeout": 30 },
14
14
  "eval_reporter": { "model": "smart", "maxTokens": 1000 },
15
- "triple_extractor": { "model": "fast", "maxTokens": 1500, "timeout": 30 }
15
+ "triple_extractor": { "model": "fast", "maxTokens": 1500, "timeout": 30 },
16
+ "session_distiller": { "model": "smart", "maxTokens": 1500, "timeout": 30 },
17
+ "knowledge_integrator": { "model": "smart", "maxTokens": 3000, "timeout": 60 }
16
18
  },
17
19
  "defaults": { "model": "fast", "maxTokens": 1500 },
18
20
  "triplestore": {
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env python3
2
+ """Session Distiller — condense session transcript into a SessionDigest.
3
+
4
+ Takes feed items + agent digests from sinain-core and produces a structured
5
+ digest of what happened, what patterns emerged, and what was learned.
6
+
7
+ Single LLM call, ~10s. Replaces: signal_analyzer + insight_synthesizer +
8
+ memory_miner for the purpose of knowledge extraction.
9
+
10
+ Usage:
11
+ python3 session_distiller.py --memory-dir memory/ \
12
+ --transcript '[ ... feed items ... ]' \
13
+ --session-meta '{"sessionKey":"...","durationMs":...}'
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ from common import (
22
+ LLMError,
23
+ call_llm_with_fallback,
24
+ extract_json,
25
+ output_json,
26
+ read_effective_playbook,
27
+ )
28
+
29
+ SYSTEM_PROMPT = """\
30
+ You are a session distiller for a personal AI overlay system (sinain).
31
+ Your job: analyze a session transcript and extract structured knowledge.
32
+
33
+ The transcript contains feed items from sinain-core:
34
+ - audio: transcribed speech from the user's environment
35
+ - agent: sinain's analysis digests and HUD messages
36
+ - openclaw: responses from the AI escalation system
37
+ - system: system events and status messages
38
+
39
+ Extract:
40
+ 1. whatHappened: 2-3 sentences summarizing what was accomplished in this session
41
+ 2. patterns: up to 5 reusable patterns discovered (things that worked, techniques used)
42
+ 3. antiPatterns: up to 3 things that failed and why
43
+ 4. preferences: up to 3 user preferences or workflow habits observed
44
+ 5. entities: key domains, tools, technologies, or topics worked with (for graph linking)
45
+ 6. toolInsights: tool usage insights (e.g., "grep before read reduces misses")
46
+
47
+ Focus on ACTIONABLE knowledge that would help a future agent in similar contexts.
48
+ Skip trivial observations. If the session was idle or empty, say so briefly.
49
+
50
+ Respond with ONLY a JSON object:
51
+ {
52
+ "whatHappened": "string",
53
+ "patterns": ["string", ...],
54
+ "antiPatterns": ["string", ...],
55
+ "preferences": ["string", ...],
56
+ "entities": ["string", ...],
57
+ "toolInsights": ["string", ...],
58
+ "isEmpty": false
59
+ }"""
60
+
61
+
62
+ def _truncate_transcript(items: list[dict], max_chars: int = 100_000) -> str:
63
+ """Format and truncate feed items to fit context window."""
64
+ lines: list[str] = []
65
+ total = 0
66
+ for item in items:
67
+ source = item.get("source", "?")
68
+ text = item.get("text", "")
69
+ ts = item.get("ts", "")
70
+
71
+ # Strip [PERIODIC] items — they're overlay refresh noise
72
+ if text.startswith("[PERIODIC]"):
73
+ continue
74
+
75
+ # Format timestamp as HH:MM:SS if numeric
76
+ ts_str = ""
77
+ if isinstance(ts, (int, float)) and ts > 0:
78
+ from datetime import datetime, timezone
79
+ ts_str = datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%H:%M:%S")
80
+ elif isinstance(ts, str):
81
+ ts_str = ts[-8:] if len(ts) > 8 else ts
82
+
83
+ line = f"[{ts_str}] ({source}) {text}"
84
+ if total + len(line) > max_chars:
85
+ lines.append(f"... truncated ({len(items) - len(lines)} more items)")
86
+ break
87
+ lines.append(line)
88
+ total += len(line)
89
+
90
+ return "\n".join(lines)
91
+
92
+
93
+ def main() -> None:
94
+ parser = argparse.ArgumentParser(description="Session Distiller")
95
+ parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
96
+ parser.add_argument("--transcript", required=True, help="JSON array of feed items")
97
+ parser.add_argument("--session-meta", default="{}", help="JSON session metadata")
98
+ args = parser.parse_args()
99
+
100
+ # Parse inputs
101
+ try:
102
+ items = json.loads(args.transcript)
103
+ except json.JSONDecodeError as e:
104
+ print(f"Invalid transcript JSON: {e}", file=sys.stderr)
105
+ output_json({"error": f"Invalid transcript JSON: {e}", "isEmpty": True})
106
+ return
107
+
108
+ meta = json.loads(args.session_meta) if args.session_meta else {}
109
+
110
+ # Skip if transcript is trivially empty
111
+ if not items or len(items) < 2:
112
+ output_json({
113
+ "whatHappened": "Empty or trivial session",
114
+ "patterns": [],
115
+ "antiPatterns": [],
116
+ "preferences": [],
117
+ "entities": [],
118
+ "toolInsights": [],
119
+ "isEmpty": True,
120
+ })
121
+ return
122
+
123
+ # Format transcript
124
+ transcript_text = _truncate_transcript(items)
125
+
126
+ # Include current playbook for context (helps avoid re-discovering known patterns)
127
+ playbook = read_effective_playbook(args.memory_dir)
128
+ playbook_summary = ""
129
+ if playbook:
130
+ lines = [l for l in playbook.splitlines() if l.strip() and not l.startswith("<!--")]
131
+ playbook_summary = f"\n\n## Current Playbook (for reference — don't repeat known patterns)\n{chr(10).join(lines[:30])}"
132
+
133
+ user_prompt = f"""## Session Transcript ({len(items)} items)
134
+ {transcript_text}
135
+
136
+ ## Session Metadata
137
+ {json.dumps(meta, indent=2)}{playbook_summary}"""
138
+
139
+ try:
140
+ raw = call_llm_with_fallback(
141
+ SYSTEM_PROMPT,
142
+ user_prompt,
143
+ script="session_distiller",
144
+ json_mode=True,
145
+ )
146
+ result = extract_json(raw)
147
+ except (ValueError, LLMError) as e:
148
+ print(f"LLM distillation failed: {e}", file=sys.stderr)
149
+ output_json({"error": str(e), "isEmpty": True})
150
+ return
151
+
152
+ # Add metadata
153
+ result["ts"] = meta.get("ts", "")
154
+ result["sessionKey"] = meta.get("sessionKey", "")
155
+ result["durationMs"] = meta.get("durationMs", 0)
156
+ result["feedItemCount"] = len(items)
157
+
158
+ output_json(result)
159
+
160
+
161
+ if __name__ == "__main__":
162
+ main()