@geravant/sinain 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +183 -0
  2. package/index.ts +2096 -0
  3. package/install.js +155 -0
  4. package/openclaw.plugin.json +59 -0
  5. package/package.json +21 -0
  6. package/sinain-memory/common.py +403 -0
  7. package/sinain-memory/demo_knowledge_transfer.sh +85 -0
  8. package/sinain-memory/embedder.py +268 -0
  9. package/sinain-memory/eval/__init__.py +0 -0
  10. package/sinain-memory/eval/assertions.py +288 -0
  11. package/sinain-memory/eval/judges/__init__.py +0 -0
  12. package/sinain-memory/eval/judges/base_judge.py +61 -0
  13. package/sinain-memory/eval/judges/curation_judge.py +46 -0
  14. package/sinain-memory/eval/judges/insight_judge.py +48 -0
  15. package/sinain-memory/eval/judges/mining_judge.py +42 -0
  16. package/sinain-memory/eval/judges/signal_judge.py +45 -0
  17. package/sinain-memory/eval/schemas.py +247 -0
  18. package/sinain-memory/eval_delta.py +109 -0
  19. package/sinain-memory/eval_reporter.py +642 -0
  20. package/sinain-memory/feedback_analyzer.py +221 -0
  21. package/sinain-memory/git_backup.sh +19 -0
  22. package/sinain-memory/insight_synthesizer.py +181 -0
  23. package/sinain-memory/memory/2026-03-01.md +11 -0
  24. package/sinain-memory/memory/playbook-archive/sinain-playbook-2026-03-01-1418.md +15 -0
  25. package/sinain-memory/memory/playbook-logs/2026-03-01.jsonl +1 -0
  26. package/sinain-memory/memory/sinain-playbook.md +21 -0
  27. package/sinain-memory/memory-config.json +39 -0
  28. package/sinain-memory/memory_miner.py +183 -0
  29. package/sinain-memory/module_manager.py +695 -0
  30. package/sinain-memory/playbook_curator.py +225 -0
  31. package/sinain-memory/requirements.txt +3 -0
  32. package/sinain-memory/signal_analyzer.py +141 -0
  33. package/sinain-memory/test_local.py +402 -0
  34. package/sinain-memory/tests/__init__.py +0 -0
  35. package/sinain-memory/tests/conftest.py +189 -0
  36. package/sinain-memory/tests/test_curator_helpers.py +94 -0
  37. package/sinain-memory/tests/test_embedder.py +210 -0
  38. package/sinain-memory/tests/test_extract_json.py +124 -0
  39. package/sinain-memory/tests/test_feedback_computation.py +121 -0
  40. package/sinain-memory/tests/test_miner_helpers.py +71 -0
  41. package/sinain-memory/tests/test_module_management.py +458 -0
  42. package/sinain-memory/tests/test_parsers.py +96 -0
  43. package/sinain-memory/tests/test_tick_evaluator.py +430 -0
  44. package/sinain-memory/tests/test_triple_extractor.py +255 -0
  45. package/sinain-memory/tests/test_triple_ingest.py +191 -0
  46. package/sinain-memory/tests/test_triple_migrate.py +138 -0
  47. package/sinain-memory/tests/test_triplestore.py +248 -0
  48. package/sinain-memory/tick_evaluator.py +392 -0
  49. package/sinain-memory/triple_extractor.py +402 -0
  50. package/sinain-memory/triple_ingest.py +290 -0
  51. package/sinain-memory/triple_migrate.py +275 -0
  52. package/sinain-memory/triple_query.py +184 -0
  53. package/sinain-memory/triplestore.py +498 -0
@@ -0,0 +1,402 @@
1
+ #!/usr/bin/env python3
2
+ """Triple Extractor — 3-tier extraction from sinain data into EAV triples.
3
+
4
+ Tier 1: JSON direct (~70%) — structured data maps directly to triples.
5
+ Tier 2: Regex + validate (~20%) — semi-structured text (playbooks, patterns.md).
6
+ Tier 3: LLM fallback (~10%) — free-form text where regex fails.
7
+
8
+ Usage:
9
+ from triple_extractor import TripleExtractor
10
+ extractor = TripleExtractor(store)
11
+ triples = extractor.extract_signal(signal_data, tick_ts)
12
+ """
13
+
14
+ import json
15
+ import re
16
+ import sys
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING
20
+
21
+ if TYPE_CHECKING:
22
+ from triplestore import TripleStore
23
+
24
+
25
+ @dataclass
26
+ class Triple:
27
+ """A single entity-attribute-value triple to be asserted."""
28
+ entity_id: str
29
+ attribute: str
30
+ value: str
31
+ value_type: str = "string"
32
+
33
+
34
+ def _make_slug(text: str) -> str:
35
+ """Convert text to a lowercase hyphen-separated slug.
36
+
37
+ >>> _make_slug("Frame Batching Improves OCR")
38
+ 'frame-batching-improves-ocr'
39
+ """
40
+ slug = re.sub(r"[^a-z0-9]+", "-", text.lower().strip())
41
+ return slug.strip("-")[:80] # cap length
42
+
43
+
44
+ class TripleExtractor:
45
+ """Extracts EAV triples from various sinain data sources."""
46
+
47
+ def __init__(self, store: "TripleStore") -> None:
48
+ self.store = store
49
+ self._vocab_cache: list[tuple[str, str]] | None = None
50
+
51
+ # ----- Tier 1: JSON direct extraction -----
52
+
53
+ def extract_signal(self, signal_data: dict, tick_ts: str) -> list[Triple]:
54
+ """Extract triples from a signal analysis result (Tier 1).
55
+
56
+ Creates signal:{tick_ts} entity + concept refs for signals.
57
+ """
58
+ triples: list[Triple] = []
59
+ entity_id = f"signal:{tick_ts}"
60
+
61
+ # Core signal attributes
62
+ if "sessionSummary" in signal_data:
63
+ triples.append(Triple(entity_id, "summary", str(signal_data["sessionSummary"])))
64
+ if "idle" in signal_data:
65
+ triples.append(Triple(entity_id, "idle", str(signal_data["idle"]).lower()))
66
+
67
+ # Individual signals
68
+ for i, sig in enumerate(signal_data.get("signals", [])):
69
+ desc = sig.get("description", "")
70
+ priority = sig.get("priority", "medium")
71
+ if desc:
72
+ triples.append(Triple(entity_id, "description", desc))
73
+ triples.append(Triple(entity_id, "priority", priority))
74
+ # Extract concepts from signal description
75
+ for concept_triple in self.extract_concepts(desc):
76
+ triples.append(concept_triple)
77
+ if concept_triple.entity_id.startswith("concept:"):
78
+ triples.append(Triple(
79
+ entity_id, "related_to", concept_triple.entity_id, "ref"
80
+ ))
81
+
82
+ # Recommended action
83
+ action = signal_data.get("recommendedAction")
84
+ if action and isinstance(action, dict):
85
+ triples.append(Triple(entity_id, "action", action.get("action", "")))
86
+ if "task" in action:
87
+ triples.append(Triple(entity_id, "action_task", action["task"]))
88
+ if "confidence" in action:
89
+ triples.append(Triple(entity_id, "action_confidence", str(action["confidence"])))
90
+
91
+ # Playbook changes
92
+ changes = (signal_data.get("playbookChanges") or {}).get("changes", {})
93
+ for added in changes.get("added", []):
94
+ slug = _make_slug(added)
95
+ if slug:
96
+ pattern_id = f"pattern:{slug}"
97
+ triples.append(Triple(pattern_id, "text", added))
98
+ triples.append(Triple(pattern_id, "source", "signal_analyzer"))
99
+ triples.append(Triple(entity_id, "added_pattern", pattern_id, "ref"))
100
+
101
+ # Output
102
+ output = signal_data.get("output", {})
103
+ if isinstance(output, dict):
104
+ if output.get("suggestion"):
105
+ triples.append(Triple(entity_id, "suggestion", output["suggestion"]))
106
+ if output.get("insight"):
107
+ triples.append(Triple(entity_id, "insight", output["insight"]))
108
+
109
+ return triples
110
+
111
+ def extract_session(self, session_data: dict) -> list[Triple]:
112
+ """Extract triples from a session summary (Tier 1).
113
+
114
+ Creates session:{ts} entity with summary, tool refs, etc.
115
+ """
116
+ triples: list[Triple] = []
117
+ ts = session_data.get("ts", session_data.get("timestamp", "unknown"))
118
+ entity_id = f"session:{ts}"
119
+
120
+ if "summary" in session_data:
121
+ triples.append(Triple(entity_id, "summary", session_data["summary"]))
122
+ if "sessionSummary" in session_data:
123
+ triples.append(Triple(entity_id, "summary", session_data["sessionSummary"]))
124
+
125
+ # Tool usage
126
+ for tool in session_data.get("toolsUsed", []):
127
+ tool_name = tool if isinstance(tool, str) else tool.get("name", "")
128
+ if tool_name:
129
+ tool_id = f"tool:{_make_slug(tool_name)}"
130
+ triples.append(Triple(tool_id, "name", tool_name))
131
+ triples.append(Triple(entity_id, "used_tool", tool_id, "ref"))
132
+
133
+ # Duration
134
+ if "durationMs" in session_data:
135
+ triples.append(Triple(entity_id, "duration_ms", str(session_data["durationMs"])))
136
+
137
+ # Extract concepts from summary
138
+ summary_text = session_data.get("summary", session_data.get("sessionSummary", ""))
139
+ if summary_text:
140
+ for concept_triple in self.extract_concepts(summary_text):
141
+ triples.append(concept_triple)
142
+ if concept_triple.entity_id.startswith("concept:"):
143
+ triples.append(Triple(
144
+ entity_id, "related_to", concept_triple.entity_id, "ref"
145
+ ))
146
+
147
+ return triples
148
+
149
+ def extract_mining(self, mining_data: dict) -> list[Triple]:
150
+ """Extract triples from memory mining results (Tier 1).
151
+
152
+ New patterns → pattern:{slug} entities.
153
+ """
154
+ triples: list[Triple] = []
155
+
156
+ for pattern_text in mining_data.get("newPatterns", []):
157
+ slug = _make_slug(pattern_text)
158
+ if not slug:
159
+ continue
160
+ pattern_id = f"pattern:{slug}"
161
+ triples.append(Triple(pattern_id, "text", pattern_text))
162
+ triples.append(Triple(pattern_id, "source", "memory_miner"))
163
+ # Extract concepts
164
+ for ct in self.extract_concepts(pattern_text):
165
+ triples.append(ct)
166
+ if ct.entity_id.startswith("concept:"):
167
+ triples.append(Triple(pattern_id, "related_to", ct.entity_id, "ref"))
168
+
169
+ for pref in mining_data.get("preferences", []):
170
+ slug = _make_slug(pref)
171
+ if slug:
172
+ pref_id = f"pattern:{slug}"
173
+ triples.append(Triple(pref_id, "text", pref))
174
+ triples.append(Triple(pref_id, "source", "memory_miner"))
175
+ triples.append(Triple(pref_id, "pattern_type", "preference"))
176
+
177
+ for contradiction in mining_data.get("contradictions", []):
178
+ slug = _make_slug(contradiction)
179
+ if slug:
180
+ c_id = f"pattern:{slug}"
181
+ triples.append(Triple(c_id, "text", contradiction))
182
+ triples.append(Triple(c_id, "source", "memory_miner"))
183
+ triples.append(Triple(c_id, "pattern_type", "contradiction"))
184
+
185
+ return triples
186
+
187
+ # ----- Tier 2: Regex extraction -----
188
+
189
+ def extract_playbook(self, playbook_text: str) -> list[Triple]:
190
+ """Extract triples from playbook markdown (Tier 2: regex).
191
+
192
+ Pattern: ^- text (score: N.N)?
193
+ Falls back to Tier 3 if <3 patterns extracted from non-empty input.
194
+ """
195
+ triples: list[Triple] = []
196
+ pattern_re = re.compile(r"^-\s+(.+?)(?:\s*\(score:\s*([\d.]+)\))?\s*$", re.MULTILINE)
197
+
198
+ for match in pattern_re.finditer(playbook_text):
199
+ text = match.group(1).strip()
200
+ score = match.group(2)
201
+
202
+ # Skip HTML comments and metadata
203
+ if text.startswith("<!--") or text.startswith("[since:"):
204
+ continue
205
+
206
+ slug = _make_slug(text)
207
+ if not slug:
208
+ continue
209
+
210
+ pattern_id = f"pattern:{slug}"
211
+ triples.append(Triple(pattern_id, "text", text))
212
+ triples.append(Triple(pattern_id, "source", "playbook"))
213
+ if score:
214
+ triples.append(Triple(pattern_id, "score", score))
215
+
216
+ # Extract concepts from pattern text
217
+ for ct in self.extract_concepts(text):
218
+ triples.append(ct)
219
+ if ct.entity_id.startswith("concept:"):
220
+ triples.append(Triple(pattern_id, "related_to", ct.entity_id, "ref"))
221
+
222
+ # Tier 3 fallback: if we got <3 patterns from non-trivial input
223
+ non_comment = re.sub(r"<!--.*?-->", "", playbook_text, flags=re.DOTALL).strip()
224
+ if len(non_comment) > 100 and sum(1 for t in triples if t.attribute == "text") < 3:
225
+ tier3 = self._extract_patterns_llm(playbook_text)
226
+ triples.extend(tier3)
227
+
228
+ return triples
229
+
230
+ def extract_module(
231
+ self, module_id: str, manifest: dict, patterns_text: str,
232
+ guidance_text: str = "",
233
+ ) -> list[Triple]:
234
+ """Extract triples from a module's manifest (Tier 1) + patterns.md + guidance.md (Tier 2).
235
+
236
+ Creates module:{id} entity + pattern entities from patterns.md
237
+ + guidance entities from guidance.md.
238
+ """
239
+ triples: list[Triple] = []
240
+ entity_id = f"module:{module_id}"
241
+
242
+ # Tier 1: manifest fields
243
+ triples.append(Triple(entity_id, "name", manifest.get("name", module_id)))
244
+ if "description" in manifest:
245
+ triples.append(Triple(entity_id, "description", manifest["description"]))
246
+ if "version" in manifest:
247
+ triples.append(Triple(entity_id, "version", manifest["version"]))
248
+
249
+ # Tier 2: extract patterns from patterns.md
250
+ if patterns_text:
251
+ pattern_triples = self.extract_playbook(patterns_text)
252
+ for pt in pattern_triples:
253
+ triples.append(pt)
254
+ # Link patterns to module
255
+ if pt.attribute == "text" and pt.entity_id.startswith("pattern:"):
256
+ triples.append(Triple(pt.entity_id, "belongs_to", entity_id, "ref"))
257
+
258
+ # Tier 2b: extract guidance items from guidance.md
259
+ if guidance_text:
260
+ guidance_triples = self.extract_playbook(guidance_text)
261
+ for gt in guidance_triples:
262
+ # Remap pattern: → guidance: entity prefix
263
+ if gt.entity_id.startswith("pattern:"):
264
+ gt = Triple(
265
+ gt.entity_id.replace("pattern:", "guidance:", 1),
266
+ gt.attribute, gt.value, gt.value_type,
267
+ )
268
+ triples.append(gt)
269
+ if gt.attribute == "text" and gt.entity_id.startswith("guidance:"):
270
+ triples.append(Triple(gt.entity_id, "type", "guidance"))
271
+ triples.append(Triple(gt.entity_id, "belongs_to", entity_id, "ref"))
272
+
273
+ return triples
274
+
275
+ # ----- Concept extraction (3-tier) -----
276
+
277
+ def extract_concepts(self, text: str) -> list[Triple]:
278
+ """Extract concept entities from text using 3-tier strategy.
279
+
280
+ Tier 1: Match against vocabulary cache from store.
281
+ Tier 2: Regex noun-phrase extraction.
282
+ Tier 3: LLM fallback (only if tiers 1+2 yield nothing from substantial text).
283
+ """
284
+ concepts: set[str] = set()
285
+
286
+ # Tier 1: vocabulary cache matching
287
+ vocab = self._get_vocab_cache()
288
+ text_lower = text.lower()
289
+ for concept_name, concept_id in vocab:
290
+ if concept_name in text_lower:
291
+ concepts.add(concept_id)
292
+
293
+ # Tier 2: regex noun-phrase extraction
294
+ # Match capitalized multi-word phrases and technical terms
295
+ noun_phrases = set()
296
+ # Capitalized phrases (2+ words)
297
+ for m in re.finditer(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b", text):
298
+ noun_phrases.add(m.group(1))
299
+ # Technical terms: word-word patterns (e.g., "frame-batching", "OCR-pipeline")
300
+ for m in re.finditer(r"\b([a-zA-Z]+-[a-zA-Z]+(?:-[a-zA-Z]+)*)\b", text):
301
+ term = m.group(1)
302
+ if len(term) > 4: # skip short terms like "is-a"
303
+ noun_phrases.add(term)
304
+ # ALL-CAPS acronyms (2+ chars)
305
+ for m in re.finditer(r"\b([A-Z]{2,})\b", text):
306
+ noun_phrases.add(m.group(1))
307
+
308
+ # Convert noun phrases to concept triples
309
+ triples: list[Triple] = []
310
+ for concept_id in concepts:
311
+ # Already exists in store — just reference it
312
+ pass # caller will create ref triples
313
+
314
+ for phrase in noun_phrases:
315
+ slug = _make_slug(phrase)
316
+ if not slug or len(slug) < 2:
317
+ continue
318
+ concept_id = f"concept:{slug}"
319
+ if concept_id not in concepts:
320
+ concepts.add(concept_id)
321
+ triples.append(Triple(concept_id, "name", phrase))
322
+
323
+ # Return known concepts as triples too (for caller to create refs)
324
+ for cid in concepts:
325
+ if not any(t.entity_id == cid for t in triples):
326
+ # Concept from vocab cache — ensure it's in the output
327
+ triples.append(Triple(cid, "name", cid.split(":", 1)[1] if ":" in cid else cid))
328
+
329
+ # Tier 3: LLM fallback only if we found nothing from substantial text
330
+ if not concepts and len(text) > 100:
331
+ tier3 = self._extract_concepts_llm(text)
332
+ triples.extend(tier3)
333
+
334
+ return triples
335
+
336
+ def _get_vocab_cache(self) -> list[tuple[str, str]]:
337
+ """Load vocabulary from store: all (name, entity_id) for concept: entities."""
338
+ if self._vocab_cache is not None:
339
+ return self._vocab_cache
340
+ try:
341
+ results = self.store.entities_with_attr("name")
342
+ self._vocab_cache = [
343
+ (val.lower(), eid)
344
+ for eid, val in results
345
+ if eid.startswith("concept:")
346
+ ]
347
+ except Exception:
348
+ self._vocab_cache = []
349
+ return self._vocab_cache
350
+
351
+ # ----- Tier 3: LLM fallback -----
352
+
353
+ def _extract_patterns_llm(self, text: str) -> list[Triple]:
354
+ """Use LLM to extract patterns from unstructured text."""
355
+ try:
356
+ from common import call_llm_with_fallback, extract_json
357
+ except ImportError:
358
+ return []
359
+
360
+ system = (
361
+ "Extract actionable patterns from this text. Return JSON: "
362
+ '{"patterns": ["pattern 1", "pattern 2", ...]}'
363
+ )
364
+ try:
365
+ raw = call_llm_with_fallback(system, text[:4000], script="triple_extractor")
366
+ data = extract_json(raw)
367
+ triples: list[Triple] = []
368
+ for p in data.get("patterns", []):
369
+ slug = _make_slug(p)
370
+ if slug:
371
+ pid = f"pattern:{slug}"
372
+ triples.append(Triple(pid, "text", p))
373
+ triples.append(Triple(pid, "source", "llm_extraction"))
374
+ return triples
375
+ except Exception as e:
376
+ print(f"[warn] Tier 3 pattern extraction failed: {e}", file=sys.stderr)
377
+ return []
378
+
379
+ def _extract_concepts_llm(self, text: str) -> list[Triple]:
380
+ """Use LLM to extract concepts from text."""
381
+ try:
382
+ from common import call_llm_with_fallback, extract_json
383
+ except ImportError:
384
+ return []
385
+
386
+ system = (
387
+ "Extract key concepts/entities from this text. Return JSON: "
388
+ '{"concepts": ["concept 1", "concept 2", ...]}'
389
+ )
390
+ try:
391
+ raw = call_llm_with_fallback(system, text[:4000], script="triple_extractor")
392
+ data = extract_json(raw)
393
+ triples: list[Triple] = []
394
+ for c in data.get("concepts", []):
395
+ slug = _make_slug(c)
396
+ if slug:
397
+ cid = f"concept:{slug}"
398
+ triples.append(Triple(cid, "name", c))
399
+ return triples
400
+ except Exception as e:
401
+ print(f"[warn] Tier 3 concept extraction failed: {e}", file=sys.stderr)
402
+ return []
@@ -0,0 +1,290 @@
1
+ #!/usr/bin/env python3
2
+ """Triple Ingest — CLI entry point for ingesting data into the triple store.
3
+
4
+ Called by the sinain-hud plugin via runScript() for fire-and-forget ingestion.
5
+
6
+ Usage:
7
+ python3 triple_ingest.py --memory-dir memory/ --signal-result '{"signals":[...]}' --tick-ts 2026-03-01T10:00:00Z
8
+ python3 triple_ingest.py --memory-dir memory/ --ingest-playbook
9
+ python3 triple_ingest.py --memory-dir memory/ --ingest-session '{"ts":"...","summary":"..."}'
10
+ python3 triple_ingest.py --memory-dir memory/ --ingest-mining '{"newPatterns":[...]}'
11
+ python3 triple_ingest.py --memory-dir memory/ --ingest-module react-native-dev --modules-dir modules/
12
+ python3 triple_ingest.py --memory-dir memory/ --retract-module react-native-dev
13
+ python3 triple_ingest.py --memory-dir memory/ --embed (add --embed to any mode to trigger embedding)
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ # Ensure sinain-koog is on path for local imports
23
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
24
+
25
+ from triplestore import TripleStore
26
+ from triple_extractor import TripleExtractor
27
+ from common import output_json, read_effective_playbook, read_file_safe
28
+
29
+
30
+ # ── Privacy matrix helpers ────────────────────────────────────────────────────
31
+
32
+ def _privacy_level(data_type: str, dest: str) -> str:
33
+ """Read PRIVACY_<DATA_TYPE>_<DEST> env var, default 'full'."""
34
+ key = f"PRIVACY_{data_type.upper()}_{dest.upper()}"
35
+ val = os.environ.get(key, "full")
36
+ if val not in ("full", "redacted", "summary", "none"):
37
+ return "full"
38
+ return val
39
+
40
+
41
+ def _should_ingest(data_type: str) -> bool:
42
+ """Return True if the data type is allowed to reach the triple store."""
43
+ level = _privacy_level(data_type, "TRIPLE_STORE")
44
+ return level != "none"
45
+
46
+
47
+ def _db_path(memory_dir: str) -> str:
48
+ return str(Path(memory_dir) / "triplestore.db")
49
+
50
+
51
+ def _assert_triples(store: TripleStore, tx_id: int, triples: list) -> int:
52
+ """Assert all triples in a transaction. Returns count."""
53
+ count = 0
54
+ for t in triples:
55
+ store.assert_triple(tx_id, t.entity_id, t.attribute, t.value, t.value_type)
56
+ count += 1
57
+ return count
58
+
59
+
60
+ def _run_embeddings(store: TripleStore, memory_dir: str) -> None:
61
+ """Run Phase 2 embeddings on recent entities (best-effort)."""
62
+ try:
63
+ from embedder import Embedder
64
+ embedder = Embedder(_db_path(memory_dir))
65
+ # Get entities from last transaction
66
+ latest = store.latest_tx()
67
+ if latest == 0:
68
+ return
69
+ novelties = store.novelty(max(0, latest - 1))
70
+ entity_ids = list({n["entity_id"] for n in novelties})
71
+ if not entity_ids:
72
+ return
73
+
74
+ # Build text for embedding
75
+ entity_texts: dict[str, str] = {}
76
+ for eid in entity_ids[:50]: # cap at 50 per batch
77
+ attrs = store.entity(eid)
78
+ text = _build_embed_text(eid, attrs)
79
+ if text:
80
+ entity_texts[eid] = text
81
+
82
+ if entity_texts:
83
+ embedder.store_embeddings(entity_texts)
84
+ print(f"[embed] Embedded {len(entity_texts)} entities", file=sys.stderr)
85
+ except ImportError:
86
+ print("[embed] embedder not available, skipping", file=sys.stderr)
87
+ except Exception as e:
88
+ print(f"[embed] Error: {e}", file=sys.stderr)
89
+
90
+
91
+ def _build_embed_text(entity_id: str, attrs: dict[str, list[str]]) -> str:
92
+ """Build embedding source text from entity attributes.
93
+
94
+ Templates per entity type (from design doc §5.3).
95
+ """
96
+ etype = entity_id.split(":")[0] if ":" in entity_id else "unknown"
97
+
98
+ if etype == "pattern":
99
+ text = attrs.get("text", [""])[0]
100
+ concepts = ", ".join(attrs.get("related_to", []))
101
+ return f"pattern: {text} (concepts: {concepts})" if text else ""
102
+
103
+ if etype == "concept":
104
+ name = attrs.get("name", [""])[0]
105
+ return f"concept: {name}" if name else ""
106
+
107
+ if etype == "session":
108
+ summary = attrs.get("summary", [""])[0]
109
+ return f"session: {summary}" if summary else ""
110
+
111
+ if etype == "signal":
112
+ desc = attrs.get("description", [""])[0]
113
+ priority = attrs.get("priority", ["medium"])[0]
114
+ return f"signal: {desc} (priority: {priority})" if desc else ""
115
+
116
+ if etype == "guidance":
117
+ text = attrs.get("text", [""])[0]
118
+ return f"guidance: {text}" if text else ""
119
+
120
+ if etype == "module":
121
+ name = attrs.get("name", [""])[0]
122
+ description = attrs.get("description", [""])[0]
123
+ return f"module: {name} — {description}" if name else ""
124
+
125
+ return ""
126
+
127
+
128
+ def cmd_signal(args: argparse.Namespace) -> None:
129
+ """Ingest signal analysis result."""
130
+ # Privacy gate: check if audio_transcript and screen_ocr are allowed for triple_store
131
+ if not _should_ingest("AUDIO") and not _should_ingest("OCR"):
132
+ output_json({"ingested": 0, "source": "signal", "skipped": "privacy_gate"})
133
+ return
134
+ signal_data = json.loads(args.signal_result)
135
+ store = TripleStore(_db_path(args.memory_dir))
136
+ try:
137
+ extractor = TripleExtractor(store)
138
+ triples = extractor.extract_signal(signal_data, args.tick_ts)
139
+ tx = store.begin_tx("signal_analyzer", metadata={"tick_ts": args.tick_ts})
140
+ count = _assert_triples(store, tx, triples)
141
+ if args.embed:
142
+ _run_embeddings(store, args.memory_dir)
143
+ output_json({"ingested": count, "entities": len({t.entity_id for t in triples}), "source": "signal", "txId": tx})
144
+ finally:
145
+ store.close()
146
+
147
+
148
+ def cmd_playbook(args: argparse.Namespace) -> None:
149
+ """Ingest the current playbook."""
150
+ playbook = read_effective_playbook(args.memory_dir)
151
+ if not playbook:
152
+ output_json({"ingested": 0, "source": "playbook", "error": "empty"})
153
+ return
154
+ store = TripleStore(_db_path(args.memory_dir))
155
+ try:
156
+ extractor = TripleExtractor(store)
157
+ triples = extractor.extract_playbook(playbook)
158
+ tx = store.begin_tx("playbook_curator")
159
+ count = _assert_triples(store, tx, triples)
160
+ if args.embed:
161
+ _run_embeddings(store, args.memory_dir)
162
+ output_json({"ingested": count, "entities": len({t.entity_id for t in triples}), "source": "playbook", "txId": tx})
163
+ finally:
164
+ store.close()
165
+
166
+
167
+ def cmd_session(args: argparse.Namespace) -> None:
168
+ """Ingest a session summary."""
169
+ session_data = json.loads(args.ingest_session)
170
+ store = TripleStore(_db_path(args.memory_dir))
171
+ try:
172
+ extractor = TripleExtractor(store)
173
+ triples = extractor.extract_session(session_data)
174
+ tx = store.begin_tx("agent_end", metadata={"session": session_data.get("ts")})
175
+ count = _assert_triples(store, tx, triples)
176
+ if args.embed:
177
+ _run_embeddings(store, args.memory_dir)
178
+ output_json({"ingested": count, "entities": len({t.entity_id for t in triples}), "source": "session", "txId": tx})
179
+ finally:
180
+ store.close()
181
+
182
+
183
+ def cmd_mining(args: argparse.Namespace) -> None:
184
+ """Ingest memory mining results."""
185
+ mining_data = json.loads(args.ingest_mining)
186
+ store = TripleStore(_db_path(args.memory_dir))
187
+ try:
188
+ extractor = TripleExtractor(store)
189
+ triples = extractor.extract_mining(mining_data)
190
+ tx = store.begin_tx("memory_miner")
191
+ count = _assert_triples(store, tx, triples)
192
+ if args.embed:
193
+ _run_embeddings(store, args.memory_dir)
194
+ output_json({"ingested": count, "entities": len({t.entity_id for t in triples}), "source": "mining", "txId": tx})
195
+ finally:
196
+ store.close()
197
+
198
+
199
+ def cmd_module(args: argparse.Namespace) -> None:
200
+ """Ingest a module's patterns and guidance into the triple store."""
201
+ modules_dir = Path(args.modules_dir)
202
+ module_id = args.ingest_module
203
+ manifest_path = modules_dir / module_id / "manifest.json"
204
+ patterns_path = modules_dir / module_id / "patterns.md"
205
+ guidance_path = modules_dir / module_id / "guidance.md"
206
+
207
+ if not manifest_path.exists():
208
+ output_json({"ingested": 0, "source": "module", "error": f"manifest not found: {manifest_path}"})
209
+ return
210
+
211
+ manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
212
+ patterns_text = read_file_safe(str(patterns_path))
213
+ guidance_text = read_file_safe(str(guidance_path))
214
+
215
+ store = TripleStore(_db_path(args.memory_dir))
216
+ try:
217
+ extractor = TripleExtractor(store)
218
+ triples = extractor.extract_module(module_id, manifest, patterns_text, guidance_text)
219
+ tx = store.begin_tx("module_ingest", metadata={"module_id": module_id})
220
+ count = _assert_triples(store, tx, triples)
221
+ if args.embed:
222
+ _run_embeddings(store, args.memory_dir)
223
+ output_json({"ingested": count, "entities": len({t.entity_id for t in triples}), "source": "module", "module": module_id, "txId": tx})
224
+ finally:
225
+ store.close()
226
+
227
+
228
+ def cmd_retract_module(args: argparse.Namespace) -> None:
229
+ """Retract a module's triples from the store."""
230
+ module_id = args.retract_module
231
+ entity_id = f"module:{module_id}"
232
+ store = TripleStore(_db_path(args.memory_dir))
233
+ try:
234
+ tx = store.begin_tx("module_retract", metadata={"module_id": module_id})
235
+ # Retract the module entity itself
236
+ attrs = store.entity(entity_id)
237
+ count = 0
238
+ for attr in attrs:
239
+ count += store.retract_triple(tx, entity_id, attr)
240
+ # Retract patterns that belong_to this module
241
+ backrefs = store.backrefs(entity_id, attribute="belongs_to")
242
+ for pattern_eid, _ in backrefs:
243
+ pattern_attrs = store.entity(pattern_eid)
244
+ for attr in pattern_attrs:
245
+ count += store.retract_triple(tx, pattern_eid, attr)
246
+ output_json({"retracted": count, "source": "module", "module": module_id, "txId": tx})
247
+ finally:
248
+ store.close()
249
+
250
+
251
+ def main() -> None:
252
+ parser = argparse.ArgumentParser(description="Triple Store Ingestion CLI")
253
+ parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
254
+ parser.add_argument("--embed", action="store_true", help="Trigger embedding after ingestion")
255
+
256
+ # Mutually exclusive ingestion modes
257
+ group = parser.add_mutually_exclusive_group(required=True)
258
+ group.add_argument("--signal-result", help="JSON: signal analysis result")
259
+ group.add_argument("--ingest-playbook", action="store_true", help="Ingest current playbook")
260
+ group.add_argument("--ingest-session", help="JSON: session summary")
261
+ group.add_argument("--ingest-mining", help="JSON: mining results")
262
+ group.add_argument("--ingest-module", help="Module ID to ingest")
263
+ group.add_argument("--retract-module", help="Module ID to retract")
264
+
265
+ # Conditional args
266
+ parser.add_argument("--tick-ts", help="Tick timestamp (required with --signal-result)")
267
+ parser.add_argument("--modules-dir", help="Path to modules/ directory (required with --ingest-module)")
268
+
269
+ args = parser.parse_args()
270
+
271
+ if args.signal_result:
272
+ if not args.tick_ts:
273
+ parser.error("--tick-ts required with --signal-result")
274
+ cmd_signal(args)
275
+ elif args.ingest_playbook:
276
+ cmd_playbook(args)
277
+ elif args.ingest_session:
278
+ cmd_session(args)
279
+ elif args.ingest_mining:
280
+ cmd_mining(args)
281
+ elif args.ingest_module:
282
+ if not args.modules_dir:
283
+ parser.error("--modules-dir required with --ingest-module")
284
+ cmd_module(args)
285
+ elif args.retract_module:
286
+ cmd_retract_module(args)
287
+
288
+
289
+ if __name__ == "__main__":
290
+ main()