@geravant/sinain 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,6 @@ function writeDistillState(workspaceDir: string, state: DistillState): void {
43
43
 
44
44
  export type HeartbeatResult = {
45
45
  status: string;
46
- gitBackup: string | null;
47
46
  signals: unknown[];
48
47
  recommendedAction: { action: string; task: string | null; confidence: number };
49
48
  output: unknown | null;
@@ -95,7 +94,6 @@ export class CurationEngine {
95
94
  const workspaceDir = this.store.getWorkspaceDir();
96
95
  const result: HeartbeatResult = {
97
96
  status: "ok",
98
- gitBackup: null,
99
97
  signals: [],
100
98
  recommendedAction: { action: "skip", task: null, confidence: 0 },
101
99
  output: null,
@@ -131,20 +129,6 @@ export class CurationEngine {
131
129
  const latencyMs: Record<string, number> = {};
132
130
  const heartbeatStart = Date.now();
133
131
 
134
- // 1. Git backup (30s timeout)
135
- try {
136
- const t0 = Date.now();
137
- const gitOut = await this.runScript(
138
- ["bash", "sinain-memory/git_backup.sh"],
139
- { timeoutMs: 30_000, cwd: workspaceDir },
140
- );
141
- latencyMs.gitBackup = Date.now() - t0;
142
- result.gitBackup = gitOut.stdout.trim() || "nothing to commit";
143
- } catch (err) {
144
- this.logger.warn(`sinain-hud: git backup error: ${String(err)}`);
145
- result.gitBackup = `error: ${String(err)}`;
146
- }
147
-
148
132
  // Current time string for memory scripts
149
133
  const hbTz = this.config.userTimezone;
150
134
  const currentTimeStr = new Date().toLocaleString("en-GB", {
@@ -291,7 +275,6 @@ export class CurationEngine {
291
275
  output: result.output,
292
276
  skipped: result.skipped,
293
277
  skipReason: result.skipReason,
294
- gitBackup: result.gitBackup,
295
278
  latencyMs,
296
279
  totalLatencyMs,
297
280
  };
@@ -59,4 +59,4 @@ SINAIN_BACKUP_REPO=<git-url> npx sinain
59
59
  - Token printed at end (or visible in Brev dashboard → Gateway Token)
60
60
  - Mac side: `./setup-nemoclaw.sh` → 5 prompts → overlay starts
61
61
 
62
- Memory is git-backed via `git_backup.sh` on every heartbeat tick. New instances restore instantly via `SINAIN_BACKUP_REPO`.
62
+ Memory is backed up via knowledge snapshots to `~/.sinain/knowledge-snapshots/`. New instances restore instantly via `SINAIN_BACKUP_REPO`.
@@ -203,19 +203,30 @@ server.tool(
203
203
  );
204
204
 
205
205
  // 8. sinain_get_knowledge
206
+ // Queries sinain-core's /knowledge API which merges both local and workspace DBs.
207
+ // Falls back to reading the workspace knowledge doc directly if sinain-core is unreachable.
206
208
  server.tool(
207
209
  "sinain_get_knowledge",
208
- "Get the portable knowledge document (playbook + long-term facts + recent sessions)",
210
+ "Get the portable knowledge document (playbook + long-term facts from both local and workspace databases)",
209
211
  {},
210
212
  async () => {
213
+ // Try sinain-core API first (merges both DBs)
214
+ try {
215
+ const data = await coreRequest("GET", "/knowledge");
216
+ if (data.ok && data.content) {
217
+ return textResult(stripPrivateTags(data.content));
218
+ }
219
+ } catch {
220
+ // sinain-core unreachable — fall through to local files
221
+ }
222
+
223
+ // Fallback: read workspace files directly
211
224
  try {
212
- // Read pre-rendered knowledge doc (fast, no subprocess)
213
225
  const docPath = resolve(MEMORY_DIR, "sinain-knowledge.md");
214
226
  if (existsSync(docPath)) {
215
227
  const content = readFileSync(docPath, "utf-8");
216
228
  return textResult(stripPrivateTags(content));
217
229
  }
218
- // Fallback: read playbook directly
219
230
  const playbookPath = resolve(MEMORY_DIR, "sinain-playbook.md");
220
231
  if (existsSync(playbookPath)) {
221
232
  return textResult(stripPrivateTags(readFileSync(playbookPath, "utf-8")));
@@ -228,14 +239,33 @@ server.tool(
228
239
  );
229
240
 
230
241
  // 8b. sinain_knowledge_query (graph query — entity-based lookup)
242
+ // Queries sinain-core's /knowledge/facts API which merges both local and workspace DBs.
243
+ // Falls back to local graph_query.py (workspace DB only) if sinain-core is unreachable.
231
244
  server.tool(
232
245
  "sinain_knowledge_query",
233
- "Query the knowledge graph for facts about specific entities/domains",
246
+ "Query the knowledge graph for facts about specific entities/domains (searches both local and workspace databases)",
234
247
  {
235
248
  entities: z.array(z.string()).optional().default([]),
236
249
  max_facts: z.number().optional().default(5),
237
250
  },
238
251
  async ({ entities, max_facts }) => {
252
+ // Try sinain-core API first (merges both local + workspace DBs)
253
+ if (entities.length > 0) {
254
+ try {
255
+ const params = new URLSearchParams({
256
+ entities: entities.join(","),
257
+ max: String(max_facts),
258
+ });
259
+ const data = await coreRequest("GET", `/knowledge/facts?${params}`);
260
+ if (data.ok && data.facts) {
261
+ return textResult(stripPrivateTags(data.facts));
262
+ }
263
+ } catch {
264
+ // sinain-core unreachable — fall through to local script
265
+ }
266
+ }
267
+
268
+ // Fallback: query workspace DB directly via graph_query.py
239
269
  try {
240
270
  const dbPath = resolve(MEMORY_DIR, "knowledge-graph.db");
241
271
  const scriptPath = resolve(SCRIPTS_DIR, "graph_query.py");
@@ -317,23 +347,7 @@ server.tool(
317
347
  const results: string[] = [];
318
348
  const now = new Date().toISOString();
319
349
 
320
- // Step 1: git_backup.sh
321
- const gitBackupPath = resolve(SCRIPTS_DIR, "git_backup.sh");
322
- if (existsSync(gitBackupPath)) {
323
- try {
324
- const out = await new Promise<string>((res, rej) => {
325
- execFile("bash", [gitBackupPath, MEMORY_DIR], { timeout: 30_000 }, (err, stdout, stderr) => {
326
- if (err) rej(new Error(`git_backup failed: ${err.message}\n${stderr}`));
327
- else res(stdout);
328
- });
329
- });
330
- results.push(`[git_backup] ${out.trim() || "OK"}`);
331
- } catch (err: any) {
332
- results.push(`[git_backup] FAILED: ${err.message}`);
333
- }
334
- }
335
-
336
- // Step 2: signal_analyzer.py
350
+ // Step 1: signal_analyzer.py
337
351
  try {
338
352
  const out = await runScript([
339
353
  resolve(SCRIPTS_DIR, "signal_analyzer.py"),
@@ -346,7 +360,7 @@ server.tool(
346
360
  results.push(`[signal_analyzer] FAILED: ${err.message}`);
347
361
  }
348
362
 
349
- // Step 3: insight_synthesizer.py
363
+ // Step 2: insight_synthesizer.py
350
364
  try {
351
365
  const out = await runScript([
352
366
  resolve(SCRIPTS_DIR, "insight_synthesizer.py"),
@@ -358,7 +372,7 @@ server.tool(
358
372
  results.push(`[insight_synthesizer] FAILED: ${err.message}`);
359
373
  }
360
374
 
361
- // Step 4: memory_miner.py
375
+ // Step 3: memory_miner.py
362
376
  try {
363
377
  const out = await runScript([
364
378
  resolve(SCRIPTS_DIR, "memory_miner.py"),
@@ -369,7 +383,7 @@ server.tool(
369
383
  results.push(`[memory_miner] FAILED: ${err.message}`);
370
384
  }
371
385
 
372
- // Step 5: playbook_curator.py
386
+ // Step 4: playbook_curator.py
373
387
  try {
374
388
  const out = await runScript([
375
389
  resolve(SCRIPTS_DIR, "playbook_curator.py"),
@@ -0,0 +1,12 @@
1
+ {"query": "OCR pipeline stalls on macOS 14", "expected_entities": ["fact:ocr-backpressure", "fact:sck-capture"], "category": "error-resolution"}
2
+ {"query": "camera conflicts with screen capture", "expected_entities": ["fact:camera-conflict", "fact:coremediaio"], "category": "error-resolution"}
3
+ {"query": "audio gain not applied in pipeline", "expected_entities": ["fact:audio-gain"], "category": "bug-fix"}
4
+ {"query": "Flutter ProviderNotFoundException in secondary window", "expected_entities": ["fact:flutter-provider", "fact:multi-window"], "category": "error-resolution"}
5
+ {"query": "user prefers concise Telegram messages", "expected_entities": ["fact:telegram-preference"], "category": "user-preference"}
6
+ {"query": "PyObjC performRequests_error_ returns bool not tuple", "expected_entities": ["fact:pyobjc-api"], "category": "bug-fix"}
7
+ {"query": "ScreenCaptureKit zero-copy IOSurface", "expected_entities": ["fact:sck-capture", "fact:iosurface"], "category": "tool-knowledge"}
8
+ {"query": "OpenClaw gateway workspace not initialized", "expected_entities": ["fact:workspace-init"], "category": "error-resolution"}
9
+ {"query": "react-native metro bundler cache invalidation", "expected_entities": ["fact:react-native-metro"], "category": "tool-knowledge"}
10
+ {"query": "sinain agent session key format", "expected_entities": ["fact:session-key"], "category": "tool-knowledge"}
11
+ {"query": "what was the OCR backend last month", "expected_entities": ["fact:ocr-backend"], "category": "temporal"}
12
+ {"query": "when did we switch from CGDisplayCreateImage to ScreenCaptureKit", "expected_entities": ["fact:sck-capture", "fact:cgdisplay-deprecation"], "category": "temporal"}
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ """Retrieval Quality Evaluator — Recall@k and NDCG@k for knowledge graph queries.
3
+
4
+ Inspired by mempalace's LongMemEval benchmark infrastructure. Measures whether the
5
+ right knowledge surfaces when the agent needs it, complementing sinain's existing
6
+ output quality evaluation (schemas + assertions + LLM judges).
7
+
8
+ Usage:
9
+ python3 eval/retrieval_evaluator.py \
10
+ --db memory/knowledge-graph.db \
11
+ --benchmark eval/retrieval_benchmark.jsonl \
12
+ [--k 1,3,5] [--format json|text]
13
+
14
+ Benchmark dataset format (JSONL):
15
+ {"query": "OCR pipeline stalls on macOS 14", "expected_entities": ["fact:sck-capture-fix"], "category": "error-resolution"}
16
+ """
17
+
18
+ import argparse
19
+ import json
20
+ import math
21
+ import sys
22
+ from collections import defaultdict
23
+ from pathlib import Path
24
+
25
+
26
+ def load_benchmark(path: str) -> list[dict]:
27
+ """Load benchmark QA pairs from JSONL."""
28
+ items = []
29
+ with open(path) as f:
30
+ for line in f:
31
+ line = line.strip()
32
+ if line:
33
+ items.append(json.loads(line))
34
+ return items
35
+
36
+
37
+ def extract_keywords(query: str) -> list[str]:
38
+ """Extract search keywords from a natural language query."""
39
+ import re
40
+ words = re.findall(r"[a-zA-Z][a-zA-Z0-9-]+", query.lower())
41
+ stopwords = {"the", "is", "in", "on", "for", "and", "or", "of", "to", "a", "an", "it", "was", "not", "how", "what", "when", "does"}
42
+ return [w for w in words if len(w) > 2 and w not in stopwords]
43
+
44
+
45
+ def dcg_at_k(relevant_positions: list[int], k: int) -> float:
46
+ """Compute Discounted Cumulative Gain at k."""
47
+ score = 0.0
48
+ for pos in relevant_positions:
49
+ if pos < k:
50
+ score += 1.0 / math.log2(pos + 2) # +2 because position is 0-indexed
51
+ return score
52
+
53
+
54
+ def ndcg_at_k(relevant_positions: list[int], num_relevant: int, k: int) -> float:
55
+ """Compute Normalized DCG at k."""
56
+ dcg = dcg_at_k(relevant_positions, k)
57
+ # Ideal DCG: all relevant items at top positions
58
+ ideal_positions = list(range(min(num_relevant, k)))
59
+ idcg = dcg_at_k(ideal_positions, k)
60
+ return dcg / idcg if idcg > 0 else 0.0
61
+
62
+
63
+ def evaluate_retrieval(
64
+ benchmark_path: str,
65
+ db_path: str,
66
+ k_values: list[int] = [1, 3, 5],
67
+ ) -> dict:
68
+ """Run benchmark queries against graph_query.py, compute Recall@k and NDCG@k."""
69
+ # Import graph_query from parent dir
70
+ sys.path.insert(0, str(Path(__file__).parent.parent))
71
+ from graph_query import query_facts_by_entities
72
+
73
+ items = load_benchmark(benchmark_path)
74
+ if not items:
75
+ return {"error": "Empty benchmark dataset"}
76
+
77
+ max_k = max(k_values)
78
+ metrics: dict[str, list[float]] = defaultdict(list)
79
+ category_metrics: dict[str, dict[str, list[float]]] = defaultdict(lambda: defaultdict(list))
80
+ details: list[dict] = []
81
+
82
+ for item in items:
83
+ query = item["query"]
84
+ expected = set(item.get("expected_entities", []))
85
+ category = item.get("category", "general")
86
+ keywords = extract_keywords(query)
87
+
88
+ if not keywords or not expected:
89
+ continue
90
+
91
+ results = query_facts_by_entities(db_path, keywords, max_facts=max_k)
92
+ result_ids = [r["entityId"] for r in results]
93
+
94
+ # Find positions of relevant results
95
+ relevant_positions = []
96
+ for i, rid in enumerate(result_ids):
97
+ if rid in expected:
98
+ relevant_positions.append(i)
99
+
100
+ for k in k_values:
101
+ hit = any(pos < k for pos in relevant_positions)
102
+ recall = 1.0 if hit else 0.0
103
+ ndcg = ndcg_at_k(relevant_positions, len(expected), k)
104
+
105
+ metrics[f"recall@{k}"].append(recall)
106
+ metrics[f"ndcg@{k}"].append(ndcg)
107
+ category_metrics[category][f"recall@{k}"].append(recall)
108
+ category_metrics[category][f"ndcg@{k}"].append(ndcg)
109
+
110
+ details.append({
111
+ "query": query,
112
+ "category": category,
113
+ "expected": list(expected),
114
+ "retrieved": result_ids[:max_k],
115
+ "hit@1": any(pos < 1 for pos in relevant_positions),
116
+ "hit@5": any(pos < 5 for pos in relevant_positions),
117
+ })
118
+
119
+ # Aggregate
120
+ summary = {
121
+ "total_queries": len(items),
122
+ "evaluated": len(details),
123
+ }
124
+ for metric_name, values in sorted(metrics.items()):
125
+ summary[metric_name] = round(sum(values) / len(values), 4) if values else 0.0
126
+
127
+ # Per-category breakdown
128
+ categories = {}
129
+ for cat, cat_metrics in sorted(category_metrics.items()):
130
+ categories[cat] = {
131
+ "count": len(next(iter(cat_metrics.values()))),
132
+ }
133
+ for metric_name, values in sorted(cat_metrics.items()):
134
+ categories[cat][metric_name] = round(sum(values) / len(values), 4) if values else 0.0
135
+
136
+ return {
137
+ "summary": summary,
138
+ "categories": categories,
139
+ "details": details,
140
+ }
141
+
142
+
143
+ def format_report_text(result: dict) -> str:
144
+ """Format evaluation result as human-readable text for daily report injection."""
145
+ lines = ["## Retrieval Quality"]
146
+ s = result["summary"]
147
+ for key in sorted(s):
148
+ if key.startswith("recall@") or key.startswith("ndcg@"):
149
+ lines.append(f"- {key}: {s[key]:.2%}")
150
+
151
+ if result.get("categories"):
152
+ lines.append("")
153
+ lines.append("**By category:**")
154
+ for cat, cm in sorted(result["categories"].items()):
155
+ r5 = cm.get("recall@5", 0)
156
+ lines.append(f"- {cat} (n={cm['count']}): recall@5={r5:.0%}")
157
+
158
+ # Weakest category
159
+ cats = result.get("categories", {})
160
+ if cats:
161
+ weakest = min(cats.items(), key=lambda x: x[1].get("recall@5", 1.0))
162
+ if weakest[1].get("recall@5", 1.0) < 0.8:
163
+ lines.append(f"\n**Weakest**: {weakest[0]} ({weakest[1].get('recall@5', 0):.0%})")
164
+
165
+ return "\n".join(lines)
166
+
167
+
168
+ def main() -> None:
169
+ parser = argparse.ArgumentParser(description="Retrieval Quality Evaluator")
170
+ parser.add_argument("--db", required=True, help="Path to knowledge-graph.db")
171
+ parser.add_argument("--benchmark", required=True, help="Path to retrieval_benchmark.jsonl")
172
+ parser.add_argument("--k", default="1,3,5", help="Comma-separated k values for Recall@k")
173
+ parser.add_argument("--format", choices=["json", "text"], default="json", help="Output format")
174
+ args = parser.parse_args()
175
+
176
+ k_values = [int(k) for k in args.k.split(",")]
177
+ result = evaluate_retrieval(args.benchmark, args.db, k_values)
178
+
179
+ if args.format == "text":
180
+ print(format_report_text(result))
181
+ else:
182
+ print(json.dumps(result, indent=2, ensure_ascii=False))
183
+
184
+
185
+ if __name__ == "__main__":
186
+ main()
@@ -154,6 +154,37 @@ def format_facts_text(facts: list[dict], max_chars: int = 500) -> str:
154
154
  return "\n".join(lines)
155
155
 
156
156
 
157
+ def format_facts_compact(facts: list[dict], max_chars: int = 400) -> str:
158
+ """Encode facts for efficient escalation context injection.
159
+
160
+ Compact format: domain/entity: value (conf, Nx)
161
+ Inspired by mempalace AAAK compression — fits 3-5x more facts per token budget.
162
+ """
163
+ if not facts:
164
+ return ""
165
+
166
+ lines = []
167
+ total = 0
168
+ for f in facts:
169
+ entity = f.get("entityId", "").split(":")[-1][:20]
170
+ value = f.get("value", "")[:60]
171
+ conf = f.get("confidence", "?")
172
+ count = f.get("reinforce_count", "1")
173
+ domain = f.get("domain", "")
174
+
175
+ if domain:
176
+ line = f"{domain}/{entity}: {value} ({conf},{count}x)"
177
+ else:
178
+ line = f"{entity}: {value} ({conf},{count}x)"
179
+
180
+ if total + len(line) + 2 > max_chars:
181
+ break
182
+ lines.append(line)
183
+ total += len(line) + 2 # account for "; " separator
184
+
185
+ return "; ".join(lines)
186
+
187
+
157
188
  def domain_fact_counts(db_path: str) -> dict[str, int]:
158
189
  """Count facts per domain for module emergence detection."""
159
190
  if not Path(db_path).exists():
@@ -184,7 +215,7 @@ def main() -> None:
184
215
  parser.add_argument("--top", type=int, default=None, help="Query top-N facts by confidence")
185
216
  parser.add_argument("--domain-counts", action="store_true", help="Show fact counts per domain")
186
217
  parser.add_argument("--max-facts", type=int, default=5, help="Maximum facts to return")
187
- parser.add_argument("--format", choices=["text", "json"], default="json", help="Output format")
218
+ parser.add_argument("--format", choices=["text", "json", "compact"], default="json", help="Output format")
188
219
  args = parser.parse_args()
189
220
 
190
221
  if args.domain_counts:
@@ -202,6 +233,8 @@ def main() -> None:
202
233
 
203
234
  if args.format == "text":
204
235
  print(format_facts_text(facts))
236
+ elif args.format == "compact":
237
+ print(format_facts_compact(facts))
205
238
  else:
206
239
  print(json.dumps({"facts": facts, "count": len(facts)}, indent=2, ensure_ascii=False))
207
240
 
@@ -117,6 +117,55 @@ def _fact_id(entity: str, attribute: str, value: str) -> str:
117
117
  return f"fact:{slug}-{h}"
118
118
 
119
119
 
120
+ def _normalize_entity(name: str) -> str:
121
+ """Normalize entity name to canonical form: lowercase, hyphenated, no punctuation."""
122
+ return re.sub(r"[^a-z0-9-]", "", name.lower().replace(" ", "-").replace("_", "-"))
123
+
124
+
125
+ def _canonicalize_ops(ops: list[dict], existing_entities: list[str]) -> list[dict]:
126
+ """Map variant entity names to canonical forms before graph execution.
127
+
128
+ Inspired by mempalace entity detection — uses simple heuristic instead of
129
+ rule-based signal detection: normalize names, merge on edit distance or substring match.
130
+ Converts duplicate assert → reinforce when a near-match exists.
131
+ """
132
+ canonical_map: dict[str, str] = {} # normalized → existing entity name
133
+ for eid in existing_entities:
134
+ # Extract entity name from the entity_id's attributes (stored as "entity" attr)
135
+ canonical_map[_normalize_entity(eid)] = eid
136
+
137
+ result = []
138
+ for op in ops:
139
+ if op.get("op") != "assert":
140
+ result.append(op)
141
+ continue
142
+
143
+ entity = op.get("entity", "")
144
+ normalized = _normalize_entity(entity)
145
+
146
+ # Check for near-match in existing entities
147
+ matched_id = None
148
+ for existing_norm, existing_eid in canonical_map.items():
149
+ if existing_norm == normalized:
150
+ matched_id = existing_eid
151
+ break
152
+ # Substring match: "react-router" matches "react-router-dom"
153
+ if len(normalized) >= 4 and (normalized in existing_norm or existing_norm in normalized):
154
+ matched_id = existing_eid
155
+ break
156
+
157
+ if matched_id:
158
+ # Convert assert → reinforce (entity already exists under different name)
159
+ result.append({"op": "reinforce", "entityId": matched_id})
160
+ print(f" [canon] merged '{entity}' → existing '{matched_id}'", file=sys.stderr)
161
+ else:
162
+ result.append(op)
163
+ # Register the new canonical form
164
+ canonical_map[normalized] = _fact_id(entity, op.get("attribute", ""), op.get("value", ""))
165
+
166
+ return result
167
+
168
+
120
169
  def _load_graph_facts(db_path: str, entities: list[str] | None = None, limit: int = 50) -> list[dict]:
121
170
  """Load relevant facts from the knowledge graph for LLM context."""
122
171
  if not Path(db_path).exists():
@@ -180,6 +229,11 @@ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str) -> dict:
180
229
  try:
181
230
  from triplestore import TripleStore
182
231
  store = TripleStore(db_path)
232
+
233
+ # Canonicalize entity names to prevent fragmentation
234
+ existing_ids = [r[0] for r in store.entities_with_attr("entity")]
235
+ ops = _canonicalize_ops(ops, existing_ids)
236
+
183
237
  stats = {"asserted": 0, "reinforced": 0, "retracted": 0}
184
238
 
185
239
  for op_data in ops:
@@ -21,6 +21,7 @@ Self-test:
21
21
  """
22
22
 
23
23
  import json
24
+ import math
24
25
  import os
25
26
  import sqlite3
26
27
  import sys
@@ -51,6 +52,7 @@ CREATE TABLE IF NOT EXISTS triples (
51
52
  value_type TEXT NOT NULL DEFAULT 'string',
52
53
  retracted INTEGER NOT NULL DEFAULT 0,
53
54
  retracted_tx INTEGER,
55
+ valid_to TEXT,
54
56
  created_at TEXT NOT NULL
55
57
  );
56
58
 
@@ -88,6 +90,24 @@ def _entity_type(entity_id: str) -> str:
88
90
  return entity_id[:colon] if colon > 0 else "unknown"
89
91
 
90
92
 
93
+ def decayed_confidence(
94
+ confidence: float, created_at: str, half_life_days: int = 60
95
+ ) -> float:
96
+ """Apply exponential time-decay to a confidence score.
97
+
98
+ Facts lose half their confidence every `half_life_days` without reinforcement.
99
+ Inspired by mempalace's temporal validity model.
100
+ """
101
+ try:
102
+ created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
103
+ age_days = (datetime.now(timezone.utc) - created).days
104
+ if age_days <= 0:
105
+ return confidence
106
+ return confidence * math.exp(-0.693 * age_days / half_life_days)
107
+ except (ValueError, TypeError):
108
+ return confidence
109
+
110
+
91
111
  class TripleStore:
92
112
  """SQLite-backed EAV triple store with WAL mode and 4 covering indexes."""
93
113
 
@@ -99,8 +119,16 @@ class TripleStore:
99
119
  self._conn.execute("PRAGMA journal_mode=WAL")
100
120
  self._conn.execute("PRAGMA busy_timeout=10000")
101
121
  self._conn.executescript(_SCHEMA_SQL)
122
+ self._migrate()
102
123
  self._conn.commit()
103
124
 
125
+ def _migrate(self) -> None:
126
+ """Run schema migrations for existing databases."""
127
+ # Add valid_to column if missing (added in memory-improvements)
128
+ cols = [r[1] for r in self._conn.execute("PRAGMA table_info(triples)").fetchall()]
129
+ if "valid_to" not in cols:
130
+ self._conn.execute("ALTER TABLE triples ADD COLUMN valid_to TEXT")
131
+
104
132
  def close(self) -> None:
105
133
  self._conn.close()
106
134
 
@@ -173,21 +201,22 @@ class TripleStore:
173
201
  ) -> int:
174
202
  """Retract triples matching entity+attribute (and optionally value).
175
203
 
176
- Sets retracted=1 and retracted_tx to the retraction transaction.
204
+ Sets retracted=1, retracted_tx, and valid_to (temporal closure).
177
205
  The original tx_id is preserved for temporal (as_of_tx) queries.
178
206
  Returns the count of triples retracted.
179
207
  """
208
+ now = _now_iso()
180
209
  if value is not None:
181
210
  cur = self._conn.execute(
182
- "UPDATE triples SET retracted = 1, retracted_tx = ? "
211
+ "UPDATE triples SET retracted = 1, retracted_tx = ?, valid_to = ? "
183
212
  "WHERE entity_id = ? AND attribute = ? AND value = ? AND retracted = 0",
184
- (tx_id, entity_id, attribute, value),
213
+ (tx_id, now, entity_id, attribute, value),
185
214
  )
186
215
  else:
187
216
  cur = self._conn.execute(
188
- "UPDATE triples SET retracted = 1, retracted_tx = ? "
217
+ "UPDATE triples SET retracted = 1, retracted_tx = ?, valid_to = ? "
189
218
  "WHERE entity_id = ? AND attribute = ? AND retracted = 0",
190
- (tx_id, entity_id, attribute),
219
+ (tx_id, now, entity_id, attribute),
191
220
  )
192
221
  self._conn.commit()
193
222
  return cur.rowcount
@@ -220,6 +249,26 @@ class TripleStore:
220
249
  result.setdefault(row["attribute"], []).append(row["value"])
221
250
  return result
222
251
 
252
+ def entity_as_of(self, entity_id: str, date: datetime) -> dict[str, list[str]]:
253
+ """Return entity attributes as they were on a specific date.
254
+
255
+ Uses created_at and valid_to for date-based temporal queries
256
+ (vs as_of_tx which uses transaction ordering).
257
+ """
258
+ date_iso = date.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
259
+ rows = self._conn.execute(
260
+ "SELECT attribute, value FROM triples "
261
+ "WHERE entity_id = ? AND created_at <= ? "
262
+ "AND (valid_to IS NULL OR valid_to > ?) "
263
+ "AND retracted = 0 "
264
+ "ORDER BY attribute, id",
265
+ (entity_id, date_iso, date_iso),
266
+ ).fetchall()
267
+ result: dict[str, list[str]] = {}
268
+ for row in rows:
269
+ result.setdefault(row["attribute"], []).append(row["value"])
270
+ return result
271
+
223
272
  # ----- Query: AEVT (attribute scan) -----
224
273
 
225
274
  def entities_with_attr(
@@ -473,6 +522,28 @@ def _self_test() -> None:
473
522
  assert "priority" in ent_before, "as_of_tx should see pre-retraction state"
474
523
  print(" [OK] as_of_tx isolation")
475
524
 
525
+ # valid_to set on retraction
526
+ retracted_row = store._conn.execute(
527
+ "SELECT valid_to FROM triples WHERE entity_id = 'signal:2026-03-01' AND attribute = 'priority'"
528
+ ).fetchone()
529
+ assert retracted_row and retracted_row["valid_to"] is not None, "valid_to should be set on retraction"
530
+ print(" [OK] valid_to set on retraction")
531
+
532
+ # entity_as_of
533
+ future = datetime.now(timezone.utc) + timedelta(days=1)
534
+ ent_future = store.entity_as_of("signal:2026-03-01", future)
535
+ assert "description" in ent_future, "entity_as_of should find active triples"
536
+ assert "priority" not in ent_future, "entity_as_of should exclude retracted triples"
537
+ print(" [OK] entity_as_of temporal query")
538
+
539
+ # Confidence decay utility
540
+ fresh_conf = decayed_confidence(0.8, _now_iso())
541
+ assert abs(fresh_conf - 0.8) < 0.01, f"Fresh fact should keep confidence: {fresh_conf}"
542
+ old_date = (datetime.now(timezone.utc) - timedelta(days=60)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
543
+ old_conf = decayed_confidence(0.8, old_date)
544
+ assert 0.35 < old_conf < 0.45, f"60-day-old fact should decay to ~0.4: {old_conf}"
545
+ print(f" [OK] Confidence decay: fresh=0.8→{fresh_conf:.2f}, 60d=0.8→{old_conf:.2f}")
546
+
476
547
  # GC (retracted triples are fresh, so gc with 0 days should get them)
477
548
  gc_count = store.gc(older_than_days=0)
478
549
  assert gc_count >= 1
@@ -1,23 +0,0 @@
1
- # sinain-agent configuration
2
- # Copy to .env and customize: cp .env.example .env
3
-
4
- # ── Agent ──
5
- SINAIN_AGENT=claude # claude | codex | junie | goose | aider | <custom command>
6
- # MCP agents (claude, codex, junie, goose) call sinain tools directly
7
- # Pipe agents (aider, custom) receive escalation text on stdin
8
-
9
- # ── Core connection ──
10
- SINAIN_CORE_URL=http://localhost:9500
11
-
12
- # ── Timing ──
13
- SINAIN_POLL_INTERVAL=5 # seconds between escalation polls
14
- SINAIN_HEARTBEAT_INTERVAL=900 # seconds between heartbeat ticks (15 min)
15
-
16
- # ── Workspace ──
17
- SINAIN_WORKSPACE=~/.openclaw/workspace # knowledge files, curation scripts, playbook
18
-
19
- # ── Tool permissions (Claude only) ──
20
- # Tools auto-approved without prompting (space-separated).
21
- # Default: auto-derived from MCP config server names (e.g. mcp__sinain).
22
- # Format: mcp__<server> (all tools) | mcp__<server>__<tool> (specific) | Bash(pattern)
23
- # SINAIN_ALLOWED_TOOLS=mcp__sinain mcp__github Bash(git *)