@geravant/sinain 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -0
- package/index.ts +2096 -0
- package/install.js +155 -0
- package/openclaw.plugin.json +59 -0
- package/package.json +21 -0
- package/sinain-memory/common.py +403 -0
- package/sinain-memory/demo_knowledge_transfer.sh +85 -0
- package/sinain-memory/embedder.py +268 -0
- package/sinain-memory/eval/__init__.py +0 -0
- package/sinain-memory/eval/assertions.py +288 -0
- package/sinain-memory/eval/judges/__init__.py +0 -0
- package/sinain-memory/eval/judges/base_judge.py +61 -0
- package/sinain-memory/eval/judges/curation_judge.py +46 -0
- package/sinain-memory/eval/judges/insight_judge.py +48 -0
- package/sinain-memory/eval/judges/mining_judge.py +42 -0
- package/sinain-memory/eval/judges/signal_judge.py +45 -0
- package/sinain-memory/eval/schemas.py +247 -0
- package/sinain-memory/eval_delta.py +109 -0
- package/sinain-memory/eval_reporter.py +642 -0
- package/sinain-memory/feedback_analyzer.py +221 -0
- package/sinain-memory/git_backup.sh +19 -0
- package/sinain-memory/insight_synthesizer.py +181 -0
- package/sinain-memory/memory/2026-03-01.md +11 -0
- package/sinain-memory/memory/playbook-archive/sinain-playbook-2026-03-01-1418.md +15 -0
- package/sinain-memory/memory/playbook-logs/2026-03-01.jsonl +1 -0
- package/sinain-memory/memory/sinain-playbook.md +21 -0
- package/sinain-memory/memory-config.json +39 -0
- package/sinain-memory/memory_miner.py +183 -0
- package/sinain-memory/module_manager.py +695 -0
- package/sinain-memory/playbook_curator.py +225 -0
- package/sinain-memory/requirements.txt +3 -0
- package/sinain-memory/signal_analyzer.py +141 -0
- package/sinain-memory/test_local.py +402 -0
- package/sinain-memory/tests/__init__.py +0 -0
- package/sinain-memory/tests/conftest.py +189 -0
- package/sinain-memory/tests/test_curator_helpers.py +94 -0
- package/sinain-memory/tests/test_embedder.py +210 -0
- package/sinain-memory/tests/test_extract_json.py +124 -0
- package/sinain-memory/tests/test_feedback_computation.py +121 -0
- package/sinain-memory/tests/test_miner_helpers.py +71 -0
- package/sinain-memory/tests/test_module_management.py +458 -0
- package/sinain-memory/tests/test_parsers.py +96 -0
- package/sinain-memory/tests/test_tick_evaluator.py +430 -0
- package/sinain-memory/tests/test_triple_extractor.py +255 -0
- package/sinain-memory/tests/test_triple_ingest.py +191 -0
- package/sinain-memory/tests/test_triple_migrate.py +138 -0
- package/sinain-memory/tests/test_triplestore.py +248 -0
- package/sinain-memory/tick_evaluator.py +392 -0
- package/sinain-memory/triple_extractor.py +402 -0
- package/sinain-memory/triple_ingest.py +290 -0
- package/sinain-memory/triple_migrate.py +275 -0
- package/sinain-memory/triple_query.py +184 -0
- package/sinain-memory/triplestore.py +498 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# demo_knowledge_transfer.sh — Demonstrates sinain knowledge transfer flow.
|
|
3
|
+
#
|
|
4
|
+
# Scenario: Expert workspace has extracted a module → export it → import into
|
|
5
|
+
# a fresh (novice) workspace → verify the module stack and attribution.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# ./demo_knowledge_transfer.sh <expert-workspace> <novice-workspace> <module-id>
|
|
9
|
+
#
|
|
10
|
+
# Example:
|
|
11
|
+
# ./demo_knowledge_transfer.sh /mnt/openclaw-state /tmp/novice-workspace ocr-vision-pipeline
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
EXPERT_WS="${1:?Usage: $0 <expert-workspace> <novice-workspace> <module-id>}"
|
|
16
|
+
NOVICE_WS="${2:?Usage: $0 <expert-workspace> <novice-workspace> <module-id>}"
|
|
17
|
+
MODULE_ID="${3:?Usage: $0 <expert-workspace> <novice-workspace> <module-id>}"
|
|
18
|
+
|
|
19
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
20
|
+
BUNDLE_PATH="/tmp/${MODULE_ID}.sinain-module.json"
|
|
21
|
+
|
|
22
|
+
echo "=== sinain Knowledge Transfer Demo ==="
|
|
23
|
+
echo ""
|
|
24
|
+
echo "Expert workspace: $EXPERT_WS"
|
|
25
|
+
echo "Novice workspace: $NOVICE_WS"
|
|
26
|
+
echo "Module to transfer: $MODULE_ID"
|
|
27
|
+
echo ""
|
|
28
|
+
|
|
29
|
+
# Step 1: Show expert's module info
|
|
30
|
+
echo "--- Step 1: Expert's module info ---"
|
|
31
|
+
python3 "$SCRIPT_DIR/module_manager.py" \
|
|
32
|
+
--modules-dir "$EXPERT_WS/modules" info "$MODULE_ID"
|
|
33
|
+
echo ""
|
|
34
|
+
|
|
35
|
+
# Step 2: Export from expert workspace
|
|
36
|
+
echo "--- Step 2: Export module as portable bundle ---"
|
|
37
|
+
python3 "$SCRIPT_DIR/module_manager.py" \
|
|
38
|
+
--modules-dir "$EXPERT_WS/modules" export "$MODULE_ID" \
|
|
39
|
+
--output "$BUNDLE_PATH"
|
|
40
|
+
echo ""
|
|
41
|
+
echo "Bundle size: $(wc -c < "$BUNDLE_PATH") bytes"
|
|
42
|
+
echo ""
|
|
43
|
+
|
|
44
|
+
# Step 3: Ensure novice workspace has modules directory
|
|
45
|
+
echo "--- Step 3: Prepare novice workspace ---"
|
|
46
|
+
mkdir -p "$NOVICE_WS/modules" "$NOVICE_WS/memory"
|
|
47
|
+
echo "Created $NOVICE_WS/modules/"
|
|
48
|
+
echo ""
|
|
49
|
+
|
|
50
|
+
# Step 4: Import into novice workspace with activation
|
|
51
|
+
echo "--- Step 4: Import into novice workspace (with --activate) ---"
|
|
52
|
+
python3 "$SCRIPT_DIR/module_manager.py" \
|
|
53
|
+
--modules-dir "$NOVICE_WS/modules" import "$BUNDLE_PATH" --activate
|
|
54
|
+
echo ""
|
|
55
|
+
|
|
56
|
+
# Step 5: Fire KG ingestion (optional, may fail if triple store not configured)
|
|
57
|
+
echo "--- Step 5: KG ingestion (optional) ---"
|
|
58
|
+
python3 "$SCRIPT_DIR/triple_ingest.py" \
|
|
59
|
+
--memory-dir "$NOVICE_WS/memory" \
|
|
60
|
+
--ingest-module "$MODULE_ID" \
|
|
61
|
+
--modules-dir "$NOVICE_WS/modules" \
|
|
62
|
+
--embed 2>/dev/null || echo "(triple store not available — skipped)"
|
|
63
|
+
echo ""
|
|
64
|
+
|
|
65
|
+
# Step 6: Verify module stack
|
|
66
|
+
echo "--- Step 6: Verify novice module stack ---"
|
|
67
|
+
python3 "$SCRIPT_DIR/module_manager.py" \
|
|
68
|
+
--modules-dir "$NOVICE_WS/modules" stack
|
|
69
|
+
echo ""
|
|
70
|
+
|
|
71
|
+
# Step 7: Show imported module details
|
|
72
|
+
echo "--- Step 7: Imported module details ---"
|
|
73
|
+
python3 "$SCRIPT_DIR/module_manager.py" \
|
|
74
|
+
--modules-dir "$NOVICE_WS/modules" info "$MODULE_ID"
|
|
75
|
+
echo ""
|
|
76
|
+
|
|
77
|
+
echo "=== Transfer complete ==="
|
|
78
|
+
echo ""
|
|
79
|
+
echo "The novice workspace now has the expert's '$MODULE_ID' module active."
|
|
80
|
+
echo "When sinain runs a heartbeat in the novice workspace, it will:"
|
|
81
|
+
echo " 1. Include transferred patterns in the effective playbook"
|
|
82
|
+
echo " 2. Tag them with [Transferred knowledge: ...]"
|
|
83
|
+
echo " 3. Cite the origin in suggestions and insights"
|
|
84
|
+
echo ""
|
|
85
|
+
echo "Bundle file: $BUNDLE_PATH"
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Embedder — dual-strategy embedding generation and vector search.
|
|
3
|
+
|
|
4
|
+
Primary: OpenRouter text-embedding-3-small (1536-dim, multilingual)
|
|
5
|
+
Fallback: Local all-MiniLM-L6-v2 (384-dim, English-only)
|
|
6
|
+
|
|
7
|
+
Extends the triplestore.db with an `embeddings` table for vector storage.
|
|
8
|
+
Uses brute-force cosine similarity for search (<1ms for 10K entities).
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
from embedder import Embedder
|
|
12
|
+
embedder = Embedder("memory/triplestore.db")
|
|
13
|
+
vecs = embedder.embed(["OCR pipeline optimization"])
|
|
14
|
+
embedder.store_embeddings({"pattern:ocr-opt": "pattern: OCR optimization"})
|
|
15
|
+
results = embedder.vector_search(vecs[0], top_k=5)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import sqlite3
|
|
22
|
+
import struct
|
|
23
|
+
import sys
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ── Privacy helpers ───────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
def _openrouter_allowed_for(data_type: str) -> bool:
|
|
31
|
+
"""Return True if the data type is allowed to be sent to OpenRouter for embedding.
|
|
32
|
+
|
|
33
|
+
Reads PRIVACY_<DATA_TYPE>_OPENROUTER env var.
|
|
34
|
+
data_type examples: "AUDIO", "OCR", "METADATA"
|
|
35
|
+
"""
|
|
36
|
+
key = f"PRIVACY_{data_type.upper()}_OPENROUTER"
|
|
37
|
+
level = os.environ.get(key, "full")
|
|
38
|
+
return level not in ("none",)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_EMBEDDINGS_SCHEMA = """
|
|
42
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
43
|
+
entity_id TEXT PRIMARY KEY,
|
|
44
|
+
vector BLOB NOT NULL,
|
|
45
|
+
text_hash TEXT NOT NULL,
|
|
46
|
+
model TEXT NOT NULL,
|
|
47
|
+
dimensions INTEGER NOT NULL,
|
|
48
|
+
created_at TEXT NOT NULL
|
|
49
|
+
);
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _now_iso() -> str:
|
|
54
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _text_hash(text: str) -> str:
|
|
58
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _vec_to_blob(vec: list[float]) -> bytes:
|
|
62
|
+
"""Pack a float vector into a compact binary blob."""
|
|
63
|
+
return struct.pack(f"{len(vec)}f", *vec)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _blob_to_vec(blob: bytes) -> list[float]:
|
|
67
|
+
"""Unpack a binary blob into a float vector."""
|
|
68
|
+
n = len(blob) // 4 # 4 bytes per float32
|
|
69
|
+
return list(struct.unpack(f"{n}f", blob))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Embedder:
|
|
73
|
+
"""Dual-strategy embedder with OpenRouter primary + local MiniLM fallback."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, db_path: str | Path) -> None:
|
|
76
|
+
self.db_path = str(db_path)
|
|
77
|
+
self._conn = sqlite3.connect(self.db_path, timeout=10)
|
|
78
|
+
self._conn.row_factory = sqlite3.Row
|
|
79
|
+
self._conn.executescript(_EMBEDDINGS_SCHEMA)
|
|
80
|
+
self._conn.commit()
|
|
81
|
+
self._local_model = None # lazy-loaded
|
|
82
|
+
|
|
83
|
+
def close(self) -> None:
|
|
84
|
+
self._conn.close()
|
|
85
|
+
|
|
86
|
+
# ----- Embedding generation -----
|
|
87
|
+
|
|
88
|
+
def embed(self, texts: list[str], data_type: str = "METADATA") -> list[list[float]]:
|
|
89
|
+
"""Generate embeddings for a list of texts.
|
|
90
|
+
|
|
91
|
+
Tries OpenRouter first (if allowed by privacy policy), falls back to local model.
|
|
92
|
+
Returns empty list on total failure.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
texts: List of texts to embed.
|
|
96
|
+
data_type: Privacy data type key (e.g. "AUDIO", "OCR", "METADATA").
|
|
97
|
+
Controls whether OpenRouter is allowed for this data.
|
|
98
|
+
"""
|
|
99
|
+
if not texts:
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
# Try OpenRouter first (unless privacy policy blocks it)
|
|
103
|
+
if _openrouter_allowed_for(data_type):
|
|
104
|
+
try:
|
|
105
|
+
return self._embed_openrouter(texts)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"[embed] OpenRouter failed: {e}, trying local model", file=sys.stderr)
|
|
108
|
+
else:
|
|
109
|
+
print(f"[embed] OpenRouter blocked by privacy policy for {data_type}, using local model", file=sys.stderr)
|
|
110
|
+
|
|
111
|
+
# Fallback to local model
|
|
112
|
+
try:
|
|
113
|
+
return self._embed_local(texts)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
print(f"[embed] Local model also failed: {e}", file=sys.stderr)
|
|
116
|
+
return [[] for _ in texts]
|
|
117
|
+
|
|
118
|
+
def _embed_openrouter(self, texts: list[str]) -> list[list[float]]:
|
|
119
|
+
"""Generate embeddings via OpenRouter API."""
|
|
120
|
+
import requests
|
|
121
|
+
|
|
122
|
+
api_key = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY_REFLECTION")
|
|
123
|
+
if not api_key:
|
|
124
|
+
raise RuntimeError("No API key for embeddings")
|
|
125
|
+
|
|
126
|
+
resp = requests.post(
|
|
127
|
+
"https://openrouter.ai/api/v1/embeddings",
|
|
128
|
+
headers={
|
|
129
|
+
"Authorization": f"Bearer {api_key}",
|
|
130
|
+
"Content-Type": "application/json",
|
|
131
|
+
},
|
|
132
|
+
json={
|
|
133
|
+
"model": "openai/text-embedding-3-small",
|
|
134
|
+
"input": texts,
|
|
135
|
+
},
|
|
136
|
+
timeout=30,
|
|
137
|
+
)
|
|
138
|
+
resp.raise_for_status()
|
|
139
|
+
data = resp.json()
|
|
140
|
+
|
|
141
|
+
# Sort by index to maintain order
|
|
142
|
+
embeddings = sorted(data.get("data", []), key=lambda x: x.get("index", 0))
|
|
143
|
+
return [e["embedding"] for e in embeddings]
|
|
144
|
+
|
|
145
|
+
def _embed_local(self, texts: list[str]) -> list[list[float]]:
|
|
146
|
+
"""Generate embeddings using local sentence-transformers model."""
|
|
147
|
+
if self._local_model is None:
|
|
148
|
+
from sentence_transformers import SentenceTransformer
|
|
149
|
+
self._local_model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
150
|
+
|
|
151
|
+
embeddings = self._local_model.encode(texts, convert_to_numpy=True)
|
|
152
|
+
return [vec.tolist() for vec in embeddings]
|
|
153
|
+
|
|
154
|
+
# ----- Storage -----
|
|
155
|
+
|
|
156
|
+
def store_embeddings(self, entity_texts: dict[str, str]) -> int:
|
|
157
|
+
"""Embed and upsert entities into the embeddings table.
|
|
158
|
+
|
|
159
|
+
Deduplicates via text_hash — skips re-embedding if text hasn't changed.
|
|
160
|
+
Returns count of newly embedded entities.
|
|
161
|
+
"""
|
|
162
|
+
if not entity_texts:
|
|
163
|
+
return 0
|
|
164
|
+
|
|
165
|
+
# Check which entities need (re-)embedding
|
|
166
|
+
to_embed: dict[str, str] = {}
|
|
167
|
+
for entity_id, text in entity_texts.items():
|
|
168
|
+
th = _text_hash(text)
|
|
169
|
+
existing = self._conn.execute(
|
|
170
|
+
"SELECT text_hash FROM embeddings WHERE entity_id = ?",
|
|
171
|
+
(entity_id,),
|
|
172
|
+
).fetchone()
|
|
173
|
+
if existing and existing["text_hash"] == th:
|
|
174
|
+
continue # text unchanged, skip
|
|
175
|
+
to_embed[entity_id] = text
|
|
176
|
+
|
|
177
|
+
if not to_embed:
|
|
178
|
+
return 0
|
|
179
|
+
|
|
180
|
+
# Generate embeddings
|
|
181
|
+
texts = list(to_embed.values())
|
|
182
|
+
entity_ids = list(to_embed.keys())
|
|
183
|
+
vectors = self.embed(texts)
|
|
184
|
+
|
|
185
|
+
if not vectors or not vectors[0]:
|
|
186
|
+
return 0
|
|
187
|
+
|
|
188
|
+
# Upsert into table
|
|
189
|
+
model_name = "unknown"
|
|
190
|
+
dims = len(vectors[0])
|
|
191
|
+
if dims >= 1000:
|
|
192
|
+
model_name = "text-embedding-3-small"
|
|
193
|
+
elif dims > 0:
|
|
194
|
+
model_name = "all-MiniLM-L6-v2"
|
|
195
|
+
|
|
196
|
+
now = _now_iso()
|
|
197
|
+
count = 0
|
|
198
|
+
for entity_id, text, vec in zip(entity_ids, texts, vectors):
|
|
199
|
+
if not vec:
|
|
200
|
+
continue
|
|
201
|
+
self._conn.execute(
|
|
202
|
+
"INSERT OR REPLACE INTO embeddings "
|
|
203
|
+
"(entity_id, vector, text_hash, model, dimensions, created_at) "
|
|
204
|
+
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
205
|
+
(entity_id, _vec_to_blob(vec), _text_hash(text), model_name, len(vec), now),
|
|
206
|
+
)
|
|
207
|
+
count += 1
|
|
208
|
+
self._conn.commit()
|
|
209
|
+
return count
|
|
210
|
+
|
|
211
|
+
# ----- Vector search -----
|
|
212
|
+
|
|
213
|
+
def vector_search(
|
|
214
|
+
self,
|
|
215
|
+
query_vec: list[float],
|
|
216
|
+
top_k: int = 10,
|
|
217
|
+
entity_types: list[str] | None = None,
|
|
218
|
+
) -> list[tuple[str, float]]:
|
|
219
|
+
"""Brute-force cosine similarity search.
|
|
220
|
+
|
|
221
|
+
Returns [(entity_id, score)] sorted by score descending.
|
|
222
|
+
"""
|
|
223
|
+
if not query_vec:
|
|
224
|
+
return []
|
|
225
|
+
|
|
226
|
+
# Load all embeddings (optionally filtered by type)
|
|
227
|
+
if entity_types:
|
|
228
|
+
placeholders = ",".join("?" * len(entity_types))
|
|
229
|
+
# Filter by entity_id prefix
|
|
230
|
+
conditions = " OR ".join(f"entity_id LIKE ?" for _ in entity_types)
|
|
231
|
+
params = [f"{t}:%" for t in entity_types]
|
|
232
|
+
rows = self._conn.execute(
|
|
233
|
+
f"SELECT entity_id, vector FROM embeddings WHERE {conditions}",
|
|
234
|
+
params,
|
|
235
|
+
).fetchall()
|
|
236
|
+
else:
|
|
237
|
+
rows = self._conn.execute(
|
|
238
|
+
"SELECT entity_id, vector FROM embeddings"
|
|
239
|
+
).fetchall()
|
|
240
|
+
|
|
241
|
+
if not rows:
|
|
242
|
+
return []
|
|
243
|
+
|
|
244
|
+
# Compute cosine similarity
|
|
245
|
+
results: list[tuple[str, float]] = []
|
|
246
|
+
q_norm = _norm(query_vec)
|
|
247
|
+
if q_norm == 0:
|
|
248
|
+
return []
|
|
249
|
+
|
|
250
|
+
for row in rows:
|
|
251
|
+
vec = _blob_to_vec(row["vector"])
|
|
252
|
+
if len(vec) != len(query_vec):
|
|
253
|
+
continue # dimension mismatch (mixed models)
|
|
254
|
+
score = _dot(query_vec, vec) / (q_norm * _norm(vec))
|
|
255
|
+
results.append((row["entity_id"], score))
|
|
256
|
+
|
|
257
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
258
|
+
return results[:top_k]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _dot(a: list[float], b: list[float]) -> float:
|
|
262
|
+
"""Dot product of two vectors."""
|
|
263
|
+
return sum(x * y for x, y in zip(a, b))
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _norm(v: list[float]) -> float:
|
|
267
|
+
"""L2 norm of a vector."""
|
|
268
|
+
return sum(x * x for x in v) ** 0.5
|
|
File without changes
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""Behavioral assertion library for sinain-koog tick evaluation.
|
|
2
|
+
|
|
3
|
+
Each assertion function validates a runtime invariant of the pipeline.
|
|
4
|
+
Returns ``{"name": str, "passed": bool, "detail": str}``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _result(name: str, passed: bool, detail: str) -> dict:
|
|
11
|
+
return {"name": name, "passed": passed, "detail": detail}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Playbook curator assertions
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
def assert_playbook_under_limit(curator_result: dict, limit: int = 50) -> dict:
|
|
19
|
+
"""Verify playbook body stays under the line limit."""
|
|
20
|
+
lines = curator_result.get("playbookLines", 0)
|
|
21
|
+
if lines <= limit:
|
|
22
|
+
return _result("playbook_under_limit", True, f"body has {lines} lines (limit {limit})")
|
|
23
|
+
return _result("playbook_under_limit", False, f"body has {lines} lines, exceeds limit of {limit}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def assert_curator_respected_directive(curator_result: dict, directive: str) -> dict:
|
|
27
|
+
"""Check that curator changes align with the curate directive."""
|
|
28
|
+
changes = curator_result.get("changes", {})
|
|
29
|
+
added = len(changes.get("added", []))
|
|
30
|
+
pruned = len(changes.get("pruned", []))
|
|
31
|
+
|
|
32
|
+
if directive == "aggressive_prune":
|
|
33
|
+
# Should have pruned items
|
|
34
|
+
if pruned > 0:
|
|
35
|
+
return _result("curator_respected_directive", True,
|
|
36
|
+
f"aggressive_prune: pruned {pruned} items")
|
|
37
|
+
if added == 0 and pruned == 0:
|
|
38
|
+
return _result("curator_respected_directive", True,
|
|
39
|
+
"aggressive_prune: no changes (acceptable if playbook already lean)")
|
|
40
|
+
return _result("curator_respected_directive", False,
|
|
41
|
+
f"aggressive_prune: added {added} but pruned {pruned} — expected pruning")
|
|
42
|
+
|
|
43
|
+
if directive == "stability":
|
|
44
|
+
# Should not aggressively prune established patterns
|
|
45
|
+
if pruned > added + 2:
|
|
46
|
+
return _result("curator_respected_directive", False,
|
|
47
|
+
f"stability: pruned {pruned} items (only added {added}) — too aggressive for stability mode")
|
|
48
|
+
return _result("curator_respected_directive", True,
|
|
49
|
+
f"stability: added {added}, pruned {pruned} — conservative")
|
|
50
|
+
|
|
51
|
+
# normal / insufficient_data — any reasonable mix is fine
|
|
52
|
+
return _result("curator_respected_directive", True,
|
|
53
|
+
f"{directive}: added {added}, pruned {pruned}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Signal analyzer assertions
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
def assert_no_repeat_action(signal_result: dict, recent_logs: list[dict], window: int = 3) -> dict:
|
|
61
|
+
"""Verify recommendedAction doesn't repeat the last N ticks' actions."""
|
|
62
|
+
action = signal_result.get("recommendedAction")
|
|
63
|
+
if action is None or action.get("action") == "skip":
|
|
64
|
+
return _result("no_repeat_action", True, "no action recommended (skip/null)")
|
|
65
|
+
|
|
66
|
+
task = (action.get("task") or "").lower().strip()
|
|
67
|
+
if not task:
|
|
68
|
+
return _result("no_repeat_action", True, "no task description to compare")
|
|
69
|
+
|
|
70
|
+
# Collect recent action tasks
|
|
71
|
+
recent_tasks: list[str] = []
|
|
72
|
+
for log in recent_logs[:window]:
|
|
73
|
+
log_actions = log.get("actionsConsidered", [])
|
|
74
|
+
for a in log_actions:
|
|
75
|
+
if a.get("chosen"):
|
|
76
|
+
recent_tasks.append((a.get("reason") or a.get("task") or "").lower().strip())
|
|
77
|
+
|
|
78
|
+
# Check for near-duplicate (substring match to catch rephrasing)
|
|
79
|
+
for prev_task in recent_tasks:
|
|
80
|
+
if not prev_task:
|
|
81
|
+
continue
|
|
82
|
+
# If >60% of words overlap, consider it a repeat
|
|
83
|
+
task_words = set(task.split())
|
|
84
|
+
prev_words = set(prev_task.split())
|
|
85
|
+
if not task_words or not prev_words:
|
|
86
|
+
continue
|
|
87
|
+
overlap = len(task_words & prev_words) / max(len(task_words), len(prev_words))
|
|
88
|
+
if overlap > 0.6:
|
|
89
|
+
return _result("no_repeat_action", False,
|
|
90
|
+
f"action task '{task[:60]}' overlaps with recent '{prev_task[:60]}' ({overlap:.0%} word overlap)")
|
|
91
|
+
|
|
92
|
+
return _result("no_repeat_action", True,
|
|
93
|
+
f"action task is distinct from last {window} ticks")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def assert_signal_confidence_threshold(signal_result: dict, threshold: float = 0.5) -> dict:
|
|
97
|
+
"""Verify actions are only recommended above the confidence threshold."""
|
|
98
|
+
action = signal_result.get("recommendedAction")
|
|
99
|
+
if action is None or action.get("action") == "skip":
|
|
100
|
+
return _result("signal_confidence_threshold", True, "no action recommended")
|
|
101
|
+
|
|
102
|
+
confidence = action.get("confidence")
|
|
103
|
+
if confidence is None:
|
|
104
|
+
return _result("signal_confidence_threshold", False,
|
|
105
|
+
"action recommended but no confidence value provided")
|
|
106
|
+
|
|
107
|
+
if confidence >= threshold:
|
|
108
|
+
return _result("signal_confidence_threshold", True,
|
|
109
|
+
f"confidence {confidence:.2f} >= threshold {threshold}")
|
|
110
|
+
return _result("signal_confidence_threshold", False,
|
|
111
|
+
f"confidence {confidence:.2f} < threshold {threshold}")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# Insight synthesizer assertions
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
def assert_insight_char_limit(synth_result: dict, limit: int = 500) -> dict:
|
|
119
|
+
"""Verify suggestion+insight stays under the character limit."""
|
|
120
|
+
if synth_result.get("skip", False):
|
|
121
|
+
return _result("insight_char_limit", True, "output skipped")
|
|
122
|
+
|
|
123
|
+
suggestion = synth_result.get("suggestion", "")
|
|
124
|
+
insight = synth_result.get("insight", "")
|
|
125
|
+
total = len(suggestion) + len(insight)
|
|
126
|
+
|
|
127
|
+
if total <= limit:
|
|
128
|
+
return _result("insight_char_limit", True, f"total {total} chars (limit {limit})")
|
|
129
|
+
return _result("insight_char_limit", False, f"total {total} chars exceeds limit of {limit}")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def assert_skip_reason_specific(synth_result: dict) -> dict:
|
|
133
|
+
"""If skip=true, verify the reason is specific (not generic boilerplate)."""
|
|
134
|
+
if not synth_result.get("skip", False):
|
|
135
|
+
return _result("skip_reason_specific", True, "output not skipped")
|
|
136
|
+
|
|
137
|
+
reason = (synth_result.get("skipReason") or "").strip()
|
|
138
|
+
if not reason:
|
|
139
|
+
return _result("skip_reason_specific", False, "skip=true but no skipReason provided")
|
|
140
|
+
|
|
141
|
+
# Check against known-generic patterns
|
|
142
|
+
generic_phrases = [
|
|
143
|
+
"no new data",
|
|
144
|
+
"nothing new",
|
|
145
|
+
"no updates",
|
|
146
|
+
"insufficient data",
|
|
147
|
+
"not enough information",
|
|
148
|
+
"no changes",
|
|
149
|
+
]
|
|
150
|
+
reason_lower = reason.lower()
|
|
151
|
+
for phrase in generic_phrases:
|
|
152
|
+
if reason_lower == phrase or (len(reason_lower) < 30 and phrase in reason_lower):
|
|
153
|
+
return _result("skip_reason_specific", False,
|
|
154
|
+
f"skipReason is too generic: '{reason}'")
|
|
155
|
+
|
|
156
|
+
return _result("skip_reason_specific", True, f"skipReason is specific ({len(reason)} chars)")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Memory miner assertions
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
def assert_miner_references_sources(miner_result: dict, daily_files: list[str]) -> dict:
|
|
164
|
+
"""Verify mining findings reference actual source files that were provided."""
|
|
165
|
+
mined = miner_result.get("minedSources", [])
|
|
166
|
+
if not mined:
|
|
167
|
+
return _result("miner_references_sources", True, "no sources mined (early return)")
|
|
168
|
+
|
|
169
|
+
# daily_files contains basenames like "2026-02-21.md"
|
|
170
|
+
known_basenames = set(daily_files)
|
|
171
|
+
unknown = [s for s in mined if s not in known_basenames]
|
|
172
|
+
|
|
173
|
+
if unknown:
|
|
174
|
+
return _result("miner_references_sources", False,
|
|
175
|
+
f"minedSources references unknown files: {unknown}")
|
|
176
|
+
return _result("miner_references_sources", True,
|
|
177
|
+
f"all {len(mined)} mined sources are valid")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# ---------------------------------------------------------------------------
|
|
181
|
+
# Cross-script / structural assertions
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def assert_schema_valid(script_name: str, output: dict, schema_errors: list[str]) -> dict:
|
|
185
|
+
"""Wrap schema validation result as an assertion."""
|
|
186
|
+
if not schema_errors:
|
|
187
|
+
return _result(f"schema_valid_{script_name}", True, "output matches schema")
|
|
188
|
+
return _result(f"schema_valid_{script_name}", False,
|
|
189
|
+
f"{len(schema_errors)} schema errors: {'; '.join(schema_errors[:3])}")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def assert_playbook_header_footer_intact(playbook_text: str) -> dict:
|
|
193
|
+
"""Verify the playbook still has its mining-index header and effectiveness footer."""
|
|
194
|
+
has_header = "<!-- mining-index:" in playbook_text
|
|
195
|
+
has_footer = "<!-- effectiveness:" in playbook_text
|
|
196
|
+
|
|
197
|
+
if has_header and has_footer:
|
|
198
|
+
return _result("playbook_header_footer_intact", True,
|
|
199
|
+
"both mining-index and effectiveness comments present")
|
|
200
|
+
missing = []
|
|
201
|
+
if not has_header:
|
|
202
|
+
missing.append("mining-index")
|
|
203
|
+
if not has_footer:
|
|
204
|
+
missing.append("effectiveness")
|
|
205
|
+
return _result("playbook_header_footer_intact", False,
|
|
206
|
+
f"missing playbook comments: {', '.join(missing)}")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
# Trait voice assertions (sinain-core wiring verification)
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
def assert_situation_has_active_voice(
|
|
214
|
+
situation_content: str, expected_trait: str | None = None
|
|
215
|
+
) -> dict:
|
|
216
|
+
"""Check SITUATION.md contains an Active Voice section (after trait wiring).
|
|
217
|
+
|
|
218
|
+
Called by tick_evaluator.py when processing live ticks that have SITUATION.md
|
|
219
|
+
content and a trait was selected for that tick.
|
|
220
|
+
"""
|
|
221
|
+
has_section = "## Active Voice" in situation_content
|
|
222
|
+
if not has_section:
|
|
223
|
+
return _result("situation_has_active_voice", False, "no '## Active Voice' section")
|
|
224
|
+
if expected_trait and expected_trait not in situation_content:
|
|
225
|
+
return _result("situation_has_active_voice", False,
|
|
226
|
+
f"section present but '{expected_trait}' not found")
|
|
227
|
+
return _result("situation_has_active_voice", True, "Active Voice section present")
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
# Runner: execute all applicable assertions for a tick
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
def run_tick_assertions(
|
|
235
|
+
log_entry: dict,
|
|
236
|
+
recent_logs: list[dict],
|
|
237
|
+
playbook_text: str,
|
|
238
|
+
daily_files: list[str],
|
|
239
|
+
) -> list[dict]:
|
|
240
|
+
"""Run all applicable assertions against a single tick's log entry.
|
|
241
|
+
|
|
242
|
+
Returns a list of assertion result dicts.
|
|
243
|
+
"""
|
|
244
|
+
results: list[dict] = []
|
|
245
|
+
|
|
246
|
+
# Signal analyzer assertions
|
|
247
|
+
signals = log_entry.get("signals")
|
|
248
|
+
if signals is not None:
|
|
249
|
+
results.append(assert_signal_confidence_threshold(
|
|
250
|
+
{"signals": signals, "recommendedAction": log_entry.get("recommendedAction")},
|
|
251
|
+
))
|
|
252
|
+
results.append(assert_no_repeat_action(
|
|
253
|
+
{"signals": signals, "recommendedAction": log_entry.get("recommendedAction")},
|
|
254
|
+
recent_logs,
|
|
255
|
+
))
|
|
256
|
+
|
|
257
|
+
# Curator assertions — playbookChanges can be {"note": "skipped"} or full output
|
|
258
|
+
curator = log_entry.get("playbookChanges")
|
|
259
|
+
if isinstance(curator, dict) and "changes" in curator:
|
|
260
|
+
curator_with_lines = {**curator}
|
|
261
|
+
if "playbookLines" not in curator_with_lines:
|
|
262
|
+
curator_with_lines["playbookLines"] = curator.get("playbookLines", 0)
|
|
263
|
+
results.append(assert_playbook_under_limit(curator_with_lines))
|
|
264
|
+
|
|
265
|
+
directive = log_entry.get("curateDirective", "normal")
|
|
266
|
+
results.append(assert_curator_respected_directive(curator_with_lines, directive))
|
|
267
|
+
|
|
268
|
+
# Insight synthesizer assertions — output can be null (pipeline-level skip)
|
|
269
|
+
output = log_entry.get("output")
|
|
270
|
+
if isinstance(output, dict):
|
|
271
|
+
results.append(assert_insight_char_limit(output))
|
|
272
|
+
results.append(assert_skip_reason_specific(output))
|
|
273
|
+
|
|
274
|
+
# Mining assertions — log uses miningFindings (str) and minedSources (list)
|
|
275
|
+
mining = log_entry.get("miningResult")
|
|
276
|
+
if mining is not None:
|
|
277
|
+
results.append(assert_miner_references_sources(mining, daily_files))
|
|
278
|
+
elif log_entry.get("minedSources"):
|
|
279
|
+
# Reconstruct mining result from flat log fields
|
|
280
|
+
results.append(assert_miner_references_sources(
|
|
281
|
+
{"minedSources": log_entry.get("minedSources", [])}, daily_files
|
|
282
|
+
))
|
|
283
|
+
|
|
284
|
+
# Playbook health (if we have playbook text)
|
|
285
|
+
if playbook_text:
|
|
286
|
+
results.append(assert_playbook_header_footer_intact(playbook_text))
|
|
287
|
+
|
|
288
|
+
return results
|
|
File without changes
|