@geravant/sinain 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -0
- package/index.ts +2096 -0
- package/install.js +155 -0
- package/openclaw.plugin.json +59 -0
- package/package.json +21 -0
- package/sinain-memory/common.py +403 -0
- package/sinain-memory/demo_knowledge_transfer.sh +85 -0
- package/sinain-memory/embedder.py +268 -0
- package/sinain-memory/eval/__init__.py +0 -0
- package/sinain-memory/eval/assertions.py +288 -0
- package/sinain-memory/eval/judges/__init__.py +0 -0
- package/sinain-memory/eval/judges/base_judge.py +61 -0
- package/sinain-memory/eval/judges/curation_judge.py +46 -0
- package/sinain-memory/eval/judges/insight_judge.py +48 -0
- package/sinain-memory/eval/judges/mining_judge.py +42 -0
- package/sinain-memory/eval/judges/signal_judge.py +45 -0
- package/sinain-memory/eval/schemas.py +247 -0
- package/sinain-memory/eval_delta.py +109 -0
- package/sinain-memory/eval_reporter.py +642 -0
- package/sinain-memory/feedback_analyzer.py +221 -0
- package/sinain-memory/git_backup.sh +19 -0
- package/sinain-memory/insight_synthesizer.py +181 -0
- package/sinain-memory/memory/2026-03-01.md +11 -0
- package/sinain-memory/memory/playbook-archive/sinain-playbook-2026-03-01-1418.md +15 -0
- package/sinain-memory/memory/playbook-logs/2026-03-01.jsonl +1 -0
- package/sinain-memory/memory/sinain-playbook.md +21 -0
- package/sinain-memory/memory-config.json +39 -0
- package/sinain-memory/memory_miner.py +183 -0
- package/sinain-memory/module_manager.py +695 -0
- package/sinain-memory/playbook_curator.py +225 -0
- package/sinain-memory/requirements.txt +3 -0
- package/sinain-memory/signal_analyzer.py +141 -0
- package/sinain-memory/test_local.py +402 -0
- package/sinain-memory/tests/__init__.py +0 -0
- package/sinain-memory/tests/conftest.py +189 -0
- package/sinain-memory/tests/test_curator_helpers.py +94 -0
- package/sinain-memory/tests/test_embedder.py +210 -0
- package/sinain-memory/tests/test_extract_json.py +124 -0
- package/sinain-memory/tests/test_feedback_computation.py +121 -0
- package/sinain-memory/tests/test_miner_helpers.py +71 -0
- package/sinain-memory/tests/test_module_management.py +458 -0
- package/sinain-memory/tests/test_parsers.py +96 -0
- package/sinain-memory/tests/test_tick_evaluator.py +430 -0
- package/sinain-memory/tests/test_triple_extractor.py +255 -0
- package/sinain-memory/tests/test_triple_ingest.py +191 -0
- package/sinain-memory/tests/test_triple_migrate.py +138 -0
- package/sinain-memory/tests/test_triplestore.py +248 -0
- package/sinain-memory/tick_evaluator.py +392 -0
- package/sinain-memory/triple_extractor.py +402 -0
- package/sinain-memory/triple_ingest.py +290 -0
- package/sinain-memory/triple_migrate.py +275 -0
- package/sinain-memory/triple_query.py +184 -0
- package/sinain-memory/triplestore.py +498 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Local integration test for sinain-koog heartbeat scripts.
|
|
3
|
+
|
|
4
|
+
Runs all scripts in pipeline order using real memory/ data (or synthetic data
|
|
5
|
+
if memory/ is sparse). Requires OPENROUTER_API_KEY env var.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
OPENROUTER_API_KEY=... python3 sinain-koog/test_local.py [--memory-dir memory/]
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
import shutil
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
SCRIPT_DIR = Path(__file__).parent.resolve()
|
|
22
|
+
SESSION_SUMMARY = "User worked on sinain-hud wearable camera pipeline. Debugged OCR backpressure issues. Explored Flutter overlay options for macOS."
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run_script(name: str, args: list[str], label: str) -> dict | None:
|
|
26
|
+
"""Run a Python script, capture stdout JSON and stderr logs."""
|
|
27
|
+
cmd = [sys.executable, str(SCRIPT_DIR / name)] + args
|
|
28
|
+
print(f"\n{'='*60}")
|
|
29
|
+
print(f" {label}")
|
|
30
|
+
print(f" cmd: {' '.join(cmd)}")
|
|
31
|
+
print(f"{'='*60}")
|
|
32
|
+
|
|
33
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
34
|
+
|
|
35
|
+
if result.stderr:
|
|
36
|
+
for line in result.stderr.strip().splitlines():
|
|
37
|
+
print(f" stderr: {line}")
|
|
38
|
+
|
|
39
|
+
if result.returncode != 0:
|
|
40
|
+
print(f" EXIT CODE: {result.returncode}")
|
|
41
|
+
print(f" stdout: {result.stdout[:500]}")
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
stdout = result.stdout.strip()
|
|
45
|
+
if not stdout:
|
|
46
|
+
print(" (no stdout)")
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
data = json.loads(stdout)
|
|
51
|
+
print(f" OUTPUT: {json.dumps(data, indent=2)[:1000]}")
|
|
52
|
+
return data
|
|
53
|
+
except json.JSONDecodeError:
|
|
54
|
+
print(f" RAW OUTPUT: {stdout[:500]}")
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def setup_synthetic_memory(memory_dir: str) -> None:
|
|
59
|
+
"""Create minimal synthetic memory data for testing."""
|
|
60
|
+
md = Path(memory_dir)
|
|
61
|
+
md.mkdir(parents=True, exist_ok=True)
|
|
62
|
+
(md / "playbook-logs").mkdir(exist_ok=True)
|
|
63
|
+
(md / "playbook-archive").mkdir(exist_ok=True)
|
|
64
|
+
|
|
65
|
+
# Create playbook if missing
|
|
66
|
+
playbook_path = md / "sinain-playbook.md"
|
|
67
|
+
if not playbook_path.exists():
|
|
68
|
+
playbook_path.write_text(
|
|
69
|
+
"<!-- mining-index: -->\n"
|
|
70
|
+
"# Sinain Playbook\n\n"
|
|
71
|
+
"## Established Patterns\n"
|
|
72
|
+
'- When OCR pipeline stalls, check camera frame queue depth (score: 0.8)\n'
|
|
73
|
+
'- When user explores new framework, spawn research agent proactively (score: 0.6)\n\n'
|
|
74
|
+
"## Observed\n"
|
|
75
|
+
'- User prefers concise Telegram messages over detailed ones\n'
|
|
76
|
+
'- Late evening sessions tend to be exploratory/research-heavy\n\n'
|
|
77
|
+
"## Stale\n"
|
|
78
|
+
'- Flutter overlay rendering glitch on macOS 15 [since: 2026-02-18]\n\n'
|
|
79
|
+
"<!-- effectiveness: outputs=8, positive=5, negative=1, neutral=2, rate=0.63, updated=2026-02-21 -->\n",
|
|
80
|
+
encoding="utf-8",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Create a sample daily memory file
|
|
84
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
85
|
+
daily_path = md / f"{today}.md"
|
|
86
|
+
if not daily_path.exists():
|
|
87
|
+
daily_path.write_text(
|
|
88
|
+
f"# {today} Session Notes\n\n"
|
|
89
|
+
"## OCR Pipeline\n"
|
|
90
|
+
"- Switched from Tesseract to OpenRouter vision API\n"
|
|
91
|
+
"- Backpressure issue: camera produces frames faster than API can process\n"
|
|
92
|
+
"- Solution: frame dropping with scene-gate (skip similar consecutive frames)\n\n"
|
|
93
|
+
"## Wearable HUD\n"
|
|
94
|
+
"- Testing 3-panel debug interface\n"
|
|
95
|
+
"- Camera feed, OCR overlay, and pipeline stats side-by-side\n"
|
|
96
|
+
"- Found that JPEG quality 70 is good balance of speed vs readability\n",
|
|
97
|
+
encoding="utf-8",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Create a sample log entry
|
|
101
|
+
log_path = md / "playbook-logs" / f"{today}.jsonl"
|
|
102
|
+
if not log_path.exists():
|
|
103
|
+
entry = {
|
|
104
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
105
|
+
"idle": False,
|
|
106
|
+
"sessionHistorySummary": "User debugging OCR pipeline",
|
|
107
|
+
"feedbackScores": {"avg": 0.35, "high": ["OCR fix suggestion"], "low": []},
|
|
108
|
+
"actionsConsidered": [
|
|
109
|
+
{"action": "spawn research", "reason": "Flutter overlay perf", "chosen": False, "skipReason": "not urgent"}
|
|
110
|
+
],
|
|
111
|
+
"effectivenessRate": 0.63,
|
|
112
|
+
"effectivenessAlert": False,
|
|
113
|
+
"playbookChanges": {"added": [], "pruned": [], "promoted": []},
|
|
114
|
+
"output": {"suggestion": "Consider frame batching for OCR pipeline", "insight": "Evening sessions correlate with exploratory work"},
|
|
115
|
+
"skipped": False,
|
|
116
|
+
}
|
|
117
|
+
log_path.write_text(json.dumps(entry) + "\n", encoding="utf-8")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_extract_json():
|
|
121
|
+
"""Unit tests for extract_json() — validates all three extraction stages."""
|
|
122
|
+
# Import here so the test file can still run standalone
|
|
123
|
+
sys.path.insert(0, str(SCRIPT_DIR))
|
|
124
|
+
from common import extract_json
|
|
125
|
+
|
|
126
|
+
passed = 0
|
|
127
|
+
failed = 0
|
|
128
|
+
|
|
129
|
+
def check(label: str, input_text: str, expected_key: str | None = None, expect_fail: bool = False):
|
|
130
|
+
nonlocal passed, failed
|
|
131
|
+
try:
|
|
132
|
+
result = extract_json(input_text)
|
|
133
|
+
if expect_fail:
|
|
134
|
+
print(f" FAIL: {label} — expected ValueError but got: {result}")
|
|
135
|
+
failed += 1
|
|
136
|
+
return
|
|
137
|
+
if expected_key and expected_key not in result:
|
|
138
|
+
print(f" FAIL: {label} — missing key '{expected_key}' in {result}")
|
|
139
|
+
failed += 1
|
|
140
|
+
return
|
|
141
|
+
print(f" OK: {label}")
|
|
142
|
+
passed += 1
|
|
143
|
+
except ValueError:
|
|
144
|
+
if expect_fail:
|
|
145
|
+
print(f" OK: {label} (correctly raised ValueError)")
|
|
146
|
+
passed += 1
|
|
147
|
+
else:
|
|
148
|
+
print(f" FAIL: {label} — unexpected ValueError")
|
|
149
|
+
failed += 1
|
|
150
|
+
|
|
151
|
+
print(f"\n{'='*60}")
|
|
152
|
+
print(" Unit Tests: extract_json()")
|
|
153
|
+
print(f"{'='*60}")
|
|
154
|
+
|
|
155
|
+
# Stage 1: clean JSON
|
|
156
|
+
check("clean object", '{"signals": [], "idle": true}', "signals")
|
|
157
|
+
check("clean array", '[{"a": 1}, {"b": 2}]')
|
|
158
|
+
|
|
159
|
+
# Stage 2: markdown fences
|
|
160
|
+
check("fenced json", '```json\n{"signals": ["x"], "idle": false}\n```', "signals")
|
|
161
|
+
check("fenced no lang tag", '```\n{"findings": "test"}\n```', "findings")
|
|
162
|
+
check("text before fence", 'Here is the result:\n```json\n{"skip": true}\n```', "skip")
|
|
163
|
+
check("text after fence", '```json\n{"skip": false}\n```\nHope this helps!', "skip")
|
|
164
|
+
check("text before and after fence",
|
|
165
|
+
'I analyzed it.\n```json\n{"curateDirective": "normal"}\n```\nLet me know.',
|
|
166
|
+
"curateDirective")
|
|
167
|
+
|
|
168
|
+
# Stage 3: balanced-brace scanner (prose-embedded JSON)
|
|
169
|
+
check("prose then JSON", 'The analysis result is: {"signals": ["a"], "idle": true}', "signals")
|
|
170
|
+
check("JSON then prose", '{"findings": "test"} That is all.', "findings")
|
|
171
|
+
check("nested braces", '{"outer": {"inner": {"deep": 1}}, "key": "val"}', "outer")
|
|
172
|
+
check("strings with braces",
|
|
173
|
+
'{"msg": "use {braces} like this", "ok": true}', "msg")
|
|
174
|
+
check("prose-embedded array", 'Result: [{"a": 1}, {"b": 2}]')
|
|
175
|
+
|
|
176
|
+
# Edge cases
|
|
177
|
+
check("whitespace padded", ' \n {"key": "value"} \n ', "key")
|
|
178
|
+
check("no JSON at all", "This is just plain text with no JSON.", expect_fail=True)
|
|
179
|
+
check("empty string", "", expect_fail=True)
|
|
180
|
+
# Stage 4: truncated JSON repair
|
|
181
|
+
check("truncated object — missing closing brace",
|
|
182
|
+
'{"signals": ["a", "b"], "idle": true, "extra": "val',
|
|
183
|
+
"signals")
|
|
184
|
+
check("truncated nested — missing two closing braces",
|
|
185
|
+
'{"outer": {"inner": "val"',
|
|
186
|
+
"outer")
|
|
187
|
+
check("truncated array in object",
|
|
188
|
+
'{"items": [1, 2, 3',
|
|
189
|
+
"items")
|
|
190
|
+
check("truncated with trailing comma",
|
|
191
|
+
'{"a": 1, "b": 2,',
|
|
192
|
+
"a")
|
|
193
|
+
check("truncated mid-key (Strategy C strips back)", '{"valid": 1, "partial_ke', "valid")
|
|
194
|
+
check("prose + truncated JSON",
|
|
195
|
+
'Here is the result: {"findings": "some text", "patterns": ["p1"',
|
|
196
|
+
"findings")
|
|
197
|
+
|
|
198
|
+
# Previously malformed — Stage 4 can now repair this (unclosed string + brace)
|
|
199
|
+
check("truncated simple object", '{"unclosed": "brace"', "unclosed")
|
|
200
|
+
|
|
201
|
+
# Truly unrecoverable
|
|
202
|
+
check("no JSON at all v2", "just some random text without any brackets", expect_fail=True)
|
|
203
|
+
|
|
204
|
+
print(f"\n Results: {passed} passed, {failed} failed")
|
|
205
|
+
if failed > 0:
|
|
206
|
+
print(" SOME TESTS FAILED")
|
|
207
|
+
return False
|
|
208
|
+
print(" All tests passed!")
|
|
209
|
+
return True
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_llm_error():
|
|
213
|
+
"""Unit tests for LLMError — verifies call_llm raises LLMError on request failures."""
|
|
214
|
+
from unittest.mock import patch, MagicMock
|
|
215
|
+
sys.path.insert(0, str(SCRIPT_DIR))
|
|
216
|
+
from common import call_llm, LLMError
|
|
217
|
+
|
|
218
|
+
passed = 0
|
|
219
|
+
failed = 0
|
|
220
|
+
|
|
221
|
+
print(f"\n{'='*60}")
|
|
222
|
+
print(" Unit Tests: LLMError from call_llm()")
|
|
223
|
+
print(f"{'='*60}")
|
|
224
|
+
|
|
225
|
+
# Test 1: Timeout raises LLMError
|
|
226
|
+
import requests as req_mod
|
|
227
|
+
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
|
228
|
+
with patch("common.requests.post", side_effect=req_mod.exceptions.Timeout("Connection timed out")):
|
|
229
|
+
try:
|
|
230
|
+
call_llm("system", "user")
|
|
231
|
+
print(" FAIL: timeout — expected LLMError but call succeeded")
|
|
232
|
+
failed += 1
|
|
233
|
+
except LLMError as e:
|
|
234
|
+
if "Timeout" in str(e):
|
|
235
|
+
print(" OK: timeout raises LLMError with Timeout info")
|
|
236
|
+
passed += 1
|
|
237
|
+
else:
|
|
238
|
+
print(f" FAIL: timeout — LLMError message missing 'Timeout': {e}")
|
|
239
|
+
failed += 1
|
|
240
|
+
except Exception as e:
|
|
241
|
+
print(f" FAIL: timeout — expected LLMError but got {type(e).__name__}: {e}")
|
|
242
|
+
failed += 1
|
|
243
|
+
|
|
244
|
+
# Test 2: ConnectionError raises LLMError
|
|
245
|
+
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
|
246
|
+
with patch("common.requests.post", side_effect=req_mod.exceptions.ConnectionError("DNS failed")):
|
|
247
|
+
try:
|
|
248
|
+
call_llm("system", "user")
|
|
249
|
+
print(" FAIL: connection error — expected LLMError")
|
|
250
|
+
failed += 1
|
|
251
|
+
except LLMError as e:
|
|
252
|
+
if "ConnectionError" in str(e):
|
|
253
|
+
print(" OK: ConnectionError raises LLMError")
|
|
254
|
+
passed += 1
|
|
255
|
+
else:
|
|
256
|
+
print(f" FAIL: connection error — message missing type: {e}")
|
|
257
|
+
failed += 1
|
|
258
|
+
except Exception as e:
|
|
259
|
+
print(f" FAIL: connection error — got {type(e).__name__}: {e}")
|
|
260
|
+
failed += 1
|
|
261
|
+
|
|
262
|
+
# Test 3: HTTP 500 raises LLMError
|
|
263
|
+
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
|
264
|
+
mock_resp = MagicMock()
|
|
265
|
+
mock_resp.raise_for_status.side_effect = req_mod.exceptions.HTTPError("500 Server Error")
|
|
266
|
+
with patch("common.requests.post", return_value=mock_resp):
|
|
267
|
+
try:
|
|
268
|
+
call_llm("system", "user")
|
|
269
|
+
print(" FAIL: HTTP 500 — expected LLMError")
|
|
270
|
+
failed += 1
|
|
271
|
+
except LLMError as e:
|
|
272
|
+
if "HTTPError" in str(e):
|
|
273
|
+
print(" OK: HTTP 500 raises LLMError")
|
|
274
|
+
passed += 1
|
|
275
|
+
else:
|
|
276
|
+
print(f" FAIL: HTTP 500 — message missing type: {e}")
|
|
277
|
+
failed += 1
|
|
278
|
+
except Exception as e:
|
|
279
|
+
print(f" FAIL: HTTP 500 — got {type(e).__name__}: {e}")
|
|
280
|
+
failed += 1
|
|
281
|
+
|
|
282
|
+
print(f"\n Results: {passed} passed, {failed} failed")
|
|
283
|
+
if failed > 0:
|
|
284
|
+
print(" SOME TESTS FAILED")
|
|
285
|
+
return False
|
|
286
|
+
print(" All tests passed!")
|
|
287
|
+
return True
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def main():
|
|
291
|
+
parser = argparse.ArgumentParser(description="sinain-koog integration test")
|
|
292
|
+
parser.add_argument("--memory-dir", default="memory/", help="Path to memory/ directory")
|
|
293
|
+
parser.add_argument("--use-synthetic", action="store_true", help="Create synthetic test data")
|
|
294
|
+
args = parser.parse_args()
|
|
295
|
+
|
|
296
|
+
# Run unit tests first (no API key needed)
|
|
297
|
+
if not test_extract_json():
|
|
298
|
+
sys.exit(1)
|
|
299
|
+
if not test_llm_error():
|
|
300
|
+
sys.exit(1)
|
|
301
|
+
|
|
302
|
+
if not (os.environ.get("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY_REFLECTION")):
|
|
303
|
+
print("ERROR: OPENROUTER_API_KEY or OPENROUTER_API_KEY_REFLECTION env var is required")
|
|
304
|
+
sys.exit(1)
|
|
305
|
+
|
|
306
|
+
memory_dir = args.memory_dir
|
|
307
|
+
cleanup_synthetic = False
|
|
308
|
+
|
|
309
|
+
# If no real memory data, use synthetic
|
|
310
|
+
if args.use_synthetic or not Path(memory_dir, "sinain-playbook.md").exists():
|
|
311
|
+
if args.use_synthetic:
|
|
312
|
+
print("Using synthetic memory data...")
|
|
313
|
+
else:
|
|
314
|
+
print(f"No playbook found at {memory_dir}/sinain-playbook.md — using synthetic data")
|
|
315
|
+
if not args.use_synthetic:
|
|
316
|
+
memory_dir = tempfile.mkdtemp(prefix="sinain-test-memory-")
|
|
317
|
+
cleanup_synthetic = True
|
|
318
|
+
setup_synthetic_memory(memory_dir)
|
|
319
|
+
|
|
320
|
+
print(f"Memory dir: {memory_dir}")
|
|
321
|
+
print(f"Session summary: {SESSION_SUMMARY[:80]}...")
|
|
322
|
+
|
|
323
|
+
results = {}
|
|
324
|
+
failed = False
|
|
325
|
+
|
|
326
|
+
# Phase 2: Signal Analyzer
|
|
327
|
+
r = run_script("signal_analyzer.py", [
|
|
328
|
+
"--memory-dir", memory_dir,
|
|
329
|
+
"--session-summary", SESSION_SUMMARY,
|
|
330
|
+
], "Phase 2: Signal Analyzer")
|
|
331
|
+
results["signal_analyzer"] = r
|
|
332
|
+
if r is None:
|
|
333
|
+
failed = True
|
|
334
|
+
|
|
335
|
+
# Phase 3.1: Memory Miner (idle only — run it for testing)
|
|
336
|
+
r = run_script("memory_miner.py", [
|
|
337
|
+
"--memory-dir", memory_dir,
|
|
338
|
+
], "Phase 3.1: Memory Miner (idle)")
|
|
339
|
+
results["memory_miner"] = r
|
|
340
|
+
mining_findings = ""
|
|
341
|
+
if r:
|
|
342
|
+
mining_findings = r.get("findings", "")
|
|
343
|
+
|
|
344
|
+
# Phase 3.2: Feedback Analyzer
|
|
345
|
+
r = run_script("feedback_analyzer.py", [
|
|
346
|
+
"--memory-dir", memory_dir,
|
|
347
|
+
"--session-summary", SESSION_SUMMARY,
|
|
348
|
+
], "Phase 3.2: Feedback Analyzer")
|
|
349
|
+
results["feedback_analyzer"] = r
|
|
350
|
+
curate_directive = "normal"
|
|
351
|
+
if r:
|
|
352
|
+
curate_directive = r.get("curateDirective", "normal")
|
|
353
|
+
|
|
354
|
+
# Phase 3.3: Playbook Curator
|
|
355
|
+
curator_args = [
|
|
356
|
+
"--memory-dir", memory_dir,
|
|
357
|
+
"--session-summary", SESSION_SUMMARY,
|
|
358
|
+
"--curate-directive", curate_directive,
|
|
359
|
+
]
|
|
360
|
+
if mining_findings:
|
|
361
|
+
curator_args += ["--mining-findings", mining_findings]
|
|
362
|
+
r = run_script("playbook_curator.py", curator_args, "Phase 3.3: Playbook Curator")
|
|
363
|
+
results["playbook_curator"] = r
|
|
364
|
+
curator_changes = ""
|
|
365
|
+
if r:
|
|
366
|
+
curator_changes = json.dumps(r.get("changes", {}))
|
|
367
|
+
|
|
368
|
+
# Phase 3.4: JSONL log — skipped (main agent responsibility)
|
|
369
|
+
print(f"\n{'='*60}")
|
|
370
|
+
print(" Phase 3.4: JSONL Log (SKIPPED — main agent responsibility)")
|
|
371
|
+
print(f"{'='*60}")
|
|
372
|
+
|
|
373
|
+
# Phase 3.5: Insight Synthesizer
|
|
374
|
+
synth_args = [
|
|
375
|
+
"--memory-dir", memory_dir,
|
|
376
|
+
"--session-summary", SESSION_SUMMARY,
|
|
377
|
+
]
|
|
378
|
+
if curator_changes:
|
|
379
|
+
synth_args += ["--curator-changes", curator_changes]
|
|
380
|
+
r = run_script("insight_synthesizer.py", synth_args, "Phase 3.5: Insight Synthesizer")
|
|
381
|
+
results["insight_synthesizer"] = r
|
|
382
|
+
|
|
383
|
+
# Summary
|
|
384
|
+
print(f"\n{'='*60}")
|
|
385
|
+
print(" SUMMARY")
|
|
386
|
+
print(f"{'='*60}")
|
|
387
|
+
for name, data in results.items():
|
|
388
|
+
status = "OK" if data is not None else "FAILED"
|
|
389
|
+
print(f" {name}: {status}")
|
|
390
|
+
|
|
391
|
+
if failed:
|
|
392
|
+
print("\nSome scripts failed — check output above.")
|
|
393
|
+
sys.exit(1)
|
|
394
|
+
else:
|
|
395
|
+
print("\nAll scripts completed successfully!")
|
|
396
|
+
|
|
397
|
+
if cleanup_synthetic:
|
|
398
|
+
shutil.rmtree(memory_dir, ignore_errors=True)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
if __name__ == "__main__":
|
|
402
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Shared fixtures for sinain-koog pytest test suite."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
# Ensure sinain-koog source is importable
|
|
11
|
+
KOOG_DIR = Path(__file__).resolve().parent.parent
|
|
12
|
+
if str(KOOG_DIR) not in sys.path:
|
|
13
|
+
sys.path.insert(0, str(KOOG_DIR))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def tmp_memory_dir(tmp_path):
|
|
18
|
+
"""Create a temporary memory directory with sample data."""
|
|
19
|
+
memory = tmp_path / "memory"
|
|
20
|
+
memory.mkdir()
|
|
21
|
+
(memory / "playbook-logs").mkdir()
|
|
22
|
+
(memory / "playbook-archive").mkdir()
|
|
23
|
+
(memory / "eval-logs").mkdir()
|
|
24
|
+
(memory / "eval-reports").mkdir()
|
|
25
|
+
|
|
26
|
+
# Sample playbook
|
|
27
|
+
playbook = (
|
|
28
|
+
"<!-- mining-index: 2026-02-21,2026-02-20 -->\n"
|
|
29
|
+
"# Sinain Playbook\n\n"
|
|
30
|
+
"## Established Patterns\n"
|
|
31
|
+
"- When OCR pipeline stalls, check camera frame queue depth (score: 0.8)\n"
|
|
32
|
+
"- When user explores new framework, spawn research agent proactively (score: 0.6)\n\n"
|
|
33
|
+
"## Observed\n"
|
|
34
|
+
"- User prefers concise Telegram messages over detailed ones\n"
|
|
35
|
+
"- Late evening sessions tend to be exploratory/research-heavy\n\n"
|
|
36
|
+
"## Stale\n"
|
|
37
|
+
"- Flutter overlay rendering glitch on macOS 15 [since: 2026-02-18]\n\n"
|
|
38
|
+
"<!-- effectiveness: outputs=8,positive=5,negative=1,neutral=2,rate=0.63,updated=2026-02-21 -->\n"
|
|
39
|
+
)
|
|
40
|
+
(memory / "sinain-playbook.md").write_text(playbook, encoding="utf-8")
|
|
41
|
+
|
|
42
|
+
# Sample daily memory files
|
|
43
|
+
for date in ["2026-02-21", "2026-02-20", "2026-02-19"]:
|
|
44
|
+
(memory / f"{date}.md").write_text(
|
|
45
|
+
f"# {date} Session Notes\n\n- Worked on OCR pipeline\n- Explored Flutter overlays\n",
|
|
46
|
+
encoding="utf-8",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Sample playbook-log entries
|
|
50
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
51
|
+
entries = [
|
|
52
|
+
{
|
|
53
|
+
"ts": "2026-02-28T10:00:00Z",
|
|
54
|
+
"idle": False,
|
|
55
|
+
"sessionSummary": "Debugging OCR pipeline",
|
|
56
|
+
"signals": [{"description": "OCR pipeline backpressure detected", "priority": "high"}],
|
|
57
|
+
"recommendedAction": {"action": "sessions_spawn", "task": "Debug OCR backpressure", "confidence": 0.8},
|
|
58
|
+
"feedbackScores": {"avg": 0.35, "high": ["OCR fix"], "low": []},
|
|
59
|
+
"effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
|
|
60
|
+
"curateDirective": "normal",
|
|
61
|
+
"playbookChanges": {
|
|
62
|
+
"changes": {"added": ["new pattern"], "pruned": [], "promoted": []},
|
|
63
|
+
"staleItemActions": [],
|
|
64
|
+
"playbookLines": 12,
|
|
65
|
+
},
|
|
66
|
+
"output": {
|
|
67
|
+
"skip": False,
|
|
68
|
+
"suggestion": "Consider frame batching for OCR pipeline",
|
|
69
|
+
"insight": "Evening sessions correlate with exploratory work patterns",
|
|
70
|
+
"totalChars": 95,
|
|
71
|
+
},
|
|
72
|
+
"skipped": False,
|
|
73
|
+
"actionsConsidered": [
|
|
74
|
+
{"action": "sessions_spawn", "reason": "Debug OCR backpressure", "chosen": True}
|
|
75
|
+
],
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"ts": "2026-02-28T10:30:00Z",
|
|
79
|
+
"idle": True,
|
|
80
|
+
"sessionSummary": "User idle",
|
|
81
|
+
"signals": [],
|
|
82
|
+
"recommendedAction": None,
|
|
83
|
+
"feedbackScores": {"avg": 0, "high": [], "low": []},
|
|
84
|
+
"effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
|
|
85
|
+
"curateDirective": "normal",
|
|
86
|
+
"playbookChanges": {
|
|
87
|
+
"changes": {"added": [], "pruned": [], "promoted": []},
|
|
88
|
+
"staleItemActions": [],
|
|
89
|
+
"playbookLines": 12,
|
|
90
|
+
},
|
|
91
|
+
"output": {
|
|
92
|
+
"skip": True,
|
|
93
|
+
"skipReason": "User is idle and no new patterns detected in playbook since last analysis",
|
|
94
|
+
},
|
|
95
|
+
"skipped": True,
|
|
96
|
+
"miningResult": {
|
|
97
|
+
"findings": "Found cross-day OCR pattern",
|
|
98
|
+
"newPatterns": ["frame dropping improves OCR accuracy"],
|
|
99
|
+
"contradictions": [],
|
|
100
|
+
"preferences": ["user prefers minimal configs"],
|
|
101
|
+
"minedSources": ["2026-02-21.md"],
|
|
102
|
+
},
|
|
103
|
+
"actionsConsidered": [],
|
|
104
|
+
},
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
log_file = memory / "playbook-logs" / f"{today}.jsonl"
|
|
108
|
+
log_file.write_text(
|
|
109
|
+
"\n".join(json.dumps(e) for e in entries) + "\n",
|
|
110
|
+
encoding="utf-8",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return memory
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@pytest.fixture
|
|
117
|
+
def tmp_modules_dir(tmp_path):
|
|
118
|
+
"""Create a temporary modules directory with sample module."""
|
|
119
|
+
modules = tmp_path / "modules"
|
|
120
|
+
modules.mkdir()
|
|
121
|
+
|
|
122
|
+
# Registry
|
|
123
|
+
registry = {
|
|
124
|
+
"version": 1,
|
|
125
|
+
"modules": {
|
|
126
|
+
"react-native-dev": {
|
|
127
|
+
"status": "active",
|
|
128
|
+
"priority": 85,
|
|
129
|
+
"activatedAt": "2026-02-20T10:00:00Z",
|
|
130
|
+
"lastTriggered": None,
|
|
131
|
+
"locked": False,
|
|
132
|
+
},
|
|
133
|
+
"ocr-pipeline": {
|
|
134
|
+
"status": "suspended",
|
|
135
|
+
"priority": 70,
|
|
136
|
+
"activatedAt": None,
|
|
137
|
+
"lastTriggered": None,
|
|
138
|
+
"locked": False,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
(modules / "module-registry.json").write_text(
|
|
143
|
+
json.dumps(registry, indent=2), encoding="utf-8"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Module directories
|
|
147
|
+
rn_dir = modules / "react-native-dev"
|
|
148
|
+
rn_dir.mkdir()
|
|
149
|
+
(rn_dir / "manifest.json").write_text(json.dumps({
|
|
150
|
+
"id": "react-native-dev",
|
|
151
|
+
"name": "React Native Development",
|
|
152
|
+
"description": "Patterns for RN development",
|
|
153
|
+
"version": "1.0.0",
|
|
154
|
+
"priority": {"default": 85, "range": [50, 100]},
|
|
155
|
+
"triggers": {},
|
|
156
|
+
"locked": False,
|
|
157
|
+
}, indent=2), encoding="utf-8")
|
|
158
|
+
(rn_dir / "patterns.md").write_text(
|
|
159
|
+
"# React Native Development\n\n## Established Patterns\n- Use Hermes engine\n",
|
|
160
|
+
encoding="utf-8",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return modules
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@pytest.fixture
|
|
167
|
+
def sample_log_entry():
|
|
168
|
+
"""A sample playbook-log entry for testing."""
|
|
169
|
+
return {
|
|
170
|
+
"ts": "2026-02-28T10:00:00Z",
|
|
171
|
+
"idle": False,
|
|
172
|
+
"signals": [{"description": "OCR pipeline backpressure detected", "priority": "high"}],
|
|
173
|
+
"recommendedAction": {"action": "sessions_spawn", "task": "Debug OCR backpressure", "confidence": 0.8},
|
|
174
|
+
"feedbackScores": {"avg": 0.35, "high": ["OCR fix"], "low": []},
|
|
175
|
+
"effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
|
|
176
|
+
"curateDirective": "normal",
|
|
177
|
+
"interpretation": "",
|
|
178
|
+
"playbookChanges": {
|
|
179
|
+
"changes": {"added": ["new pattern"], "pruned": [], "promoted": []},
|
|
180
|
+
"staleItemActions": [],
|
|
181
|
+
"playbookLines": 12,
|
|
182
|
+
},
|
|
183
|
+
"output": {
|
|
184
|
+
"skip": False,
|
|
185
|
+
"suggestion": "Consider frame batching for OCR pipeline",
|
|
186
|
+
"insight": "Evening sessions correlate with exploratory work patterns",
|
|
187
|
+
"totalChars": 95,
|
|
188
|
+
},
|
|
189
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Tests for playbook_curator.py: extract_header_footer() and reassemble_playbook()."""
|
|
2
|
+
|
|
3
|
+
from playbook_curator import extract_header_footer, reassemble_playbook
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestExtractHeaderFooter:
|
|
7
|
+
def test_standard_playbook(self):
|
|
8
|
+
playbook = (
|
|
9
|
+
"<!-- mining-index: 2026-02-21 -->\n"
|
|
10
|
+
"# Playbook\n"
|
|
11
|
+
"- Pattern 1\n"
|
|
12
|
+
"- Pattern 2\n"
|
|
13
|
+
"<!-- effectiveness: rate=0.63 -->\n"
|
|
14
|
+
)
|
|
15
|
+
header, body, footer = extract_header_footer(playbook)
|
|
16
|
+
assert "mining-index" in header
|
|
17
|
+
assert "# Playbook" in body
|
|
18
|
+
assert "- Pattern 1" in body
|
|
19
|
+
assert "effectiveness" in footer
|
|
20
|
+
|
|
21
|
+
def test_no_header(self):
|
|
22
|
+
playbook = "# Playbook\n- Pattern 1\n<!-- effectiveness: rate=0.5 -->\n"
|
|
23
|
+
header, body, footer = extract_header_footer(playbook)
|
|
24
|
+
assert header == ""
|
|
25
|
+
assert "# Playbook" in body
|
|
26
|
+
assert "effectiveness" in footer
|
|
27
|
+
|
|
28
|
+
def test_no_footer(self):
|
|
29
|
+
playbook = "<!-- mining-index: 2026-02-21 -->\n# Playbook\n- Pattern 1\n"
|
|
30
|
+
header, body, footer = extract_header_footer(playbook)
|
|
31
|
+
assert "mining-index" in header
|
|
32
|
+
assert "# Playbook" in body
|
|
33
|
+
assert footer == ""
|
|
34
|
+
|
|
35
|
+
def test_empty_playbook(self):
|
|
36
|
+
header, body, footer = extract_header_footer("")
|
|
37
|
+
assert header == ""
|
|
38
|
+
assert body == ""
|
|
39
|
+
assert footer == ""
|
|
40
|
+
|
|
41
|
+
def test_body_lines_exclude_comments(self):
|
|
42
|
+
playbook = (
|
|
43
|
+
"<!-- mining-index: 2026-02-21 -->\n"
|
|
44
|
+
"line1\nline2\nline3\n"
|
|
45
|
+
"<!-- effectiveness: rate=0.5 -->\n"
|
|
46
|
+
)
|
|
47
|
+
header, body, footer = extract_header_footer(playbook)
|
|
48
|
+
body_lines = [l for l in body.strip().splitlines() if l.strip()]
|
|
49
|
+
assert len(body_lines) == 3
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class TestReassemblePlaybook:
|
|
53
|
+
def test_standard_reassembly(self):
|
|
54
|
+
result = reassemble_playbook(
|
|
55
|
+
"<!-- mining-index: 2026-02-21 -->",
|
|
56
|
+
"# Playbook\n- Pattern 1",
|
|
57
|
+
"<!-- effectiveness: rate=0.5 -->",
|
|
58
|
+
)
|
|
59
|
+
assert "mining-index" in result
|
|
60
|
+
assert "# Playbook" in result
|
|
61
|
+
assert "effectiveness" in result
|
|
62
|
+
assert result.endswith("\n")
|
|
63
|
+
|
|
64
|
+
def test_body_limit_enforced(self):
|
|
65
|
+
body_lines = [f"- Pattern {i}" for i in range(60)]
|
|
66
|
+
body = "\n".join(body_lines)
|
|
67
|
+
result = reassemble_playbook("", body, "")
|
|
68
|
+
# Count non-empty lines in body section
|
|
69
|
+
result_body_lines = [l for l in result.strip().splitlines() if l.strip()]
|
|
70
|
+
assert len(result_body_lines) <= 50
|
|
71
|
+
|
|
72
|
+
def test_empty_parts_handled(self):
|
|
73
|
+
result = reassemble_playbook("", "body content", "")
|
|
74
|
+
assert "body content" in result
|
|
75
|
+
assert result.endswith("\n")
|
|
76
|
+
|
|
77
|
+
def test_all_parts_empty(self):
|
|
78
|
+
result = reassemble_playbook("", "", "")
|
|
79
|
+
assert result == "\n"
|
|
80
|
+
|
|
81
|
+
def test_50_lines_exactly(self):
|
|
82
|
+
body_lines = [f"- Pattern {i}" for i in range(50)]
|
|
83
|
+
body = "\n".join(body_lines)
|
|
84
|
+
result = reassemble_playbook("<!-- header -->", body, "<!-- footer -->")
|
|
85
|
+
# Should not truncate — 50 is exactly the limit
|
|
86
|
+
assert "Pattern 49" in result
|
|
87
|
+
|
|
88
|
+
def test_51_lines_truncated(self):
|
|
89
|
+
body_lines = [f"- Pattern {i}" for i in range(51)]
|
|
90
|
+
body = "\n".join(body_lines)
|
|
91
|
+
result = reassemble_playbook("<!-- header -->", body, "<!-- footer -->")
|
|
92
|
+
# Line 51 (Pattern 50) should be cut
|
|
93
|
+
assert "Pattern 50" not in result
|
|
94
|
+
assert "Pattern 49" in result
|