@geravant/sinain 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.env.example +4 -2
  2. package/config-shared.js +1 -0
  3. package/package.json +4 -1
  4. package/sinain-agent/run.sh +36 -4
  5. package/sinain-core/package-lock.json +963 -0
  6. package/sinain-core/package.json +1 -0
  7. package/sinain-core/src/buffers/feed-buffer.ts +34 -0
  8. package/sinain-core/src/embedding/service.ts +66 -0
  9. package/sinain-core/src/index.ts +65 -17
  10. package/sinain-core/src/learning/local-curation.ts +137 -7
  11. package/sinain-core/src/server.ts +31 -0
  12. package/sinain-memory/README.md +105 -0
  13. package/sinain-memory/embed_client.py +117 -0
  14. package/sinain-memory/graph_query.py +269 -18
  15. package/sinain-memory/knowledge_integrator.py +551 -74
  16. package/sinain-memory/memory-config.json +1 -1
  17. package/sinain-memory/session_distiller.py +43 -19
  18. package/sinain-memory/triplestore.py +60 -0
  19. package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
  20. package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
  21. package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
  22. package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
  23. package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
  24. package/sinain-memory/eval/__init__.py +0 -0
  25. package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
  26. package/sinain-memory/eval/assertions.py +0 -267
  27. package/sinain-memory/eval/benchmarks/__init__.py +0 -0
  28. package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
  29. package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
  30. package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
  31. package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
  32. package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
  33. package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
  34. package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
  35. package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
  36. package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
  37. package/sinain-memory/eval/benchmarks/base_adapter.py +0 -43
  38. package/sinain-memory/eval/benchmarks/config.py +0 -23
  39. package/sinain-memory/eval/benchmarks/evaluate.py +0 -146
  40. package/sinain-memory/eval/benchmarks/ingest.py +0 -152
  41. package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
  42. package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
  43. package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
  44. package/sinain-memory/eval/benchmarks/judges/qa_judge.py +0 -81
  45. package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +0 -177
  46. package/sinain-memory/eval/benchmarks/query.py +0 -172
  47. package/sinain-memory/eval/benchmarks/report.py +0 -87
  48. package/sinain-memory/eval/benchmarks/runner.py +0 -276
  49. package/sinain-memory/eval/judges/__init__.py +0 -0
  50. package/sinain-memory/eval/judges/base_judge.py +0 -61
  51. package/sinain-memory/eval/judges/curation_judge.py +0 -46
  52. package/sinain-memory/eval/judges/insight_judge.py +0 -48
  53. package/sinain-memory/eval/judges/mining_judge.py +0 -42
  54. package/sinain-memory/eval/judges/signal_judge.py +0 -45
  55. package/sinain-memory/eval/retrieval_benchmark.jsonl +0 -12
  56. package/sinain-memory/eval/retrieval_evaluator.py +0 -186
  57. package/sinain-memory/eval/schemas.py +0 -247
  58. package/sinain-memory/tests/__init__.py +0 -0
  59. package/sinain-memory/tests/conftest.py +0 -189
  60. package/sinain-memory/tests/test_curator_helpers.py +0 -94
  61. package/sinain-memory/tests/test_embedder.py +0 -210
  62. package/sinain-memory/tests/test_extract_json.py +0 -124
  63. package/sinain-memory/tests/test_feedback_computation.py +0 -121
  64. package/sinain-memory/tests/test_miner_helpers.py +0 -71
  65. package/sinain-memory/tests/test_module_management.py +0 -458
  66. package/sinain-memory/tests/test_parsers.py +0 -96
  67. package/sinain-memory/tests/test_tick_evaluator.py +0 -430
  68. package/sinain-memory/tests/test_triple_extractor.py +0 -255
  69. package/sinain-memory/tests/test_triple_ingest.py +0 -191
  70. package/sinain-memory/tests/test_triple_migrate.py +0 -138
  71. package/sinain-memory/tests/test_triplestore.py +0 -248
@@ -1,255 +0,0 @@
1
- """Tests for triple_extractor.py — 3-tier extraction."""
2
-
3
- import pytest
4
- from triplestore import TripleStore
5
- from triple_extractor import TripleExtractor, Triple, _make_slug
6
-
7
-
8
- @pytest.fixture
9
- def store(tmp_path):
10
- s = TripleStore(str(tmp_path / "test.db"))
11
- yield s
12
- s.close()
13
-
14
-
15
- @pytest.fixture
16
- def extractor(store):
17
- return TripleExtractor(store)
18
-
19
-
20
- @pytest.fixture
21
- def store_with_vocab(store):
22
- """Store pre-populated with concept vocabulary."""
23
- tx = store.begin_tx("setup")
24
- store.assert_triple(tx, "concept:ocr", "name", "OCR")
25
- store.assert_triple(tx, "concept:flutter", "name", "Flutter")
26
- store.assert_triple(tx, "concept:react-native", "name", "React Native")
27
- return store
28
-
29
-
30
- # ----- Slug generation -----
31
-
32
- class TestMakeSlug:
33
- def test_basic(self):
34
- assert _make_slug("Frame Batching") == "frame-batching"
35
-
36
- def test_special_chars(self):
37
- assert _make_slug("OCR pipeline (v2.0)") == "ocr-pipeline-v2-0"
38
-
39
- def test_leading_trailing(self):
40
- assert _make_slug(" --hello-- ") == "hello"
41
-
42
- def test_max_length(self):
43
- long = "a" * 100
44
- assert len(_make_slug(long)) <= 80
45
-
46
- def test_empty(self):
47
- assert _make_slug("") == ""
48
- assert _make_slug("---") == ""
49
-
50
-
51
- # ----- Tier 1: Signal extraction -----
52
-
53
- class TestExtractSignal:
54
- def test_basic_signal(self, extractor):
55
- data = {
56
- "sessionSummary": "Debugging OCR pipeline",
57
- "idle": False,
58
- "signals": [
59
- {"description": "OCR pipeline backpressure", "priority": "high"}
60
- ],
61
- "recommendedAction": {"action": "sessions_spawn", "task": "Debug", "confidence": 0.8},
62
- "output": {"suggestion": "Try frame batching", "insight": "Evening pattern"},
63
- }
64
- triples = extractor.extract_signal(data, "2026-03-01T10:00:00Z")
65
-
66
- # Check entity creation
67
- entity_attrs = {t.attribute for t in triples if t.entity_id == "signal:2026-03-01T10:00:00Z"}
68
- assert "summary" in entity_attrs
69
- assert "description" in entity_attrs
70
- assert "priority" in entity_attrs
71
- assert "action" in entity_attrs
72
- assert "suggestion" in entity_attrs
73
-
74
- def test_signal_with_playbook_changes(self, extractor):
75
- data = {
76
- "signals": [],
77
- "playbookChanges": {
78
- "changes": {"added": ["Use frame batching for OCR"], "pruned": [], "promoted": []}
79
- },
80
- }
81
- triples = extractor.extract_signal(data, "2026-03-01")
82
- pattern_triples = [t for t in triples if t.entity_id.startswith("pattern:")]
83
- assert len(pattern_triples) > 0
84
- assert any(t.attribute == "text" and "frame batching" in t.value.lower() for t in pattern_triples)
85
-
86
- def test_signal_empty(self, extractor):
87
- triples = extractor.extract_signal({"signals": []}, "2026-03-01")
88
- assert isinstance(triples, list)
89
-
90
- def test_signal_concepts_extracted(self, extractor):
91
- data = {"signals": [{"description": "OCR Pipeline stall detected", "priority": "high"}]}
92
- triples = extractor.extract_signal(data, "2026-03-01")
93
- concept_triples = [t for t in triples if t.entity_id.startswith("concept:")]
94
- assert len(concept_triples) > 0 # "OCR" or "Pipeline" should be extracted
95
-
96
- def test_signal_concept_refs(self, extractor):
97
- data = {"signals": [{"description": "React Native bridge crash", "priority": "high"}]}
98
- triples = extractor.extract_signal(data, "2026-03-01")
99
- ref_triples = [t for t in triples if t.value_type == "ref" and t.attribute == "related_to"]
100
- assert len(ref_triples) > 0
101
-
102
-
103
- # ----- Tier 1: Session extraction -----
104
-
105
- class TestExtractSession:
106
- def test_basic_session(self, extractor):
107
- data = {
108
- "ts": "2026-03-01T09:00:00Z",
109
- "summary": "Implemented OCR batch processing",
110
- "toolsUsed": ["Read", "Edit", "Bash"],
111
- "durationMs": 120000,
112
- }
113
- triples = extractor.extract_session(data)
114
-
115
- # Check session entity
116
- session_triples = [t for t in triples if t.entity_id.startswith("session:")]
117
- assert any(t.attribute == "summary" for t in session_triples)
118
- assert any(t.attribute == "duration_ms" for t in session_triples)
119
-
120
- # Check tool refs
121
- tool_triples = [t for t in triples if t.entity_id.startswith("tool:")]
122
- assert len(tool_triples) >= 3
123
-
124
- def test_session_tool_refs(self, extractor):
125
- data = {
126
- "ts": "2026-03-01",
127
- "toolsUsed": ["Bash"],
128
- }
129
- triples = extractor.extract_session(data)
130
- ref_triples = [t for t in triples if t.value_type == "ref" and t.attribute == "used_tool"]
131
- assert len(ref_triples) == 1
132
-
133
-
134
- # ----- Tier 1: Mining extraction -----
135
-
136
- class TestExtractMining:
137
- def test_new_patterns(self, extractor):
138
- data = {
139
- "newPatterns": ["Frame dropping improves OCR accuracy", "Use batch processing"],
140
- "preferences": ["User prefers minimal configs"],
141
- "contradictions": [],
142
- }
143
- triples = extractor.extract_mining(data)
144
- pattern_triples = [t for t in triples if t.attribute == "text"]
145
- assert len(pattern_triples) >= 2
146
-
147
- def test_mining_preferences(self, extractor):
148
- data = {"newPatterns": [], "preferences": ["User likes concise output"]}
149
- triples = extractor.extract_mining(data)
150
- pref_triples = [t for t in triples if t.attribute == "pattern_type" and t.value == "preference"]
151
- assert len(pref_triples) == 1
152
-
153
- def test_mining_contradictions(self, extractor):
154
- data = {"newPatterns": [], "contradictions": ["Playbook says X but observation shows Y"]}
155
- triples = extractor.extract_mining(data)
156
- contra = [t for t in triples if t.attribute == "pattern_type" and t.value == "contradiction"]
157
- assert len(contra) == 1
158
-
159
- def test_mining_empty(self, extractor):
160
- triples = extractor.extract_mining({"newPatterns": [], "preferences": [], "contradictions": []})
161
- assert triples == []
162
-
163
-
164
- # ----- Tier 2: Playbook extraction (regex) -----
165
-
166
- class TestExtractPlaybook:
167
- def test_patterns_with_scores(self, extractor):
168
- text = (
169
- "## Established Patterns\n"
170
- "- OCR pipeline stalls when queue depth > 10 (score: 0.8)\n"
171
- "- Use frame batching for throughput (score: 0.6)\n"
172
- "- Spawn research agent for new frameworks\n"
173
- )
174
- triples = extractor.extract_playbook(text)
175
- text_triples = [t for t in triples if t.attribute == "text"]
176
- assert len(text_triples) >= 3
177
-
178
- score_triples = [t for t in triples if t.attribute == "score"]
179
- assert len(score_triples) == 2
180
- assert any(t.value == "0.8" for t in score_triples)
181
-
182
- def test_skips_comments_and_metadata(self, extractor):
183
- text = "- <!-- mining-index: 2026-02-21 -->\n- [since: 2026-02-18] stale entry\n- Real pattern here\n"
184
- triples = extractor.extract_playbook(text)
185
- text_triples = [t for t in triples if t.attribute == "text"]
186
- # Should only get "Real pattern here"
187
- assert any("Real pattern" in t.value for t in text_triples)
188
- assert not any("mining-index" in t.value for t in text_triples)
189
-
190
- def test_playbook_source_tagged(self, extractor):
191
- text = "- Use batch processing\n"
192
- triples = extractor.extract_playbook(text)
193
- source_triples = [t for t in triples if t.attribute == "source"]
194
- assert any(t.value == "playbook" for t in source_triples)
195
-
196
-
197
- # ----- Tier 2: Module extraction -----
198
-
199
- class TestExtractModule:
200
- def test_module_manifest_and_patterns(self, extractor):
201
- manifest = {
202
- "name": "React Native Dev",
203
- "description": "RN development patterns",
204
- "version": "1.0.0",
205
- }
206
- patterns_text = "## Established\n- Use Hermes engine for Android\n- Enable Fast Refresh\n"
207
- triples = extractor.extract_module("react-native-dev", manifest, patterns_text)
208
-
209
- module_triples = [t for t in triples if t.entity_id == "module:react-native-dev"]
210
- assert any(t.attribute == "name" and t.value == "React Native Dev" for t in module_triples)
211
- assert any(t.attribute == "description" for t in module_triples)
212
-
213
- # Patterns should link back to module
214
- belongs_triples = [t for t in triples if t.attribute == "belongs_to" and t.value == "module:react-native-dev"]
215
- assert len(belongs_triples) >= 2
216
-
217
-
218
- # ----- Concept extraction -----
219
-
220
- class TestExtractConcepts:
221
- def test_vocab_cache_match(self, store_with_vocab):
222
- extractor = TripleExtractor(store_with_vocab)
223
- triples = extractor.extract_concepts("Working on OCR pipeline improvements")
224
- concept_ids = {t.entity_id for t in triples}
225
- assert "concept:ocr" in concept_ids
226
-
227
- def test_regex_capitalized_phrases(self, extractor):
228
- # Multi-word capitalized phrases get captured (including leading caps)
229
- triples = extractor.extract_concepts("Debugging React Native bridge issues")
230
- concept_ids = {t.entity_id for t in triples}
231
- # "Debugging React Native" is captured as one phrase since all words are capitalized
232
- assert any("react-native" in cid for cid in concept_ids)
233
-
234
- def test_regex_acronyms(self, extractor):
235
- triples = extractor.extract_concepts("The API uses JSON over HTTP")
236
- concept_ids = {t.entity_id for t in triples}
237
- assert any("api" in cid for cid in concept_ids)
238
- assert any("json" in cid for cid in concept_ids)
239
- assert any("http" in cid for cid in concept_ids)
240
-
241
- def test_regex_technical_terms(self, extractor):
242
- triples = extractor.extract_concepts("Check the frame-batching pipeline and error-handling logic")
243
- concept_ids = {t.entity_id for t in triples}
244
- assert "concept:frame-batching" in concept_ids
245
- assert "concept:error-handling" in concept_ids
246
-
247
- def test_short_text_no_llm(self, extractor):
248
- # Short text should not trigger LLM fallback
249
- triples = extractor.extract_concepts("Hello")
250
- # Should return empty or just regex matches, no LLM call
251
- assert isinstance(triples, list)
252
-
253
- def test_empty_text(self, extractor):
254
- triples = extractor.extract_concepts("")
255
- assert triples == []
@@ -1,191 +0,0 @@
1
- """Tests for triple_ingest.py — CLI entry point."""
2
-
3
- import json
4
- import subprocess
5
- import sys
6
- from pathlib import Path
7
-
8
- import pytest
9
- from triplestore import TripleStore
10
-
11
-
12
- KOOG_DIR = Path(__file__).resolve().parent.parent
13
-
14
-
15
- @pytest.fixture
16
- def memory_dir(tmp_path):
17
- """Create a temporary memory directory with playbook."""
18
- mem = tmp_path / "memory"
19
- mem.mkdir()
20
- (mem / "sinain-playbook.md").write_text(
21
- "## Established Patterns\n"
22
- "- OCR pipeline stalls when queue depth > 10 (score: 0.8)\n"
23
- "- Use frame batching for throughput (score: 0.6)\n"
24
- "- Spawn research agent proactively\n",
25
- encoding="utf-8",
26
- )
27
- return str(mem)
28
-
29
-
30
- @pytest.fixture
31
- def modules_dir(tmp_path):
32
- """Create a temporary modules directory with a test module."""
33
- modules = tmp_path / "modules"
34
- modules.mkdir()
35
- mod_dir = modules / "test-mod"
36
- mod_dir.mkdir()
37
- (mod_dir / "manifest.json").write_text(json.dumps({
38
- "name": "Test Module",
39
- "description": "Testing patterns",
40
- "version": "1.0.0",
41
- }))
42
- (mod_dir / "patterns.md").write_text("## Patterns\n- Test pattern one\n- Test pattern two\n")
43
- return str(modules)
44
-
45
-
46
- class TestSignalIngest:
47
- def test_signal_ingest_creates_db(self, memory_dir):
48
- signal = json.dumps({
49
- "signals": [{"description": "OCR stall", "priority": "high"}],
50
- "output": {"suggestion": "Try batching"},
51
- })
52
- result = subprocess.run(
53
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
54
- "--memory-dir", memory_dir,
55
- "--signal-result", signal,
56
- "--tick-ts", "2026-03-01T10:00:00Z"],
57
- capture_output=True, text=True, timeout=30,
58
- )
59
- assert result.returncode == 0, f"stderr: {result.stderr}"
60
- data = json.loads(result.stdout.strip())
61
- assert data["ingested"] > 0
62
- assert data["source"] == "signal"
63
- assert "txId" in data
64
- # DB should exist
65
- assert Path(memory_dir, "triplestore.db").exists()
66
-
67
- def test_signal_ingest_requires_tick_ts(self, memory_dir):
68
- result = subprocess.run(
69
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
70
- "--memory-dir", memory_dir,
71
- "--signal-result", '{"signals":[]}'],
72
- capture_output=True, text=True, timeout=10,
73
- )
74
- assert result.returncode != 0
75
-
76
-
77
- class TestPlaybookIngest:
78
- def test_playbook_ingest(self, memory_dir):
79
- result = subprocess.run(
80
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
81
- "--memory-dir", memory_dir,
82
- "--ingest-playbook"],
83
- capture_output=True, text=True, timeout=30,
84
- )
85
- assert result.returncode == 0, f"stderr: {result.stderr}"
86
- data = json.loads(result.stdout.strip())
87
- assert data["ingested"] > 0
88
- assert data["source"] == "playbook"
89
-
90
-
91
- class TestSessionIngest:
92
- def test_session_ingest(self, memory_dir):
93
- session = json.dumps({
94
- "ts": "2026-03-01T09:00:00Z",
95
- "summary": "Debugging OCR pipeline issues",
96
- "toolsUsed": ["Read", "Edit"],
97
- })
98
- result = subprocess.run(
99
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
100
- "--memory-dir", memory_dir,
101
- "--ingest-session", session],
102
- capture_output=True, text=True, timeout=30,
103
- )
104
- assert result.returncode == 0, f"stderr: {result.stderr}"
105
- data = json.loads(result.stdout.strip())
106
- assert data["ingested"] > 0
107
- assert data["source"] == "session"
108
-
109
-
110
- class TestMiningIngest:
111
- def test_mining_ingest(self, memory_dir):
112
- mining = json.dumps({
113
- "newPatterns": ["Frame dropping improves OCR"],
114
- "preferences": ["User prefers minimal output"],
115
- "contradictions": [],
116
- })
117
- result = subprocess.run(
118
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
119
- "--memory-dir", memory_dir,
120
- "--ingest-mining", mining],
121
- capture_output=True, text=True, timeout=30,
122
- )
123
- assert result.returncode == 0, f"stderr: {result.stderr}"
124
- data = json.loads(result.stdout.strip())
125
- assert data["ingested"] > 0
126
- assert data["source"] == "mining"
127
-
128
-
129
- class TestModuleIngest:
130
- def test_module_ingest(self, memory_dir, modules_dir):
131
- result = subprocess.run(
132
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
133
- "--memory-dir", memory_dir,
134
- "--ingest-module", "test-mod",
135
- "--modules-dir", modules_dir],
136
- capture_output=True, text=True, timeout=30,
137
- )
138
- assert result.returncode == 0, f"stderr: {result.stderr}"
139
- data = json.loads(result.stdout.strip())
140
- assert data["ingested"] > 0
141
- assert data["source"] == "module"
142
- assert data["module"] == "test-mod"
143
-
144
- def test_module_requires_modules_dir(self, memory_dir):
145
- result = subprocess.run(
146
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
147
- "--memory-dir", memory_dir,
148
- "--ingest-module", "test-mod"],
149
- capture_output=True, text=True, timeout=10,
150
- )
151
- assert result.returncode != 0
152
-
153
-
154
- class TestRetractModule:
155
- def test_retract_module(self, memory_dir, modules_dir):
156
- # First ingest
157
- subprocess.run(
158
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
159
- "--memory-dir", memory_dir,
160
- "--ingest-module", "test-mod",
161
- "--modules-dir", modules_dir],
162
- capture_output=True, text=True, timeout=30,
163
- )
164
- # Then retract
165
- result = subprocess.run(
166
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
167
- "--memory-dir", memory_dir,
168
- "--retract-module", "test-mod"],
169
- capture_output=True, text=True, timeout=30,
170
- )
171
- assert result.returncode == 0, f"stderr: {result.stderr}"
172
- data = json.loads(result.stdout.strip())
173
- assert data["source"] == "module"
174
- assert data["module"] == "test-mod"
175
-
176
-
177
- class TestOutputFormat:
178
- def test_output_is_valid_json(self, memory_dir):
179
- signal = json.dumps({"signals": []})
180
- result = subprocess.run(
181
- [sys.executable, str(KOOG_DIR / "triple_ingest.py"),
182
- "--memory-dir", memory_dir,
183
- "--signal-result", signal,
184
- "--tick-ts", "2026-03-01"],
185
- capture_output=True, text=True, timeout=30,
186
- )
187
- assert result.returncode == 0
188
- data = json.loads(result.stdout.strip())
189
- assert isinstance(data, dict)
190
- assert "ingested" in data
191
- assert "source" in data
@@ -1,138 +0,0 @@
1
- """Tests for triple_migrate.py — historical data migration to triple store."""
2
-
3
- import json
4
- import sys
5
- from pathlib import Path
6
-
7
- import pytest
8
-
9
- # Ensure sinain-koog source is importable
10
- KOOG_DIR = Path(__file__).resolve().parent.parent
11
- if str(KOOG_DIR) not in sys.path:
12
- sys.path.insert(0, str(KOOG_DIR))
13
-
14
- from triple_extractor import TripleExtractor
15
- from triple_migrate import (
16
- MIGRATION_ENTITY,
17
- migrate_daily_memories,
18
- migrate_modules,
19
- migrate_playbook,
20
- migrate_playbook_logs,
21
- )
22
- from triplestore import TripleStore
23
-
24
-
25
- @pytest.fixture
26
- def store(tmp_path):
27
- db = tmp_path / "triplestore.db"
28
- return TripleStore(str(db))
29
-
30
-
31
- @pytest.fixture
32
- def extractor(store):
33
- return TripleExtractor(store)
34
-
35
-
36
- class TestMigratePlaybook:
37
- def test_extracts_patterns(self, extractor, store, tmp_memory_dir):
38
- count = migrate_playbook(extractor, store, str(tmp_memory_dir))
39
- assert count > 0
40
- # Should find at least the OCR and research-agent patterns
41
- patterns = store.entities_with_attr("text")
42
- pattern_ids = [eid for eid, _ in patterns if eid.startswith("pattern:")]
43
- assert len(pattern_ids) >= 2
44
-
45
- def test_empty_playbook_returns_zero(self, extractor, store, tmp_path):
46
- memory = tmp_path / "empty_memory"
47
- memory.mkdir()
48
- count = migrate_playbook(extractor, store, str(memory))
49
- assert count == 0
50
-
51
-
52
- class TestMigrateModules:
53
- def test_active_only(self, extractor, store, tmp_modules_dir):
54
- mod_count, triple_count = migrate_modules(extractor, store, str(tmp_modules_dir))
55
- # Only react-native-dev is active; ocr-pipeline is suspended
56
- assert mod_count == 1
57
- assert triple_count > 0
58
- # Module entity should exist
59
- ent = store.entity("module:react-native-dev")
60
- assert "name" in ent
61
- assert ent["name"] == ["React Native Development"]
62
-
63
- def test_no_registry(self, extractor, store, tmp_path):
64
- mod_count, triple_count = migrate_modules(extractor, store, str(tmp_path / "nope"))
65
- assert mod_count == 0
66
- assert triple_count == 0
67
-
68
-
69
- class TestMigratePlaybookLogs:
70
- def test_skips_idle_no_signals(self, extractor, store, tmp_memory_dir):
71
- """Idle entries with empty signals should be skipped."""
72
- file_count, triple_count = migrate_playbook_logs(
73
- extractor, store, str(tmp_memory_dir)
74
- )
75
- assert file_count >= 1
76
- assert triple_count > 0
77
- # The second entry in conftest is idle=True + signals=[], should be skipped
78
- # First entry has ts=2026-02-28T10:00:00Z — should be ingested
79
- ent = store.entity("signal:2026-02-28T10:00:00Z")
80
- assert ent # non-idle entry should exist
81
- # Idle entry should NOT exist
82
- idle_ent = store.entity("signal:2026-02-28T10:30:00Z")
83
- assert not idle_ent
84
-
85
- def test_no_log_dir(self, extractor, store, tmp_path):
86
- memory = tmp_path / "empty"
87
- memory.mkdir()
88
- file_count, triple_count = migrate_playbook_logs(extractor, store, str(memory))
89
- assert file_count == 0
90
-
91
-
92
- class TestMigrateDailyMemories:
93
- def test_creates_observation_entities(self, extractor, store, tmp_memory_dir):
94
- file_count, triple_count = migrate_daily_memories(
95
- extractor, store, str(tmp_memory_dir)
96
- )
97
- # conftest creates 3 daily memory files
98
- assert file_count == 3
99
- assert triple_count > 0
100
- # Check one observation entity
101
- ent = store.entity("observation:2026-02-21")
102
- assert "text" in ent
103
- assert "source" in ent
104
- assert ent["source"] == ["daily_memory"]
105
-
106
- def test_truncates_long_text(self, extractor, store, tmp_path):
107
- memory = tmp_path / "memory"
108
- memory.mkdir()
109
- (memory / "2026-01-01.md").write_text("x" * 5000, encoding="utf-8")
110
- migrate_daily_memories(extractor, store, str(memory))
111
- ent = store.entity("observation:2026-01-01")
112
- assert len(ent["text"][0]) == 2000
113
-
114
-
115
- class TestIdempotency:
116
- def test_stamp_prevents_remigration(self, store, tmp_path):
117
- """Once migration:v1 exists, the script should be a no-op."""
118
- tx = store.begin_tx("test")
119
- store.assert_triple(tx, MIGRATION_ENTITY, "completed_at", "2026-03-05T00:00:00Z")
120
- existing = store.entity(MIGRATION_ENTITY)
121
- assert existing # guard entity exists
122
-
123
- def test_full_migration_stamps(self, extractor, store, tmp_memory_dir, tmp_modules_dir):
124
- """Full migration should create the stamp entity."""
125
- migrate_playbook(extractor, store, str(tmp_memory_dir))
126
- migrate_modules(extractor, store, str(tmp_modules_dir))
127
- migrate_playbook_logs(extractor, store, str(tmp_memory_dir))
128
- migrate_daily_memories(extractor, store, str(tmp_memory_dir))
129
-
130
- # Simulate stamping
131
- stats = store.stats()
132
- stamp_tx = store.begin_tx("migration:stamp")
133
- store.assert_triple(stamp_tx, MIGRATION_ENTITY, "completed_at", "2026-03-05T00:00:00Z")
134
- store.assert_triple(stamp_tx, MIGRATION_ENTITY, "total_triples", str(stats["triples"]))
135
-
136
- ent = store.entity(MIGRATION_ENTITY)
137
- assert "completed_at" in ent
138
- assert int(ent["total_triples"][0]) > 0