@geravant/sinain 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.env.example +4 -2
  2. package/config-shared.js +1 -0
  3. package/package.json +4 -1
  4. package/sinain-agent/run.sh +36 -4
  5. package/sinain-core/package-lock.json +963 -0
  6. package/sinain-core/package.json +1 -0
  7. package/sinain-core/src/buffers/feed-buffer.ts +34 -0
  8. package/sinain-core/src/embedding/service.ts +66 -0
  9. package/sinain-core/src/index.ts +65 -17
  10. package/sinain-core/src/learning/local-curation.ts +137 -7
  11. package/sinain-core/src/server.ts +31 -0
  12. package/sinain-memory/README.md +105 -0
  13. package/sinain-memory/embed_client.py +117 -0
  14. package/sinain-memory/graph_query.py +269 -18
  15. package/sinain-memory/knowledge_integrator.py +551 -74
  16. package/sinain-memory/memory-config.json +1 -1
  17. package/sinain-memory/session_distiller.py +43 -19
  18. package/sinain-memory/triplestore.py +60 -0
  19. package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
  20. package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
  21. package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
  22. package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
  23. package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
  24. package/sinain-memory/eval/__init__.py +0 -0
  25. package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
  26. package/sinain-memory/eval/assertions.py +0 -267
  27. package/sinain-memory/eval/benchmarks/__init__.py +0 -0
  28. package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
  29. package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
  30. package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
  31. package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
  32. package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
  33. package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
  34. package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
  35. package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
  36. package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
  37. package/sinain-memory/eval/benchmarks/base_adapter.py +0 -43
  38. package/sinain-memory/eval/benchmarks/config.py +0 -23
  39. package/sinain-memory/eval/benchmarks/evaluate.py +0 -146
  40. package/sinain-memory/eval/benchmarks/ingest.py +0 -152
  41. package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
  42. package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
  43. package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
  44. package/sinain-memory/eval/benchmarks/judges/qa_judge.py +0 -81
  45. package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +0 -177
  46. package/sinain-memory/eval/benchmarks/query.py +0 -172
  47. package/sinain-memory/eval/benchmarks/report.py +0 -87
  48. package/sinain-memory/eval/benchmarks/runner.py +0 -276
  49. package/sinain-memory/eval/judges/__init__.py +0 -0
  50. package/sinain-memory/eval/judges/base_judge.py +0 -61
  51. package/sinain-memory/eval/judges/curation_judge.py +0 -46
  52. package/sinain-memory/eval/judges/insight_judge.py +0 -48
  53. package/sinain-memory/eval/judges/mining_judge.py +0 -42
  54. package/sinain-memory/eval/judges/signal_judge.py +0 -45
  55. package/sinain-memory/eval/retrieval_benchmark.jsonl +0 -12
  56. package/sinain-memory/eval/retrieval_evaluator.py +0 -186
  57. package/sinain-memory/eval/schemas.py +0 -247
  58. package/sinain-memory/tests/__init__.py +0 -0
  59. package/sinain-memory/tests/conftest.py +0 -189
  60. package/sinain-memory/tests/test_curator_helpers.py +0 -94
  61. package/sinain-memory/tests/test_embedder.py +0 -210
  62. package/sinain-memory/tests/test_extract_json.py +0 -124
  63. package/sinain-memory/tests/test_feedback_computation.py +0 -121
  64. package/sinain-memory/tests/test_miner_helpers.py +0 -71
  65. package/sinain-memory/tests/test_module_management.py +0 -458
  66. package/sinain-memory/tests/test_parsers.py +0 -96
  67. package/sinain-memory/tests/test_tick_evaluator.py +0 -430
  68. package/sinain-memory/tests/test_triple_extractor.py +0 -255
  69. package/sinain-memory/tests/test_triple_ingest.py +0 -191
  70. package/sinain-memory/tests/test_triple_migrate.py +0 -138
  71. package/sinain-memory/tests/test_triplestore.py +0 -248
@@ -1,189 +0,0 @@
1
- """Shared fixtures for sinain-koog pytest test suite."""
2
-
3
- import json
4
- import sys
5
- from datetime import datetime, timezone
6
- from pathlib import Path
7
-
8
- import pytest
9
-
10
- # Ensure sinain-koog source is importable
11
- KOOG_DIR = Path(__file__).resolve().parent.parent
12
- if str(KOOG_DIR) not in sys.path:
13
- sys.path.insert(0, str(KOOG_DIR))
14
-
15
-
16
- @pytest.fixture
17
- def tmp_memory_dir(tmp_path):
18
- """Create a temporary memory directory with sample data."""
19
- memory = tmp_path / "memory"
20
- memory.mkdir()
21
- (memory / "playbook-logs").mkdir()
22
- (memory / "playbook-archive").mkdir()
23
- (memory / "eval-logs").mkdir()
24
- (memory / "eval-reports").mkdir()
25
-
26
- # Sample playbook
27
- playbook = (
28
- "<!-- mining-index: 2026-02-21,2026-02-20 -->\n"
29
- "# Sinain Playbook\n\n"
30
- "## Established Patterns\n"
31
- "- When OCR pipeline stalls, check camera frame queue depth (score: 0.8)\n"
32
- "- When user explores new framework, spawn research agent proactively (score: 0.6)\n\n"
33
- "## Observed\n"
34
- "- User prefers concise Telegram messages over detailed ones\n"
35
- "- Late evening sessions tend to be exploratory/research-heavy\n\n"
36
- "## Stale\n"
37
- "- Flutter overlay rendering glitch on macOS 15 [since: 2026-02-18]\n\n"
38
- "<!-- effectiveness: outputs=8,positive=5,negative=1,neutral=2,rate=0.63,updated=2026-02-21 -->\n"
39
- )
40
- (memory / "sinain-playbook.md").write_text(playbook, encoding="utf-8")
41
-
42
- # Sample daily memory files
43
- for date in ["2026-02-21", "2026-02-20", "2026-02-19"]:
44
- (memory / f"{date}.md").write_text(
45
- f"# {date} Session Notes\n\n- Worked on OCR pipeline\n- Explored Flutter overlays\n",
46
- encoding="utf-8",
47
- )
48
-
49
- # Sample playbook-log entries
50
- today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
51
- entries = [
52
- {
53
- "ts": "2026-02-28T10:00:00Z",
54
- "idle": False,
55
- "sessionSummary": "Debugging OCR pipeline",
56
- "signals": [{"description": "OCR pipeline backpressure detected", "priority": "high"}],
57
- "recommendedAction": {"action": "sessions_spawn", "task": "Debug OCR backpressure", "confidence": 0.8},
58
- "feedbackScores": {"avg": 0.35, "high": ["OCR fix"], "low": []},
59
- "effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
60
- "curateDirective": "normal",
61
- "playbookChanges": {
62
- "changes": {"added": ["new pattern"], "pruned": [], "promoted": []},
63
- "staleItemActions": [],
64
- "playbookLines": 12,
65
- },
66
- "output": {
67
- "skip": False,
68
- "suggestion": "Consider frame batching for OCR pipeline",
69
- "insight": "Evening sessions correlate with exploratory work patterns",
70
- "totalChars": 95,
71
- },
72
- "skipped": False,
73
- "actionsConsidered": [
74
- {"action": "sessions_spawn", "reason": "Debug OCR backpressure", "chosen": True}
75
- ],
76
- },
77
- {
78
- "ts": "2026-02-28T10:30:00Z",
79
- "idle": True,
80
- "sessionSummary": "User idle",
81
- "signals": [],
82
- "recommendedAction": None,
83
- "feedbackScores": {"avg": 0, "high": [], "low": []},
84
- "effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
85
- "curateDirective": "normal",
86
- "playbookChanges": {
87
- "changes": {"added": [], "pruned": [], "promoted": []},
88
- "staleItemActions": [],
89
- "playbookLines": 12,
90
- },
91
- "output": {
92
- "skip": True,
93
- "skipReason": "User is idle and no new patterns detected in playbook since last analysis",
94
- },
95
- "skipped": True,
96
- "miningResult": {
97
- "findings": "Found cross-day OCR pattern",
98
- "newPatterns": ["frame dropping improves OCR accuracy"],
99
- "contradictions": [],
100
- "preferences": ["user prefers minimal configs"],
101
- "minedSources": ["2026-02-21.md"],
102
- },
103
- "actionsConsidered": [],
104
- },
105
- ]
106
-
107
- log_file = memory / "playbook-logs" / f"{today}.jsonl"
108
- log_file.write_text(
109
- "\n".join(json.dumps(e) for e in entries) + "\n",
110
- encoding="utf-8",
111
- )
112
-
113
- return memory
114
-
115
-
116
- @pytest.fixture
117
- def tmp_modules_dir(tmp_path):
118
- """Create a temporary modules directory with sample module."""
119
- modules = tmp_path / "modules"
120
- modules.mkdir()
121
-
122
- # Registry
123
- registry = {
124
- "version": 1,
125
- "modules": {
126
- "react-native-dev": {
127
- "status": "active",
128
- "priority": 85,
129
- "activatedAt": "2026-02-20T10:00:00Z",
130
- "lastTriggered": None,
131
- "locked": False,
132
- },
133
- "ocr-pipeline": {
134
- "status": "suspended",
135
- "priority": 70,
136
- "activatedAt": None,
137
- "lastTriggered": None,
138
- "locked": False,
139
- },
140
- },
141
- }
142
- (modules / "module-registry.json").write_text(
143
- json.dumps(registry, indent=2), encoding="utf-8"
144
- )
145
-
146
- # Module directories
147
- rn_dir = modules / "react-native-dev"
148
- rn_dir.mkdir()
149
- (rn_dir / "manifest.json").write_text(json.dumps({
150
- "id": "react-native-dev",
151
- "name": "React Native Development",
152
- "description": "Patterns for RN development",
153
- "version": "1.0.0",
154
- "priority": {"default": 85, "range": [50, 100]},
155
- "triggers": {},
156
- "locked": False,
157
- }, indent=2), encoding="utf-8")
158
- (rn_dir / "patterns.md").write_text(
159
- "# React Native Development\n\n## Established Patterns\n- Use Hermes engine\n",
160
- encoding="utf-8",
161
- )
162
-
163
- return modules
164
-
165
-
166
- @pytest.fixture
167
- def sample_log_entry():
168
- """A sample playbook-log entry for testing."""
169
- return {
170
- "ts": "2026-02-28T10:00:00Z",
171
- "idle": False,
172
- "signals": [{"description": "OCR pipeline backpressure detected", "priority": "high"}],
173
- "recommendedAction": {"action": "sessions_spawn", "task": "Debug OCR backpressure", "confidence": 0.8},
174
- "feedbackScores": {"avg": 0.35, "high": ["OCR fix"], "low": []},
175
- "effectiveness": {"outputs": 8, "positive": 5, "negative": 1, "neutral": 2, "rate": 0.63},
176
- "curateDirective": "normal",
177
- "interpretation": "",
178
- "playbookChanges": {
179
- "changes": {"added": ["new pattern"], "pruned": [], "promoted": []},
180
- "staleItemActions": [],
181
- "playbookLines": 12,
182
- },
183
- "output": {
184
- "skip": False,
185
- "suggestion": "Consider frame batching for OCR pipeline",
186
- "insight": "Evening sessions correlate with exploratory work patterns",
187
- "totalChars": 95,
188
- },
189
- }
@@ -1,94 +0,0 @@
1
- """Tests for playbook_curator.py: extract_header_footer() and reassemble_playbook()."""
2
-
3
- from playbook_curator import extract_header_footer, reassemble_playbook
4
-
5
-
6
- class TestExtractHeaderFooter:
7
- def test_standard_playbook(self):
8
- playbook = (
9
- "<!-- mining-index: 2026-02-21 -->\n"
10
- "# Playbook\n"
11
- "- Pattern 1\n"
12
- "- Pattern 2\n"
13
- "<!-- effectiveness: rate=0.63 -->\n"
14
- )
15
- header, body, footer = extract_header_footer(playbook)
16
- assert "mining-index" in header
17
- assert "# Playbook" in body
18
- assert "- Pattern 1" in body
19
- assert "effectiveness" in footer
20
-
21
- def test_no_header(self):
22
- playbook = "# Playbook\n- Pattern 1\n<!-- effectiveness: rate=0.5 -->\n"
23
- header, body, footer = extract_header_footer(playbook)
24
- assert header == ""
25
- assert "# Playbook" in body
26
- assert "effectiveness" in footer
27
-
28
- def test_no_footer(self):
29
- playbook = "<!-- mining-index: 2026-02-21 -->\n# Playbook\n- Pattern 1\n"
30
- header, body, footer = extract_header_footer(playbook)
31
- assert "mining-index" in header
32
- assert "# Playbook" in body
33
- assert footer == ""
34
-
35
- def test_empty_playbook(self):
36
- header, body, footer = extract_header_footer("")
37
- assert header == ""
38
- assert body == ""
39
- assert footer == ""
40
-
41
- def test_body_lines_exclude_comments(self):
42
- playbook = (
43
- "<!-- mining-index: 2026-02-21 -->\n"
44
- "line1\nline2\nline3\n"
45
- "<!-- effectiveness: rate=0.5 -->\n"
46
- )
47
- header, body, footer = extract_header_footer(playbook)
48
- body_lines = [l for l in body.strip().splitlines() if l.strip()]
49
- assert len(body_lines) == 3
50
-
51
-
52
- class TestReassemblePlaybook:
53
- def test_standard_reassembly(self):
54
- result = reassemble_playbook(
55
- "<!-- mining-index: 2026-02-21 -->",
56
- "# Playbook\n- Pattern 1",
57
- "<!-- effectiveness: rate=0.5 -->",
58
- )
59
- assert "mining-index" in result
60
- assert "# Playbook" in result
61
- assert "effectiveness" in result
62
- assert result.endswith("\n")
63
-
64
- def test_body_limit_enforced(self):
65
- body_lines = [f"- Pattern {i}" for i in range(60)]
66
- body = "\n".join(body_lines)
67
- result = reassemble_playbook("", body, "")
68
- # Count non-empty lines in body section
69
- result_body_lines = [l for l in result.strip().splitlines() if l.strip()]
70
- assert len(result_body_lines) <= 50
71
-
72
- def test_empty_parts_handled(self):
73
- result = reassemble_playbook("", "body content", "")
74
- assert "body content" in result
75
- assert result.endswith("\n")
76
-
77
- def test_all_parts_empty(self):
78
- result = reassemble_playbook("", "", "")
79
- assert result == "\n"
80
-
81
- def test_50_lines_exactly(self):
82
- body_lines = [f"- Pattern {i}" for i in range(50)]
83
- body = "\n".join(body_lines)
84
- result = reassemble_playbook("<!-- header -->", body, "<!-- footer -->")
85
- # Should not truncate — 50 is exactly the limit
86
- assert "Pattern 49" in result
87
-
88
- def test_51_lines_truncated(self):
89
- body_lines = [f"- Pattern {i}" for i in range(51)]
90
- body = "\n".join(body_lines)
91
- result = reassemble_playbook("<!-- header -->", body, "<!-- footer -->")
92
- # Line 51 (Pattern 50) should be cut
93
- assert "Pattern 50" not in result
94
- assert "Pattern 49" in result
@@ -1,210 +0,0 @@
1
- """Tests for embedder.py — dual-strategy embeddings + vector search."""
2
-
3
- import math
4
- import struct
5
- import pytest
6
- from unittest.mock import patch, MagicMock
7
-
8
- from triplestore import TripleStore
9
- from embedder import Embedder, _vec_to_blob, _blob_to_vec, _text_hash, _dot, _norm
10
-
11
-
12
- @pytest.fixture
13
- def db_path(tmp_path):
14
- return str(tmp_path / "test.db")
15
-
16
-
17
- @pytest.fixture
18
- def store(db_path):
19
- s = TripleStore(db_path)
20
- yield s
21
- s.close()
22
-
23
-
24
- @pytest.fixture
25
- def embedder(db_path, store):
26
- """Embedder with a pre-initialized store."""
27
- e = Embedder(db_path)
28
- yield e
29
- e.close()
30
-
31
-
32
- # ----- Utility functions -----
33
-
34
- class TestVecConversion:
35
- def test_roundtrip(self):
36
- vec = [0.1, 0.2, 0.3, -0.5, 1.0]
37
- blob = _vec_to_blob(vec)
38
- recovered = _blob_to_vec(blob)
39
- for a, b in zip(vec, recovered):
40
- assert abs(a - b) < 1e-6
41
-
42
- def test_empty_vec(self):
43
- assert _vec_to_blob([]) == b""
44
- assert _blob_to_vec(b"") == []
45
-
46
-
47
- class TestTextHash:
48
- def test_deterministic(self):
49
- assert _text_hash("hello") == _text_hash("hello")
50
-
51
- def test_different_texts(self):
52
- assert _text_hash("hello") != _text_hash("world")
53
-
54
- def test_length(self):
55
- assert len(_text_hash("test")) == 16
56
-
57
-
58
- class TestDotAndNorm:
59
- def test_dot_product(self):
60
- assert _dot([1, 2, 3], [4, 5, 6]) == 32
61
-
62
- def test_norm(self):
63
- assert abs(_norm([3, 4]) - 5.0) < 1e-6
64
-
65
- def test_unit_vector_norm(self):
66
- assert abs(_norm([1, 0, 0]) - 1.0) < 1e-6
67
-
68
-
69
- # ----- Embedder with mocked API -----
70
-
71
- def _mock_openrouter_response(texts):
72
- """Create a mock OpenRouter embedding response."""
73
- # Generate deterministic fake embeddings (10-dim for testing)
74
- embeddings = []
75
- for i, text in enumerate(texts):
76
- vec = [(hash(text + str(j)) % 1000) / 1000.0 for j in range(10)]
77
- embeddings.append({"index": i, "embedding": vec})
78
- return MagicMock(
79
- status_code=200,
80
- json=lambda: {"data": embeddings},
81
- raise_for_status=lambda: None,
82
- )
83
-
84
-
85
- class TestEmbedOpenRouter:
86
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
87
- @patch("requests.post")
88
- def test_embed_calls_api(self, mock_post, embedder):
89
- mock_post.return_value = _mock_openrouter_response(["hello"])
90
- result = embedder.embed(["hello"])
91
- assert len(result) == 1
92
- assert len(result[0]) == 10
93
- mock_post.assert_called_once()
94
-
95
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
96
- @patch("requests.post")
97
- def test_embed_multiple(self, mock_post, embedder):
98
- texts = ["hello", "world", "test"]
99
- mock_post.return_value = _mock_openrouter_response(texts)
100
- result = embedder.embed(texts)
101
- assert len(result) == 3
102
-
103
- def test_embed_empty(self, embedder):
104
- assert embedder.embed([]) == []
105
-
106
-
107
- class TestEmbedFallback:
108
- @patch.dict("os.environ", {}, clear=True)
109
- def test_no_api_key_tries_local(self, embedder):
110
- """Without API key, should try local model (which may not be installed)."""
111
- result = embedder.embed(["test"])
112
- # Either local model works or returns empty vectors
113
- assert isinstance(result, list)
114
-
115
-
116
- # ----- Store embeddings -----
117
-
118
- class TestStoreEmbeddings:
119
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
120
- @patch("requests.post")
121
- def test_store_and_dedup(self, mock_post, embedder):
122
- mock_post.return_value = _mock_openrouter_response(["pattern text"])
123
- count1 = embedder.store_embeddings({"pattern:test": "pattern text"})
124
- assert count1 == 1
125
-
126
- # Same text → should skip
127
- count2 = embedder.store_embeddings({"pattern:test": "pattern text"})
128
- assert count2 == 0
129
-
130
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
131
- @patch("requests.post")
132
- def test_store_update_on_text_change(self, mock_post, embedder):
133
- mock_post.return_value = _mock_openrouter_response(["v1"])
134
- embedder.store_embeddings({"pattern:test": "v1"})
135
-
136
- mock_post.return_value = _mock_openrouter_response(["v2"])
137
- count = embedder.store_embeddings({"pattern:test": "v2"})
138
- assert count == 1 # re-embedded because text changed
139
-
140
- def test_store_empty(self, embedder):
141
- assert embedder.store_embeddings({}) == 0
142
-
143
-
144
- # ----- Vector search -----
145
-
146
- class TestVectorSearch:
147
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
148
- @patch("requests.post")
149
- def test_search_returns_sorted(self, mock_post, embedder):
150
- # Store some embeddings with known vectors
151
- # We'll bypass embed() and insert directly
152
- from embedder import _vec_to_blob, _now_iso
153
- vecs = {
154
- "pattern:a": [1.0, 0.0, 0.0],
155
- "pattern:b": [0.0, 1.0, 0.0],
156
- "pattern:c": [0.7, 0.7, 0.0], # closest to query [1,0,0] after normalization
157
- }
158
- for eid, vec in vecs.items():
159
- embedder._conn.execute(
160
- "INSERT INTO embeddings (entity_id, vector, text_hash, model, dimensions, created_at) "
161
- "VALUES (?, ?, ?, ?, ?, ?)",
162
- (eid, _vec_to_blob(vec), "hash", "test", len(vec), _now_iso()),
163
- )
164
- embedder._conn.commit()
165
-
166
- results = embedder.vector_search([1.0, 0.0, 0.0], top_k=3)
167
- assert len(results) == 3
168
- # pattern:a should be first (exact match, cosine=1.0)
169
- assert results[0][0] == "pattern:a"
170
- assert abs(results[0][1] - 1.0) < 1e-6
171
-
172
- def test_search_empty_db(self, embedder):
173
- results = embedder.vector_search([1.0, 0.0], top_k=5)
174
- assert results == []
175
-
176
- def test_search_empty_query(self, embedder):
177
- assert embedder.vector_search([], top_k=5) == []
178
-
179
- @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test-key"})
180
- def test_search_with_type_filter(self, embedder):
181
- from embedder import _vec_to_blob, _now_iso
182
- for eid, vec in [
183
- ("pattern:x", [1.0, 0.0]),
184
- ("concept:y", [0.9, 0.1]),
185
- ]:
186
- embedder._conn.execute(
187
- "INSERT INTO embeddings (entity_id, vector, text_hash, model, dimensions, created_at) "
188
- "VALUES (?, ?, ?, ?, ?, ?)",
189
- (eid, _vec_to_blob(vec), "hash", "test", 2, _now_iso()),
190
- )
191
- embedder._conn.commit()
192
-
193
- results = embedder.vector_search([1.0, 0.0], top_k=5, entity_types=["pattern"])
194
- assert len(results) == 1
195
- assert results[0][0] == "pattern:x"
196
-
197
-
198
- # ----- Schema -----
199
-
200
- class TestEmbeddingsSchema:
201
- def test_embeddings_table_exists(self, embedder):
202
- rows = embedder._conn.execute(
203
- "SELECT name FROM sqlite_master WHERE type='table' AND name='embeddings'"
204
- ).fetchall()
205
- assert len(rows) == 1
206
-
207
- def test_embeddings_columns(self, embedder):
208
- info = embedder._conn.execute("PRAGMA table_info(embeddings)").fetchall()
209
- col_names = {row["name"] for row in info}
210
- assert col_names == {"entity_id", "vector", "text_hash", "model", "dimensions", "created_at"}
@@ -1,124 +0,0 @@
1
- """Tests for common.extract_json() — all three extraction stages + truncation repair."""
2
-
3
- import pytest
4
- from common import extract_json
5
-
6
-
7
- class TestStage1DirectParse:
8
- def test_clean_object(self):
9
- result = extract_json('{"signals": [], "idle": true}')
10
- assert result["signals"] == []
11
- assert result["idle"] is True
12
-
13
- def test_clean_array(self):
14
- result = extract_json('[{"a": 1}, {"b": 2}]')
15
- assert len(result) == 2
16
-
17
- def test_whitespace_padded(self):
18
- result = extract_json(' \n {"key": "value"} \n ')
19
- assert result["key"] == "value"
20
-
21
- def test_unicode(self):
22
- result = extract_json('{"msg": "привет мир"}')
23
- assert result["msg"] == "привет мир"
24
-
25
-
26
- class TestStage2MarkdownFences:
27
- def test_fenced_json(self):
28
- result = extract_json('```json\n{"signals": ["x"], "idle": false}\n```')
29
- assert result["signals"] == ["x"]
30
-
31
- def test_fenced_no_lang_tag(self):
32
- result = extract_json('```\n{"findings": "test"}\n```')
33
- assert result["findings"] == "test"
34
-
35
- def test_text_before_fence(self):
36
- result = extract_json('Here is the result:\n```json\n{"skip": true}\n```')
37
- assert result["skip"] is True
38
-
39
- def test_text_after_fence(self):
40
- result = extract_json('```json\n{"skip": false}\n```\nHope this helps!')
41
- assert result["skip"] is False
42
-
43
- def test_text_before_and_after_fence(self):
44
- result = extract_json(
45
- 'I analyzed it.\n```json\n{"curateDirective": "normal"}\n```\nLet me know.'
46
- )
47
- assert result["curateDirective"] == "normal"
48
-
49
-
50
- class TestStage3BalancedBrace:
51
- def test_prose_then_json(self):
52
- result = extract_json('The analysis result is: {"signals": ["a"], "idle": true}')
53
- assert result["signals"] == ["a"]
54
-
55
- def test_json_then_prose(self):
56
- result = extract_json('{"findings": "test"} That is all.')
57
- assert result["findings"] == "test"
58
-
59
- def test_nested_braces(self):
60
- result = extract_json('{"outer": {"inner": {"deep": 1}}, "key": "val"}')
61
- assert result["outer"]["inner"]["deep"] == 1
62
-
63
- def test_strings_with_braces(self):
64
- result = extract_json('{"msg": "use {braces} like this", "ok": true}')
65
- assert result["msg"] == "use {braces} like this"
66
-
67
- def test_prose_embedded_array(self):
68
- # Balanced-brace scanner tries {} before [], so it finds the first object
69
- result = extract_json('Result: [{"a": 1}, {"b": 2}]')
70
- assert isinstance(result, (dict, list))
71
-
72
- def test_escaped_quotes_in_strings(self):
73
- result = extract_json(r'{"msg": "he said \"hello\"", "ok": true}')
74
- assert result["ok"] is True
75
-
76
-
77
- class TestStage4TruncationRepair:
78
- def test_missing_closing_brace(self):
79
- result = extract_json('{"signals": ["a", "b"], "idle": true, "extra": "val')
80
- assert result["signals"] == ["a", "b"]
81
-
82
- def test_missing_two_closing_braces(self):
83
- result = extract_json('{"outer": {"inner": "val"')
84
- assert result["outer"]["inner"] == "val"
85
-
86
- def test_truncated_array_in_object(self):
87
- result = extract_json('{"items": [1, 2, 3')
88
- assert result["items"] == [1, 2, 3]
89
-
90
- def test_trailing_comma(self):
91
- result = extract_json('{"a": 1, "b": 2,')
92
- assert result["a"] == 1
93
-
94
- def test_mid_key_truncation(self):
95
- result = extract_json('{"valid": 1, "partial_ke')
96
- assert result["valid"] == 1
97
-
98
- def test_prose_plus_truncated(self):
99
- result = extract_json(
100
- 'Here is the result: {"findings": "some text", "patterns": ["p1"'
101
- )
102
- assert result["findings"] == "some text"
103
-
104
- def test_truncated_simple_object(self):
105
- result = extract_json('{"unclosed": "brace"')
106
- assert result["unclosed"] == "brace"
107
-
108
-
109
- class TestFailureCases:
110
- def test_no_json_at_all(self):
111
- with pytest.raises(ValueError):
112
- extract_json("This is just plain text with no JSON.")
113
-
114
- def test_empty_string(self):
115
- with pytest.raises(ValueError):
116
- extract_json("")
117
-
118
- def test_no_brackets(self):
119
- with pytest.raises(ValueError):
120
- extract_json("just some random text without any brackets")
121
-
122
- def test_only_whitespace(self):
123
- with pytest.raises(ValueError):
124
- extract_json(" \n\n ")