@geravant/sinain 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +183 -0
- package/index.ts +2096 -0
- package/install.js +155 -0
- package/openclaw.plugin.json +59 -0
- package/package.json +21 -0
- package/sinain-memory/common.py +403 -0
- package/sinain-memory/demo_knowledge_transfer.sh +85 -0
- package/sinain-memory/embedder.py +268 -0
- package/sinain-memory/eval/__init__.py +0 -0
- package/sinain-memory/eval/assertions.py +288 -0
- package/sinain-memory/eval/judges/__init__.py +0 -0
- package/sinain-memory/eval/judges/base_judge.py +61 -0
- package/sinain-memory/eval/judges/curation_judge.py +46 -0
- package/sinain-memory/eval/judges/insight_judge.py +48 -0
- package/sinain-memory/eval/judges/mining_judge.py +42 -0
- package/sinain-memory/eval/judges/signal_judge.py +45 -0
- package/sinain-memory/eval/schemas.py +247 -0
- package/sinain-memory/eval_delta.py +109 -0
- package/sinain-memory/eval_reporter.py +642 -0
- package/sinain-memory/feedback_analyzer.py +221 -0
- package/sinain-memory/git_backup.sh +19 -0
- package/sinain-memory/insight_synthesizer.py +181 -0
- package/sinain-memory/memory/2026-03-01.md +11 -0
- package/sinain-memory/memory/playbook-archive/sinain-playbook-2026-03-01-1418.md +15 -0
- package/sinain-memory/memory/playbook-logs/2026-03-01.jsonl +1 -0
- package/sinain-memory/memory/sinain-playbook.md +21 -0
- package/sinain-memory/memory-config.json +39 -0
- package/sinain-memory/memory_miner.py +183 -0
- package/sinain-memory/module_manager.py +695 -0
- package/sinain-memory/playbook_curator.py +225 -0
- package/sinain-memory/requirements.txt +3 -0
- package/sinain-memory/signal_analyzer.py +141 -0
- package/sinain-memory/test_local.py +402 -0
- package/sinain-memory/tests/__init__.py +0 -0
- package/sinain-memory/tests/conftest.py +189 -0
- package/sinain-memory/tests/test_curator_helpers.py +94 -0
- package/sinain-memory/tests/test_embedder.py +210 -0
- package/sinain-memory/tests/test_extract_json.py +124 -0
- package/sinain-memory/tests/test_feedback_computation.py +121 -0
- package/sinain-memory/tests/test_miner_helpers.py +71 -0
- package/sinain-memory/tests/test_module_management.py +458 -0
- package/sinain-memory/tests/test_parsers.py +96 -0
- package/sinain-memory/tests/test_tick_evaluator.py +430 -0
- package/sinain-memory/tests/test_triple_extractor.py +255 -0
- package/sinain-memory/tests/test_triple_ingest.py +191 -0
- package/sinain-memory/tests/test_triple_migrate.py +138 -0
- package/sinain-memory/tests/test_triplestore.py +248 -0
- package/sinain-memory/tick_evaluator.py +392 -0
- package/sinain-memory/triple_extractor.py +402 -0
- package/sinain-memory/triple_ingest.py +290 -0
- package/sinain-memory/triple_migrate.py +275 -0
- package/sinain-memory/triple_query.py +184 -0
- package/sinain-memory/triplestore.py +498 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""One-shot historical data migration into the EAV triple store.
|
|
3
|
+
|
|
4
|
+
Migrates ~3 weeks of pre-triplestore data (playbook logs, daily memories,
|
|
5
|
+
playbook patterns, active modules) into the triple store so that triple_query
|
|
6
|
+
can surface historical context.
|
|
7
|
+
|
|
8
|
+
Idempotent: checks for a `migration:v1` stamp entity before running.
|
|
9
|
+
No embeddings: those accumulate organically at runtime.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python3 triple_migrate.py --memory-dir memory/ --modules-dir modules/ [--dry-run]
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import sys
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
# Ensure sibling imports work when invoked from workspace root
|
|
22
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
23
|
+
|
|
24
|
+
from common import (
|
|
25
|
+
_read_jsonl,
|
|
26
|
+
list_daily_memory_files,
|
|
27
|
+
read_effective_playbook,
|
|
28
|
+
read_file_safe,
|
|
29
|
+
output_json,
|
|
30
|
+
)
|
|
31
|
+
from triple_extractor import TripleExtractor
|
|
32
|
+
from triplestore import TripleStore
|
|
33
|
+
|
|
34
|
+
MIGRATION_ENTITY = "migration:v1"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _assert_triples(store: TripleStore, tx_id: int, triples: list) -> int:
|
|
38
|
+
"""Assert a batch of Triple objects into the store. Returns count.
|
|
39
|
+
|
|
40
|
+
Skips triples with None/empty entity_id or value (legacy data tolerance).
|
|
41
|
+
"""
|
|
42
|
+
count = 0
|
|
43
|
+
for t in triples:
|
|
44
|
+
if t.value is None or t.entity_id is None:
|
|
45
|
+
continue
|
|
46
|
+
store.assert_triple(tx_id, t.entity_id, t.attribute, str(t.value), t.value_type)
|
|
47
|
+
count += 1
|
|
48
|
+
return count
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def migrate_playbook(extractor: TripleExtractor, store: TripleStore, memory_dir: str) -> int:
|
|
52
|
+
"""Migrate the effective playbook into patterns. Returns triple count."""
|
|
53
|
+
text = read_effective_playbook(memory_dir)
|
|
54
|
+
if not text.strip():
|
|
55
|
+
print("[migrate] no playbook found, skipping", file=sys.stderr)
|
|
56
|
+
return 0
|
|
57
|
+
triples = extractor.extract_playbook(text)
|
|
58
|
+
if not triples:
|
|
59
|
+
return 0
|
|
60
|
+
tx = store.begin_tx("migration:playbook")
|
|
61
|
+
return _assert_triples(store, tx, triples)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def migrate_modules(
|
|
65
|
+
extractor: TripleExtractor, store: TripleStore, modules_dir: str
|
|
66
|
+
) -> tuple[int, int]:
|
|
67
|
+
"""Migrate active modules. Returns (module_count, triple_count)."""
|
|
68
|
+
registry_path = Path(modules_dir) / "module-registry.json"
|
|
69
|
+
if not registry_path.exists():
|
|
70
|
+
print("[migrate] no module-registry.json, skipping modules", file=sys.stderr)
|
|
71
|
+
return 0, 0
|
|
72
|
+
|
|
73
|
+
registry = json.loads(registry_path.read_text(encoding="utf-8"))
|
|
74
|
+
modules = registry.get("modules", {})
|
|
75
|
+
|
|
76
|
+
module_count = 0
|
|
77
|
+
triple_count = 0
|
|
78
|
+
|
|
79
|
+
for mod_id, entry in modules.items():
|
|
80
|
+
if entry.get("status") != "active":
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
manifest_path = Path(modules_dir) / mod_id / "manifest.json"
|
|
84
|
+
patterns_path = Path(modules_dir) / mod_id / "patterns.md"
|
|
85
|
+
|
|
86
|
+
manifest = {}
|
|
87
|
+
if manifest_path.exists():
|
|
88
|
+
try:
|
|
89
|
+
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
|
90
|
+
except json.JSONDecodeError:
|
|
91
|
+
print(f"[migrate] bad manifest for {mod_id}, using empty", file=sys.stderr)
|
|
92
|
+
|
|
93
|
+
patterns_text = read_file_safe(str(patterns_path))
|
|
94
|
+
|
|
95
|
+
triples = extractor.extract_module(mod_id, manifest, patterns_text)
|
|
96
|
+
if triples:
|
|
97
|
+
tx = store.begin_tx(f"migration:module:{mod_id}")
|
|
98
|
+
_assert_triples(store, tx, triples)
|
|
99
|
+
triple_count += len(triples)
|
|
100
|
+
module_count += 1
|
|
101
|
+
|
|
102
|
+
return module_count, triple_count
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def migrate_playbook_logs(
|
|
106
|
+
extractor: TripleExtractor, store: TripleStore, memory_dir: str
|
|
107
|
+
) -> tuple[int, int]:
|
|
108
|
+
"""Migrate all playbook-log JSONL files. Returns (file_count, triple_count)."""
|
|
109
|
+
log_dir = Path(memory_dir) / "playbook-logs"
|
|
110
|
+
if not log_dir.is_dir():
|
|
111
|
+
print("[migrate] no playbook-logs/ directory, skipping", file=sys.stderr)
|
|
112
|
+
return 0, 0
|
|
113
|
+
|
|
114
|
+
file_count = 0
|
|
115
|
+
triple_count = 0
|
|
116
|
+
|
|
117
|
+
for jsonl_file in sorted(log_dir.glob("*.jsonl")):
|
|
118
|
+
entries = _read_jsonl(jsonl_file)
|
|
119
|
+
if not entries:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
day_triples = []
|
|
123
|
+
for entry in entries:
|
|
124
|
+
# Skip idle entries with no signals
|
|
125
|
+
if entry.get("idle", False) and not entry.get("signals"):
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
ts = entry.get("ts", "")
|
|
129
|
+
if not ts:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# Normalize legacy string signals → dict format
|
|
133
|
+
raw_signals = entry.get("signals", [])
|
|
134
|
+
if raw_signals and isinstance(raw_signals[0], str):
|
|
135
|
+
entry["signals"] = [
|
|
136
|
+
{"description": s, "priority": "medium"} for s in raw_signals
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
day_triples.extend(extractor.extract_signal(entry, ts))
|
|
140
|
+
|
|
141
|
+
if day_triples:
|
|
142
|
+
tx = store.begin_tx(f"migration:logs:{jsonl_file.stem}")
|
|
143
|
+
_assert_triples(store, tx, day_triples)
|
|
144
|
+
triple_count += len(day_triples)
|
|
145
|
+
file_count += 1
|
|
146
|
+
print(f" logs/{jsonl_file.name}: {len(day_triples)} triples", file=sys.stderr)
|
|
147
|
+
|
|
148
|
+
return file_count, triple_count
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def migrate_daily_memories(
|
|
152
|
+
extractor: TripleExtractor, store: TripleStore, memory_dir: str
|
|
153
|
+
) -> tuple[int, int]:
|
|
154
|
+
"""Migrate YYYY-MM-DD.md daily memory files. Returns (file_count, triple_count)."""
|
|
155
|
+
files = list_daily_memory_files(memory_dir)
|
|
156
|
+
if not files:
|
|
157
|
+
print("[migrate] no daily memory files, skipping", file=sys.stderr)
|
|
158
|
+
return 0, 0
|
|
159
|
+
|
|
160
|
+
file_count = 0
|
|
161
|
+
triple_count = 0
|
|
162
|
+
|
|
163
|
+
for filepath in files:
|
|
164
|
+
text = read_file_safe(filepath)
|
|
165
|
+
if not text.strip():
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
date = Path(filepath).stem # YYYY-MM-DD
|
|
169
|
+
entity_id = f"observation:{date}"
|
|
170
|
+
truncated = text[:2000]
|
|
171
|
+
|
|
172
|
+
tx = store.begin_tx(f"migration:memory:{date}")
|
|
173
|
+
|
|
174
|
+
# Core observation entity
|
|
175
|
+
store.assert_triple(tx, entity_id, "text", truncated)
|
|
176
|
+
store.assert_triple(tx, entity_id, "source", "daily_memory")
|
|
177
|
+
count = 2
|
|
178
|
+
|
|
179
|
+
# Extract and link concepts
|
|
180
|
+
concept_triples = extractor.extract_concepts(truncated)
|
|
181
|
+
for ct in concept_triples:
|
|
182
|
+
store.assert_triple(tx, ct.entity_id, ct.attribute, ct.value, ct.value_type)
|
|
183
|
+
count += 1
|
|
184
|
+
if ct.entity_id.startswith("concept:"):
|
|
185
|
+
store.assert_triple(tx, entity_id, "related_to", ct.entity_id, "ref")
|
|
186
|
+
count += 1
|
|
187
|
+
|
|
188
|
+
triple_count += count
|
|
189
|
+
file_count += 1
|
|
190
|
+
print(f" memory/{date}.md: {count} triples", file=sys.stderr)
|
|
191
|
+
|
|
192
|
+
return file_count, triple_count
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main() -> None:
|
|
196
|
+
parser = argparse.ArgumentParser(description="Migrate historical data to triple store")
|
|
197
|
+
parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
|
|
198
|
+
parser.add_argument("--modules-dir", default=None, help="Path to modules/ directory")
|
|
199
|
+
parser.add_argument("--dry-run", action="store_true", help="Print plan without writing")
|
|
200
|
+
args = parser.parse_args()
|
|
201
|
+
|
|
202
|
+
memory_dir = args.memory_dir
|
|
203
|
+
modules_dir = args.modules_dir or str(Path(memory_dir).parent / "modules")
|
|
204
|
+
|
|
205
|
+
db_path = str(Path(memory_dir) / "triplestore.db")
|
|
206
|
+
if args.dry_run:
|
|
207
|
+
print(f"[dry-run] would migrate into {db_path}", file=sys.stderr)
|
|
208
|
+
print(f" playbook: {read_effective_playbook(memory_dir)[:80]}...", file=sys.stderr)
|
|
209
|
+
log_dir = Path(memory_dir) / "playbook-logs"
|
|
210
|
+
if log_dir.is_dir():
|
|
211
|
+
jsonl_files = list(log_dir.glob("*.jsonl"))
|
|
212
|
+
print(f" log files: {len(jsonl_files)}", file=sys.stderr)
|
|
213
|
+
mem_files = list_daily_memory_files(memory_dir)
|
|
214
|
+
print(f" daily memories: {len(mem_files)}", file=sys.stderr)
|
|
215
|
+
output_json({"dryRun": True, "dbPath": db_path})
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
store = TripleStore(db_path)
|
|
219
|
+
extractor = TripleExtractor(store)
|
|
220
|
+
|
|
221
|
+
# 1. Idempotency guard
|
|
222
|
+
existing = store.entity(MIGRATION_ENTITY)
|
|
223
|
+
if existing:
|
|
224
|
+
print("[migrate] already migrated — migration:v1 entity exists", file=sys.stderr)
|
|
225
|
+
output_json({"alreadyMigrated": True, **existing})
|
|
226
|
+
store.close()
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
print("[migrate] starting historical data migration...", file=sys.stderr)
|
|
230
|
+
|
|
231
|
+
# 2. Playbook
|
|
232
|
+
pb_triples = migrate_playbook(extractor, store, memory_dir)
|
|
233
|
+
print(f"[migrate] playbook: {pb_triples} triples", file=sys.stderr)
|
|
234
|
+
|
|
235
|
+
# 3. Modules
|
|
236
|
+
mod_count, mod_triples = migrate_modules(extractor, store, modules_dir)
|
|
237
|
+
print(f"[migrate] modules: {mod_count} modules, {mod_triples} triples", file=sys.stderr)
|
|
238
|
+
|
|
239
|
+
# 4. Playbook logs
|
|
240
|
+
log_files, log_triples = migrate_playbook_logs(extractor, store, memory_dir)
|
|
241
|
+
print(f"[migrate] logs: {log_files} files, {log_triples} triples", file=sys.stderr)
|
|
242
|
+
|
|
243
|
+
# 5. Daily memories
|
|
244
|
+
mem_files, mem_triples = migrate_daily_memories(extractor, store, memory_dir)
|
|
245
|
+
print(f"[migrate] memories: {mem_files} files, {mem_triples} triples", file=sys.stderr)
|
|
246
|
+
|
|
247
|
+
# 6. Stamp
|
|
248
|
+
total_triples = pb_triples + mod_triples + log_triples + mem_triples
|
|
249
|
+
stats = store.stats()
|
|
250
|
+
stamp_tx = store.begin_tx("migration:stamp")
|
|
251
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
252
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "completed_at", now)
|
|
253
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "playbook_triples", str(pb_triples))
|
|
254
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "module_count", str(mod_count))
|
|
255
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "log_files", str(log_files))
|
|
256
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "memory_files", str(mem_files))
|
|
257
|
+
store.assert_triple(stamp_tx, MIGRATION_ENTITY, "total_triples", str(total_triples))
|
|
258
|
+
|
|
259
|
+
store.close()
|
|
260
|
+
|
|
261
|
+
# 7. Output
|
|
262
|
+
output_json({
|
|
263
|
+
"migrated": {
|
|
264
|
+
"playbook": pb_triples,
|
|
265
|
+
"modules": mod_count,
|
|
266
|
+
"logs": log_files,
|
|
267
|
+
"dailyMemory": mem_files,
|
|
268
|
+
},
|
|
269
|
+
"totalTriples": stats["triples"],
|
|
270
|
+
"totalEntities": stats["entities"],
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
if __name__ == "__main__":
|
|
275
|
+
main()
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Triple Query — read-only utilities for querying the triple store.
|
|
3
|
+
|
|
4
|
+
Importable module + CLI for generating context from the knowledge graph.
|
|
5
|
+
|
|
6
|
+
Usage (CLI):
|
|
7
|
+
python3 triple_query.py --memory-dir memory/ --context "OCR pipeline" --max-chars 1500
|
|
8
|
+
|
|
9
|
+
Usage (import):
|
|
10
|
+
from triple_query import get_related_context
|
|
11
|
+
context = get_related_context("memory/", ["OCR pipeline"], max_chars=1500)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
20
|
+
|
|
21
|
+
from triplestore import TripleStore
|
|
22
|
+
from common import output_json
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _db_path(memory_dir: str) -> str:
|
|
26
|
+
return str(Path(memory_dir) / "triplestore.db")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_entity_text(store: TripleStore, entity_id: str) -> str:
|
|
30
|
+
"""Build a readable text representation of an entity."""
|
|
31
|
+
attrs = store.entity(entity_id)
|
|
32
|
+
if not attrs:
|
|
33
|
+
return ""
|
|
34
|
+
|
|
35
|
+
etype = entity_id.split(":")[0] if ":" in entity_id else "unknown"
|
|
36
|
+
parts = [f"[{etype}] {entity_id}"]
|
|
37
|
+
|
|
38
|
+
for attr, vals in sorted(attrs.items()):
|
|
39
|
+
if attr == "related_to":
|
|
40
|
+
parts.append(f" links: {', '.join(vals)}")
|
|
41
|
+
elif attr == "belongs_to":
|
|
42
|
+
parts.append(f" module: {', '.join(vals)}")
|
|
43
|
+
elif len(vals) == 1:
|
|
44
|
+
parts.append(f" {attr}: {vals[0]}")
|
|
45
|
+
else:
|
|
46
|
+
parts.append(f" {attr}: {', '.join(vals)}")
|
|
47
|
+
|
|
48
|
+
return "\n".join(parts)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_related_concepts(
|
|
52
|
+
memory_dir: str, keywords: list[str]
|
|
53
|
+
) -> str:
|
|
54
|
+
"""Find concepts matching keywords, then follow backrefs to related entities.
|
|
55
|
+
|
|
56
|
+
Returns formatted markdown suitable for injection into LLM context.
|
|
57
|
+
"""
|
|
58
|
+
db = _db_path(memory_dir)
|
|
59
|
+
if not Path(db).exists():
|
|
60
|
+
return ""
|
|
61
|
+
|
|
62
|
+
store = TripleStore(db)
|
|
63
|
+
try:
|
|
64
|
+
parts: list[str] = []
|
|
65
|
+
seen_entities: set[str] = set()
|
|
66
|
+
|
|
67
|
+
for keyword in keywords:
|
|
68
|
+
kw_lower = keyword.lower().strip()
|
|
69
|
+
if not kw_lower:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
# AVET: look up concepts by name
|
|
73
|
+
all_concepts = store.entities_with_attr("name")
|
|
74
|
+
for eid, name in all_concepts:
|
|
75
|
+
if not eid.startswith("concept:"):
|
|
76
|
+
continue
|
|
77
|
+
if kw_lower in name.lower():
|
|
78
|
+
if eid in seen_entities:
|
|
79
|
+
continue
|
|
80
|
+
seen_entities.add(eid)
|
|
81
|
+
|
|
82
|
+
# VAET: find what references this concept
|
|
83
|
+
refs = store.backrefs(eid)
|
|
84
|
+
if refs:
|
|
85
|
+
ref_parts = []
|
|
86
|
+
for ref_eid, ref_attr in refs[:10]: # cap
|
|
87
|
+
if ref_eid in seen_entities:
|
|
88
|
+
continue
|
|
89
|
+
seen_entities.add(ref_eid)
|
|
90
|
+
text = build_entity_text(store, ref_eid)
|
|
91
|
+
if text:
|
|
92
|
+
ref_parts.append(text)
|
|
93
|
+
if ref_parts:
|
|
94
|
+
parts.append(f"### {name}\n" + "\n".join(ref_parts))
|
|
95
|
+
|
|
96
|
+
return "\n\n".join(parts) if parts else ""
|
|
97
|
+
finally:
|
|
98
|
+
store.close()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_related_context(
|
|
102
|
+
memory_dir: str,
|
|
103
|
+
seed_texts: list[str],
|
|
104
|
+
max_chars: int = 1500,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Build a context block from the knowledge graph for the given seed texts.
|
|
107
|
+
|
|
108
|
+
Phase 1: keyword matching against entity attributes.
|
|
109
|
+
Phase 2 (when embedder available): adds vector search as primary channel.
|
|
110
|
+
"""
|
|
111
|
+
db = _db_path(memory_dir)
|
|
112
|
+
if not Path(db).exists():
|
|
113
|
+
return ""
|
|
114
|
+
|
|
115
|
+
store = TripleStore(db)
|
|
116
|
+
try:
|
|
117
|
+
context_parts: list[str] = []
|
|
118
|
+
total_chars = 0
|
|
119
|
+
seen: set[str] = set()
|
|
120
|
+
|
|
121
|
+
# Phase 2: try vector search first
|
|
122
|
+
try:
|
|
123
|
+
from embedder import Embedder
|
|
124
|
+
embedder = Embedder(db)
|
|
125
|
+
for text in seed_texts:
|
|
126
|
+
vecs = embedder.embed([text])
|
|
127
|
+
if vecs and vecs[0]:
|
|
128
|
+
results = embedder.vector_search(vecs[0], top_k=5)
|
|
129
|
+
for eid, score in results:
|
|
130
|
+
if eid in seen:
|
|
131
|
+
continue
|
|
132
|
+
seen.add(eid)
|
|
133
|
+
ent_text = build_entity_text(store, eid)
|
|
134
|
+
if ent_text and total_chars + len(ent_text) < max_chars:
|
|
135
|
+
context_parts.append(f"{ent_text} (relevance: {score:.2f})")
|
|
136
|
+
total_chars += len(ent_text) + 20
|
|
137
|
+
except (ImportError, Exception):
|
|
138
|
+
pass # Phase 2 not available, fall through to keyword
|
|
139
|
+
|
|
140
|
+
# Phase 1: keyword matching
|
|
141
|
+
if total_chars < max_chars // 2:
|
|
142
|
+
# Extract keywords from seed texts
|
|
143
|
+
keywords: set[str] = set()
|
|
144
|
+
for text in seed_texts:
|
|
145
|
+
for word in text.lower().split():
|
|
146
|
+
word = word.strip(".,!?;:'\"()[]{}").strip()
|
|
147
|
+
if len(word) > 3:
|
|
148
|
+
keywords.add(word)
|
|
149
|
+
|
|
150
|
+
# Search patterns and concepts by keyword
|
|
151
|
+
all_text_triples = store.entities_with_attr("text")
|
|
152
|
+
all_name_triples = store.entities_with_attr("name")
|
|
153
|
+
|
|
154
|
+
for eid, val in all_text_triples + all_name_triples:
|
|
155
|
+
if eid in seen:
|
|
156
|
+
continue
|
|
157
|
+
val_lower = val.lower()
|
|
158
|
+
if any(kw in val_lower for kw in keywords):
|
|
159
|
+
seen.add(eid)
|
|
160
|
+
ent_text = build_entity_text(store, eid)
|
|
161
|
+
if ent_text and total_chars + len(ent_text) < max_chars:
|
|
162
|
+
context_parts.append(ent_text)
|
|
163
|
+
total_chars += len(ent_text)
|
|
164
|
+
if total_chars >= max_chars:
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
return "\n\n".join(context_parts) if context_parts else ""
|
|
168
|
+
finally:
|
|
169
|
+
store.close()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def main() -> None:
|
|
173
|
+
parser = argparse.ArgumentParser(description="Triple Store Query CLI")
|
|
174
|
+
parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
|
|
175
|
+
parser.add_argument("--context", required=True, help="Query text for context generation")
|
|
176
|
+
parser.add_argument("--max-chars", type=int, default=1500, help="Maximum context chars")
|
|
177
|
+
args = parser.parse_args()
|
|
178
|
+
|
|
179
|
+
context = get_related_context(args.memory_dir, [args.context], max_chars=args.max_chars)
|
|
180
|
+
output_json({"context": context})
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if __name__ == "__main__":
|
|
184
|
+
main()
|