@geravant/sinain 1.15.5 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +0 -171
- package/launcher.js +0 -298
- package/package.json +4 -1
- package/sinain-agent/.claude/settings.json +16 -0
- package/sinain-agent/hooks/approve-tool.sh +46 -0
- package/sinain-agent/openrouter-proxy.mjs +266 -0
- package/sinain-core/src/agent/analyzer.ts +5 -1
- package/sinain-core/src/agent/loop.ts +11 -0
- package/sinain-core/src/index.ts +56 -0
- package/sinain-core/src/learning/entity-cache.ts +180 -0
- package/sinain-core/src/server.ts +23 -0
- package/sinain-core/src/types.ts +2 -0
- package/sinain-memory/graph_query.py +132 -2
|
@@ -246,6 +246,88 @@ def query_facts_by_entity_graph(
|
|
|
246
246
|
return []
|
|
247
247
|
|
|
248
248
|
|
|
249
|
+
def expand_entity_community(
|
|
250
|
+
store,
|
|
251
|
+
entity_name: str,
|
|
252
|
+
max_related: int = 3,
|
|
253
|
+
max_facts_per_entity: int = 30,
|
|
254
|
+
) -> list[tuple[str, int]]:
|
|
255
|
+
"""Find related entities by following entity → facts → mentioned entities.
|
|
256
|
+
|
|
257
|
+
Returns [(entity_name, co_mention_count), ...] sorted by frequency.
|
|
258
|
+
"""
|
|
259
|
+
entity_node_id = f"entity:{entity_name.lower().replace(' ', '-')}"
|
|
260
|
+
if not store.entity(entity_node_id):
|
|
261
|
+
return []
|
|
262
|
+
|
|
263
|
+
# Collect facts linked to this entity (both about and mentions)
|
|
264
|
+
fact_ids = set()
|
|
265
|
+
for fact_eid, _ in store.backrefs(entity_node_id, attribute="about")[:max_facts_per_entity]:
|
|
266
|
+
if fact_eid.startswith("fact:"):
|
|
267
|
+
fact_ids.add(fact_eid)
|
|
268
|
+
for fact_eid, _ in store.backrefs(entity_node_id, attribute="mentions")[:max_facts_per_entity]:
|
|
269
|
+
if fact_eid.startswith("fact:"):
|
|
270
|
+
fact_ids.add(fact_eid)
|
|
271
|
+
|
|
272
|
+
# Follow each fact's outgoing refs to find other entity nodes
|
|
273
|
+
related_counts: dict[str, int] = {}
|
|
274
|
+
for fact_eid in fact_ids:
|
|
275
|
+
attrs = store.entity(fact_eid)
|
|
276
|
+
for ref_attr in ("about", "mentions"):
|
|
277
|
+
targets = attrs.get(ref_attr, [])
|
|
278
|
+
if not isinstance(targets, list):
|
|
279
|
+
targets = [targets]
|
|
280
|
+
for target in targets:
|
|
281
|
+
if isinstance(target, str) and target.startswith("entity:") and target != entity_node_id:
|
|
282
|
+
name = target[len("entity:"):]
|
|
283
|
+
related_counts[name] = related_counts.get(name, 0) + 1
|
|
284
|
+
|
|
285
|
+
# Sort by frequency, return top N
|
|
286
|
+
ranked = sorted(related_counts.items(), key=lambda x: -x[1])
|
|
287
|
+
return ranked[:max_related]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _cooccurring_entities(
|
|
291
|
+
store,
|
|
292
|
+
fact_ids: set[str],
|
|
293
|
+
max_entities: int = 3,
|
|
294
|
+
) -> list[str]:
|
|
295
|
+
"""Find entities that co-occur in the same distillation pass (shared first_seen timestamp)."""
|
|
296
|
+
if not fact_ids:
|
|
297
|
+
return []
|
|
298
|
+
|
|
299
|
+
# Get first_seen timestamps for the input facts
|
|
300
|
+
timestamps = set()
|
|
301
|
+
for fid in list(fact_ids)[:20]: # cap to avoid huge queries
|
|
302
|
+
attrs = store.entity(fid)
|
|
303
|
+
fs = attrs.get("first_seen", [])
|
|
304
|
+
if isinstance(fs, list) and fs:
|
|
305
|
+
timestamps.add(fs[0])
|
|
306
|
+
elif isinstance(fs, str):
|
|
307
|
+
timestamps.add(fs)
|
|
308
|
+
|
|
309
|
+
if not timestamps:
|
|
310
|
+
return []
|
|
311
|
+
|
|
312
|
+
# Find other facts with same timestamps and extract their entity names
|
|
313
|
+
placeholders = ",".join("?" for _ in timestamps)
|
|
314
|
+
rows = store._conn.execute(
|
|
315
|
+
f"SELECT DISTINCT t2.value FROM triples t1 "
|
|
316
|
+
f"JOIN triples t2 ON t2.entity_id = t1.entity_id AND t2.attribute = 'entity' AND t2.retracted = 0 "
|
|
317
|
+
f"WHERE t1.attribute = 'first_seen' AND t1.value IN ({placeholders}) "
|
|
318
|
+
f"AND t1.retracted = 0 AND t1.entity_id LIKE 'fact:%' "
|
|
319
|
+
f"AND t1.entity_id NOT IN ({','.join('?' for _ in fact_ids)})",
|
|
320
|
+
list(timestamps) + list(fact_ids),
|
|
321
|
+
).fetchall()
|
|
322
|
+
|
|
323
|
+
# Count co-occurrence per entity name
|
|
324
|
+
counts: dict[str, int] = {}
|
|
325
|
+
for (name,) in rows:
|
|
326
|
+
counts[name] = counts.get(name, 0) + 1
|
|
327
|
+
ranked = sorted(counts, key=lambda x: -counts[x])
|
|
328
|
+
return ranked[:max_entities]
|
|
329
|
+
|
|
330
|
+
|
|
249
331
|
def query_facts_hybrid(
|
|
250
332
|
db_path: str,
|
|
251
333
|
query: str,
|
|
@@ -257,17 +339,45 @@ def query_facts_hybrid(
|
|
|
257
339
|
expands top results with 1-hop graph neighbors.
|
|
258
340
|
"""
|
|
259
341
|
import re
|
|
342
|
+
import time
|
|
260
343
|
keywords = [w.lower() for w in re.findall(r"[a-zA-Z][a-zA-Z0-9-]+", query) if len(w) > 2]
|
|
261
344
|
|
|
262
345
|
# Entity graph pre-filter: find facts linked to mentioned entities via backrefs.
|
|
263
346
|
# Used to BOOST relevant facts in RRF, not as a separate tier (avoids dilution).
|
|
264
347
|
graph_fact_ids: set[str] = set()
|
|
348
|
+
community_fact_ids: set[str] = set()
|
|
265
349
|
for kw in keywords:
|
|
266
350
|
for f in query_facts_by_entity_graph(db_path, kw, max_facts=50):
|
|
267
351
|
eid = f.get("entity_id", "")
|
|
268
352
|
if eid:
|
|
269
353
|
graph_fact_ids.add(eid)
|
|
270
354
|
|
|
355
|
+
# Community expansion: follow mentions edges to find related entities
|
|
356
|
+
t0 = time.monotonic()
|
|
357
|
+
try:
|
|
358
|
+
from triplestore import TripleStore
|
|
359
|
+
store = TripleStore(db_path)
|
|
360
|
+
|
|
361
|
+
matched_entities = set()
|
|
362
|
+
for kw in keywords:
|
|
363
|
+
node_id = f"entity:{kw}"
|
|
364
|
+
if store.entity(node_id):
|
|
365
|
+
matched_entities.add(kw)
|
|
366
|
+
|
|
367
|
+
for ent in matched_entities:
|
|
368
|
+
if time.monotonic() - t0 > 0.5:
|
|
369
|
+
break # timing guard
|
|
370
|
+
community = expand_entity_community(store, ent, max_related=3)
|
|
371
|
+
for related_name, _count in community:
|
|
372
|
+
for f in query_facts_by_entity_graph(db_path, related_name, max_facts=20):
|
|
373
|
+
eid = f.get("entity_id", "")
|
|
374
|
+
if eid and eid not in graph_fact_ids:
|
|
375
|
+
community_fact_ids.add(eid)
|
|
376
|
+
|
|
377
|
+
store.close()
|
|
378
|
+
except Exception:
|
|
379
|
+
pass
|
|
380
|
+
|
|
271
381
|
# Run three retrieval methods independently
|
|
272
382
|
candidate_limit = max_facts * 3
|
|
273
383
|
fts_results = query_facts_fts(db_path, query, max_facts=candidate_limit)
|
|
@@ -296,11 +406,31 @@ def query_facts_hybrid(
|
|
|
296
406
|
for rank, eid in enumerate(ranked_list):
|
|
297
407
|
rrf_scores[eid] = rrf_scores.get(eid, 0.0) + 1.0 / (K + rank)
|
|
298
408
|
|
|
409
|
+
# Co-occurrence boost: use FTS/tag results to find temporally related entities
|
|
410
|
+
import time as _time
|
|
411
|
+
_t_cooccur = _time.monotonic()
|
|
412
|
+
query_matched_ids = {f.get("entity_id", "") for f in fts_results + tag_results if f.get("entity_id")}
|
|
413
|
+
if query_matched_ids and _time.monotonic() - _t_cooccur < 0.3:
|
|
414
|
+
try:
|
|
415
|
+
from triplestore import TripleStore
|
|
416
|
+
_store = TripleStore(db_path)
|
|
417
|
+
cooccur = _cooccurring_entities(_store, query_matched_ids, max_entities=5)
|
|
418
|
+
for ent_name in cooccur:
|
|
419
|
+
for f in query_facts_by_entity_graph(db_path, ent_name, max_facts=10):
|
|
420
|
+
eid = f.get("entity_id", "")
|
|
421
|
+
if eid and eid not in graph_fact_ids:
|
|
422
|
+
community_fact_ids.add(eid)
|
|
423
|
+
_store.close()
|
|
424
|
+
except Exception:
|
|
425
|
+
pass
|
|
426
|
+
|
|
299
427
|
# Graph boost: facts linked to mentioned entities via backrefs get priority
|
|
300
|
-
if graph_fact_ids:
|
|
428
|
+
if graph_fact_ids or community_fact_ids:
|
|
301
429
|
for eid in rrf_scores:
|
|
302
430
|
if eid in graph_fact_ids:
|
|
303
|
-
rrf_scores[eid] += 0.02 #
|
|
431
|
+
rrf_scores[eid] += 0.02 # direct graph-linked facts
|
|
432
|
+
elif eid in community_fact_ids:
|
|
433
|
+
rrf_scores[eid] += 0.01 # community-expanded facts (half weight)
|
|
304
434
|
|
|
305
435
|
# Apply confidence decay as secondary signal (fresh facts rank above stale ones)
|
|
306
436
|
from triplestore import decayed_confidence
|