npcpy 1.3.19__py3-none-any.whl → 1.3.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/data/web.py +113 -8
- npcpy/llm_funcs.py +3 -4
- npcpy/memory/command_history.py +85 -9
- npcpy/memory/knowledge_graph.py +554 -33
- npcpy/memory/memory_processor.py +269 -53
- npcpy/npc_compiler.py +6 -0
- npcpy/npc_sysenv.py +24 -2
- npcpy/serve.py +25 -1
- {npcpy-1.3.19.dist-info → npcpy-1.3.21.dist-info}/METADATA +3 -1
- {npcpy-1.3.19.dist-info → npcpy-1.3.21.dist-info}/RECORD +13 -13
- {npcpy-1.3.19.dist-info → npcpy-1.3.21.dist-info}/WHEEL +1 -1
- {npcpy-1.3.19.dist-info → npcpy-1.3.21.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.3.19.dist-info → npcpy-1.3.21.dist-info}/top_level.txt +0 -0
npcpy/memory/knowledge_graph.py
CHANGED
|
@@ -1179,24 +1179,45 @@ def kg_add_fact(
|
|
|
1179
1179
|
|
|
1180
1180
|
def kg_search_facts(
|
|
1181
1181
|
engine,
|
|
1182
|
-
query: str,
|
|
1183
|
-
npc=None,
|
|
1182
|
+
query: str,
|
|
1183
|
+
npc=None,
|
|
1184
1184
|
team=None,
|
|
1185
1185
|
model=None,
|
|
1186
|
-
provider=None
|
|
1186
|
+
provider=None,
|
|
1187
|
+
search_all_scopes=True
|
|
1187
1188
|
):
|
|
1188
|
-
"""Search facts in the knowledge graph
|
|
1189
|
+
"""Search facts in the knowledge graph.
|
|
1190
|
+
|
|
1191
|
+
If search_all_scopes is True and no npc/team is provided,
|
|
1192
|
+
searches across all scopes in the database.
|
|
1193
|
+
"""
|
|
1194
|
+
from sqlalchemy import text
|
|
1195
|
+
|
|
1189
1196
|
directory_path = os.getcwd()
|
|
1190
|
-
team_name = getattr(team, 'name',
|
|
1191
|
-
npc_name = npc
|
|
1192
|
-
|
|
1193
|
-
kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
|
|
1194
|
-
|
|
1197
|
+
team_name = getattr(team, 'name', None) if team else None
|
|
1198
|
+
npc_name = getattr(npc, 'name', None) if npc else None
|
|
1199
|
+
|
|
1195
1200
|
matching_facts = []
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1201
|
+
|
|
1202
|
+
if search_all_scopes and (not team_name or not npc_name):
|
|
1203
|
+
# Search across all scopes directly in DB
|
|
1204
|
+
with engine.connect() as conn:
|
|
1205
|
+
result = conn.execute(text("""
|
|
1206
|
+
SELECT DISTINCT statement FROM kg_facts
|
|
1207
|
+
WHERE LOWER(statement) LIKE LOWER(:query)
|
|
1208
|
+
"""), {"query": f"%{query}%"})
|
|
1209
|
+
matching_facts = [row.statement for row in result]
|
|
1210
|
+
else:
|
|
1211
|
+
# Scope-specific search
|
|
1212
|
+
if not team_name:
|
|
1213
|
+
team_name = 'global_team'
|
|
1214
|
+
if not npc_name:
|
|
1215
|
+
npc_name = 'default_npc'
|
|
1216
|
+
kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
|
|
1217
|
+
for fact in kg_data.get('facts', []):
|
|
1218
|
+
if query.lower() in fact['statement'].lower():
|
|
1219
|
+
matching_facts.append(fact['statement'])
|
|
1220
|
+
|
|
1200
1221
|
return matching_facts
|
|
1201
1222
|
|
|
1202
1223
|
def kg_remove_fact(
|
|
@@ -1226,20 +1247,30 @@ def kg_remove_fact(
|
|
|
1226
1247
|
|
|
1227
1248
|
def kg_list_concepts(
|
|
1228
1249
|
engine,
|
|
1229
|
-
npc=None,
|
|
1250
|
+
npc=None,
|
|
1230
1251
|
team=None,
|
|
1231
1252
|
model=None,
|
|
1232
|
-
provider=None
|
|
1253
|
+
provider=None,
|
|
1254
|
+
search_all_scopes=True
|
|
1233
1255
|
):
|
|
1234
1256
|
"""List all concepts in the knowledge graph"""
|
|
1257
|
+
from sqlalchemy import text
|
|
1258
|
+
|
|
1235
1259
|
directory_path = os.getcwd()
|
|
1236
|
-
team_name = getattr(team, 'name',
|
|
1237
|
-
npc_name = npc
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1260
|
+
team_name = getattr(team, 'name', None) if team else None
|
|
1261
|
+
npc_name = getattr(npc, 'name', None) if npc else None
|
|
1262
|
+
|
|
1263
|
+
if search_all_scopes and (not team_name or not npc_name):
|
|
1264
|
+
with engine.connect() as conn:
|
|
1265
|
+
result = conn.execute(text("SELECT DISTINCT name FROM kg_concepts"))
|
|
1266
|
+
return [row.name for row in result]
|
|
1267
|
+
else:
|
|
1268
|
+
if not team_name:
|
|
1269
|
+
team_name = 'global_team'
|
|
1270
|
+
if not npc_name:
|
|
1271
|
+
npc_name = 'default_npc'
|
|
1272
|
+
kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
|
|
1273
|
+
return [c['name'] for c in kg_data.get('concepts', [])]
|
|
1243
1274
|
|
|
1244
1275
|
def kg_get_facts_for_concept(
|
|
1245
1276
|
engine,
|
|
@@ -1348,20 +1379,30 @@ def kg_link_fact_to_concept(
|
|
|
1348
1379
|
|
|
1349
1380
|
def kg_get_all_facts(
|
|
1350
1381
|
engine,
|
|
1351
|
-
npc=None,
|
|
1382
|
+
npc=None,
|
|
1352
1383
|
team=None,
|
|
1353
1384
|
model=None,
|
|
1354
|
-
provider=None
|
|
1385
|
+
provider=None,
|
|
1386
|
+
search_all_scopes=True
|
|
1355
1387
|
):
|
|
1356
1388
|
"""Get all facts from the knowledge graph"""
|
|
1389
|
+
from sqlalchemy import text
|
|
1390
|
+
|
|
1357
1391
|
directory_path = os.getcwd()
|
|
1358
|
-
team_name = getattr(team, 'name',
|
|
1359
|
-
npc_name = npc
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1392
|
+
team_name = getattr(team, 'name', None) if team else None
|
|
1393
|
+
npc_name = getattr(npc, 'name', None) if npc else None
|
|
1394
|
+
|
|
1395
|
+
if search_all_scopes and (not team_name or not npc_name):
|
|
1396
|
+
with engine.connect() as conn:
|
|
1397
|
+
result = conn.execute(text("SELECT DISTINCT statement FROM kg_facts"))
|
|
1398
|
+
return [row.statement for row in result]
|
|
1399
|
+
else:
|
|
1400
|
+
if not team_name:
|
|
1401
|
+
team_name = 'global_team'
|
|
1402
|
+
if not npc_name:
|
|
1403
|
+
npc_name = 'default_npc'
|
|
1404
|
+
kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
|
|
1405
|
+
return [f['statement'] for f in kg_data.get('facts', [])]
|
|
1365
1406
|
|
|
1366
1407
|
def kg_get_stats(
|
|
1367
1408
|
engine,
|
|
@@ -1412,5 +1453,485 @@ def kg_evolve_knowledge(
|
|
|
1412
1453
|
)
|
|
1413
1454
|
|
|
1414
1455
|
save_kg_to_db(engine, evolved_kg, team_name, npc_name, directory_path)
|
|
1415
|
-
|
|
1416
|
-
return "Knowledge graph evolved with new content"
|
|
1456
|
+
|
|
1457
|
+
return "Knowledge graph evolved with new content"
|
|
1458
|
+
|
|
1459
|
+
|
|
1460
|
+
# =============================================================================
|
|
1461
|
+
# ADVANCED SEARCH FUNCTIONS
|
|
1462
|
+
# =============================================================================
|
|
1463
|
+
|
|
1464
|
+
def kg_link_search(
|
|
1465
|
+
engine,
|
|
1466
|
+
query: str,
|
|
1467
|
+
npc=None,
|
|
1468
|
+
team=None,
|
|
1469
|
+
max_depth: int = 2,
|
|
1470
|
+
breadth_per_step: int = 5,
|
|
1471
|
+
max_results: int = 20,
|
|
1472
|
+
strategy: str = 'bfs',
|
|
1473
|
+
search_all_scopes: bool = True
|
|
1474
|
+
):
|
|
1475
|
+
"""
|
|
1476
|
+
Search KG by traversing links from keyword-matched seeds.
|
|
1477
|
+
|
|
1478
|
+
Args:
|
|
1479
|
+
engine: SQLAlchemy engine
|
|
1480
|
+
query: Search query to find initial seeds
|
|
1481
|
+
max_depth: How many hops to traverse from seeds
|
|
1482
|
+
breadth_per_step: Max items to expand per hop
|
|
1483
|
+
max_results: Max total results
|
|
1484
|
+
strategy: 'bfs' (breadth-first) or 'dfs' (depth-first)
|
|
1485
|
+
search_all_scopes: Search across all npc/team scopes
|
|
1486
|
+
|
|
1487
|
+
Returns:
|
|
1488
|
+
List of dicts with 'content', 'type', 'depth', 'path', 'score'
|
|
1489
|
+
"""
|
|
1490
|
+
from sqlalchemy import text
|
|
1491
|
+
from collections import deque
|
|
1492
|
+
|
|
1493
|
+
# Phase 1: Find seed facts/concepts via keyword search
|
|
1494
|
+
seeds = kg_search_facts(engine, query, npc=npc, team=team,
|
|
1495
|
+
search_all_scopes=search_all_scopes)
|
|
1496
|
+
|
|
1497
|
+
if not seeds:
|
|
1498
|
+
return []
|
|
1499
|
+
|
|
1500
|
+
visited = set(seeds[:breadth_per_step])
|
|
1501
|
+
results = [{'content': s, 'type': 'fact', 'depth': 0, 'path': [s], 'score': 1.0}
|
|
1502
|
+
for s in seeds[:breadth_per_step]]
|
|
1503
|
+
|
|
1504
|
+
# Phase 2: Traverse links
|
|
1505
|
+
if strategy == 'bfs':
|
|
1506
|
+
queue = deque()
|
|
1507
|
+
for seed in seeds[:breadth_per_step]:
|
|
1508
|
+
queue.append((seed, 'fact', 0, [seed], 1.0))
|
|
1509
|
+
else:
|
|
1510
|
+
queue = [] # Use as stack for DFS
|
|
1511
|
+
for seed in seeds[:breadth_per_step]:
|
|
1512
|
+
queue.append((seed, 'fact', 0, [seed], 1.0))
|
|
1513
|
+
|
|
1514
|
+
with engine.connect() as conn:
|
|
1515
|
+
while queue and len(results) < max_results:
|
|
1516
|
+
if strategy == 'bfs':
|
|
1517
|
+
current, curr_type, depth, path, score = queue.popleft()
|
|
1518
|
+
else:
|
|
1519
|
+
current, curr_type, depth, path, score = queue.pop()
|
|
1520
|
+
|
|
1521
|
+
if depth >= max_depth:
|
|
1522
|
+
continue
|
|
1523
|
+
|
|
1524
|
+
# Find linked items (both directions)
|
|
1525
|
+
linked = []
|
|
1526
|
+
|
|
1527
|
+
# Links where current is source
|
|
1528
|
+
result = conn.execute(text("""
|
|
1529
|
+
SELECT target, type FROM kg_links WHERE source = :src
|
|
1530
|
+
"""), {"src": current})
|
|
1531
|
+
for row in result:
|
|
1532
|
+
target_type = 'concept' if 'concept' in row.type else 'fact'
|
|
1533
|
+
linked.append((row.target, target_type, row.type))
|
|
1534
|
+
|
|
1535
|
+
# Links where current is target
|
|
1536
|
+
result = conn.execute(text("""
|
|
1537
|
+
SELECT source, type FROM kg_links WHERE target = :tgt
|
|
1538
|
+
"""), {"tgt": current})
|
|
1539
|
+
for row in result:
|
|
1540
|
+
source_type = 'fact' if 'fact_to' in row.type else 'concept'
|
|
1541
|
+
linked.append((row.source, source_type, f"rev_{row.type}"))
|
|
1542
|
+
|
|
1543
|
+
# Expand to linked items
|
|
1544
|
+
added = 0
|
|
1545
|
+
for item_content, item_type, link_type in linked:
|
|
1546
|
+
if item_content in visited or added >= breadth_per_step:
|
|
1547
|
+
continue
|
|
1548
|
+
|
|
1549
|
+
visited.add(item_content)
|
|
1550
|
+
new_path = path + [item_content]
|
|
1551
|
+
new_score = score * 0.8 # Decay with depth
|
|
1552
|
+
|
|
1553
|
+
results.append({
|
|
1554
|
+
'content': item_content,
|
|
1555
|
+
'type': item_type,
|
|
1556
|
+
'depth': depth + 1,
|
|
1557
|
+
'path': new_path,
|
|
1558
|
+
'score': new_score,
|
|
1559
|
+
'link_type': link_type
|
|
1560
|
+
})
|
|
1561
|
+
|
|
1562
|
+
queue.append((item_content, item_type, depth + 1, new_path, new_score))
|
|
1563
|
+
added += 1
|
|
1564
|
+
|
|
1565
|
+
# Sort by score then depth
|
|
1566
|
+
results.sort(key=lambda x: (-x['score'], x['depth']))
|
|
1567
|
+
return results[:max_results]
|
|
1568
|
+
|
|
1569
|
+
|
|
1570
|
+
def kg_embedding_search(
|
|
1571
|
+
engine,
|
|
1572
|
+
query: str,
|
|
1573
|
+
npc=None,
|
|
1574
|
+
team=None,
|
|
1575
|
+
embedding_model: str = None,
|
|
1576
|
+
embedding_provider: str = None,
|
|
1577
|
+
similarity_threshold: float = 0.6,
|
|
1578
|
+
max_results: int = 20,
|
|
1579
|
+
include_concepts: bool = True,
|
|
1580
|
+
search_all_scopes: bool = True
|
|
1581
|
+
):
|
|
1582
|
+
"""
|
|
1583
|
+
Semantic search using embeddings.
|
|
1584
|
+
|
|
1585
|
+
Args:
|
|
1586
|
+
engine: SQLAlchemy engine
|
|
1587
|
+
query: Search query
|
|
1588
|
+
embedding_model: Model for embeddings (default: nomic-embed-text)
|
|
1589
|
+
embedding_provider: Provider (default: ollama)
|
|
1590
|
+
similarity_threshold: Min cosine similarity to include
|
|
1591
|
+
max_results: Max results to return
|
|
1592
|
+
include_concepts: Also search concepts, not just facts
|
|
1593
|
+
search_all_scopes: Search across all npc/team scopes
|
|
1594
|
+
|
|
1595
|
+
Returns:
|
|
1596
|
+
List of dicts with 'content', 'type', 'score'
|
|
1597
|
+
"""
|
|
1598
|
+
from sqlalchemy import text
|
|
1599
|
+
import numpy as np
|
|
1600
|
+
|
|
1601
|
+
try:
|
|
1602
|
+
from npcpy.gen.embeddings import get_embeddings
|
|
1603
|
+
except ImportError:
|
|
1604
|
+
print("Embeddings not available, falling back to keyword search")
|
|
1605
|
+
facts = kg_search_facts(engine, query, npc=npc, team=team,
|
|
1606
|
+
search_all_scopes=search_all_scopes)
|
|
1607
|
+
return [{'content': f, 'type': 'fact', 'score': 0.5} for f in facts[:max_results]]
|
|
1608
|
+
|
|
1609
|
+
model = embedding_model or 'nomic-embed-text'
|
|
1610
|
+
provider = embedding_provider or 'ollama'
|
|
1611
|
+
|
|
1612
|
+
# Get query embedding
|
|
1613
|
+
query_embedding = np.array(get_embeddings([query], model, provider)[0])
|
|
1614
|
+
|
|
1615
|
+
results = []
|
|
1616
|
+
|
|
1617
|
+
with engine.connect() as conn:
|
|
1618
|
+
# Search facts
|
|
1619
|
+
if search_all_scopes:
|
|
1620
|
+
fact_rows = conn.execute(text(
|
|
1621
|
+
"SELECT DISTINCT statement FROM kg_facts"
|
|
1622
|
+
)).fetchall()
|
|
1623
|
+
else:
|
|
1624
|
+
team_name = getattr(team, 'name', 'global_team') if team else 'global_team'
|
|
1625
|
+
npc_name = getattr(npc, 'name', 'default_npc') if npc else 'default_npc'
|
|
1626
|
+
fact_rows = conn.execute(text("""
|
|
1627
|
+
SELECT statement FROM kg_facts
|
|
1628
|
+
WHERE team_name = :team AND npc_name = :npc
|
|
1629
|
+
"""), {"team": team_name, "npc": npc_name}).fetchall()
|
|
1630
|
+
|
|
1631
|
+
if fact_rows:
|
|
1632
|
+
statements = [r.statement for r in fact_rows]
|
|
1633
|
+
embeddings = get_embeddings(statements, model, provider)
|
|
1634
|
+
|
|
1635
|
+
for i, stmt in enumerate(statements):
|
|
1636
|
+
emb = np.array(embeddings[i])
|
|
1637
|
+
sim = float(np.dot(query_embedding, emb) /
|
|
1638
|
+
(np.linalg.norm(query_embedding) * np.linalg.norm(emb)))
|
|
1639
|
+
if sim >= similarity_threshold:
|
|
1640
|
+
results.append({'content': stmt, 'type': 'fact', 'score': sim})
|
|
1641
|
+
|
|
1642
|
+
# Search concepts
|
|
1643
|
+
if include_concepts:
|
|
1644
|
+
if search_all_scopes:
|
|
1645
|
+
concept_rows = conn.execute(text(
|
|
1646
|
+
"SELECT DISTINCT name FROM kg_concepts"
|
|
1647
|
+
)).fetchall()
|
|
1648
|
+
else:
|
|
1649
|
+
concept_rows = conn.execute(text("""
|
|
1650
|
+
SELECT name FROM kg_concepts
|
|
1651
|
+
WHERE team_name = :team AND npc_name = :npc
|
|
1652
|
+
"""), {"team": team_name, "npc": npc_name}).fetchall()
|
|
1653
|
+
|
|
1654
|
+
if concept_rows:
|
|
1655
|
+
names = [r.name for r in concept_rows]
|
|
1656
|
+
embeddings = get_embeddings(names, model, provider)
|
|
1657
|
+
|
|
1658
|
+
for i, name in enumerate(names):
|
|
1659
|
+
emb = np.array(embeddings[i])
|
|
1660
|
+
sim = float(np.dot(query_embedding, emb) /
|
|
1661
|
+
(np.linalg.norm(query_embedding) * np.linalg.norm(emb)))
|
|
1662
|
+
if sim >= similarity_threshold:
|
|
1663
|
+
results.append({'content': name, 'type': 'concept', 'score': sim})
|
|
1664
|
+
|
|
1665
|
+
results.sort(key=lambda x: -x['score'])
|
|
1666
|
+
return results[:max_results]
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
def kg_hybrid_search(
|
|
1670
|
+
engine,
|
|
1671
|
+
query: str,
|
|
1672
|
+
npc=None,
|
|
1673
|
+
team=None,
|
|
1674
|
+
mode: str = 'keyword+link',
|
|
1675
|
+
max_depth: int = 2,
|
|
1676
|
+
breadth_per_step: int = 5,
|
|
1677
|
+
max_results: int = 20,
|
|
1678
|
+
embedding_model: str = None,
|
|
1679
|
+
embedding_provider: str = None,
|
|
1680
|
+
similarity_threshold: float = 0.6,
|
|
1681
|
+
search_all_scopes: bool = True
|
|
1682
|
+
):
|
|
1683
|
+
"""
|
|
1684
|
+
Hybrid search combining multiple methods.
|
|
1685
|
+
|
|
1686
|
+
Args:
|
|
1687
|
+
engine: SQLAlchemy engine
|
|
1688
|
+
query: Search query
|
|
1689
|
+
mode: Search mode - 'keyword', 'embedding', 'link',
|
|
1690
|
+
'keyword+link', 'keyword+embedding', 'all'
|
|
1691
|
+
max_depth: Link traversal depth
|
|
1692
|
+
breadth_per_step: Items per traversal hop
|
|
1693
|
+
max_results: Max results
|
|
1694
|
+
embedding_model/provider: For embedding search
|
|
1695
|
+
similarity_threshold: For embedding search
|
|
1696
|
+
search_all_scopes: Search all npc/team scopes
|
|
1697
|
+
|
|
1698
|
+
Returns:
|
|
1699
|
+
List of dicts with 'content', 'type', 'score', 'source'
|
|
1700
|
+
"""
|
|
1701
|
+
all_results = {} # content -> result dict
|
|
1702
|
+
|
|
1703
|
+
# Keyword search (always fast, always run unless embedding-only)
|
|
1704
|
+
if 'keyword' in mode or mode == 'link' or mode == 'all':
|
|
1705
|
+
keyword_facts = kg_search_facts(engine, query, npc=npc, team=team,
|
|
1706
|
+
search_all_scopes=search_all_scopes)
|
|
1707
|
+
for f in keyword_facts:
|
|
1708
|
+
all_results[f] = {'content': f, 'type': 'fact', 'score': 0.7, 'source': 'keyword'}
|
|
1709
|
+
|
|
1710
|
+
# Embedding search
|
|
1711
|
+
if 'embedding' in mode or mode == 'all':
|
|
1712
|
+
try:
|
|
1713
|
+
emb_results = kg_embedding_search(
|
|
1714
|
+
engine, query, npc=npc, team=team,
|
|
1715
|
+
embedding_model=embedding_model,
|
|
1716
|
+
embedding_provider=embedding_provider,
|
|
1717
|
+
similarity_threshold=similarity_threshold,
|
|
1718
|
+
max_results=max_results,
|
|
1719
|
+
search_all_scopes=search_all_scopes
|
|
1720
|
+
)
|
|
1721
|
+
for r in emb_results:
|
|
1722
|
+
if r['content'] in all_results:
|
|
1723
|
+
# Boost if found by multiple methods
|
|
1724
|
+
all_results[r['content']]['score'] = max(
|
|
1725
|
+
all_results[r['content']]['score'], r['score']
|
|
1726
|
+
) * 1.1
|
|
1727
|
+
all_results[r['content']]['source'] += '+embedding'
|
|
1728
|
+
else:
|
|
1729
|
+
r['source'] = 'embedding'
|
|
1730
|
+
all_results[r['content']] = r
|
|
1731
|
+
except Exception as e:
|
|
1732
|
+
print(f"Embedding search failed: {e}")
|
|
1733
|
+
|
|
1734
|
+
# Link traversal
|
|
1735
|
+
if 'link' in mode or mode == 'all':
|
|
1736
|
+
link_results = kg_link_search(
|
|
1737
|
+
engine, query, npc=npc, team=team,
|
|
1738
|
+
max_depth=max_depth,
|
|
1739
|
+
breadth_per_step=breadth_per_step,
|
|
1740
|
+
max_results=max_results,
|
|
1741
|
+
search_all_scopes=search_all_scopes
|
|
1742
|
+
)
|
|
1743
|
+
for r in link_results:
|
|
1744
|
+
if r['content'] in all_results:
|
|
1745
|
+
# Boost linked results
|
|
1746
|
+
all_results[r['content']]['score'] = max(
|
|
1747
|
+
all_results[r['content']]['score'], r['score']
|
|
1748
|
+
) * 1.05
|
|
1749
|
+
all_results[r['content']]['source'] += '+link'
|
|
1750
|
+
all_results[r['content']]['depth'] = r.get('depth', 0)
|
|
1751
|
+
all_results[r['content']]['path'] = r.get('path', [])
|
|
1752
|
+
else:
|
|
1753
|
+
r['source'] = 'link'
|
|
1754
|
+
all_results[r['content']] = r
|
|
1755
|
+
|
|
1756
|
+
# Sort and return
|
|
1757
|
+
final = sorted(all_results.values(), key=lambda x: -x['score'])
|
|
1758
|
+
return final[:max_results]
|
|
1759
|
+
|
|
1760
|
+
|
|
1761
|
+
def kg_backfill_from_memories(
|
|
1762
|
+
engine,
|
|
1763
|
+
model: str = None,
|
|
1764
|
+
provider: str = None,
|
|
1765
|
+
npc=None,
|
|
1766
|
+
get_concepts: bool = True,
|
|
1767
|
+
link_concepts_facts: bool = False,
|
|
1768
|
+
link_concepts_concepts: bool = False,
|
|
1769
|
+
link_facts_facts: bool = False,
|
|
1770
|
+
dry_run: bool = False
|
|
1771
|
+
):
|
|
1772
|
+
"""
|
|
1773
|
+
Backfill KG from approved memories that haven't been incorporated yet.
|
|
1774
|
+
|
|
1775
|
+
Args:
|
|
1776
|
+
engine: SQLAlchemy engine
|
|
1777
|
+
model: LLM model for concept generation
|
|
1778
|
+
provider: LLM provider
|
|
1779
|
+
npc: NPC object (optional)
|
|
1780
|
+
get_concepts: Whether to generate concepts
|
|
1781
|
+
link_concepts_facts: Whether to link facts to concepts
|
|
1782
|
+
link_concepts_concepts: Whether to link concepts to concepts
|
|
1783
|
+
link_facts_facts: Whether to link facts to facts
|
|
1784
|
+
dry_run: If True, just report what would be done
|
|
1785
|
+
|
|
1786
|
+
Returns:
|
|
1787
|
+
Dict with stats: scopes_processed, facts_added, concepts_added
|
|
1788
|
+
"""
|
|
1789
|
+
from sqlalchemy import text
|
|
1790
|
+
|
|
1791
|
+
stats = {
|
|
1792
|
+
'scopes_processed': 0,
|
|
1793
|
+
'facts_before': 0,
|
|
1794
|
+
'facts_after': 0,
|
|
1795
|
+
'concepts_before': 0,
|
|
1796
|
+
'concepts_after': 0,
|
|
1797
|
+
'scopes': []
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
# Get current counts
|
|
1801
|
+
with engine.connect() as conn:
|
|
1802
|
+
stats['facts_before'] = conn.execute(text("SELECT COUNT(*) FROM kg_facts")).scalar() or 0
|
|
1803
|
+
stats['concepts_before'] = conn.execute(text("SELECT COUNT(*) FROM kg_concepts")).scalar() or 0
|
|
1804
|
+
|
|
1805
|
+
# Get approved memories grouped by scope
|
|
1806
|
+
with engine.connect() as conn:
|
|
1807
|
+
result = conn.execute(text("""
|
|
1808
|
+
SELECT npc, team, directory_path, initial_memory, final_memory
|
|
1809
|
+
FROM memory_lifecycle
|
|
1810
|
+
WHERE status IN ('human-approved', 'human-edited')
|
|
1811
|
+
ORDER BY npc, team, directory_path
|
|
1812
|
+
"""))
|
|
1813
|
+
|
|
1814
|
+
from collections import defaultdict
|
|
1815
|
+
memories_by_scope = defaultdict(list)
|
|
1816
|
+
for row in result:
|
|
1817
|
+
statement = row.final_memory or row.initial_memory
|
|
1818
|
+
scope = (row.npc or 'default', row.team or 'global_team', row.directory_path or os.getcwd())
|
|
1819
|
+
memories_by_scope[scope].append({
|
|
1820
|
+
'statement': statement,
|
|
1821
|
+
'source_text': '',
|
|
1822
|
+
'type': 'explicit',
|
|
1823
|
+
'generation': 0
|
|
1824
|
+
})
|
|
1825
|
+
|
|
1826
|
+
if dry_run:
|
|
1827
|
+
for scope, facts in memories_by_scope.items():
|
|
1828
|
+
stats['scopes'].append({
|
|
1829
|
+
'scope': scope,
|
|
1830
|
+
'memory_count': len(facts)
|
|
1831
|
+
})
|
|
1832
|
+
stats['scopes_processed'] = len(memories_by_scope)
|
|
1833
|
+
return stats
|
|
1834
|
+
|
|
1835
|
+
# Process each scope
|
|
1836
|
+
for (npc_name, team_name, directory_path), facts in memories_by_scope.items():
|
|
1837
|
+
existing_kg = load_kg_from_db(engine, team_name, npc_name, directory_path)
|
|
1838
|
+
|
|
1839
|
+
# Filter out facts already in KG
|
|
1840
|
+
existing_statements = {f['statement'] for f in existing_kg.get('facts', [])}
|
|
1841
|
+
new_facts = [f for f in facts if f['statement'] not in existing_statements]
|
|
1842
|
+
|
|
1843
|
+
if not new_facts:
|
|
1844
|
+
continue
|
|
1845
|
+
|
|
1846
|
+
try:
|
|
1847
|
+
evolved_kg, _ = kg_evolve_incremental(
|
|
1848
|
+
existing_kg=existing_kg,
|
|
1849
|
+
new_facts=new_facts,
|
|
1850
|
+
model=model or (npc.model if npc else None),
|
|
1851
|
+
provider=provider or (npc.provider if npc else None),
|
|
1852
|
+
npc=npc,
|
|
1853
|
+
get_concepts=get_concepts,
|
|
1854
|
+
link_concepts_facts=link_concepts_facts,
|
|
1855
|
+
link_concepts_concepts=link_concepts_concepts,
|
|
1856
|
+
link_facts_facts=link_facts_facts
|
|
1857
|
+
)
|
|
1858
|
+
save_kg_to_db(engine, evolved_kg, team_name, npc_name, directory_path)
|
|
1859
|
+
|
|
1860
|
+
stats['scopes'].append({
|
|
1861
|
+
'scope': (npc_name, team_name, directory_path),
|
|
1862
|
+
'facts_added': len(new_facts),
|
|
1863
|
+
'concepts_added': len(evolved_kg.get('concepts', [])) - len(existing_kg.get('concepts', []))
|
|
1864
|
+
})
|
|
1865
|
+
stats['scopes_processed'] += 1
|
|
1866
|
+
|
|
1867
|
+
except Exception as e:
|
|
1868
|
+
print(f"Error processing scope {npc_name}/{team_name}: {e}")
|
|
1869
|
+
|
|
1870
|
+
# Get final counts
|
|
1871
|
+
with engine.connect() as conn:
|
|
1872
|
+
stats['facts_after'] = conn.execute(text("SELECT COUNT(*) FROM kg_facts")).scalar() or 0
|
|
1873
|
+
stats['concepts_after'] = conn.execute(text("SELECT COUNT(*) FROM kg_concepts")).scalar() or 0
|
|
1874
|
+
|
|
1875
|
+
return stats
|
|
1876
|
+
|
|
1877
|
+
|
|
1878
|
+
def kg_explore_concept(
|
|
1879
|
+
engine,
|
|
1880
|
+
concept_name: str,
|
|
1881
|
+
max_depth: int = 2,
|
|
1882
|
+
breadth_per_step: int = 10,
|
|
1883
|
+
search_all_scopes: bool = True
|
|
1884
|
+
):
|
|
1885
|
+
"""
|
|
1886
|
+
Explore all facts and related concepts for a given concept.
|
|
1887
|
+
|
|
1888
|
+
Args:
|
|
1889
|
+
engine: SQLAlchemy engine
|
|
1890
|
+
concept_name: Concept to explore from
|
|
1891
|
+
max_depth: How deep to traverse
|
|
1892
|
+
breadth_per_step: Items per hop
|
|
1893
|
+
search_all_scopes: Search all scopes
|
|
1894
|
+
|
|
1895
|
+
Returns:
|
|
1896
|
+
Dict with 'direct_facts', 'related_concepts', 'extended_facts'
|
|
1897
|
+
"""
|
|
1898
|
+
from sqlalchemy import text
|
|
1899
|
+
|
|
1900
|
+
result = {
|
|
1901
|
+
'concept': concept_name,
|
|
1902
|
+
'direct_facts': [],
|
|
1903
|
+
'related_concepts': [],
|
|
1904
|
+
'extended_facts': []
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
with engine.connect() as conn:
|
|
1908
|
+
# Get facts directly linked to this concept
|
|
1909
|
+
rows = conn.execute(text("""
|
|
1910
|
+
SELECT source FROM kg_links
|
|
1911
|
+
WHERE target = :concept AND type = 'fact_to_concept'
|
|
1912
|
+
"""), {"concept": concept_name})
|
|
1913
|
+
result['direct_facts'] = [r.source for r in rows]
|
|
1914
|
+
|
|
1915
|
+
# Get related concepts (concept-to-concept links)
|
|
1916
|
+
rows = conn.execute(text("""
|
|
1917
|
+
SELECT target FROM kg_links
|
|
1918
|
+
WHERE source = :concept AND type = 'concept_to_concept'
|
|
1919
|
+
UNION
|
|
1920
|
+
SELECT source FROM kg_links
|
|
1921
|
+
WHERE target = :concept AND type = 'concept_to_concept'
|
|
1922
|
+
"""), {"concept": concept_name})
|
|
1923
|
+
result['related_concepts'] = [r[0] for r in rows]
|
|
1924
|
+
|
|
1925
|
+
# Get facts from related concepts (1 hop)
|
|
1926
|
+
if result['related_concepts'] and max_depth > 0:
|
|
1927
|
+
placeholders = ','.join([f':c{i}' for i in range(len(result['related_concepts']))])
|
|
1928
|
+
params = {f'c{i}': c for i, c in enumerate(result['related_concepts'])}
|
|
1929
|
+
|
|
1930
|
+
rows = conn.execute(text(f"""
|
|
1931
|
+
SELECT DISTINCT source FROM kg_links
|
|
1932
|
+
WHERE target IN ({placeholders}) AND type = 'fact_to_concept'
|
|
1933
|
+
"""), params)
|
|
1934
|
+
result['extended_facts'] = [r.source for r in rows
|
|
1935
|
+
if r.source not in result['direct_facts']]
|
|
1936
|
+
|
|
1937
|
+
return result
|