npcpy 1.1.28__py3-none-any.whl → 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. npcpy/data/audio.py +16 -38
  2. npcpy/data/image.py +29 -29
  3. npcpy/data/load.py +4 -3
  4. npcpy/data/text.py +28 -28
  5. npcpy/data/video.py +6 -6
  6. npcpy/data/web.py +49 -21
  7. npcpy/ft/__init__.py +0 -0
  8. npcpy/ft/diff.py +110 -0
  9. npcpy/ft/ge.py +115 -0
  10. npcpy/ft/memory_trainer.py +171 -0
  11. npcpy/ft/model_ensembler.py +357 -0
  12. npcpy/ft/rl.py +360 -0
  13. npcpy/ft/sft.py +248 -0
  14. npcpy/ft/usft.py +128 -0
  15. npcpy/gen/audio_gen.py +24 -0
  16. npcpy/gen/embeddings.py +13 -13
  17. npcpy/gen/image_gen.py +37 -15
  18. npcpy/gen/response.py +287 -111
  19. npcpy/gen/video_gen.py +10 -9
  20. npcpy/llm_funcs.py +447 -79
  21. npcpy/memory/command_history.py +201 -48
  22. npcpy/memory/kg_vis.py +74 -74
  23. npcpy/memory/knowledge_graph.py +482 -115
  24. npcpy/memory/memory_processor.py +81 -0
  25. npcpy/memory/search.py +70 -70
  26. npcpy/mix/debate.py +192 -3
  27. npcpy/npc_compiler.py +1541 -879
  28. npcpy/npc_sysenv.py +250 -78
  29. npcpy/serve.py +1036 -321
  30. npcpy/sql/ai_function_tools.py +257 -0
  31. npcpy/sql/database_ai_adapters.py +186 -0
  32. npcpy/sql/database_ai_functions.py +163 -0
  33. npcpy/sql/model_runner.py +19 -19
  34. npcpy/sql/npcsql.py +706 -507
  35. npcpy/sql/sql_model_compiler.py +156 -0
  36. npcpy/tools.py +20 -20
  37. npcpy/work/plan.py +8 -8
  38. npcpy/work/trigger.py +3 -3
  39. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/METADATA +169 -9
  40. npcpy-1.2.32.dist-info/RECORD +54 -0
  41. npcpy-1.1.28.dist-info/RECORD +0 -40
  42. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
  43. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
  44. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,8 @@ from npcpy.llm_funcs import (
22
22
  remove_idempotent_groups,
23
23
  zoom_in,
24
24
  )
25
- from npcpy.npc_compiler import NPC
25
+
26
+ from npcpy.memory.command_history import load_kg_from_db, save_kg_to_db
26
27
 
27
28
  def safe_kuzu_execute(conn, query, error_message="Kuzu query failed"):
28
29
  """Execute a Kuzu query with proper error handling"""
@@ -42,7 +43,7 @@ def create_group(conn, name: str, metadata: str = ""):
42
43
  return False
43
44
 
44
45
  try:
45
- # Properly escape quotes in strings
46
+
46
47
  escaped_name = name.replace('"', '\\"')
47
48
  escaped_metadata = metadata.replace('"', '\\"')
48
49
 
@@ -77,13 +78,13 @@ def init_db(db_path: str, drop=False):
77
78
  print("Database connection established successfully")
78
79
 
79
80
  if drop:
80
- # Drop tables in reverse order of dependency
81
+
81
82
  safe_kuzu_execute(conn, "DROP TABLE IF EXISTS Contains")
82
- safe_kuzu_execute(conn, "DROP TABLE IF EXISTS EvolvedFrom") # New
83
+ safe_kuzu_execute(conn, "DROP TABLE IF EXISTS EvolvedFrom")
83
84
  safe_kuzu_execute(conn, "DROP TABLE IF EXISTS Fact")
84
85
  safe_kuzu_execute(conn, "DROP TABLE IF EXISTS Groups")
85
86
 
86
- # Fact table remains the same
87
+
87
88
  safe_kuzu_execute(
88
89
  conn,
89
90
  """
@@ -97,7 +98,7 @@ def init_db(db_path: str, drop=False):
97
98
  "Failed to create Fact table",
98
99
  )
99
100
 
100
- # UPDATED Groups table with generational properties
101
+
101
102
  safe_kuzu_execute(
102
103
  conn,
103
104
  """
@@ -113,14 +114,14 @@ def init_db(db_path: str, drop=False):
113
114
  )
114
115
  print("Groups table (with generation tracking) created or already exists.")
115
116
 
116
- # Contains relationship remains the same
117
+
117
118
  safe_kuzu_execute(
118
119
  conn,
119
120
  "CREATE REL TABLE IF NOT EXISTS Contains(FROM Groups TO Fact);",
120
121
  "Failed to create Contains relationship table",
121
122
  )
122
123
 
123
- # NEW EvolvedFrom relationship table
124
+
124
125
  safe_kuzu_execute(
125
126
  conn,
126
127
  """
@@ -153,12 +154,12 @@ def find_similar_groups(
153
154
  **kwargs: Any
154
155
  ) -> List[str]:
155
156
  """Find existing groups that might contain this fact"""
156
- response = conn.execute(f"MATCH (g:Groups) RETURN g.name;") # Execute query
157
- #print(response)
158
- #print(type(response))
159
- #print(dir(response))
157
+ response = conn.execute(f"MATCH (g:Groups) RETURN g.name;")
158
+
159
+
160
+
160
161
  groups = response.fetch_as_df()
161
- #print(f"Groups: {groups}")
162
+
162
163
  if not groups:
163
164
  return []
164
165
 
@@ -189,13 +190,14 @@ def find_similar_groups(
189
190
  return response["group_list"]
190
191
 
191
192
 
192
- def kg_initial(content_text=None,
193
+ def kg_initial(content,
193
194
  model=None,
194
195
  provider=None,
195
196
  npc=None,
196
197
  context='',
197
198
  facts=None,
198
- generation=None):
199
+ generation=None,
200
+ verbose=True,):
199
201
 
200
202
  if generation is None:
201
203
  CURRENT_GENERATION = 0
@@ -205,34 +207,99 @@ def kg_initial(content_text=None,
205
207
  print(f"--- Running KG Structuring Process (Generation: {CURRENT_GENERATION}) ---")
206
208
 
207
209
  if facts is None:
208
- if not content_text:
210
+ if not content:
209
211
  raise ValueError("kg_initial requires either content_text or a list of facts.")
210
212
  print(" - Mode: Deriving new facts from text content...")
211
- facts = get_facts(content_text, model=model, provider=provider, npc=npc, context=context)
212
- for fact in facts:
213
+ all_facts = []
214
+ print(len(content))
215
+ if len(content)>10000:
216
+ # randomly sub sample 10000 characters
217
+ starting_point = random.randint(0, len(content)-10000)
218
+
219
+ content_to_sample = content[starting_point:starting_point+10000]
220
+
221
+ for n in range(len(content)//10000):
222
+ print(n)
223
+ print(starting_point)
224
+ print(content_to_sample[0:1000])
225
+ facts = get_facts(content_to_sample,
226
+ model=model,
227
+ provider=provider,
228
+ npc=npc,
229
+ context=context)
230
+ if verbose:
231
+ print(f" - Extracted {len(facts)} facts from segment {n+1}")
232
+ print(facts)
233
+ all_facts.extend(facts)
234
+ else:
235
+ print(content[0:1000] )
236
+ all_facts = get_facts(content,
237
+ model=model,
238
+ provider=provider,
239
+ npc=npc,
240
+ context=context)
241
+ if verbose:
242
+ print(f" - Extracted {len(all_facts)} facts from content")
243
+ print(all_facts)
244
+ for fact in all_facts:
245
+
213
246
  fact['generation'] = CURRENT_GENERATION
214
247
  else:
215
248
  print(f" - Mode: Building structure from {len(facts)} pre-existing facts...")
216
249
 
217
250
  print(" - Inferring implied facts (zooming in)...")
218
- implied_facts = zoom_in(facts, model=model, provider=provider, npc=npc, context=context)
219
- for fact in implied_facts:
251
+ all_implied_facts = []
252
+ if len(all_facts) > 20:
253
+ # sub sample facts randomly to generate zoomed in facts
254
+ sampled_facts = random.sample(all_facts, k=20)
255
+ for n in range(len(all_facts) // 20):
256
+ implied_facts = zoom_in(sampled_facts,
257
+ model=model,
258
+ provider=provider,
259
+ npc=npc,
260
+ context=context)
261
+ all_implied_facts.extend(implied_facts)
262
+ if verbose:
263
+ print(f" - Inferred {len(implied_facts)} implied facts from sample {n+1}")
264
+ print(implied_facts)
265
+ else:
266
+ implied_facts = zoom_in(all_facts,
267
+ model=model,
268
+ provider=provider,
269
+ npc=npc,
270
+ context=context)
271
+ print(implied_facts)
272
+
273
+ all_implied_facts.extend(implied_facts)
274
+
275
+ if verbose:
276
+ print(f" - Inferred {len(implied_facts)} implied facts from all facts")
277
+ print(implied_facts)
278
+ for fact in all_implied_facts:
220
279
  fact['generation'] = CURRENT_GENERATION
221
-
222
- all_facts = facts + implied_facts
223
-
280
+
281
+ all_facts = all_facts + all_implied_facts
282
+
224
283
  print(" - Generating concepts from all facts...")
225
- concepts = generate_groups(all_facts, model=model, provider=provider, npc=npc, context=context)
284
+ concepts = generate_groups(all_facts,
285
+ model=model,
286
+ provider=provider,
287
+ npc=npc,
288
+ context=context)
226
289
  for concept in concepts:
227
290
  concept['generation'] = CURRENT_GENERATION
228
-
291
+
292
+ if verbose:
293
+ print(f" - Generated {len(concepts)} concepts")
294
+ print(concepts)
229
295
  print(" - Linking facts to concepts...")
230
296
  fact_to_concept_links = defaultdict(list)
231
297
  concept_names = [c['name'] for c in concepts if c and 'name' in c]
232
298
  for fact in all_facts:
233
299
 
234
300
  fact_to_concept_links[fact['statement']] = get_related_concepts_multi(fact['statement'], "fact", concept_names, model, provider, npc, context)
235
- print(fact_to_concept_links[fact['statement']])
301
+ if verbose:
302
+ print(fact_to_concept_links[fact['statement']])
236
303
  print(" - Linking facts to other facts...")
237
304
  fact_to_fact_links = []
238
305
  fact_statements = [f['statement'] for f in all_facts]
@@ -249,7 +316,8 @@ def kg_initial(content_text=None,
249
316
  for related_stmt in related_fact_stmts:
250
317
 
251
318
  fact_to_fact_links.append((fact['statement'], related_stmt))
252
- print(fact['statement'], related_stmt)
319
+ if verbose:
320
+ print(fact['statement'], related_stmt)
253
321
 
254
322
  return {
255
323
  "generation": CURRENT_GENERATION,
@@ -261,9 +329,9 @@ def kg_initial(content_text=None,
261
329
  }
262
330
 
263
331
 
264
-
265
332
  def kg_evolve_incremental(existing_kg,
266
- new_content_text,
333
+ new_content_text=None,
334
+ new_facts=None,
267
335
  model = None,
268
336
  provider=None,
269
337
  npc=None,
@@ -272,18 +340,16 @@ def kg_evolve_incremental(existing_kg,
272
340
  link_concepts_facts = False,
273
341
  link_concepts_concepts=False,
274
342
  link_facts_facts = False,
275
-
276
343
  ):
277
344
 
278
345
  current_gen = existing_kg.get('generation', 0)
279
346
  next_gen = current_gen + 1
280
347
  print(f"\n--- ABSORBING INFO: Gen {current_gen} -> Gen {next_gen} ---")
281
348
 
282
- print('extracting facts...')
283
-
284
349
  newly_added_concepts = []
285
350
  concept_links = list(existing_kg.get('concept_links', []))
286
- fact_to_concept_links = defaultdict(list, existing_kg.get('fact_to_concept_links', {}))
351
+ fact_to_concept_links = defaultdict(list,
352
+ existing_kg.get('fact_to_concept_links', {}))
287
353
  fact_to_fact_links = list(existing_kg.get('fact_to_fact_links', []))
288
354
 
289
355
  existing_facts = existing_kg.get('facts', [])
@@ -291,27 +357,51 @@ def kg_evolve_incremental(existing_kg,
291
357
  existing_concept_names = {c['name'] for c in existing_concepts}
292
358
  existing_fact_statements = [f['statement'] for f in existing_facts]
293
359
  all_concept_names = list(existing_concept_names)
360
+
361
+ all_new_facts = []
362
+ print(npc, npc.model, npc.provider)
363
+
364
+ if new_facts:
365
+ all_new_facts = new_facts
366
+ print(f'using pre-approved facts: {len(all_new_facts)}')
367
+ elif new_content_text:
368
+ print('extracting facts from content...')
369
+ if len(new_content_text) > 10000:
370
+ starting_point = random.randint(0, len(new_content_text)-10000)
371
+ for n in range(len(new_content_text)//10000):
372
+ content_to_sample = new_content_text[n*10000:(n+1)*10000]
373
+ facts = get_facts(content_to_sample,
374
+ model=model,
375
+ provider=provider,
376
+ npc = npc,
377
+ context=context)
378
+ all_new_facts.extend(facts)
379
+ print(facts)
380
+ else:
381
+ all_new_facts = get_facts(new_content_text,
382
+ model=model,
383
+ provider=provider,
384
+ npc = npc,
385
+ context=context)
386
+ print(all_new_facts)
387
+ else:
388
+ print("No new content or facts provided")
389
+ return existing_kg, {}
294
390
 
391
+ for fact in all_new_facts:
392
+ fact['generation'] = next_gen
295
393
 
296
- new_facts = get_facts(new_content_text,
297
- model=model,
298
- provider=provider,
299
- npc = npc,
300
- context=context)
394
+ final_facts = existing_facts + all_new_facts
301
395
 
302
- for fact in new_facts:
303
- fact['generation'] = next_gen
304
-
305
- final_facts = existing_facts + new_facts
306
-
307
396
  if get_concepts:
308
397
  print('generating groups...')
309
398
 
310
- candidate_concepts = generate_groups(new_facts,
399
+ candidate_concepts = generate_groups(all_new_facts,
311
400
  model = model,
312
401
  provider = provider,
313
402
  npc=npc,
314
403
  context=context)
404
+ print(candidate_concepts)
315
405
  print('checking group uniqueness')
316
406
  for cand_concept in candidate_concepts:
317
407
  cand_name = cand_concept['name']
@@ -331,7 +421,6 @@ def kg_evolve_incremental(existing_kg,
331
421
  context)
332
422
  for related_name in related_concepts:
333
423
  if related_name != cand_name:
334
- # denying self
335
424
  concept_links.append((cand_name, related_name))
336
425
  all_concept_names.append(cand_name)
337
426
 
@@ -339,21 +428,26 @@ def kg_evolve_incremental(existing_kg,
339
428
 
340
429
  if link_concepts_facts:
341
430
  print('linking facts and concepts...')
342
- for fact in new_facts:
431
+ for fact in all_new_facts:
343
432
  fact_to_concept_links[fact['statement']] = get_related_concepts_multi(fact['statement'],
344
433
  "fact",
345
434
  all_concept_names,
346
- model,
347
- provider,
348
- npc,
349
- context)
435
+ model = model,
436
+ provider=provider,
437
+ npc = npc,
438
+ context= context)
350
439
  else:
351
440
  final_concepts = existing_concepts
352
441
  if link_facts_facts:
353
442
  print('linking facts and facts...')
354
443
 
355
- for new_fact in new_facts:
356
- related_fact_stmts = get_related_facts_llm(new_fact['statement'], existing_fact_statements, model, provider, context)
444
+ for new_fact in all_new_facts:
445
+ related_fact_stmts = get_related_facts_llm(new_fact['statement'],
446
+ existing_fact_statements,
447
+ model = model,
448
+ provider = provider,
449
+ npc = npc,
450
+ context=context)
357
451
  for related_stmt in related_fact_stmts:
358
452
  fact_to_fact_links.append((new_fact['statement'], related_stmt))
359
453
 
@@ -370,20 +464,25 @@ def kg_evolve_incremental(existing_kg,
370
464
 
371
465
 
372
466
 
373
- # UPGRADED KG_SLEEP_PROCESS - Now correctly uses the kg_initial framework.
374
- def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='', operations_config=None):
467
+
468
+ def kg_sleep_process(existing_kg,
469
+ model=None,
470
+ provider=None,
471
+ npc=None,
472
+ context='',
473
+ operations_config=None):
375
474
  current_gen = existing_kg.get('generation', 0)
376
475
  next_gen = current_gen + 1
377
476
  print(f"\n--- SLEEPING (Evolving Knowledge): Gen {current_gen} -> Gen {next_gen} ---")
378
477
 
379
- # Load KG components into mutable structures
478
+
380
479
  facts_map = {f['statement']: f for f in existing_kg.get('facts', [])}
381
480
  concepts_map = {c['name']: c for c in existing_kg.get('concepts', [])}
382
481
  fact_links = defaultdict(list, {k: list(v) for k, v in existing_kg.get('fact_to_concept_links', {}).items()})
383
482
  concept_links = set(tuple(sorted(link)) for link in existing_kg.get('concept_links', []))
384
483
  fact_to_fact_links = set(tuple(sorted(link)) for link in existing_kg.get('fact_to_fact_links', []))
385
484
 
386
- # --- PHASE 1: BOOTSTRAP using kg_initial ---
485
+
387
486
  print(" - Phase 1: Checking for unstructured facts...")
388
487
  facts_with_concepts = set(fact_links.keys())
389
488
  orphaned_fact_statements = list(set(facts_map.keys()) - facts_with_concepts)
@@ -392,7 +491,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
392
491
  print(f" - Found {len(orphaned_fact_statements)} orphaned facts. Applying full KG structuring process...")
393
492
  orphaned_facts_as_dicts = [facts_map[s] for s in orphaned_fact_statements]
394
493
 
395
- # USE THE REFACTORED KG_INITIAL AS THE STRUCTURING ENGINE
494
+
396
495
  new_structure = kg_initial(
397
496
  facts=orphaned_facts_as_dicts,
398
497
  model=model,
@@ -402,7 +501,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
402
501
  generation=next_gen
403
502
  )
404
503
 
405
- # Merge the newly generated structure back into the main KG
504
+
406
505
  print(" - Merging new structure into main KG...")
407
506
  for concept in new_structure.get("concepts", []):
408
507
  if concept['name'] not in concepts_map:
@@ -418,7 +517,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
418
517
  else:
419
518
  print(" - Knowledge graph is sufficiently structured. Proceeding to refinement.")
420
519
 
421
- # --- PHASE 2: REFINE ---
520
+
422
521
  if operations_config is None:
423
522
  possible_ops = ['prune', 'deepen', 'abstract_link']
424
523
  ops_to_run = random.sample(possible_ops, k=random.randint(1, 2))
@@ -428,7 +527,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
428
527
  print(f" - Phase 2: Executing refinement operations: {ops_to_run}")
429
528
 
430
529
  for op in ops_to_run:
431
- # Prune Operation
530
+
432
531
  if op == 'prune' and (len(facts_map) > 10 or len(concepts_map) > 5):
433
532
  print(" - Running 'prune' operation using consolidate_facts_llm...")
434
533
  fact_to_check = random.choice(list(facts_map.values()))
@@ -438,7 +537,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
438
537
  print(f" - Pruning redundant fact: '{fact_to_check['statement'][:80]}...'")
439
538
  del facts_map[fact_to_check['statement']]
440
539
 
441
- # Deepen Operation
540
+
442
541
  elif op == 'deepen' and facts_map:
443
542
  print(" - Running 'deepen' operation using zoom_in...")
444
543
  fact_to_deepen = random.choice(list(facts_map.values()))
@@ -454,7 +553,7 @@ def kg_sleep_process(existing_kg, model=None, provider=None, npc=None, context='
454
553
  else:
455
554
  print(f" - SKIPPED: Operation '{op}' did not run (conditions not met).")
456
555
 
457
- # Reassemble the final KG
556
+
458
557
  new_kg = {
459
558
  "generation": next_gen,
460
559
  "facts": list(facts_map.values()),
@@ -545,15 +644,15 @@ def store_fact_and_group(conn, fact: str,
545
644
  return False
546
645
 
547
646
  print(f"store_fact_and_group: Storing fact: {fact}, with groups:"
548
- f" {groups}") # DEBUG
647
+ f" {groups}")
549
648
  try:
550
- # Insert the fact
551
- insert_success = insert_fact(conn, fact, path) # Capture return
649
+
650
+ insert_success = insert_fact(conn, fact, path)
552
651
  if not insert_success:
553
652
  print(f"store_fact_and_group: Failed to insert fact: {fact}")
554
653
  return False
555
654
 
556
- # Assign fact to groups
655
+
557
656
  for group in groups:
558
657
  assign_success = assign_fact_to_group_graph(conn, fact, group)
559
658
  if not assign_success:
@@ -573,19 +672,19 @@ def insert_fact(conn, fact: str, path: str) -> bool:
573
672
  " database connection is None")
574
673
  return False
575
674
  try:
576
- # Properly escape quotes in strings
675
+
577
676
  escaped_fact = fact.replace('"', '\\"')
578
677
  escaped_path = os.path.expanduser(path).replace('"', '\\"')
579
678
 
580
- # Generate timestamp
679
+
581
680
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
582
681
 
583
- print(f"insert_fact: Attempting to insert fact: {fact}") #DEBUG
682
+ print(f"insert_fact: Attempting to insert fact: {fact}")
584
683
 
585
- # Begin transaction
684
+
586
685
  safe_kuzu_execute(conn, "BEGIN TRANSACTION")
587
686
 
588
- # Check if fact already exists
687
+
589
688
  check_query = f"""
590
689
  MATCH (f:Fact {{content: "{escaped_fact}"}})
591
690
  RETURN f
@@ -599,7 +698,7 @@ def insert_fact(conn, fact: str, path: str) -> bool:
599
698
  print(f"insert_fact: Error checking if fact exists: {error}")
600
699
  return False
601
700
 
602
- # Insert fact if it doesn't exist
701
+
603
702
  if not result.has_next():
604
703
  insert_query = f"""
605
704
  CREATE (f:Fact {{
@@ -617,7 +716,7 @@ def insert_fact(conn, fact: str, path: str) -> bool:
617
716
  print(f"insert_fact: Error inserting fact: {error}")
618
717
  return False
619
718
 
620
- # Commit transaction
719
+
621
720
  safe_kuzu_execute(conn, "COMMIT")
622
721
  print(f"insert_fact: Successfully inserted/found fact: {fact}")
623
722
  return True
@@ -636,14 +735,14 @@ def assign_fact_to_group_graph(conn, fact: str, group: str) -> bool:
636
735
  return False
637
736
 
638
737
  try:
639
- # Properly escape quotes in strings
738
+
640
739
  escaped_fact = fact.replace('"', '\\"')
641
740
  escaped_group = group.replace('"', '\\"')
642
741
 
643
742
  print(f"assign_fact_to_group_graph: Assigning fact: {fact} to group:"
644
- f" {group}") #DEBUG
743
+ f" {group}")
645
744
 
646
- # Check if both fact and group exist before creating relationship
745
+
647
746
  check_query = f"""
648
747
  MATCH (f:Fact {{content: "{escaped_fact}"}})
649
748
  RETURN f
@@ -670,7 +769,7 @@ def assign_fact_to_group_graph(conn, fact: str, group: str) -> bool:
670
769
  print(f"assign_fact_to_group_graph: Group not found: {group}")
671
770
  return False
672
771
 
673
- # Create relationship
772
+
674
773
  query = f"""
675
774
  MATCH (f:Fact), (g:Groups)
676
775
  WHERE f.content = "{escaped_fact}" AND g.name = "{escaped_group}"
@@ -695,26 +794,26 @@ def assign_fact_to_group_graph(conn, fact: str, group: str) -> bool:
695
794
  traceback.print_exc()
696
795
  return False
697
796
 
698
- #--- Kuzu Database integration ---
797
+
699
798
  def store_fact_and_group(conn, fact: str, groups: List[str], path: str) -> bool:
700
799
  """Insert a fact into the database along with its groups"""
701
800
  if not conn:
702
801
  print("store_fact_and_group: Database connection is None")
703
802
  return False
704
803
 
705
- print(f"store_fact_and_group: Storing fact: {fact}, with groups: {groups}") # DEBUG
804
+ print(f"store_fact_and_group: Storing fact: {fact}, with groups: {groups}")
706
805
  try:
707
- # Insert the fact
708
- insert_success = insert_fact(conn, fact, path) # Capture return value
806
+
807
+ insert_success = insert_fact(conn, fact, path)
709
808
  if not insert_success:
710
- print(f"store_fact_and_group: Failed to insert fact: {fact}") #DEBUG
809
+ print(f"store_fact_and_group: Failed to insert fact: {fact}")
711
810
  return False
712
811
 
713
- # Assign fact to groups
812
+
714
813
  for group in groups:
715
814
  assign_success = assign_fact_to_group_graph(conn, fact, group)
716
815
  if not assign_success:
717
- print(f"store_fact_and_group: Failed to assign fact {fact} to group {group}") #DEBUG
816
+ print(f"store_fact_and_group: Failed to assign fact {fact} to group {group}")
718
817
  return False
719
818
 
720
819
  return True
@@ -724,7 +823,7 @@ def store_fact_and_group(conn, fact: str, groups: List[str], path: str) -> bool:
724
823
  return False
725
824
 
726
825
 
727
- # ---Database and other helper methods---
826
+
728
827
  def safe_kuzu_execute(conn, query, error_message="Kuzu query failed"):
729
828
  """Execute a Kuzu query with proper error handling"""
730
829
  try:
@@ -763,7 +862,7 @@ def process_text_with_chroma(
763
862
  Returns:
764
863
  List of extracted facts
765
864
  """
766
- # Initialize databases
865
+
767
866
  kuzu_conn = init_db(kuzu_db_path, drop=False)
768
867
  chroma_client, chroma_collection = setup_chroma_db(
769
868
  "knowledge_graph",
@@ -771,15 +870,15 @@ def process_text_with_chroma(
771
870
  chroma_db_path
772
871
  )
773
872
 
774
- # Extract facts
873
+
775
874
  facts = extract_facts(text, model=model, provider=provider, npc=npc)
776
875
 
777
- # Process extracted facts
876
+
778
877
  for i in range(0, len(facts), batch_size):
779
878
  batch = facts[i : i + batch_size]
780
879
  print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
781
880
 
782
- # Generate embeddings for the batch using npcpy.llm_funcs.get_embeddings
881
+
783
882
  from npcpy.llm_funcs import get_embeddings
784
883
 
785
884
  batch_embeddings = get_embeddings(
@@ -790,7 +889,7 @@ def process_text_with_chroma(
790
889
  print(f"Processing fact: {fact}")
791
890
  embedding = batch_embeddings[j]
792
891
 
793
- # Check for similar facts in Chroma before inserting
892
+
794
893
  similar_facts = find_similar_facts_chroma(
795
894
  chroma_collection, fact, query_embedding=embedding, n_results=3
796
895
  )
@@ -799,9 +898,9 @@ def process_text_with_chroma(
799
898
  print(f"Similar facts found:")
800
899
  for result in similar_facts:
801
900
  print(f" - {result['fact']} (distance: {result['distance']})")
802
- # Note: Could implement a similarity threshold here to skip highly similar facts
901
+
803
902
 
804
- # Prepare metadata
903
+
805
904
  metadata = {
806
905
  "path": path,
807
906
  "timestamp": datetime.now().isoformat(),
@@ -809,10 +908,10 @@ def process_text_with_chroma(
809
908
  "source_provider": provider,
810
909
  }
811
910
 
812
- # Insert into Kuzu graph DB
911
+
813
912
  kuzu_success = insert_fact(kuzu_conn, fact, path)
814
913
 
815
- # Insert into Chroma vector DB if Kuzu insert was successful
914
+
816
915
  if kuzu_success:
817
916
  chroma_id = store_fact_with_embedding(
818
917
  chroma_collection, fact, metadata, embedding
@@ -824,7 +923,7 @@ def process_text_with_chroma(
824
923
  else:
825
924
  print(f"Failed to save fact to Kuzu graph")
826
925
 
827
- # Close Kuzu connection
926
+
828
927
  kuzu_conn.close()
829
928
 
830
929
  return facts
@@ -853,12 +952,12 @@ def hybrid_search_with_chroma(
853
952
  Returns:
854
953
  List of dictionaries with combined results
855
954
  """
856
- # Get embedding for query using npcpy.llm_funcs.get_embeddings
955
+
857
956
  from npcpy.llm_funcs import get_embeddings
858
957
 
859
958
  query_embedding = get_embeddings([query])[0]
860
959
 
861
- # Step 1: Find similar facts using Chroma vector search
960
+
862
961
  vector_results = find_similar_facts_chroma(
863
962
  chroma_collection,
864
963
  query,
@@ -867,13 +966,13 @@ def hybrid_search_with_chroma(
867
966
  metadata_filter=metadata_filter,
868
967
  )
869
968
 
870
- # Extract just the fact texts from vector results
969
+
871
970
  vector_facts = [result["fact"] for result in vector_results]
872
971
 
873
- # Step 2: Expand context using graph relationships
972
+
874
973
  expanded_results = []
875
974
 
876
- # Add vector search results
975
+
877
976
  for result in vector_results:
878
977
  expanded_results.append(
879
978
  {
@@ -885,13 +984,13 @@ def hybrid_search_with_chroma(
885
984
  }
886
985
  )
887
986
 
888
- # For each vector-matched fact, find related facts in the graph
987
+
889
988
  for fact in vector_facts:
890
989
  try:
891
- # Safely escape fact text for Kuzu query
990
+
892
991
  escaped_fact = fact.replace('"', '\\"')
893
992
 
894
- # Find groups containing this fact
993
+
895
994
  group_result = kuzu_conn.execute(
896
995
  f"""
897
996
  MATCH (g:Groups)-[:Contains]->(f:Fact)
@@ -900,18 +999,18 @@ def hybrid_search_with_chroma(
900
999
  """
901
1000
  ).get_as_df()
902
1001
 
903
- # Extract group names
1002
+
904
1003
  fact_groups = [row["g.name"] for _, row in group_result.iterrows()]
905
1004
 
906
- # Apply group filter if provided
1005
+
907
1006
  if group_filter:
908
1007
  fact_groups = [g for g in fact_groups if g in group_filter]
909
1008
 
910
- # For each group, find other related facts
1009
+
911
1010
  for group in fact_groups:
912
1011
  escaped_group = group.replace('"', '\\"')
913
1012
 
914
- # Find facts in the same group
1013
+
915
1014
  related_facts_result = kuzu_conn.execute(
916
1015
  f"""
917
1016
  MATCH (g:Groups)-[:Contains]->(f:Fact)
@@ -921,7 +1020,7 @@ def hybrid_search_with_chroma(
921
1020
  """
922
1021
  ).get_as_df()
923
1022
 
924
- # Add these related facts to results
1023
+
925
1024
  for _, row in related_facts_result.iterrows():
926
1025
  related_fact = {
927
1026
  "fact": row["f.content"],
@@ -931,7 +1030,7 @@ def hybrid_search_with_chroma(
931
1030
  "recorded_at": row["f.recorded_at"],
932
1031
  }
933
1032
 
934
- # Avoid duplicates
1033
+
935
1034
  if not any(
936
1035
  r.get("fact") == related_fact["fact"] for r in expanded_results
937
1036
  ):
@@ -940,7 +1039,7 @@ def hybrid_search_with_chroma(
940
1039
  except Exception as e:
941
1040
  print(f"Error expanding results via graph: {e}")
942
1041
 
943
- # Return results, limiting to top_k if needed
1042
+
944
1043
  return expanded_results[:top_k]
945
1044
 
946
1045
 
@@ -964,14 +1063,14 @@ def find_similar_facts_chroma(
964
1063
  List of dictionaries with results
965
1064
  """
966
1065
  try:
967
- # Perform query with optional metadata filtering
1066
+
968
1067
  results = collection.query(
969
1068
  query_embeddings=[query_embedding],
970
1069
  n_results=n_results,
971
1070
  where=metadata_filter,
972
1071
  )
973
1072
 
974
- # Format results
1073
+
975
1074
  formatted_results = []
976
1075
  for i, doc in enumerate(results["documents"][0]):
977
1076
  formatted_results.append(
@@ -1007,12 +1106,12 @@ def store_fact_with_embedding(
1007
1106
  ID of the stored fact
1008
1107
  """
1009
1108
  try:
1010
- # Generate a deterministic ID from the fact content
1109
+
1011
1110
  import hashlib
1012
1111
 
1013
1112
  fact_id = hashlib.md5(fact.encode()).hexdigest()
1014
1113
 
1015
- # Store document with pre-generated embedding
1114
+
1016
1115
  collection.add(
1017
1116
  documents=[fact],
1018
1117
  embeddings=[embedding],
@@ -1033,7 +1132,7 @@ def save_facts_to_graph_db(
1033
1132
  batch = facts[i : i + batch_size]
1034
1133
  print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
1035
1134
 
1036
- # Process each fact in the batch
1135
+
1037
1136
  for fact in batch:
1038
1137
  try:
1039
1138
  print(f"Inserting fact: {fact}")
@@ -1049,3 +1148,271 @@ def save_facts_to_graph_db(
1049
1148
  continue
1050
1149
 
1051
1150
  print(f"Completed batch {i//batch_size + 1}")
1151
+
1152
+
1153
+
1154
+ def kg_add_fact(
1155
+ engine,
1156
+ fact_text: str,
1157
+ npc=None,
1158
+ team=None,
1159
+ model=None,
1160
+ provider=None
1161
+ ):
1162
+ """Add a new fact to the knowledge graph"""
1163
+ directory_path = os.getcwd()
1164
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1165
+ npc_name = npc.name if npc else 'default_npc'
1166
+
1167
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1168
+
1169
+ new_fact = {
1170
+ "statement": fact_text,
1171
+ "source_text": fact_text,
1172
+ "type": "manual",
1173
+ "generation": kg_data.get('generation', 0),
1174
+ "origin": "manual_add"
1175
+ }
1176
+
1177
+ kg_data['facts'].append(new_fact)
1178
+ save_kg_to_db(engine, kg_data, team_name, npc_name, directory_path)
1179
+
1180
+ return f"Added fact: {fact_text}"
1181
+
1182
+ def kg_search_facts(
1183
+ engine,
1184
+ query: str,
1185
+ npc=None,
1186
+ team=None,
1187
+ model=None,
1188
+ provider=None
1189
+ ):
1190
+ """Search facts in the knowledge graph"""
1191
+ directory_path = os.getcwd()
1192
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1193
+ npc_name = npc.name if npc else 'default_npc'
1194
+
1195
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1196
+
1197
+ matching_facts = []
1198
+ for fact in kg_data.get('facts', []):
1199
+ if query.lower() in fact['statement'].lower():
1200
+ matching_facts.append(fact['statement'])
1201
+
1202
+ return matching_facts
1203
+
1204
+ def kg_remove_fact(
1205
+ engine,
1206
+ fact_text: str,
1207
+ npc=None,
1208
+ team=None,
1209
+ model=None,
1210
+ provider=None
1211
+ ):
1212
+ """Remove a fact from the knowledge graph"""
1213
+ directory_path = os.getcwd()
1214
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1215
+ npc_name = npc.name if npc else 'default_npc'
1216
+
1217
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1218
+
1219
+ original_count = len(kg_data.get('facts', []))
1220
+ kg_data['facts'] = [f for f in kg_data.get('facts', []) if f['statement'] != fact_text]
1221
+ removed_count = original_count - len(kg_data['facts'])
1222
+
1223
+ if removed_count > 0:
1224
+ save_kg_to_db(engine, kg_data, team_name, npc_name, directory_path)
1225
+ return f"Removed {removed_count} matching fact(s)"
1226
+
1227
+ return "No matching facts found"
1228
+
1229
+ def kg_list_concepts(
1230
+ engine,
1231
+ npc=None,
1232
+ team=None,
1233
+ model=None,
1234
+ provider=None
1235
+ ):
1236
+ """List all concepts in the knowledge graph"""
1237
+ directory_path = os.getcwd()
1238
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1239
+ npc_name = npc.name if npc else 'default_npc'
1240
+
1241
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1242
+
1243
+ concepts = [c['name'] for c in kg_data.get('concepts', [])]
1244
+ return concepts
1245
+
1246
+ def kg_get_facts_for_concept(
1247
+ engine,
1248
+ concept_name: str,
1249
+ npc=None,
1250
+ team=None,
1251
+ model=None,
1252
+ provider=None
1253
+ ):
1254
+ """Get all facts linked to a specific concept"""
1255
+ directory_path = os.getcwd()
1256
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1257
+ npc_name = npc.name if npc else 'default_npc'
1258
+
1259
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1260
+
1261
+ fact_to_concept_links = kg_data.get('fact_to_concept_links', {})
1262
+ linked_facts = []
1263
+
1264
+ for fact_statement, linked_concepts in fact_to_concept_links.items():
1265
+ if concept_name in linked_concepts:
1266
+ linked_facts.append(fact_statement)
1267
+
1268
+ return linked_facts
1269
+
1270
+ def kg_add_concept(
1271
+ engine,
1272
+ concept_name: str,
1273
+ concept_description: str,
1274
+ npc=None,
1275
+ team=None,
1276
+ model=None,
1277
+ provider=None
1278
+ ):
1279
+ """Add a new concept to the knowledge graph"""
1280
+ directory_path = os.getcwd()
1281
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1282
+ npc_name = npc.name if npc else 'default_npc'
1283
+
1284
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1285
+
1286
+ new_concept = {
1287
+ "name": concept_name,
1288
+ "description": concept_description,
1289
+ "generation": kg_data.get('generation', 0)
1290
+ }
1291
+
1292
+ kg_data['concepts'].append(new_concept)
1293
+ save_kg_to_db(engine, kg_data, team_name, npc_name, directory_path)
1294
+
1295
+ return f"Added concept: {concept_name}"
1296
+
1297
+ def kg_remove_concept(
1298
+ engine,
1299
+ concept_name: str,
1300
+ npc=None,
1301
+ team=None,
1302
+ model=None,
1303
+ provider=None
1304
+ ):
1305
+ """Remove a concept from the knowledge graph"""
1306
+ directory_path = os.getcwd()
1307
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1308
+ npc_name = npc.name if npc else 'default_npc'
1309
+
1310
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1311
+
1312
+ original_count = len(kg_data.get('concepts', []))
1313
+ kg_data['concepts'] = [c for c in kg_data.get('concepts', []) if c['name'] != concept_name]
1314
+ removed_count = original_count - len(kg_data['concepts'])
1315
+
1316
+ if removed_count > 0:
1317
+ save_kg_to_db(engine, kg_data, team_name, npc_name, directory_path)
1318
+ return f"Removed concept: {concept_name}"
1319
+
1320
+ return "Concept not found"
1321
+
1322
+ def kg_link_fact_to_concept(
1323
+ engine,
1324
+ fact_text: str,
1325
+ concept_name: str,
1326
+ npc=None,
1327
+ team=None,
1328
+ model=None,
1329
+ provider=None
1330
+ ):
1331
+ """Link a fact to a concept in the knowledge graph"""
1332
+ directory_path = os.getcwd()
1333
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1334
+ npc_name = npc.name if npc else 'default_npc'
1335
+
1336
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1337
+
1338
+ fact_to_concept_links = kg_data.get('fact_to_concept_links', {})
1339
+
1340
+ if fact_text not in fact_to_concept_links:
1341
+ fact_to_concept_links[fact_text] = []
1342
+
1343
+ if concept_name not in fact_to_concept_links[fact_text]:
1344
+ fact_to_concept_links[fact_text].append(concept_name)
1345
+ kg_data['fact_to_concept_links'] = fact_to_concept_links
1346
+ save_kg_to_db(engine, kg_data, team_name, npc_name, directory_path)
1347
+ return f"Linked fact '{fact_text}' to concept '{concept_name}'"
1348
+
1349
+ return "Fact already linked to concept"
1350
+
1351
+ def kg_get_all_facts(
1352
+ engine,
1353
+ npc=None,
1354
+ team=None,
1355
+ model=None,
1356
+ provider=None
1357
+ ):
1358
+ """Get all facts from the knowledge graph"""
1359
+ directory_path = os.getcwd()
1360
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1361
+ npc_name = npc.name if npc else 'default_npc'
1362
+
1363
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1364
+
1365
+ facts = [f['statement'] for f in kg_data.get('facts', [])]
1366
+ return facts
1367
+
1368
+ def kg_get_stats(
1369
+ engine,
1370
+ npc=None,
1371
+ team=None,
1372
+ model=None,
1373
+ provider=None
1374
+ ):
1375
+ """Get statistics about the knowledge graph"""
1376
+ directory_path = os.getcwd()
1377
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1378
+ npc_name = npc.name if npc else 'default_npc'
1379
+
1380
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1381
+
1382
+ return {
1383
+ "total_facts": len(kg_data.get('facts', [])),
1384
+ "total_concepts": len(kg_data.get('concepts', [])),
1385
+ "total_fact_concept_links": len(kg_data.get('fact_to_concept_links', {})),
1386
+ "generation": kg_data.get('generation', 0)
1387
+ }
1388
+
1389
+ def kg_evolve_knowledge(
1390
+ engine,
1391
+ content_text: str,
1392
+ npc=None,
1393
+ team=None,
1394
+ model=None,
1395
+ provider=None
1396
+ ):
1397
+ """Evolve the knowledge graph with new content"""
1398
+ directory_path = os.getcwd()
1399
+ team_name = getattr(team, 'name', 'default_team') if team else 'default_team'
1400
+ npc_name = npc.name if npc else 'default_npc'
1401
+
1402
+ kg_data = load_kg_from_db(engine, team_name, npc_name, directory_path)
1403
+
1404
+ evolved_kg, _ = kg_evolve_incremental(
1405
+ existing_kg=kg_data,
1406
+ new_content_text=content_text,
1407
+ model=npc.model if npc else model,
1408
+ provider=npc.provider if npc else provider,
1409
+ npc=npc,
1410
+ get_concepts=True,
1411
+ link_concepts_facts=False,
1412
+ link_concepts_concepts=False,
1413
+ link_facts_facts=False
1414
+ )
1415
+
1416
+ save_kg_to_db(engine, evolved_kg, team_name, npc_name, directory_path)
1417
+
1418
+ return "Knowledge graph evolved with new content"