npcsh 0.3.32__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. npcsh/_state.py +942 -0
  2. npcsh/alicanto.py +1074 -0
  3. npcsh/guac.py +785 -0
  4. npcsh/mcp_helpers.py +357 -0
  5. npcsh/mcp_npcsh.py +822 -0
  6. npcsh/mcp_server.py +184 -0
  7. npcsh/npc.py +218 -0
  8. npcsh/npcsh.py +1161 -0
  9. npcsh/plonk.py +387 -269
  10. npcsh/pti.py +234 -0
  11. npcsh/routes.py +958 -0
  12. npcsh/spool.py +315 -0
  13. npcsh/wander.py +550 -0
  14. npcsh/yap.py +573 -0
  15. npcsh-1.0.1.dist-info/METADATA +596 -0
  16. npcsh-1.0.1.dist-info/RECORD +21 -0
  17. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/WHEEL +1 -1
  18. npcsh-1.0.1.dist-info/entry_points.txt +9 -0
  19. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/licenses/LICENSE +1 -1
  20. npcsh/audio.py +0 -569
  21. npcsh/audio_gen.py +0 -1
  22. npcsh/cli.py +0 -543
  23. npcsh/command_history.py +0 -566
  24. npcsh/conversation.py +0 -54
  25. npcsh/data_models.py +0 -46
  26. npcsh/dataframes.py +0 -171
  27. npcsh/embeddings.py +0 -168
  28. npcsh/helpers.py +0 -646
  29. npcsh/image.py +0 -298
  30. npcsh/image_gen.py +0 -79
  31. npcsh/knowledge_graph.py +0 -1006
  32. npcsh/llm_funcs.py +0 -2195
  33. npcsh/load_data.py +0 -83
  34. npcsh/main.py +0 -5
  35. npcsh/model_runner.py +0 -189
  36. npcsh/npc_compiler.py +0 -2879
  37. npcsh/npc_sysenv.py +0 -388
  38. npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
  39. npcsh/npc_team/corca.npc +0 -13
  40. npcsh/npc_team/foreman.npc +0 -7
  41. npcsh/npc_team/npcsh.ctx +0 -11
  42. npcsh/npc_team/sibiji.npc +0 -4
  43. npcsh/npc_team/templates/analytics/celona.npc +0 -0
  44. npcsh/npc_team/templates/hr_support/raone.npc +0 -0
  45. npcsh/npc_team/templates/humanities/eriane.npc +0 -4
  46. npcsh/npc_team/templates/it_support/lineru.npc +0 -0
  47. npcsh/npc_team/templates/marketing/slean.npc +0 -4
  48. npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
  49. npcsh/npc_team/templates/sales/turnic.npc +0 -4
  50. npcsh/npc_team/templates/software/welxor.npc +0 -0
  51. npcsh/npc_team/tools/bash_executer.tool +0 -32
  52. npcsh/npc_team/tools/calculator.tool +0 -8
  53. npcsh/npc_team/tools/code_executor.tool +0 -16
  54. npcsh/npc_team/tools/generic_search.tool +0 -27
  55. npcsh/npc_team/tools/image_generation.tool +0 -25
  56. npcsh/npc_team/tools/local_search.tool +0 -149
  57. npcsh/npc_team/tools/npcsh_executor.tool +0 -9
  58. npcsh/npc_team/tools/screen_cap.tool +0 -27
  59. npcsh/npc_team/tools/sql_executor.tool +0 -26
  60. npcsh/response.py +0 -272
  61. npcsh/search.py +0 -252
  62. npcsh/serve.py +0 -1467
  63. npcsh/shell.py +0 -524
  64. npcsh/shell_helpers.py +0 -3919
  65. npcsh/stream.py +0 -233
  66. npcsh/video.py +0 -52
  67. npcsh/video_gen.py +0 -69
  68. npcsh-0.3.32.data/data/npcsh/npc_team/bash_executer.tool +0 -32
  69. npcsh-0.3.32.data/data/npcsh/npc_team/calculator.tool +0 -8
  70. npcsh-0.3.32.data/data/npcsh/npc_team/celona.npc +0 -0
  71. npcsh-0.3.32.data/data/npcsh/npc_team/code_executor.tool +0 -16
  72. npcsh-0.3.32.data/data/npcsh/npc_team/corca.npc +0 -13
  73. npcsh-0.3.32.data/data/npcsh/npc_team/eriane.npc +0 -4
  74. npcsh-0.3.32.data/data/npcsh/npc_team/foreman.npc +0 -7
  75. npcsh-0.3.32.data/data/npcsh/npc_team/generic_search.tool +0 -27
  76. npcsh-0.3.32.data/data/npcsh/npc_team/image_generation.tool +0 -25
  77. npcsh-0.3.32.data/data/npcsh/npc_team/lineru.npc +0 -0
  78. npcsh-0.3.32.data/data/npcsh/npc_team/local_search.tool +0 -149
  79. npcsh-0.3.32.data/data/npcsh/npc_team/maurawa.npc +0 -0
  80. npcsh-0.3.32.data/data/npcsh/npc_team/npcsh.ctx +0 -11
  81. npcsh-0.3.32.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
  82. npcsh-0.3.32.data/data/npcsh/npc_team/raone.npc +0 -0
  83. npcsh-0.3.32.data/data/npcsh/npc_team/screen_cap.tool +0 -27
  84. npcsh-0.3.32.data/data/npcsh/npc_team/sibiji.npc +0 -4
  85. npcsh-0.3.32.data/data/npcsh/npc_team/slean.npc +0 -4
  86. npcsh-0.3.32.data/data/npcsh/npc_team/sql_executor.tool +0 -26
  87. npcsh-0.3.32.data/data/npcsh/npc_team/test_pipeline.py +0 -181
  88. npcsh-0.3.32.data/data/npcsh/npc_team/turnic.npc +0 -4
  89. npcsh-0.3.32.data/data/npcsh/npc_team/welxor.npc +0 -0
  90. npcsh-0.3.32.dist-info/METADATA +0 -779
  91. npcsh-0.3.32.dist-info/RECORD +0 -78
  92. npcsh-0.3.32.dist-info/entry_points.txt +0 -3
  93. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/top_level.txt +0 -0
npcsh/knowledge_graph.py DELETED
@@ -1,1006 +0,0 @@
1
- import json
2
- import os
3
- import datetime
4
-
5
- import numpy as np
6
-
7
- try:
8
- import kuzu
9
- except ModuleNotFoundError:
10
- print("kuzu not installed")
11
- from typing import Optional, Dict, List, Union, Tuple
12
-
13
-
14
- from npcsh.llm_funcs import get_llm_response, get_embeddings
15
- from npcsh.npc_compiler import NPC
16
- import sqlite3
17
-
18
-
19
- def safe_kuzu_execute(conn, query, error_message="Kuzu query failed"):
20
- """Execute a Kuzu query with proper error handling"""
21
- try:
22
- result = conn.execute(query)
23
- return result, None
24
- except Exception as e:
25
- error = f"{error_message}: {str(e)}"
26
- print(error)
27
- return None, error
28
-
29
-
30
- def create_group(conn: kuzu.Connection, name: str, metadata: str = ""):
31
- """Create a new group in the database with robust error handling"""
32
- if conn is None:
33
- print("Cannot create group: database connection is None")
34
- return False
35
-
36
- try:
37
- # Properly escape quotes in strings
38
- escaped_name = name.replace('"', '\\"')
39
- escaped_metadata = metadata.replace('"', '\\"')
40
-
41
- query = f"""
42
- CREATE (g:Groups {{
43
- name: "{escaped_name}",
44
- metadata: "{escaped_metadata}"
45
- }});
46
- """
47
-
48
- result, error = safe_kuzu_execute(
49
- conn, query, f"Failed to create group: {name}"
50
- )
51
- if error:
52
- return False
53
-
54
- print(f"Created group: {name}")
55
- return True
56
- except Exception as e:
57
- print(f"Error creating group {name}: {str(e)}")
58
- traceback.print_exc()
59
- return False
60
-
61
-
62
- import traceback
63
-
64
-
65
- def init_db(db_path: str, drop=False):
66
- """Initialize Kùzu database and create schema with robust error handling"""
67
- try:
68
- # Ensure the directory exists
69
- os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True)
70
-
71
- try:
72
- db = kuzu.Database(db_path)
73
- conn = kuzu.Connection(db)
74
- print("Database connection established successfully")
75
- except Exception as e:
76
- print(f"Failed to connect to database: {str(e)}")
77
- traceback.print_exc()
78
- return None
79
- # Drop tables if requested
80
- if drop:
81
- safe_kuzu_execute(conn, "DROP REL TABLE IF EXISTS Contains")
82
- safe_kuzu_execute(conn, "DROP NODE TABLE IF EXISTS Fact")
83
- safe_kuzu_execute(conn, "DROP NODE TABLE IF EXISTS Groups")
84
-
85
- # Create tables with proper error handling
86
- safe_kuzu_execute(
87
- conn,
88
- """
89
- CREATE NODE TABLE IF NOT EXISTS Fact(
90
- content STRING,
91
- path STRING,
92
- recorded_at STRING,
93
- PRIMARY KEY (content)
94
- );
95
- """,
96
- "Failed to create Fact table",
97
- )
98
- print("Fact table created or already exists.")
99
-
100
- safe_kuzu_execute(
101
- conn,
102
- """
103
- CREATE NODE TABLE IF NOT EXISTS Groups(
104
- name STRING,
105
- metadata STRING,
106
- PRIMARY KEY (name)
107
- );
108
- """,
109
- "Failed to create Groups table",
110
- )
111
- print("Groups table created or already exists.")
112
-
113
- safe_kuzu_execute(
114
- conn,
115
- """
116
- CREATE REL TABLE IF NOT EXISTS Contains(
117
- FROM Groups TO Fact
118
- );
119
- """,
120
- "Failed to create Contains relationship table",
121
- )
122
- print("Contains relationship table created or already exists.")
123
-
124
- return conn
125
- except Exception as e:
126
- print(f"Fatal error initializing database: {str(e)}")
127
- traceback.print_exc()
128
- return None
129
-
130
-
131
- def extract_facts(
132
- text: str, model: str = "llama3.2", provider: str = "ollama", npc: NPC = None
133
- ) -> List:
134
- """Extract facts from text using LLM"""
135
- prompt = """Extract facts from this text.
136
- A fact is a piece of information that makes a statement about the world.
137
- A fact is typically a sentence that is true or false.
138
- Facts may be simple or complex. They can also be conflicting with each other, usually
139
- because there is some hidden context that is not mentioned in the text.
140
- In any case, it is simply your job to extract a list of facts that could pertain to
141
- an individual's personality.
142
- For example, if a user says :
143
- "since I am a doctor I am often trying to think up new ways to help people.
144
- Can you help me set up a new kind of software to help with that?"
145
- You might extract the following facts:
146
- - The user is a doctor
147
- - The user is helpful
148
-
149
- Another example:
150
- "I am a software engineer who loves to play video games. I am also a huge fan of the
151
- Star Wars franchise and I am a member of the 501st Legion."
152
- You might extract the following facts:
153
- - The user is a software engineer
154
- - The user loves to play video games
155
- - The user is a huge fan of the Star Wars franchise
156
- - The user is a member of the 501st Legion
157
-
158
- Thus, it is your mission to reliably extract litss of facts.
159
-
160
-
161
- Return a JSON object with the following structure:
162
-
163
- {{
164
- "fact_list": "a list containing the facts where each fact is a string",
165
- }}
166
-
167
-
168
- Return only the JSON object.
169
- Do not include any additional markdown formatting.
170
-
171
- """
172
-
173
- response = get_llm_response(
174
- prompt + f"\n\nText: {text}",
175
- model=model,
176
- provider=provider,
177
- format="json",
178
- )
179
- response = response["response"]
180
- print(response)
181
- return response["fact_list"]
182
-
183
-
184
- def find_similar_groups(
185
- conn: kuzu.Connection,
186
- fact: str, # Ensure fact is passed as a string
187
- model: str = "llama3.2",
188
- provider: str = "ollama",
189
- npc: NPC = None,
190
- ) -> List[str]:
191
- """Find existing groups that might contain this fact"""
192
- response = conn.execute(f"MATCH (g:Groups) RETURN g.name;") # Execute query
193
- print(response)
194
- print(type(response))
195
- print(dir(response))
196
- groups = response.fetch_as_df()
197
- print(f"Groups: {groups}")
198
- if not groups:
199
- return []
200
-
201
- prompt = """Given a fact and a list of groups, determine which groups this fact belongs to.
202
- A fact should belong to a group if it is semantically related to the group's theme or purpose.
203
- For example, if a fact is "The user loves programming" and there's a group called "Technical_Interests",
204
- that would be a match.
205
-
206
- Return a JSON object with the following structure:
207
- {
208
- "group_list": "a list containing the names of matching groups"
209
- }
210
-
211
- Return only the JSON object.
212
- Do not include any additional markdown formatting.
213
- """
214
-
215
- response = get_llm_response(
216
- prompt + f"\n\nFact: {fact}\nGroups: {json.dumps(groups)}",
217
- model=model,
218
- provider=provider,
219
- format="json",
220
- npc=npc,
221
- )
222
- response = response["response"]
223
- return response["group_list"]
224
-
225
-
226
- def identify_groups(
227
- facts: List[str],
228
- model: str = "llama3.2",
229
- provider: str = "ollama",
230
- npc: NPC = None,
231
- ) -> List[str]:
232
- """Identify natural groups from a list of facts"""
233
- prompt = """What are the main groups these facts could be organized into?
234
- Express these groups in plain, natural language.
235
-
236
- For example, given:
237
- - User enjoys programming in Python
238
- - User works on machine learning projects
239
- - User likes to play piano
240
- - User practices meditation daily
241
-
242
- You might identify groups like:
243
- - Programming
244
- - Machine Learning
245
- - Musical Interests
246
- - Daily Practices
247
-
248
- Return a JSON object with the following structure:
249
- `{
250
- "groups": ["list of group names"]
251
- }`
252
-
253
-
254
- Return only the JSON object. Do not include any additional markdown formatting or
255
- leading json characters.
256
- """
257
-
258
- response = get_llm_response(
259
- prompt + f"\n\nFacts: {json.dumps(facts)}",
260
- model=model,
261
- provider=provider,
262
- format="json",
263
- npc=npc,
264
- )
265
- return response["response"]["groups"]
266
-
267
-
268
- def assign_to_groups(
269
- fact: str,
270
- groups: List[str],
271
- model: str = "llama3.2",
272
- provider: str = "ollama",
273
- npc: NPC = None,
274
- ) -> Dict[str, List[str]]:
275
- """Assign facts to the identified groups"""
276
- prompt = f"""Given this fact, assign it to any relevant groups.
277
-
278
- A fact can belong to multiple groups if it fits.
279
-
280
- Here is the facT: {fact}
281
-
282
- Here are the groups: {groups}
283
-
284
- Return a JSON object with the following structure:
285
- {{
286
- "groups": ["list of group names"]
287
- }}
288
-
289
- Do not include any additional markdown formatting or leading json characters.
290
-
291
-
292
- """
293
-
294
- response = get_llm_response(
295
- prompt,
296
- model=model,
297
- provider=provider,
298
- format="json",
299
- npc=npc,
300
- )
301
- return response["response"]
302
-
303
-
304
- def insert_fact(conn: kuzu.Connection, fact: str, path: str) -> bool:
305
- """Insert a fact into the database with robust error handling"""
306
- if conn is None:
307
- print("Cannot insert fact: database connection is None")
308
- return False
309
-
310
- try:
311
- # Properly escape quotes in strings
312
- escaped_fact = fact.replace('"', '\\"')
313
- escaped_path = os.path.expanduser(path).replace('"', '\\"')
314
-
315
- # Generate timestamp
316
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
317
-
318
- # Begin transaction
319
- safe_kuzu_execute(conn, "BEGIN TRANSACTION")
320
-
321
- # Check if fact already exists
322
- check_query = f"""
323
- MATCH (f:Fact {{content: "{escaped_fact}"}})
324
- RETURN f
325
- """
326
-
327
- result, error = safe_kuzu_execute(
328
- conn, check_query, "Failed to check if fact exists"
329
- )
330
- if error:
331
- safe_kuzu_execute(conn, "ROLLBACK")
332
- return False
333
-
334
- # Insert fact if it doesn't exist
335
- if not result.has_next():
336
- insert_query = f"""
337
- CREATE (f:Fact {{
338
- content: "{escaped_fact}",
339
- path: "{escaped_path}",
340
- recorded_at: "{timestamp}"
341
- }})
342
- """
343
-
344
- result, error = safe_kuzu_execute(
345
- conn, insert_query, "Failed to insert fact"
346
- )
347
- if error:
348
- safe_kuzu_execute(conn, "ROLLBACK")
349
- return False
350
-
351
- # Commit transaction
352
- safe_kuzu_execute(conn, "COMMIT")
353
- return True
354
- except Exception as e:
355
- print(f"Error inserting fact: {str(e)}")
356
- traceback.print_exc()
357
- safe_kuzu_execute(conn, "ROLLBACK")
358
- return False
359
-
360
-
361
- def assign_fact_to_group(conn: kuzu.Connection, fact: str, group: str) -> bool:
362
- """Create a relationship between a fact and a group with robust error handling"""
363
- if conn is None:
364
- print("Cannot assign fact to group: database connection is None")
365
- return False
366
-
367
- try:
368
- # Properly escape quotes in strings
369
- escaped_fact = fact.replace('"', '\\"')
370
- escaped_group = group.replace('"', '\\"')
371
-
372
- # Check if both fact and group exist before creating relationship
373
- check_query = f"""
374
- MATCH (f:Fact {{content: "{escaped_fact}"}})
375
- RETURN f
376
- """
377
-
378
- result, error = safe_kuzu_execute(
379
- conn, check_query, "Failed to check if fact exists"
380
- )
381
- if error or not result.has_next():
382
- print(f"Fact not found: {fact}")
383
- return False
384
-
385
- check_query = f"""
386
- MATCH (g:Groups {{name: "{escaped_group}"}})
387
- RETURN g
388
- """
389
-
390
- result, error = safe_kuzu_execute(
391
- conn, check_query, "Failed to check if group exists"
392
- )
393
- if error or not result.has_next():
394
- print(f"Group not found: {group}")
395
- return False
396
-
397
- # Create relationship
398
- query = f"""
399
- MATCH (f:Fact), (g:Groups)
400
- WHERE f.content = "{escaped_fact}" AND g.name = "{escaped_group}"
401
- CREATE (g)-[:Contains]->(f)
402
- """
403
-
404
- result, error = safe_kuzu_execute(
405
- conn, query, f"Failed to assign fact to group: {group}"
406
- )
407
- if error:
408
- return False
409
-
410
- print(f"Assigned fact to group: {group}")
411
- return True
412
- except Exception as e:
413
- print(f"Error assigning fact to group: {str(e)}")
414
- traceback.print_exc()
415
- return False
416
-
417
-
418
- def save_facts_to_db(
419
- conn: kuzu.Connection, facts: List[str], path: str, batch_size: int
420
- ):
421
- """Save a list of facts to the database in batches"""
422
- for i in range(0, len(facts), batch_size):
423
- batch = facts[i : i + batch_size]
424
- print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
425
-
426
- # Process each fact in the batch
427
- for fact in batch:
428
- try:
429
- print(f"Inserting fact: {fact}")
430
- print(f"With path: {path}")
431
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
432
- print(f"With recorded_at: {timestamp}")
433
-
434
- insert_fact(conn, fact, path)
435
- print("Success!")
436
- except Exception as e:
437
- print(f"Failed to insert fact: {fact}")
438
- print(f"Error: {e}")
439
- continue
440
-
441
- print(f"Completed batch {i//batch_size + 1}")
442
-
443
-
444
- def process_text(
445
- db_path: str,
446
- text: str,
447
- path: str,
448
- model: str = "llama3.2",
449
- provider: str = "ollama",
450
- npc: NPC = None,
451
- batch_size: int = 5,
452
- conn=None,
453
- ):
454
- """Process text and add extracted facts to the database with robust error handling"""
455
-
456
- try:
457
- # Initialize database
458
- if conn is None:
459
- conn = init_db(db_path, drop=False)
460
-
461
- return []
462
-
463
- # Extract facts
464
- facts = extract_facts(text, model=model, provider=provider, npc=npc)
465
- if not facts:
466
- print("No facts extracted")
467
- return []
468
-
469
- print(f"Extracted {len(facts)} facts")
470
- for fact in facts:
471
- print(f"- {fact}")
472
-
473
- # Process facts in batches
474
- for i in range(0, len(facts), batch_size):
475
- batch = facts[i : i + batch_size]
476
- print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
477
-
478
- for fact in batch:
479
- try:
480
- print(f"Inserting fact: {fact}")
481
- success = insert_fact(conn, fact, path)
482
- if success:
483
- print("Success!")
484
- else:
485
- print("Failed to insert fact")
486
- except Exception as e:
487
- print(f"Error processing fact: {str(e)}")
488
- traceback.print_exc()
489
-
490
- print(f"Completed batch {i//batch_size + 1}")
491
-
492
- return facts
493
- except Exception as e:
494
- print(f"Error processing text: {str(e)}")
495
- traceback.print_exc()
496
- return []
497
-
498
-
499
- import networkx as nx
500
- import matplotlib.pyplot as plt
501
-
502
-
503
- def visualize_graph(conn):
504
- """Visualize the knowledge graph using networkx"""
505
- # Create a networkx graph
506
- G = nx.DiGraph()
507
-
508
- # Get all facts and groups with their relationships
509
- facts_result = conn.execute("MATCH (f:Fact) RETURN f.content;").get_as_df()
510
- facts = [row["f.content"] for index, row in facts_result.iterrows()]
511
-
512
- groups_result = conn.execute("MATCH (g:Groups) RETURN g.name;").get_as_df()
513
- groups = [row["g.name"] for index, row in groups_result.iterrows()]
514
-
515
- relationships_result = conn.execute(
516
- """
517
- MATCH (g:Groups)-[r:Contains]->(f:Fact)
518
- RETURN g.name, f.content;
519
- """
520
- ).get_as_df()
521
-
522
- # Add nodes with different colors for facts and groups
523
- for fact in facts:
524
- G.add_node(fact, node_type="fact")
525
- for group in groups:
526
- G.add_node(group, node_type="group")
527
-
528
- # Add edges from relationships
529
- for index, row in relationships_result.iterrows():
530
- G.add_edge(row["g.name"], row["f.content"]) # group name -> fact content
531
-
532
- # Set up the visualization
533
- plt.figure(figsize=(20, 12))
534
- pos = nx.spring_layout(G, k=2, iterations=50)
535
-
536
- # Draw groups (larger nodes, distinct color)
537
- group_nodes = [
538
- n for n, attr in G.nodes(data=True) if attr.get("node_type") == "group"
539
- ]
540
- nx.draw_networkx_nodes(
541
- G, pos, nodelist=group_nodes, node_color="lightgreen", node_size=3000, alpha=0.7
542
- )
543
-
544
- # Draw facts (smaller nodes, different color)
545
- fact_nodes = [
546
- n for n, attr in G.nodes(data=True) if attr.get("node_type") == "fact"
547
- ]
548
- nx.draw_networkx_nodes(
549
- G, pos, nodelist=fact_nodes, node_color="lightblue", node_size=2000, alpha=0.5
550
- )
551
-
552
- # Draw edges with arrows
553
- nx.draw_networkx_edges(G, pos, edge_color="gray", arrows=True, arrowsize=20)
554
-
555
- # Add labels with different sizes for groups and facts
556
- group_labels = {node: node for node in group_nodes}
557
- fact_labels = {
558
- node: node[:50] + "..." if len(node) > 50 else node for node in fact_nodes
559
- }
560
-
561
- nx.draw_networkx_labels(G, pos, group_labels, font_size=10, font_weight="bold")
562
- nx.draw_networkx_labels(G, pos, fact_labels, font_size=8)
563
-
564
- plt.title("Knowledge Graph: Groups and Facts", pad=20, fontsize=16)
565
- plt.axis("off")
566
- plt.tight_layout()
567
-
568
- # Print statistics
569
- print("\nKnowledge Graph Statistics:")
570
- print(f"Number of facts: {len(facts)}")
571
- print(f"Number of groups: {len(groups)}")
572
- print(f"Number of relationships: {len(relationships_result)}")
573
-
574
- print("\nGroups:")
575
- for g in groups:
576
- related_facts = [
577
- row["f.content"]
578
- for index, row in relationships_result.iterrows()
579
- if row["g.name"] == g
580
- ]
581
- print(f"\n{g}:")
582
- for f in related_facts:
583
- print(f" - {f}")
584
-
585
- plt.show()
586
-
587
-
588
- try:
589
- import chromadb
590
- except ModuleNotFoundError:
591
- print("chromadb not installed")
592
- import numpy as np
593
- import os
594
- import datetime
595
- from typing import Optional, Dict, List, Union, Tuple
596
-
597
-
598
- def setup_chroma_db(db_path: str):
599
- """Initialize Chroma vector database without a default embedding function"""
600
- try:
601
- # Create or connect to Chroma client with persistent storage
602
- client = chromadb.PersistentClient(path=db_path)
603
-
604
- # Check if collection exists, create if not
605
- try:
606
- collection = client.get_collection("facts_collection")
607
- print("Connected to existing facts collection")
608
- except ValueError:
609
- # Create new collection without an embedding function
610
- # We'll provide embeddings manually using get_embeddings
611
- collection = client.create_collection(
612
- name="facts_collection",
613
- metadata={"description": "Facts extracted from various sources"},
614
- )
615
- print("Created new facts collection")
616
-
617
- return client, collection
618
- except Exception as e:
619
- print(f"Error setting up Chroma DB: {e}")
620
- raise
621
-
622
-
623
- def store_fact_with_embedding(
624
- collection, fact: str, metadata: dict, embedding: List[float]
625
- ) -> str:
626
- """Store a fact with its pre-generated embedding in Chroma DB
627
-
628
- Args:
629
- collection: Chroma collection
630
- fact: The fact text
631
- metadata: Dictionary with metadata (path, source, timestamp, etc.)
632
- embedding: Pre-generated embedding vector from get_embeddings
633
-
634
- Returns:
635
- ID of the stored fact
636
- """
637
- try:
638
- # Generate a deterministic ID from the fact content
639
- import hashlib
640
-
641
- fact_id = hashlib.md5(fact.encode()).hexdigest()
642
-
643
- # Store document with pre-generated embedding
644
- collection.add(
645
- documents=[fact],
646
- embeddings=[embedding],
647
- metadatas=[metadata],
648
- ids=[fact_id],
649
- )
650
-
651
- return fact_id
652
- except Exception as e:
653
- print(f"Error storing fact in Chroma: {e}")
654
- return None
655
-
656
-
657
- def find_similar_facts_chroma(
658
- collection,
659
- query: str,
660
- query_embedding: List[float],
661
- n_results: int = 5,
662
- metadata_filter: Optional[Dict] = None,
663
- ) -> List[Dict]:
664
- """Find facts similar to the query using pre-generated embedding
665
-
666
- Args:
667
- collection: Chroma collection
668
- query: Query text (for reference only)
669
- query_embedding: Pre-generated embedding from get_embeddings
670
- n_results: Number of results to return
671
- metadata_filter: Optional filter for metadata fields
672
-
673
- Returns:
674
- List of dictionaries with results
675
- """
676
- try:
677
- # Perform query with optional metadata filtering
678
- results = collection.query(
679
- query_embeddings=[query_embedding],
680
- n_results=n_results,
681
- where=metadata_filter,
682
- )
683
-
684
- # Format results
685
- formatted_results = []
686
- for i, doc in enumerate(results["documents"][0]):
687
- formatted_results.append(
688
- {
689
- "fact": doc,
690
- "metadata": results["metadatas"][0][i],
691
- "id": results["ids"][0][i],
692
- "distance": (
693
- results["distances"][0][i] if "distances" in results else None
694
- ),
695
- }
696
- )
697
-
698
- return formatted_results
699
- except Exception as e:
700
- print(f"Error searching in Chroma: {e}")
701
- return []
702
-
703
-
704
- def process_text_with_chroma(
705
- kuzu_db_path: str,
706
- chroma_db_path: str,
707
- text: str,
708
- path: str,
709
- npc: NPC = None,
710
- batch_size: int = 5,
711
- ):
712
- """Process text and store facts in both Kuzu and Chroma DB
713
-
714
- Args:
715
- kuzu_db_path: Path to Kuzu graph database
716
- chroma_db_path: Path to Chroma vector database
717
- text: Input text to process
718
- path: Source path or identifier
719
- model: LLM model to use
720
- provider: LLM provider
721
- embedding_model: Model to use for embeddings
722
- npc: Optional NPC instance
723
- batch_size: Batch size for processing
724
-
725
- Returns:
726
- List of extracted facts
727
- """
728
- # Initialize databases
729
- kuzu_conn = init_db(kuzu_db_path, drop=False)
730
- chroma_client, chroma_collection = setup_chroma_db(chroma_db_path)
731
-
732
- # Extract facts
733
- facts = extract_facts(text, model=model, provider=provider, npc=npc)
734
-
735
- # Process extracted facts
736
- for i in range(0, len(facts), batch_size):
737
- batch = facts[i : i + batch_size]
738
- print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
739
-
740
- # Generate embeddings for the batch using npcsh.llm_funcs.get_embeddings
741
- from npcsh.llm_funcs import get_embeddings
742
-
743
- batch_embeddings = get_embeddings(
744
- batch,
745
- )
746
-
747
- for j, fact in enumerate(batch):
748
- print(f"Processing fact: {fact}")
749
- embedding = batch_embeddings[j]
750
-
751
- # Check for similar facts in Chroma before inserting
752
- similar_facts = find_similar_facts_chroma(
753
- chroma_collection, fact, query_embedding=embedding, n_results=3
754
- )
755
-
756
- if similar_facts:
757
- print(f"Similar facts found:")
758
- for result in similar_facts:
759
- print(f" - {result['fact']} (distance: {result['distance']})")
760
- # Note: Could implement a similarity threshold here to skip highly similar facts
761
-
762
- # Prepare metadata
763
- metadata = {
764
- "path": path,
765
- "timestamp": datetime.datetime.now().isoformat(),
766
- "source_model": model,
767
- "source_provider": provider,
768
- }
769
-
770
- # Insert into Kuzu graph DB
771
- kuzu_success = insert_fact(kuzu_conn, fact, path)
772
-
773
- # Insert into Chroma vector DB if Kuzu insert was successful
774
- if kuzu_success:
775
- chroma_id = store_fact_with_embedding(
776
- chroma_collection, fact, metadata, embedding
777
- )
778
- if chroma_id:
779
- print(f"Successfully saved fact with ID: {chroma_id}")
780
- else:
781
- print(f"Failed to save fact to Chroma")
782
- else:
783
- print(f"Failed to save fact to Kuzu graph")
784
-
785
- # Close Kuzu connection
786
- kuzu_conn.close()
787
-
788
- return facts
789
-
790
-
791
- def hybrid_search_with_chroma(
792
- kuzu_conn: kuzu.Connection,
793
- chroma_collection,
794
- query: str,
795
- group_filter: Optional[List[str]] = None,
796
- top_k: int = 5,
797
- metadata_filter: Optional[Dict] = None,
798
- ) -> List[Dict]:
799
- """Perform hybrid search using both Chroma vector search and Kuzu graph relationships
800
-
801
- Args:
802
- kuzu_conn: Connection to Kuzu graph database
803
- chroma_collection: Chroma collection for vector search
804
- query: Search query text
805
- group_filter: Optional list of groups to filter by in graph
806
- top_k: Number of results to return
807
- metadata_filter: Optional metadata filter for Chroma search
808
- embedding_model: Model to use for embeddings
809
- provider: Provider for embeddings
810
-
811
- Returns:
812
- List of dictionaries with combined results
813
- """
814
- # Get embedding for query using npcsh.llm_funcs.get_embeddings
815
- from npcsh.llm_funcs import get_embeddings
816
-
817
- query_embedding = get_embeddings([query])[0]
818
-
819
- # Step 1: Find similar facts using Chroma vector search
820
- vector_results = find_similar_facts_chroma(
821
- chroma_collection,
822
- query,
823
- query_embedding=query_embedding,
824
- n_results=top_k,
825
- metadata_filter=metadata_filter,
826
- )
827
-
828
- # Extract just the fact texts from vector results
829
- vector_facts = [result["fact"] for result in vector_results]
830
-
831
- # Step 2: Expand context using graph relationships
832
- expanded_results = []
833
-
834
- # Add vector search results
835
- for result in vector_results:
836
- expanded_results.append(
837
- {
838
- "fact": result["fact"],
839
- "source": "vector_search",
840
- "relevance": "direct_match",
841
- "distance": result["distance"],
842
- "metadata": result["metadata"],
843
- }
844
- )
845
-
846
- # For each vector-matched fact, find related facts in the graph
847
- for fact in vector_facts:
848
- try:
849
- # Safely escape fact text for Kuzu query
850
- escaped_fact = fact.replace('"', '\\"')
851
-
852
- # Find groups containing this fact
853
- group_result = kuzu_conn.execute(
854
- f"""
855
- MATCH (g:Groups)-[:Contains]->(f:Fact)
856
- WHERE f.content = "{escaped_fact}"
857
- RETURN g.name
858
- """
859
- ).get_as_df()
860
-
861
- # Extract group names
862
- fact_groups = [row["g.name"] for _, row in group_result.iterrows()]
863
-
864
- # Apply group filter if provided
865
- if group_filter:
866
- fact_groups = [g for g in fact_groups if g in group_filter]
867
-
868
- # For each group, find other related facts
869
- for group in fact_groups:
870
- escaped_group = group.replace('"', '\\"')
871
-
872
- # Find facts in the same group
873
- related_facts_result = kuzu_conn.execute(
874
- f"""
875
- MATCH (g:Groups)-[:Contains]->(f:Fact)
876
- WHERE g.name = "{escaped_group}" AND f.content <> "{escaped_fact}"
877
- RETURN f.content, f.path, f.recorded_at
878
- LIMIT 5
879
- """
880
- ).get_as_df()
881
-
882
- # Add these related facts to results
883
- for _, row in related_facts_result.iterrows():
884
- related_fact = {
885
- "fact": row["f.content"],
886
- "source": f"graph_relation_via_{group}",
887
- "relevance": "group_related",
888
- "path": row["f.path"],
889
- "recorded_at": row["f.recorded_at"],
890
- }
891
-
892
- # Avoid duplicates
893
- if not any(
894
- r.get("fact") == related_fact["fact"] for r in expanded_results
895
- ):
896
- expanded_results.append(related_fact)
897
-
898
- except Exception as e:
899
- print(f"Error expanding results via graph: {e}")
900
-
901
- # Return results, limiting to top_k if needed
902
- return expanded_results[:top_k]
903
-
904
-
905
- def get_facts_for_rag(
906
- kuzu_db_path: str,
907
- chroma_db_path: str,
908
- query: str,
909
- group_filters: Optional[List[str]] = None,
910
- top_k: int = 10,
911
- ) -> str:
912
- """Get facts for RAG by combining vector and graph search
913
-
914
- Args:
915
- kuzu_db_path: Path to Kuzu graph database
916
- chroma_db_path: Path to Chroma vector database
917
- query: Search query
918
- group_filters: Optional list of groups to filter by
919
- top_k: Number of results to return
920
- embedding_model: Model to use for embeddings
921
- provider: Provider for embeddings
922
-
923
- Returns:
924
- Formatted context string with retrieved facts
925
- """
926
- # Initialize connections
927
- kuzu_conn = init_db(kuzu_db_path)
928
- chroma_client, chroma_collection = setup_chroma_db(chroma_db_path)
929
-
930
- # Perform hybrid search
931
- results = hybrid_search_with_chroma(
932
- kuzu_conn=kuzu_conn,
933
- chroma_collection=chroma_collection,
934
- query=query,
935
- group_filter=group_filters,
936
- top_k=top_k,
937
- )
938
-
939
- # Format results as context for RAG
940
- context = "Related facts:\n\n"
941
-
942
- # First include direct vector matches
943
- context += "Most relevant facts:\n"
944
- vector_matches = [r for r in results if r["source"] == "vector_search"]
945
- for i, item in enumerate(vector_matches):
946
- context += f"{i+1}. {item['fact']}\n"
947
-
948
- # Then include graph-related facts
949
- context += "\nRelated concepts:\n"
950
- graph_matches = [r for r in results if r["source"] != "vector_search"]
951
- for i, item in enumerate(graph_matches):
952
- group = item["source"].replace("graph_relation_via_", "")
953
- context += f"{i+1}. {item['fact']} (related via {group})\n"
954
-
955
- # Close connections
956
- kuzu_conn.close()
957
-
958
- return context
959
-
960
-
961
- # Example usage in a RAG context
962
- def answer_with_rag(
963
- query: str,
964
- kuzu_db_path: str = os.path.expanduser("~/npcsh_graph.db"),
965
- chroma_db_path: str = os.path.expanduser("~/npcsh_chroma.db"),
966
- model: str = "ollama",
967
- provider: str = "llama3.2",
968
- embedding_model: str = "text-embedding-3-small",
969
- ) -> str:
970
- """Answer a query using RAG with facts from the knowledge base
971
-
972
- Args:
973
- query: User query
974
- kuzu_db_path: Path to Kuzu graph database
975
- chroma_db_path: Path to Chroma vector database
976
- model: LLM model to use
977
- provider: LLM provider
978
- embedding_model: Model to use for embeddings
979
-
980
- Returns:
981
- Answer from the model
982
- """
983
- # Get relevant facts using hybrid search
984
- context = get_facts_for_rag(
985
- kuzu_db_path,
986
- chroma_db_path,
987
- query,
988
- )
989
-
990
- # Craft prompt with retrieved context
991
- prompt = f"""
992
- Answer this question based on the retrieved information.
993
-
994
- Question: {query}
995
-
996
- {context}
997
-
998
- Please provide a comprehensive answer based on the facts above. If the information
999
- doesn't contain a direct answer, please indicate that clearly but try to synthesize
1000
- from the available facts.
1001
- """
1002
-
1003
- # Get response from LLM
1004
- response = get_llm_response(prompt, model=model, provider=provider)
1005
-
1006
- return response["response"]