npcsh 0.3.31__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/_state.py +942 -0
- npcsh/alicanto.py +1074 -0
- npcsh/guac.py +785 -0
- npcsh/mcp_helpers.py +357 -0
- npcsh/mcp_npcsh.py +822 -0
- npcsh/mcp_server.py +184 -0
- npcsh/npc.py +218 -0
- npcsh/npcsh.py +1161 -0
- npcsh/plonk.py +387 -269
- npcsh/pti.py +234 -0
- npcsh/routes.py +958 -0
- npcsh/spool.py +315 -0
- npcsh/wander.py +550 -0
- npcsh/yap.py +573 -0
- npcsh-1.0.0.dist-info/METADATA +596 -0
- npcsh-1.0.0.dist-info/RECORD +21 -0
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/WHEEL +1 -1
- npcsh-1.0.0.dist-info/entry_points.txt +9 -0
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/licenses/LICENSE +1 -1
- npcsh/audio.py +0 -210
- npcsh/cli.py +0 -545
- npcsh/command_history.py +0 -566
- npcsh/conversation.py +0 -291
- npcsh/data_models.py +0 -46
- npcsh/dataframes.py +0 -163
- npcsh/embeddings.py +0 -168
- npcsh/helpers.py +0 -641
- npcsh/image.py +0 -298
- npcsh/image_gen.py +0 -79
- npcsh/knowledge_graph.py +0 -1006
- npcsh/llm_funcs.py +0 -2027
- npcsh/load_data.py +0 -83
- npcsh/main.py +0 -5
- npcsh/model_runner.py +0 -189
- npcsh/npc_compiler.py +0 -2870
- npcsh/npc_sysenv.py +0 -383
- npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
- npcsh/npc_team/corca.npc +0 -13
- npcsh/npc_team/foreman.npc +0 -7
- npcsh/npc_team/npcsh.ctx +0 -11
- npcsh/npc_team/sibiji.npc +0 -4
- npcsh/npc_team/templates/analytics/celona.npc +0 -0
- npcsh/npc_team/templates/hr_support/raone.npc +0 -0
- npcsh/npc_team/templates/humanities/eriane.npc +0 -4
- npcsh/npc_team/templates/it_support/lineru.npc +0 -0
- npcsh/npc_team/templates/marketing/slean.npc +0 -4
- npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
- npcsh/npc_team/templates/sales/turnic.npc +0 -4
- npcsh/npc_team/templates/software/welxor.npc +0 -0
- npcsh/npc_team/tools/bash_executer.tool +0 -32
- npcsh/npc_team/tools/calculator.tool +0 -8
- npcsh/npc_team/tools/code_executor.tool +0 -16
- npcsh/npc_team/tools/generic_search.tool +0 -27
- npcsh/npc_team/tools/image_generation.tool +0 -25
- npcsh/npc_team/tools/local_search.tool +0 -149
- npcsh/npc_team/tools/npcsh_executor.tool +0 -9
- npcsh/npc_team/tools/screen_cap.tool +0 -27
- npcsh/npc_team/tools/sql_executor.tool +0 -26
- npcsh/response.py +0 -623
- npcsh/search.py +0 -248
- npcsh/serve.py +0 -1460
- npcsh/shell.py +0 -538
- npcsh/shell_helpers.py +0 -3529
- npcsh/stream.py +0 -700
- npcsh/video.py +0 -49
- npcsh-0.3.31.data/data/npcsh/npc_team/bash_executer.tool +0 -32
- npcsh-0.3.31.data/data/npcsh/npc_team/calculator.tool +0 -8
- npcsh-0.3.31.data/data/npcsh/npc_team/celona.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/code_executor.tool +0 -16
- npcsh-0.3.31.data/data/npcsh/npc_team/corca.npc +0 -13
- npcsh-0.3.31.data/data/npcsh/npc_team/eriane.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/foreman.npc +0 -7
- npcsh-0.3.31.data/data/npcsh/npc_team/generic_search.tool +0 -27
- npcsh-0.3.31.data/data/npcsh/npc_team/image_generation.tool +0 -25
- npcsh-0.3.31.data/data/npcsh/npc_team/lineru.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/local_search.tool +0 -149
- npcsh-0.3.31.data/data/npcsh/npc_team/maurawa.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/npcsh.ctx +0 -11
- npcsh-0.3.31.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
- npcsh-0.3.31.data/data/npcsh/npc_team/raone.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/screen_cap.tool +0 -27
- npcsh-0.3.31.data/data/npcsh/npc_team/sibiji.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/slean.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/sql_executor.tool +0 -26
- npcsh-0.3.31.data/data/npcsh/npc_team/test_pipeline.py +0 -181
- npcsh-0.3.31.data/data/npcsh/npc_team/turnic.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/welxor.npc +0 -0
- npcsh-0.3.31.dist-info/METADATA +0 -1853
- npcsh-0.3.31.dist-info/RECORD +0 -76
- npcsh-0.3.31.dist-info/entry_points.txt +0 -3
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/top_level.txt +0 -0
npcsh/knowledge_graph.py
DELETED
|
@@ -1,1006 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import datetime
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
import kuzu
|
|
9
|
-
except ModuleNotFoundError:
|
|
10
|
-
print("kuzu not installed")
|
|
11
|
-
from typing import Optional, Dict, List, Union, Tuple
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from .llm_funcs import get_llm_response, get_embeddings
|
|
15
|
-
from .npc_compiler import NPC
|
|
16
|
-
import sqlite3
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def safe_kuzu_execute(conn, query, error_message="Kuzu query failed"):
|
|
20
|
-
"""Execute a Kuzu query with proper error handling"""
|
|
21
|
-
try:
|
|
22
|
-
result = conn.execute(query)
|
|
23
|
-
return result, None
|
|
24
|
-
except Exception as e:
|
|
25
|
-
error = f"{error_message}: {str(e)}"
|
|
26
|
-
print(error)
|
|
27
|
-
return None, error
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def create_group(conn: kuzu.Connection, name: str, metadata: str = ""):
|
|
31
|
-
"""Create a new group in the database with robust error handling"""
|
|
32
|
-
if conn is None:
|
|
33
|
-
print("Cannot create group: database connection is None")
|
|
34
|
-
return False
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
# Properly escape quotes in strings
|
|
38
|
-
escaped_name = name.replace('"', '\\"')
|
|
39
|
-
escaped_metadata = metadata.replace('"', '\\"')
|
|
40
|
-
|
|
41
|
-
query = f"""
|
|
42
|
-
CREATE (g:Groups {{
|
|
43
|
-
name: "{escaped_name}",
|
|
44
|
-
metadata: "{escaped_metadata}"
|
|
45
|
-
}});
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
result, error = safe_kuzu_execute(
|
|
49
|
-
conn, query, f"Failed to create group: {name}"
|
|
50
|
-
)
|
|
51
|
-
if error:
|
|
52
|
-
return False
|
|
53
|
-
|
|
54
|
-
print(f"Created group: {name}")
|
|
55
|
-
return True
|
|
56
|
-
except Exception as e:
|
|
57
|
-
print(f"Error creating group {name}: {str(e)}")
|
|
58
|
-
traceback.print_exc()
|
|
59
|
-
return False
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
import traceback
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def init_db(db_path: str, drop=False):
|
|
66
|
-
"""Initialize Kùzu database and create schema with robust error handling"""
|
|
67
|
-
try:
|
|
68
|
-
# Ensure the directory exists
|
|
69
|
-
os.makedirs(os.path.dirname(os.path.abspath(db_path)), exist_ok=True)
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
db = kuzu.Database(db_path)
|
|
73
|
-
conn = kuzu.Connection(db)
|
|
74
|
-
print("Database connection established successfully")
|
|
75
|
-
except Exception as e:
|
|
76
|
-
print(f"Failed to connect to database: {str(e)}")
|
|
77
|
-
traceback.print_exc()
|
|
78
|
-
return None
|
|
79
|
-
# Drop tables if requested
|
|
80
|
-
if drop:
|
|
81
|
-
safe_kuzu_execute(conn, "DROP REL TABLE IF EXISTS Contains")
|
|
82
|
-
safe_kuzu_execute(conn, "DROP NODE TABLE IF EXISTS Fact")
|
|
83
|
-
safe_kuzu_execute(conn, "DROP NODE TABLE IF EXISTS Groups")
|
|
84
|
-
|
|
85
|
-
# Create tables with proper error handling
|
|
86
|
-
safe_kuzu_execute(
|
|
87
|
-
conn,
|
|
88
|
-
"""
|
|
89
|
-
CREATE NODE TABLE IF NOT EXISTS Fact(
|
|
90
|
-
content STRING,
|
|
91
|
-
path STRING,
|
|
92
|
-
recorded_at STRING,
|
|
93
|
-
PRIMARY KEY (content)
|
|
94
|
-
);
|
|
95
|
-
""",
|
|
96
|
-
"Failed to create Fact table",
|
|
97
|
-
)
|
|
98
|
-
print("Fact table created or already exists.")
|
|
99
|
-
|
|
100
|
-
safe_kuzu_execute(
|
|
101
|
-
conn,
|
|
102
|
-
"""
|
|
103
|
-
CREATE NODE TABLE IF NOT EXISTS Groups(
|
|
104
|
-
name STRING,
|
|
105
|
-
metadata STRING,
|
|
106
|
-
PRIMARY KEY (name)
|
|
107
|
-
);
|
|
108
|
-
""",
|
|
109
|
-
"Failed to create Groups table",
|
|
110
|
-
)
|
|
111
|
-
print("Groups table created or already exists.")
|
|
112
|
-
|
|
113
|
-
safe_kuzu_execute(
|
|
114
|
-
conn,
|
|
115
|
-
"""
|
|
116
|
-
CREATE REL TABLE IF NOT EXISTS Contains(
|
|
117
|
-
FROM Groups TO Fact
|
|
118
|
-
);
|
|
119
|
-
""",
|
|
120
|
-
"Failed to create Contains relationship table",
|
|
121
|
-
)
|
|
122
|
-
print("Contains relationship table created or already exists.")
|
|
123
|
-
|
|
124
|
-
return conn
|
|
125
|
-
except Exception as e:
|
|
126
|
-
print(f"Fatal error initializing database: {str(e)}")
|
|
127
|
-
traceback.print_exc()
|
|
128
|
-
return None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def extract_facts(
|
|
132
|
-
text: str, model: str = "llama3.2", provider: str = "ollama", npc: NPC = None
|
|
133
|
-
) -> List:
|
|
134
|
-
"""Extract facts from text using LLM"""
|
|
135
|
-
prompt = """Extract facts from this text.
|
|
136
|
-
A fact is a piece of information that makes a statement about the world.
|
|
137
|
-
A fact is typically a sentence that is true or false.
|
|
138
|
-
Facts may be simple or complex. They can also be conflicting with each other, usually
|
|
139
|
-
because there is some hidden context that is not mentioned in the text.
|
|
140
|
-
In any case, it is simply your job to extract a list of facts that could pertain to
|
|
141
|
-
an individual's personality.
|
|
142
|
-
For example, if a user says :
|
|
143
|
-
"since I am a doctor I am often trying to think up new ways to help people.
|
|
144
|
-
Can you help me set up a new kind of software to help with that?"
|
|
145
|
-
You might extract the following facts:
|
|
146
|
-
- The user is a doctor
|
|
147
|
-
- The user is helpful
|
|
148
|
-
|
|
149
|
-
Another example:
|
|
150
|
-
"I am a software engineer who loves to play video games. I am also a huge fan of the
|
|
151
|
-
Star Wars franchise and I am a member of the 501st Legion."
|
|
152
|
-
You might extract the following facts:
|
|
153
|
-
- The user is a software engineer
|
|
154
|
-
- The user loves to play video games
|
|
155
|
-
- The user is a huge fan of the Star Wars franchise
|
|
156
|
-
- The user is a member of the 501st Legion
|
|
157
|
-
|
|
158
|
-
Thus, it is your mission to reliably extract litss of facts.
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
Return a JSON object with the following structure:
|
|
162
|
-
|
|
163
|
-
{{
|
|
164
|
-
"fact_list": "a list containing the facts where each fact is a string",
|
|
165
|
-
}}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
Return only the JSON object.
|
|
169
|
-
Do not include any additional markdown formatting.
|
|
170
|
-
|
|
171
|
-
"""
|
|
172
|
-
|
|
173
|
-
response = get_llm_response(
|
|
174
|
-
prompt + f"\n\nText: {text}",
|
|
175
|
-
model=model,
|
|
176
|
-
provider=provider,
|
|
177
|
-
format="json",
|
|
178
|
-
)
|
|
179
|
-
response = response["response"]
|
|
180
|
-
print(response)
|
|
181
|
-
return response["fact_list"]
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def find_similar_groups(
|
|
185
|
-
conn: kuzu.Connection,
|
|
186
|
-
fact: str, # Ensure fact is passed as a string
|
|
187
|
-
model: str = "llama3.2",
|
|
188
|
-
provider: str = "ollama",
|
|
189
|
-
npc: NPC = None,
|
|
190
|
-
) -> List[str]:
|
|
191
|
-
"""Find existing groups that might contain this fact"""
|
|
192
|
-
response = conn.execute(f"MATCH (g:Groups) RETURN g.name;") # Execute query
|
|
193
|
-
print(response)
|
|
194
|
-
print(type(response))
|
|
195
|
-
print(dir(response))
|
|
196
|
-
groups = response.fetch_as_df()
|
|
197
|
-
print(f"Groups: {groups}")
|
|
198
|
-
if not groups:
|
|
199
|
-
return []
|
|
200
|
-
|
|
201
|
-
prompt = """Given a fact and a list of groups, determine which groups this fact belongs to.
|
|
202
|
-
A fact should belong to a group if it is semantically related to the group's theme or purpose.
|
|
203
|
-
For example, if a fact is "The user loves programming" and there's a group called "Technical_Interests",
|
|
204
|
-
that would be a match.
|
|
205
|
-
|
|
206
|
-
Return a JSON object with the following structure:
|
|
207
|
-
{
|
|
208
|
-
"group_list": "a list containing the names of matching groups"
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
Return only the JSON object.
|
|
212
|
-
Do not include any additional markdown formatting.
|
|
213
|
-
"""
|
|
214
|
-
|
|
215
|
-
response = get_llm_response(
|
|
216
|
-
prompt + f"\n\nFact: {fact}\nGroups: {json.dumps(groups)}",
|
|
217
|
-
model=model,
|
|
218
|
-
provider=provider,
|
|
219
|
-
format="json",
|
|
220
|
-
npc=npc,
|
|
221
|
-
)
|
|
222
|
-
response = response["response"]
|
|
223
|
-
return response["group_list"]
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def identify_groups(
|
|
227
|
-
facts: List[str],
|
|
228
|
-
model: str = "llama3.2",
|
|
229
|
-
provider: str = "ollama",
|
|
230
|
-
npc: NPC = None,
|
|
231
|
-
) -> List[str]:
|
|
232
|
-
"""Identify natural groups from a list of facts"""
|
|
233
|
-
prompt = """What are the main groups these facts could be organized into?
|
|
234
|
-
Express these groups in plain, natural language.
|
|
235
|
-
|
|
236
|
-
For example, given:
|
|
237
|
-
- User enjoys programming in Python
|
|
238
|
-
- User works on machine learning projects
|
|
239
|
-
- User likes to play piano
|
|
240
|
-
- User practices meditation daily
|
|
241
|
-
|
|
242
|
-
You might identify groups like:
|
|
243
|
-
- Programming
|
|
244
|
-
- Machine Learning
|
|
245
|
-
- Musical Interests
|
|
246
|
-
- Daily Practices
|
|
247
|
-
|
|
248
|
-
Return a JSON object with the following structure:
|
|
249
|
-
`{
|
|
250
|
-
"groups": ["list of group names"]
|
|
251
|
-
}`
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
Return only the JSON object. Do not include any additional markdown formatting or
|
|
255
|
-
leading json characters.
|
|
256
|
-
"""
|
|
257
|
-
|
|
258
|
-
response = get_llm_response(
|
|
259
|
-
prompt + f"\n\nFacts: {json.dumps(facts)}",
|
|
260
|
-
model=model,
|
|
261
|
-
provider=provider,
|
|
262
|
-
format="json",
|
|
263
|
-
npc=npc,
|
|
264
|
-
)
|
|
265
|
-
return response["response"]["groups"]
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
def assign_to_groups(
|
|
269
|
-
fact: str,
|
|
270
|
-
groups: List[str],
|
|
271
|
-
model: str = "llama3.2",
|
|
272
|
-
provider: str = "ollama",
|
|
273
|
-
npc: NPC = None,
|
|
274
|
-
) -> Dict[str, List[str]]:
|
|
275
|
-
"""Assign facts to the identified groups"""
|
|
276
|
-
prompt = f"""Given this fact, assign it to any relevant groups.
|
|
277
|
-
|
|
278
|
-
A fact can belong to multiple groups if it fits.
|
|
279
|
-
|
|
280
|
-
Here is the facT: {fact}
|
|
281
|
-
|
|
282
|
-
Here are the groups: {groups}
|
|
283
|
-
|
|
284
|
-
Return a JSON object with the following structure:
|
|
285
|
-
{{
|
|
286
|
-
"groups": ["list of group names"]
|
|
287
|
-
}}
|
|
288
|
-
|
|
289
|
-
Do not include any additional markdown formatting or leading json characters.
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
"""
|
|
293
|
-
|
|
294
|
-
response = get_llm_response(
|
|
295
|
-
prompt,
|
|
296
|
-
model=model,
|
|
297
|
-
provider=provider,
|
|
298
|
-
format="json",
|
|
299
|
-
npc=npc,
|
|
300
|
-
)
|
|
301
|
-
return response["response"]
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def insert_fact(conn: kuzu.Connection, fact: str, path: str) -> bool:
|
|
305
|
-
"""Insert a fact into the database with robust error handling"""
|
|
306
|
-
if conn is None:
|
|
307
|
-
print("Cannot insert fact: database connection is None")
|
|
308
|
-
return False
|
|
309
|
-
|
|
310
|
-
try:
|
|
311
|
-
# Properly escape quotes in strings
|
|
312
|
-
escaped_fact = fact.replace('"', '\\"')
|
|
313
|
-
escaped_path = os.path.expanduser(path).replace('"', '\\"')
|
|
314
|
-
|
|
315
|
-
# Generate timestamp
|
|
316
|
-
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
317
|
-
|
|
318
|
-
# Begin transaction
|
|
319
|
-
safe_kuzu_execute(conn, "BEGIN TRANSACTION")
|
|
320
|
-
|
|
321
|
-
# Check if fact already exists
|
|
322
|
-
check_query = f"""
|
|
323
|
-
MATCH (f:Fact {{content: "{escaped_fact}"}})
|
|
324
|
-
RETURN f
|
|
325
|
-
"""
|
|
326
|
-
|
|
327
|
-
result, error = safe_kuzu_execute(
|
|
328
|
-
conn, check_query, "Failed to check if fact exists"
|
|
329
|
-
)
|
|
330
|
-
if error:
|
|
331
|
-
safe_kuzu_execute(conn, "ROLLBACK")
|
|
332
|
-
return False
|
|
333
|
-
|
|
334
|
-
# Insert fact if it doesn't exist
|
|
335
|
-
if not result.has_next():
|
|
336
|
-
insert_query = f"""
|
|
337
|
-
CREATE (f:Fact {{
|
|
338
|
-
content: "{escaped_fact}",
|
|
339
|
-
path: "{escaped_path}",
|
|
340
|
-
recorded_at: "{timestamp}"
|
|
341
|
-
}})
|
|
342
|
-
"""
|
|
343
|
-
|
|
344
|
-
result, error = safe_kuzu_execute(
|
|
345
|
-
conn, insert_query, "Failed to insert fact"
|
|
346
|
-
)
|
|
347
|
-
if error:
|
|
348
|
-
safe_kuzu_execute(conn, "ROLLBACK")
|
|
349
|
-
return False
|
|
350
|
-
|
|
351
|
-
# Commit transaction
|
|
352
|
-
safe_kuzu_execute(conn, "COMMIT")
|
|
353
|
-
return True
|
|
354
|
-
except Exception as e:
|
|
355
|
-
print(f"Error inserting fact: {str(e)}")
|
|
356
|
-
traceback.print_exc()
|
|
357
|
-
safe_kuzu_execute(conn, "ROLLBACK")
|
|
358
|
-
return False
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
def assign_fact_to_group(conn: kuzu.Connection, fact: str, group: str) -> bool:
|
|
362
|
-
"""Create a relationship between a fact and a group with robust error handling"""
|
|
363
|
-
if conn is None:
|
|
364
|
-
print("Cannot assign fact to group: database connection is None")
|
|
365
|
-
return False
|
|
366
|
-
|
|
367
|
-
try:
|
|
368
|
-
# Properly escape quotes in strings
|
|
369
|
-
escaped_fact = fact.replace('"', '\\"')
|
|
370
|
-
escaped_group = group.replace('"', '\\"')
|
|
371
|
-
|
|
372
|
-
# Check if both fact and group exist before creating relationship
|
|
373
|
-
check_query = f"""
|
|
374
|
-
MATCH (f:Fact {{content: "{escaped_fact}"}})
|
|
375
|
-
RETURN f
|
|
376
|
-
"""
|
|
377
|
-
|
|
378
|
-
result, error = safe_kuzu_execute(
|
|
379
|
-
conn, check_query, "Failed to check if fact exists"
|
|
380
|
-
)
|
|
381
|
-
if error or not result.has_next():
|
|
382
|
-
print(f"Fact not found: {fact}")
|
|
383
|
-
return False
|
|
384
|
-
|
|
385
|
-
check_query = f"""
|
|
386
|
-
MATCH (g:Groups {{name: "{escaped_group}"}})
|
|
387
|
-
RETURN g
|
|
388
|
-
"""
|
|
389
|
-
|
|
390
|
-
result, error = safe_kuzu_execute(
|
|
391
|
-
conn, check_query, "Failed to check if group exists"
|
|
392
|
-
)
|
|
393
|
-
if error or not result.has_next():
|
|
394
|
-
print(f"Group not found: {group}")
|
|
395
|
-
return False
|
|
396
|
-
|
|
397
|
-
# Create relationship
|
|
398
|
-
query = f"""
|
|
399
|
-
MATCH (f:Fact), (g:Groups)
|
|
400
|
-
WHERE f.content = "{escaped_fact}" AND g.name = "{escaped_group}"
|
|
401
|
-
CREATE (g)-[:Contains]->(f)
|
|
402
|
-
"""
|
|
403
|
-
|
|
404
|
-
result, error = safe_kuzu_execute(
|
|
405
|
-
conn, query, f"Failed to assign fact to group: {group}"
|
|
406
|
-
)
|
|
407
|
-
if error:
|
|
408
|
-
return False
|
|
409
|
-
|
|
410
|
-
print(f"Assigned fact to group: {group}")
|
|
411
|
-
return True
|
|
412
|
-
except Exception as e:
|
|
413
|
-
print(f"Error assigning fact to group: {str(e)}")
|
|
414
|
-
traceback.print_exc()
|
|
415
|
-
return False
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
def save_facts_to_db(
|
|
419
|
-
conn: kuzu.Connection, facts: List[str], path: str, batch_size: int
|
|
420
|
-
):
|
|
421
|
-
"""Save a list of facts to the database in batches"""
|
|
422
|
-
for i in range(0, len(facts), batch_size):
|
|
423
|
-
batch = facts[i : i + batch_size]
|
|
424
|
-
print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
|
|
425
|
-
|
|
426
|
-
# Process each fact in the batch
|
|
427
|
-
for fact in batch:
|
|
428
|
-
try:
|
|
429
|
-
print(f"Inserting fact: {fact}")
|
|
430
|
-
print(f"With path: {path}")
|
|
431
|
-
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
432
|
-
print(f"With recorded_at: {timestamp}")
|
|
433
|
-
|
|
434
|
-
insert_fact(conn, fact, path)
|
|
435
|
-
print("Success!")
|
|
436
|
-
except Exception as e:
|
|
437
|
-
print(f"Failed to insert fact: {fact}")
|
|
438
|
-
print(f"Error: {e}")
|
|
439
|
-
continue
|
|
440
|
-
|
|
441
|
-
print(f"Completed batch {i//batch_size + 1}")
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
def process_text(
|
|
445
|
-
db_path: str,
|
|
446
|
-
text: str,
|
|
447
|
-
path: str,
|
|
448
|
-
model: str = "llama3.2",
|
|
449
|
-
provider: str = "ollama",
|
|
450
|
-
npc: NPC = None,
|
|
451
|
-
batch_size: int = 5,
|
|
452
|
-
conn=None,
|
|
453
|
-
):
|
|
454
|
-
"""Process text and add extracted facts to the database with robust error handling"""
|
|
455
|
-
|
|
456
|
-
try:
|
|
457
|
-
# Initialize database
|
|
458
|
-
if conn is None:
|
|
459
|
-
conn = init_db(db_path, drop=False)
|
|
460
|
-
|
|
461
|
-
return []
|
|
462
|
-
|
|
463
|
-
# Extract facts
|
|
464
|
-
facts = extract_facts(text, model=model, provider=provider, npc=npc)
|
|
465
|
-
if not facts:
|
|
466
|
-
print("No facts extracted")
|
|
467
|
-
return []
|
|
468
|
-
|
|
469
|
-
print(f"Extracted {len(facts)} facts")
|
|
470
|
-
for fact in facts:
|
|
471
|
-
print(f"- {fact}")
|
|
472
|
-
|
|
473
|
-
# Process facts in batches
|
|
474
|
-
for i in range(0, len(facts), batch_size):
|
|
475
|
-
batch = facts[i : i + batch_size]
|
|
476
|
-
print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
|
|
477
|
-
|
|
478
|
-
for fact in batch:
|
|
479
|
-
try:
|
|
480
|
-
print(f"Inserting fact: {fact}")
|
|
481
|
-
success = insert_fact(conn, fact, path)
|
|
482
|
-
if success:
|
|
483
|
-
print("Success!")
|
|
484
|
-
else:
|
|
485
|
-
print("Failed to insert fact")
|
|
486
|
-
except Exception as e:
|
|
487
|
-
print(f"Error processing fact: {str(e)}")
|
|
488
|
-
traceback.print_exc()
|
|
489
|
-
|
|
490
|
-
print(f"Completed batch {i//batch_size + 1}")
|
|
491
|
-
|
|
492
|
-
return facts
|
|
493
|
-
except Exception as e:
|
|
494
|
-
print(f"Error processing text: {str(e)}")
|
|
495
|
-
traceback.print_exc()
|
|
496
|
-
return []
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
import networkx as nx
|
|
500
|
-
import matplotlib.pyplot as plt
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def visualize_graph(conn):
|
|
504
|
-
"""Visualize the knowledge graph using networkx"""
|
|
505
|
-
# Create a networkx graph
|
|
506
|
-
G = nx.DiGraph()
|
|
507
|
-
|
|
508
|
-
# Get all facts and groups with their relationships
|
|
509
|
-
facts_result = conn.execute("MATCH (f:Fact) RETURN f.content;").get_as_df()
|
|
510
|
-
facts = [row["f.content"] for index, row in facts_result.iterrows()]
|
|
511
|
-
|
|
512
|
-
groups_result = conn.execute("MATCH (g:Groups) RETURN g.name;").get_as_df()
|
|
513
|
-
groups = [row["g.name"] for index, row in groups_result.iterrows()]
|
|
514
|
-
|
|
515
|
-
relationships_result = conn.execute(
|
|
516
|
-
"""
|
|
517
|
-
MATCH (g:Groups)-[r:Contains]->(f:Fact)
|
|
518
|
-
RETURN g.name, f.content;
|
|
519
|
-
"""
|
|
520
|
-
).get_as_df()
|
|
521
|
-
|
|
522
|
-
# Add nodes with different colors for facts and groups
|
|
523
|
-
for fact in facts:
|
|
524
|
-
G.add_node(fact, node_type="fact")
|
|
525
|
-
for group in groups:
|
|
526
|
-
G.add_node(group, node_type="group")
|
|
527
|
-
|
|
528
|
-
# Add edges from relationships
|
|
529
|
-
for index, row in relationships_result.iterrows():
|
|
530
|
-
G.add_edge(row["g.name"], row["f.content"]) # group name -> fact content
|
|
531
|
-
|
|
532
|
-
# Set up the visualization
|
|
533
|
-
plt.figure(figsize=(20, 12))
|
|
534
|
-
pos = nx.spring_layout(G, k=2, iterations=50)
|
|
535
|
-
|
|
536
|
-
# Draw groups (larger nodes, distinct color)
|
|
537
|
-
group_nodes = [
|
|
538
|
-
n for n, attr in G.nodes(data=True) if attr.get("node_type") == "group"
|
|
539
|
-
]
|
|
540
|
-
nx.draw_networkx_nodes(
|
|
541
|
-
G, pos, nodelist=group_nodes, node_color="lightgreen", node_size=3000, alpha=0.7
|
|
542
|
-
)
|
|
543
|
-
|
|
544
|
-
# Draw facts (smaller nodes, different color)
|
|
545
|
-
fact_nodes = [
|
|
546
|
-
n for n, attr in G.nodes(data=True) if attr.get("node_type") == "fact"
|
|
547
|
-
]
|
|
548
|
-
nx.draw_networkx_nodes(
|
|
549
|
-
G, pos, nodelist=fact_nodes, node_color="lightblue", node_size=2000, alpha=0.5
|
|
550
|
-
)
|
|
551
|
-
|
|
552
|
-
# Draw edges with arrows
|
|
553
|
-
nx.draw_networkx_edges(G, pos, edge_color="gray", arrows=True, arrowsize=20)
|
|
554
|
-
|
|
555
|
-
# Add labels with different sizes for groups and facts
|
|
556
|
-
group_labels = {node: node for node in group_nodes}
|
|
557
|
-
fact_labels = {
|
|
558
|
-
node: node[:50] + "..." if len(node) > 50 else node for node in fact_nodes
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
nx.draw_networkx_labels(G, pos, group_labels, font_size=10, font_weight="bold")
|
|
562
|
-
nx.draw_networkx_labels(G, pos, fact_labels, font_size=8)
|
|
563
|
-
|
|
564
|
-
plt.title("Knowledge Graph: Groups and Facts", pad=20, fontsize=16)
|
|
565
|
-
plt.axis("off")
|
|
566
|
-
plt.tight_layout()
|
|
567
|
-
|
|
568
|
-
# Print statistics
|
|
569
|
-
print("\nKnowledge Graph Statistics:")
|
|
570
|
-
print(f"Number of facts: {len(facts)}")
|
|
571
|
-
print(f"Number of groups: {len(groups)}")
|
|
572
|
-
print(f"Number of relationships: {len(relationships_result)}")
|
|
573
|
-
|
|
574
|
-
print("\nGroups:")
|
|
575
|
-
for g in groups:
|
|
576
|
-
related_facts = [
|
|
577
|
-
row["f.content"]
|
|
578
|
-
for index, row in relationships_result.iterrows()
|
|
579
|
-
if row["g.name"] == g
|
|
580
|
-
]
|
|
581
|
-
print(f"\n{g}:")
|
|
582
|
-
for f in related_facts:
|
|
583
|
-
print(f" - {f}")
|
|
584
|
-
|
|
585
|
-
plt.show()
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
try:
|
|
589
|
-
import chromadb
|
|
590
|
-
except ModuleNotFoundError:
|
|
591
|
-
print("chromadb not installed")
|
|
592
|
-
import numpy as np
|
|
593
|
-
import os
|
|
594
|
-
import datetime
|
|
595
|
-
from typing import Optional, Dict, List, Union, Tuple
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
def setup_chroma_db(db_path: str):
|
|
599
|
-
"""Initialize Chroma vector database without a default embedding function"""
|
|
600
|
-
try:
|
|
601
|
-
# Create or connect to Chroma client with persistent storage
|
|
602
|
-
client = chromadb.PersistentClient(path=db_path)
|
|
603
|
-
|
|
604
|
-
# Check if collection exists, create if not
|
|
605
|
-
try:
|
|
606
|
-
collection = client.get_collection("facts_collection")
|
|
607
|
-
print("Connected to existing facts collection")
|
|
608
|
-
except ValueError:
|
|
609
|
-
# Create new collection without an embedding function
|
|
610
|
-
# We'll provide embeddings manually using get_embeddings
|
|
611
|
-
collection = client.create_collection(
|
|
612
|
-
name="facts_collection",
|
|
613
|
-
metadata={"description": "Facts extracted from various sources"},
|
|
614
|
-
)
|
|
615
|
-
print("Created new facts collection")
|
|
616
|
-
|
|
617
|
-
return client, collection
|
|
618
|
-
except Exception as e:
|
|
619
|
-
print(f"Error setting up Chroma DB: {e}")
|
|
620
|
-
raise
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
def store_fact_with_embedding(
|
|
624
|
-
collection, fact: str, metadata: dict, embedding: List[float]
|
|
625
|
-
) -> str:
|
|
626
|
-
"""Store a fact with its pre-generated embedding in Chroma DB
|
|
627
|
-
|
|
628
|
-
Args:
|
|
629
|
-
collection: Chroma collection
|
|
630
|
-
fact: The fact text
|
|
631
|
-
metadata: Dictionary with metadata (path, source, timestamp, etc.)
|
|
632
|
-
embedding: Pre-generated embedding vector from get_embeddings
|
|
633
|
-
|
|
634
|
-
Returns:
|
|
635
|
-
ID of the stored fact
|
|
636
|
-
"""
|
|
637
|
-
try:
|
|
638
|
-
# Generate a deterministic ID from the fact content
|
|
639
|
-
import hashlib
|
|
640
|
-
|
|
641
|
-
fact_id = hashlib.md5(fact.encode()).hexdigest()
|
|
642
|
-
|
|
643
|
-
# Store document with pre-generated embedding
|
|
644
|
-
collection.add(
|
|
645
|
-
documents=[fact],
|
|
646
|
-
embeddings=[embedding],
|
|
647
|
-
metadatas=[metadata],
|
|
648
|
-
ids=[fact_id],
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
return fact_id
|
|
652
|
-
except Exception as e:
|
|
653
|
-
print(f"Error storing fact in Chroma: {e}")
|
|
654
|
-
return None
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
def find_similar_facts_chroma(
|
|
658
|
-
collection,
|
|
659
|
-
query: str,
|
|
660
|
-
query_embedding: List[float],
|
|
661
|
-
n_results: int = 5,
|
|
662
|
-
metadata_filter: Optional[Dict] = None,
|
|
663
|
-
) -> List[Dict]:
|
|
664
|
-
"""Find facts similar to the query using pre-generated embedding
|
|
665
|
-
|
|
666
|
-
Args:
|
|
667
|
-
collection: Chroma collection
|
|
668
|
-
query: Query text (for reference only)
|
|
669
|
-
query_embedding: Pre-generated embedding from get_embeddings
|
|
670
|
-
n_results: Number of results to return
|
|
671
|
-
metadata_filter: Optional filter for metadata fields
|
|
672
|
-
|
|
673
|
-
Returns:
|
|
674
|
-
List of dictionaries with results
|
|
675
|
-
"""
|
|
676
|
-
try:
|
|
677
|
-
# Perform query with optional metadata filtering
|
|
678
|
-
results = collection.query(
|
|
679
|
-
query_embeddings=[query_embedding],
|
|
680
|
-
n_results=n_results,
|
|
681
|
-
where=metadata_filter,
|
|
682
|
-
)
|
|
683
|
-
|
|
684
|
-
# Format results
|
|
685
|
-
formatted_results = []
|
|
686
|
-
for i, doc in enumerate(results["documents"][0]):
|
|
687
|
-
formatted_results.append(
|
|
688
|
-
{
|
|
689
|
-
"fact": doc,
|
|
690
|
-
"metadata": results["metadatas"][0][i],
|
|
691
|
-
"id": results["ids"][0][i],
|
|
692
|
-
"distance": (
|
|
693
|
-
results["distances"][0][i] if "distances" in results else None
|
|
694
|
-
),
|
|
695
|
-
}
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
return formatted_results
|
|
699
|
-
except Exception as e:
|
|
700
|
-
print(f"Error searching in Chroma: {e}")
|
|
701
|
-
return []
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
def process_text_with_chroma(
|
|
705
|
-
kuzu_db_path: str,
|
|
706
|
-
chroma_db_path: str,
|
|
707
|
-
text: str,
|
|
708
|
-
path: str,
|
|
709
|
-
npc: NPC = None,
|
|
710
|
-
batch_size: int = 5,
|
|
711
|
-
):
|
|
712
|
-
"""Process text and store facts in both Kuzu and Chroma DB
|
|
713
|
-
|
|
714
|
-
Args:
|
|
715
|
-
kuzu_db_path: Path to Kuzu graph database
|
|
716
|
-
chroma_db_path: Path to Chroma vector database
|
|
717
|
-
text: Input text to process
|
|
718
|
-
path: Source path or identifier
|
|
719
|
-
model: LLM model to use
|
|
720
|
-
provider: LLM provider
|
|
721
|
-
embedding_model: Model to use for embeddings
|
|
722
|
-
npc: Optional NPC instance
|
|
723
|
-
batch_size: Batch size for processing
|
|
724
|
-
|
|
725
|
-
Returns:
|
|
726
|
-
List of extracted facts
|
|
727
|
-
"""
|
|
728
|
-
# Initialize databases
|
|
729
|
-
kuzu_conn = init_db(kuzu_db_path, drop=False)
|
|
730
|
-
chroma_client, chroma_collection = setup_chroma_db(chroma_db_path)
|
|
731
|
-
|
|
732
|
-
# Extract facts
|
|
733
|
-
facts = extract_facts(text, model=model, provider=provider, npc=npc)
|
|
734
|
-
|
|
735
|
-
# Process extracted facts
|
|
736
|
-
for i in range(0, len(facts), batch_size):
|
|
737
|
-
batch = facts[i : i + batch_size]
|
|
738
|
-
print(f"\nProcessing batch {i//batch_size + 1} ({len(batch)} facts)")
|
|
739
|
-
|
|
740
|
-
# Generate embeddings for the batch using npcsh.llm_funcs.get_embeddings
|
|
741
|
-
from .llm_funcs import get_embeddings
|
|
742
|
-
|
|
743
|
-
batch_embeddings = get_embeddings(
|
|
744
|
-
batch,
|
|
745
|
-
)
|
|
746
|
-
|
|
747
|
-
for j, fact in enumerate(batch):
|
|
748
|
-
print(f"Processing fact: {fact}")
|
|
749
|
-
embedding = batch_embeddings[j]
|
|
750
|
-
|
|
751
|
-
# Check for similar facts in Chroma before inserting
|
|
752
|
-
similar_facts = find_similar_facts_chroma(
|
|
753
|
-
chroma_collection, fact, query_embedding=embedding, n_results=3
|
|
754
|
-
)
|
|
755
|
-
|
|
756
|
-
if similar_facts:
|
|
757
|
-
print(f"Similar facts found:")
|
|
758
|
-
for result in similar_facts:
|
|
759
|
-
print(f" - {result['fact']} (distance: {result['distance']})")
|
|
760
|
-
# Note: Could implement a similarity threshold here to skip highly similar facts
|
|
761
|
-
|
|
762
|
-
# Prepare metadata
|
|
763
|
-
metadata = {
|
|
764
|
-
"path": path,
|
|
765
|
-
"timestamp": datetime.datetime.now().isoformat(),
|
|
766
|
-
"source_model": model,
|
|
767
|
-
"source_provider": provider,
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
# Insert into Kuzu graph DB
|
|
771
|
-
kuzu_success = insert_fact(kuzu_conn, fact, path)
|
|
772
|
-
|
|
773
|
-
# Insert into Chroma vector DB if Kuzu insert was successful
|
|
774
|
-
if kuzu_success:
|
|
775
|
-
chroma_id = store_fact_with_embedding(
|
|
776
|
-
chroma_collection, fact, metadata, embedding
|
|
777
|
-
)
|
|
778
|
-
if chroma_id:
|
|
779
|
-
print(f"Successfully saved fact with ID: {chroma_id}")
|
|
780
|
-
else:
|
|
781
|
-
print(f"Failed to save fact to Chroma")
|
|
782
|
-
else:
|
|
783
|
-
print(f"Failed to save fact to Kuzu graph")
|
|
784
|
-
|
|
785
|
-
# Close Kuzu connection
|
|
786
|
-
kuzu_conn.close()
|
|
787
|
-
|
|
788
|
-
return facts
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
def hybrid_search_with_chroma(
|
|
792
|
-
kuzu_conn: kuzu.Connection,
|
|
793
|
-
chroma_collection,
|
|
794
|
-
query: str,
|
|
795
|
-
group_filter: Optional[List[str]] = None,
|
|
796
|
-
top_k: int = 5,
|
|
797
|
-
metadata_filter: Optional[Dict] = None,
|
|
798
|
-
) -> List[Dict]:
|
|
799
|
-
"""Perform hybrid search using both Chroma vector search and Kuzu graph relationships
|
|
800
|
-
|
|
801
|
-
Args:
|
|
802
|
-
kuzu_conn: Connection to Kuzu graph database
|
|
803
|
-
chroma_collection: Chroma collection for vector search
|
|
804
|
-
query: Search query text
|
|
805
|
-
group_filter: Optional list of groups to filter by in graph
|
|
806
|
-
top_k: Number of results to return
|
|
807
|
-
metadata_filter: Optional metadata filter for Chroma search
|
|
808
|
-
embedding_model: Model to use for embeddings
|
|
809
|
-
provider: Provider for embeddings
|
|
810
|
-
|
|
811
|
-
Returns:
|
|
812
|
-
List of dictionaries with combined results
|
|
813
|
-
"""
|
|
814
|
-
# Get embedding for query using npcsh.llm_funcs.get_embeddings
|
|
815
|
-
from .llm_funcs import get_embeddings
|
|
816
|
-
|
|
817
|
-
query_embedding = get_embeddings([query])[0]
|
|
818
|
-
|
|
819
|
-
# Step 1: Find similar facts using Chroma vector search
|
|
820
|
-
vector_results = find_similar_facts_chroma(
|
|
821
|
-
chroma_collection,
|
|
822
|
-
query,
|
|
823
|
-
query_embedding=query_embedding,
|
|
824
|
-
n_results=top_k,
|
|
825
|
-
metadata_filter=metadata_filter,
|
|
826
|
-
)
|
|
827
|
-
|
|
828
|
-
# Extract just the fact texts from vector results
|
|
829
|
-
vector_facts = [result["fact"] for result in vector_results]
|
|
830
|
-
|
|
831
|
-
# Step 2: Expand context using graph relationships
|
|
832
|
-
expanded_results = []
|
|
833
|
-
|
|
834
|
-
# Add vector search results
|
|
835
|
-
for result in vector_results:
|
|
836
|
-
expanded_results.append(
|
|
837
|
-
{
|
|
838
|
-
"fact": result["fact"],
|
|
839
|
-
"source": "vector_search",
|
|
840
|
-
"relevance": "direct_match",
|
|
841
|
-
"distance": result["distance"],
|
|
842
|
-
"metadata": result["metadata"],
|
|
843
|
-
}
|
|
844
|
-
)
|
|
845
|
-
|
|
846
|
-
# For each vector-matched fact, find related facts in the graph
|
|
847
|
-
for fact in vector_facts:
|
|
848
|
-
try:
|
|
849
|
-
# Safely escape fact text for Kuzu query
|
|
850
|
-
escaped_fact = fact.replace('"', '\\"')
|
|
851
|
-
|
|
852
|
-
# Find groups containing this fact
|
|
853
|
-
group_result = kuzu_conn.execute(
|
|
854
|
-
f"""
|
|
855
|
-
MATCH (g:Groups)-[:Contains]->(f:Fact)
|
|
856
|
-
WHERE f.content = "{escaped_fact}"
|
|
857
|
-
RETURN g.name
|
|
858
|
-
"""
|
|
859
|
-
).get_as_df()
|
|
860
|
-
|
|
861
|
-
# Extract group names
|
|
862
|
-
fact_groups = [row["g.name"] for _, row in group_result.iterrows()]
|
|
863
|
-
|
|
864
|
-
# Apply group filter if provided
|
|
865
|
-
if group_filter:
|
|
866
|
-
fact_groups = [g for g in fact_groups if g in group_filter]
|
|
867
|
-
|
|
868
|
-
# For each group, find other related facts
|
|
869
|
-
for group in fact_groups:
|
|
870
|
-
escaped_group = group.replace('"', '\\"')
|
|
871
|
-
|
|
872
|
-
# Find facts in the same group
|
|
873
|
-
related_facts_result = kuzu_conn.execute(
|
|
874
|
-
f"""
|
|
875
|
-
MATCH (g:Groups)-[:Contains]->(f:Fact)
|
|
876
|
-
WHERE g.name = "{escaped_group}" AND f.content <> "{escaped_fact}"
|
|
877
|
-
RETURN f.content, f.path, f.recorded_at
|
|
878
|
-
LIMIT 5
|
|
879
|
-
"""
|
|
880
|
-
).get_as_df()
|
|
881
|
-
|
|
882
|
-
# Add these related facts to results
|
|
883
|
-
for _, row in related_facts_result.iterrows():
|
|
884
|
-
related_fact = {
|
|
885
|
-
"fact": row["f.content"],
|
|
886
|
-
"source": f"graph_relation_via_{group}",
|
|
887
|
-
"relevance": "group_related",
|
|
888
|
-
"path": row["f.path"],
|
|
889
|
-
"recorded_at": row["f.recorded_at"],
|
|
890
|
-
}
|
|
891
|
-
|
|
892
|
-
# Avoid duplicates
|
|
893
|
-
if not any(
|
|
894
|
-
r.get("fact") == related_fact["fact"] for r in expanded_results
|
|
895
|
-
):
|
|
896
|
-
expanded_results.append(related_fact)
|
|
897
|
-
|
|
898
|
-
except Exception as e:
|
|
899
|
-
print(f"Error expanding results via graph: {e}")
|
|
900
|
-
|
|
901
|
-
# Return results, limiting to top_k if needed
|
|
902
|
-
return expanded_results[:top_k]
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
def get_facts_for_rag(
|
|
906
|
-
kuzu_db_path: str,
|
|
907
|
-
chroma_db_path: str,
|
|
908
|
-
query: str,
|
|
909
|
-
group_filters: Optional[List[str]] = None,
|
|
910
|
-
top_k: int = 10,
|
|
911
|
-
) -> str:
|
|
912
|
-
"""Get facts for RAG by combining vector and graph search
|
|
913
|
-
|
|
914
|
-
Args:
|
|
915
|
-
kuzu_db_path: Path to Kuzu graph database
|
|
916
|
-
chroma_db_path: Path to Chroma vector database
|
|
917
|
-
query: Search query
|
|
918
|
-
group_filters: Optional list of groups to filter by
|
|
919
|
-
top_k: Number of results to return
|
|
920
|
-
embedding_model: Model to use for embeddings
|
|
921
|
-
provider: Provider for embeddings
|
|
922
|
-
|
|
923
|
-
Returns:
|
|
924
|
-
Formatted context string with retrieved facts
|
|
925
|
-
"""
|
|
926
|
-
# Initialize connections
|
|
927
|
-
kuzu_conn = init_db(kuzu_db_path)
|
|
928
|
-
chroma_client, chroma_collection = setup_chroma_db(chroma_db_path)
|
|
929
|
-
|
|
930
|
-
# Perform hybrid search
|
|
931
|
-
results = hybrid_search_with_chroma(
|
|
932
|
-
kuzu_conn=kuzu_conn,
|
|
933
|
-
chroma_collection=chroma_collection,
|
|
934
|
-
query=query,
|
|
935
|
-
group_filter=group_filters,
|
|
936
|
-
top_k=top_k,
|
|
937
|
-
)
|
|
938
|
-
|
|
939
|
-
# Format results as context for RAG
|
|
940
|
-
context = "Related facts:\n\n"
|
|
941
|
-
|
|
942
|
-
# First include direct vector matches
|
|
943
|
-
context += "Most relevant facts:\n"
|
|
944
|
-
vector_matches = [r for r in results if r["source"] == "vector_search"]
|
|
945
|
-
for i, item in enumerate(vector_matches):
|
|
946
|
-
context += f"{i+1}. {item['fact']}\n"
|
|
947
|
-
|
|
948
|
-
# Then include graph-related facts
|
|
949
|
-
context += "\nRelated concepts:\n"
|
|
950
|
-
graph_matches = [r for r in results if r["source"] != "vector_search"]
|
|
951
|
-
for i, item in enumerate(graph_matches):
|
|
952
|
-
group = item["source"].replace("graph_relation_via_", "")
|
|
953
|
-
context += f"{i+1}. {item['fact']} (related via {group})\n"
|
|
954
|
-
|
|
955
|
-
# Close connections
|
|
956
|
-
kuzu_conn.close()
|
|
957
|
-
|
|
958
|
-
return context
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
# Example usage in a RAG context
|
|
962
|
-
def answer_with_rag(
|
|
963
|
-
query: str,
|
|
964
|
-
kuzu_db_path: str = os.path.expanduser("~/npcsh_graph.db"),
|
|
965
|
-
chroma_db_path: str = os.path.expanduser("~/npcsh_chroma.db"),
|
|
966
|
-
model: str = "ollama",
|
|
967
|
-
provider: str = "llama3.2",
|
|
968
|
-
embedding_model: str = "text-embedding-3-small",
|
|
969
|
-
) -> str:
|
|
970
|
-
"""Answer a query using RAG with facts from the knowledge base
|
|
971
|
-
|
|
972
|
-
Args:
|
|
973
|
-
query: User query
|
|
974
|
-
kuzu_db_path: Path to Kuzu graph database
|
|
975
|
-
chroma_db_path: Path to Chroma vector database
|
|
976
|
-
model: LLM model to use
|
|
977
|
-
provider: LLM provider
|
|
978
|
-
embedding_model: Model to use for embeddings
|
|
979
|
-
|
|
980
|
-
Returns:
|
|
981
|
-
Answer from the model
|
|
982
|
-
"""
|
|
983
|
-
# Get relevant facts using hybrid search
|
|
984
|
-
context = get_facts_for_rag(
|
|
985
|
-
kuzu_db_path,
|
|
986
|
-
chroma_db_path,
|
|
987
|
-
query,
|
|
988
|
-
)
|
|
989
|
-
|
|
990
|
-
# Craft prompt with retrieved context
|
|
991
|
-
prompt = f"""
|
|
992
|
-
Answer this question based on the retrieved information.
|
|
993
|
-
|
|
994
|
-
Question: {query}
|
|
995
|
-
|
|
996
|
-
{context}
|
|
997
|
-
|
|
998
|
-
Please provide a comprehensive answer based on the facts above. If the information
|
|
999
|
-
doesn't contain a direct answer, please indicate that clearly but try to synthesize
|
|
1000
|
-
from the available facts.
|
|
1001
|
-
"""
|
|
1002
|
-
|
|
1003
|
-
# Get response from LLM
|
|
1004
|
-
response = get_llm_response(prompt, model=model, provider=provider)
|
|
1005
|
-
|
|
1006
|
-
return response["response"]
|