npcsh 0.3.32__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. npcsh/_state.py +942 -0
  2. npcsh/alicanto.py +1074 -0
  3. npcsh/guac.py +785 -0
  4. npcsh/mcp_helpers.py +357 -0
  5. npcsh/mcp_npcsh.py +822 -0
  6. npcsh/mcp_server.py +184 -0
  7. npcsh/npc.py +218 -0
  8. npcsh/npcsh.py +1161 -0
  9. npcsh/plonk.py +387 -269
  10. npcsh/pti.py +234 -0
  11. npcsh/routes.py +958 -0
  12. npcsh/spool.py +315 -0
  13. npcsh/wander.py +550 -0
  14. npcsh/yap.py +573 -0
  15. npcsh-1.0.1.dist-info/METADATA +596 -0
  16. npcsh-1.0.1.dist-info/RECORD +21 -0
  17. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/WHEEL +1 -1
  18. npcsh-1.0.1.dist-info/entry_points.txt +9 -0
  19. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/licenses/LICENSE +1 -1
  20. npcsh/audio.py +0 -569
  21. npcsh/audio_gen.py +0 -1
  22. npcsh/cli.py +0 -543
  23. npcsh/command_history.py +0 -566
  24. npcsh/conversation.py +0 -54
  25. npcsh/data_models.py +0 -46
  26. npcsh/dataframes.py +0 -171
  27. npcsh/embeddings.py +0 -168
  28. npcsh/helpers.py +0 -646
  29. npcsh/image.py +0 -298
  30. npcsh/image_gen.py +0 -79
  31. npcsh/knowledge_graph.py +0 -1006
  32. npcsh/llm_funcs.py +0 -2195
  33. npcsh/load_data.py +0 -83
  34. npcsh/main.py +0 -5
  35. npcsh/model_runner.py +0 -189
  36. npcsh/npc_compiler.py +0 -2879
  37. npcsh/npc_sysenv.py +0 -388
  38. npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
  39. npcsh/npc_team/corca.npc +0 -13
  40. npcsh/npc_team/foreman.npc +0 -7
  41. npcsh/npc_team/npcsh.ctx +0 -11
  42. npcsh/npc_team/sibiji.npc +0 -4
  43. npcsh/npc_team/templates/analytics/celona.npc +0 -0
  44. npcsh/npc_team/templates/hr_support/raone.npc +0 -0
  45. npcsh/npc_team/templates/humanities/eriane.npc +0 -4
  46. npcsh/npc_team/templates/it_support/lineru.npc +0 -0
  47. npcsh/npc_team/templates/marketing/slean.npc +0 -4
  48. npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
  49. npcsh/npc_team/templates/sales/turnic.npc +0 -4
  50. npcsh/npc_team/templates/software/welxor.npc +0 -0
  51. npcsh/npc_team/tools/bash_executer.tool +0 -32
  52. npcsh/npc_team/tools/calculator.tool +0 -8
  53. npcsh/npc_team/tools/code_executor.tool +0 -16
  54. npcsh/npc_team/tools/generic_search.tool +0 -27
  55. npcsh/npc_team/tools/image_generation.tool +0 -25
  56. npcsh/npc_team/tools/local_search.tool +0 -149
  57. npcsh/npc_team/tools/npcsh_executor.tool +0 -9
  58. npcsh/npc_team/tools/screen_cap.tool +0 -27
  59. npcsh/npc_team/tools/sql_executor.tool +0 -26
  60. npcsh/response.py +0 -272
  61. npcsh/search.py +0 -252
  62. npcsh/serve.py +0 -1467
  63. npcsh/shell.py +0 -524
  64. npcsh/shell_helpers.py +0 -3919
  65. npcsh/stream.py +0 -233
  66. npcsh/video.py +0 -52
  67. npcsh/video_gen.py +0 -69
  68. npcsh-0.3.32.data/data/npcsh/npc_team/bash_executer.tool +0 -32
  69. npcsh-0.3.32.data/data/npcsh/npc_team/calculator.tool +0 -8
  70. npcsh-0.3.32.data/data/npcsh/npc_team/celona.npc +0 -0
  71. npcsh-0.3.32.data/data/npcsh/npc_team/code_executor.tool +0 -16
  72. npcsh-0.3.32.data/data/npcsh/npc_team/corca.npc +0 -13
  73. npcsh-0.3.32.data/data/npcsh/npc_team/eriane.npc +0 -4
  74. npcsh-0.3.32.data/data/npcsh/npc_team/foreman.npc +0 -7
  75. npcsh-0.3.32.data/data/npcsh/npc_team/generic_search.tool +0 -27
  76. npcsh-0.3.32.data/data/npcsh/npc_team/image_generation.tool +0 -25
  77. npcsh-0.3.32.data/data/npcsh/npc_team/lineru.npc +0 -0
  78. npcsh-0.3.32.data/data/npcsh/npc_team/local_search.tool +0 -149
  79. npcsh-0.3.32.data/data/npcsh/npc_team/maurawa.npc +0 -0
  80. npcsh-0.3.32.data/data/npcsh/npc_team/npcsh.ctx +0 -11
  81. npcsh-0.3.32.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
  82. npcsh-0.3.32.data/data/npcsh/npc_team/raone.npc +0 -0
  83. npcsh-0.3.32.data/data/npcsh/npc_team/screen_cap.tool +0 -27
  84. npcsh-0.3.32.data/data/npcsh/npc_team/sibiji.npc +0 -4
  85. npcsh-0.3.32.data/data/npcsh/npc_team/slean.npc +0 -4
  86. npcsh-0.3.32.data/data/npcsh/npc_team/sql_executor.tool +0 -26
  87. npcsh-0.3.32.data/data/npcsh/npc_team/test_pipeline.py +0 -181
  88. npcsh-0.3.32.data/data/npcsh/npc_team/turnic.npc +0 -4
  89. npcsh-0.3.32.data/data/npcsh/npc_team/welxor.npc +0 -0
  90. npcsh-0.3.32.dist-info/METADATA +0 -779
  91. npcsh-0.3.32.dist-info/RECORD +0 -78
  92. npcsh-0.3.32.dist-info/entry_points.txt +0 -3
  93. {npcsh-0.3.32.dist-info → npcsh-1.0.1.dist-info}/top_level.txt +0 -0
npcsh/npc_compiler.py DELETED
@@ -1,2879 +0,0 @@
1
- import subprocess
2
- import sqlite3
3
- import numpy as np
4
- import os
5
- import yaml
6
- from jinja2 import Environment, FileSystemLoader, Template, Undefined
7
- import pandas as pd
8
- from typing import Dict, Any, Optional, Union, List, Set
9
- import matplotlib.pyplot as plt
10
- import json
11
- import pathlib
12
- import fnmatch
13
- import re
14
- import ast
15
- import random
16
- from datetime import datetime
17
- import hashlib
18
- from collections import defaultdict, deque
19
- import traceback
20
-
21
- # Importing functions
22
- from npcsh.llm_funcs import (
23
- get_llm_response,
24
- get_stream,
25
- process_data_output,
26
- get_data_response,
27
- generate_image,
28
- check_llm_command,
29
- handle_tool_call,
30
- execute_llm_command,
31
- )
32
- from npcsh.helpers import get_npc_path
33
- from npcsh.search import search_web, rag_search
34
- from npcsh.image import capture_screenshot, analyze_image_base
35
-
36
-
37
- def create_or_replace_table(db_path: str, table_name: str, data: pd.DataFrame):
38
- """
39
- Creates or replaces a table in the SQLite database.
40
-
41
- :param db_path: Path to the SQLite database.
42
- :param table_name: Name of the table to create/replace.
43
- :param data: Pandas DataFrame containing the data to insert.
44
- """
45
- conn = sqlite3.connect(db_path)
46
- try:
47
- data.to_sql(table_name, conn, if_exists="replace", index=False)
48
- print(f"Table '{table_name}' created/replaced successfully.")
49
- except Exception as e:
50
- print(f"Error creating/replacing table '{table_name}': {e}")
51
- finally:
52
- conn.close()
53
-
54
-
55
- def load_npc_team(template_path):
56
- """
57
- Load an NPC team from a template directory.
58
-
59
- Args:
60
- template_path: Path to the NPC team template directory
61
-
62
- Returns:
63
- A dictionary containing the NPC team definition with loaded NPCs and tools
64
- """
65
- template_path = os.path.expanduser(template_path)
66
-
67
- if not os.path.exists(template_path):
68
- raise FileNotFoundError(f"Template directory not found: {template_path}")
69
-
70
- # Initialize team structure
71
- npc_team = {
72
- "name": os.path.basename(template_path),
73
- "npcs": [],
74
- "tools": [],
75
- "assembly_lines": [],
76
- "sql_models": [],
77
- "jobs": [],
78
- }
79
-
80
- # Load NPCs
81
- npc_objects = {}
82
- db_conn = sqlite3.connect(os.path.expanduser("~/npcsh_history.db"))
83
-
84
- for filename in os.listdir(template_path):
85
- if filename.endswith(".npc"):
86
- npc_path = os.path.join(template_path, filename)
87
-
88
- with open(npc_path, "r") as f:
89
- npc_content = f.read()
90
- npc_data = yaml.safe_load(npc_content)
91
- npc_team["npcs"].append(npc_data)
92
-
93
- # Load as NPC object
94
-
95
- npc_obj = load_npc_from_file(npc_path, db_conn)
96
- npc_name = npc_data.get("name", os.path.splitext(filename)[0])
97
- npc_objects[npc_name] = npc_obj
98
-
99
- # Load tools
100
- tools_dir = os.path.join(template_path, "tools")
101
- tool_objects = {}
102
-
103
- if os.path.exists(tools_dir):
104
- for filename in os.listdir(tools_dir):
105
- if filename.endswith(".tool"):
106
- tool_path = os.path.join(tools_dir, filename)
107
- with open(tool_path, "r") as f:
108
- tool_content = f.read()
109
- tool_data = yaml.safe_load(tool_content)
110
- npc_team["tools"].append(tool_data)
111
-
112
- # Load as Tool object
113
- try:
114
- tool_obj = Tool(tool_data)
115
- tool_name = tool_data.get(
116
- "tool_name", os.path.splitext(filename)[0]
117
- )
118
- tool_objects[tool_name] = tool_obj
119
- except Exception as e:
120
- print(f"Warning: Could not load tool {filename}: {str(e)}")
121
-
122
- # Load assembly lines
123
- assembly_lines_dir = os.path.join(template_path, "assembly_lines")
124
- if os.path.exists(assembly_lines_dir):
125
- for filename in os.listdir(assembly_lines_dir):
126
- if filename.endswith(".pipe"):
127
- pipe_path = os.path.join(assembly_lines_dir, filename)
128
- with open(pipe_path, "r") as f:
129
- pipe_content = f.read()
130
- pipe_data = yaml.safe_load(pipe_content)
131
- npc_team["assembly_lines"].append(pipe_data)
132
-
133
- # Load SQL models
134
- sql_models_dir = os.path.join(template_path, "sql_models")
135
- if os.path.exists(sql_models_dir):
136
- for filename in os.listdir(sql_models_dir):
137
- if filename.endswith(".sql"):
138
- sql_path = os.path.join(sql_models_dir, filename)
139
- with open(sql_path, "r") as f:
140
- sql_content = f.read()
141
- npc_team["sql_models"].append(
142
- {"name": os.path.basename(sql_path), "content": sql_content}
143
- )
144
-
145
- # Load jobs
146
- jobs_dir = os.path.join(template_path, "jobs")
147
- if os.path.exists(jobs_dir):
148
- for filename in os.listdir(jobs_dir):
149
- if filename.endswith(".job"):
150
- job_path = os.path.join(jobs_dir, filename)
151
- with open(job_path, "r") as f:
152
- job_content = f.read()
153
- job_data = yaml.safe_load(job_content)
154
- npc_team["jobs"].append(job_data)
155
-
156
- # Add loaded objects to the team structure
157
- npc_team["npc_objects"] = npc_objects
158
- npc_team["tool_objects"] = tool_objects
159
- npc_team["template_path"] = template_path
160
-
161
- return npc_team
162
-
163
-
164
- def get_template_npc_team(template, template_dir="~/.npcsh/npc_team/templates/"):
165
-
166
- # get the working directory where the
167
-
168
- npc_team = load_npc_team(template_dir + template)
169
- return npc_team
170
-
171
-
172
- def generate_npcs_from_area_of_expertise(
173
- areas_of_expertise,
174
- context,
175
- templates: list = None,
176
- model=None,
177
- provider=None,
178
- npc=None,
179
- ):
180
-
181
- prompt = f"""
182
- Here are the areas of expertise that a user requires a team of agents to be developed for.
183
-
184
- {areas_of_expertise}
185
-
186
- Here is some additional context that may be useful:
187
- {context}
188
-
189
- """
190
- # print(templates)
191
- if templates is not None:
192
- prompt += "the user has also provided the following templates to use as a base for the NPC team:\n"
193
- for template in templates:
194
- prompt += f"{template}\n"
195
- prompt += "your output should use these templates and modify them accordingly. Your response must contain the specific named NPCs included in these templates, with their primary directives adjusted accordingly based on the context and the areas of expertise. any other new npcs should complement these template ones and should not overlap."
196
-
197
- prompt += """
198
- Now, generate a set of 2-5 NPCs that cover the required areas of expertise and adequatetly incorporate the context provided.
199
- according to the following framework and return a json response
200
- {"npc_team": [
201
- {
202
- "name":"name of npc1",
203
- "primary_directive": "a 2-3 sentence description of the NPCs duties and responsibilities in the second person"
204
- },
205
- {
206
- "name":"name of npc2",
207
- "primary_directive": "a 2-3 sentence description of the NPCs duties and responsibilities in the second person"
208
- }
209
- ]}
210
-
211
- Each npc's name should be one word.
212
- The npc's primary directive must be essentially an assistant system message, so ensure that when you
213
- write it, you are writing it in that way.
214
- For example, here is an npc named 'sibiji' with a primary directive:
215
- {
216
- "name":"sibiji",
217
- "primary_directive": "You are sibiji, the foreman of an NPC team. You are a foundational AI assistant. Your role is to provide basic support and information. Respond to queries concisely and accurately."
218
- }
219
- When writing out your response, you must ensure that the agents have distinct areas of
220
- expertise such that they are not redundant in their abilities. Keeping the agent team
221
- small is important and we do not wwish to clutter the team with agents that have overlapping
222
- areas of expertise or responsibilities that make it difficult to know which agent should be
223
- called upon in a specific situation.
224
-
225
-
226
- do not include any additional markdown formatting or leading ```json tags.
227
- """
228
-
229
- response = get_llm_response(
230
- prompt, model=model, provider=provider, npc=npc, format="json"
231
- )
232
- response = response.get("response").get("npc_team")
233
- return response
234
-
235
-
236
- def edit_areas(areas):
237
- for i, area in enumerate(areas):
238
- print(f"{i+1}. {area}")
239
-
240
- index = input("Which area would you like to edit? (number or 'c' to continue): ")
241
- if index.lower() in ["c", "continue"]:
242
- return areas
243
- else:
244
- index = int(index)
245
- if 0 <= index < len(areas):
246
- new_value = input(f"Current value: {areas[index]}. Enter new value: ")
247
- areas[index] = new_value
248
- else:
249
- print("invalid index, please try again")
250
- return edit_areas(areas)
251
-
252
-
253
- def delete_areas(areas):
254
- for i, area in enumerate(areas):
255
- print(f"{i+1}. {area}")
256
-
257
- index = (
258
- int(input("Which area would you like to delete? (number or 'c' to continue): "))
259
- - 1
260
- )
261
-
262
- if index.lower() in ["c", "continue"]:
263
- return areas
264
- if 0 <= index < len(areas):
265
- del areas[index]
266
-
267
- return delete_areas(areas)
268
-
269
-
270
- def conjure_team(
271
- context,
272
- templates,
273
- npc=None,
274
- model=None,
275
- provider=None,
276
- ):
277
- """
278
- Function to generate an NPC team using existing templates and identifying additional areas of expertise.
279
-
280
- Args:
281
- templates: List of template names to use as a base
282
- context: Description of the project and what the team should do
283
- npc: The NPC to use for generating the areas (optional)
284
- model: The model to use for generation (optional)
285
- provider: The provider to use for generation (optional)
286
-
287
- Returns:
288
- Dictionary with identified areas of expertise
289
- """
290
- teams = []
291
- for team in templates:
292
- npc_team = get_template_npc_team(team)
293
- teams.append(npc_team)
294
-
295
- # Extract existing areas of expertise from templates
296
- prompt = f"""
297
- The user has provided the following context:
298
-
299
- {context}
300
- """
301
-
302
- if templates is not None:
303
- prompt += f"""
304
- The user has requested to generate an NPC team using the following templates:
305
-
306
- {templates}
307
-
308
- """
309
-
310
- prompt += """
311
- Now what is important in generating an NPC team is to ensure that the NPCs are balanced and distinctly necessary.
312
- Each NPC should essentially focus on a single area of expertise. This does not mean that they should only focus on a
313
- single function, but rather that they have a specific purview.
314
-
315
- To first figure out what NPCs would be necessary in addition to the templates given the combination of the templates
316
- and the user-provided context, we will need to generate a list of the abstract areas that the user requires in an NPC team.
317
- Now, given that information, consider whether other potential areas of expertise would complement the provided templates and the user context?
318
- Try to think carefully about this in a way to determine what other potential issues might arise for a team like this to anticipate whether it may be
319
- necessary to cover additional areas of expertise.
320
-
321
- Now, generate a list of 3-5 abstract areas explicitly required.
322
- It is actually quite important that you consolidate and abstract away various areas
323
- into general forms. Agents will be generated based on these descriptions, and an agentic team is more
324
- useful when it is as small as reasonably possible.
325
-
326
- Similarly, generate a list of 2-3 suggested areas of expertise that would complement the existing templates and the user context.
327
-
328
- This will be provided to the user for confirmation and adjustment before the NPC team is generated.
329
-
330
- Return a json response with two lists. It should be formatted like so:
331
-
332
- {
333
- "explicit_areas": ["area 1", "area 2"],
334
- "suggested_areas": ["area 3", "area 4"]
335
- }
336
-
337
- Do not include any additional markdown formatting or leading ```json tags.
338
-
339
- """
340
-
341
- response = get_llm_response(
342
- prompt, model=model, provider=provider, npc=npc, format="json"
343
- )
344
-
345
- response = response.get("response")
346
- explicit_areas = response.get("explicit_areas", [])
347
- suggested_areas = response.get("suggested_areas", [])
348
- combined_areas = explicit_areas + suggested_areas
349
- print("\nExplicit areas of expertise:")
350
- for i, area in enumerate(explicit_areas):
351
- print(f"{i+1}. {area}")
352
-
353
- print("\nSuggested areas of expertise:")
354
- for i, area in enumerate(suggested_areas):
355
- print(f"{i+1}. {area}")
356
-
357
- user_input = input(
358
- """\n\n
359
- Above is the generated list of areas of expertise.
360
-
361
- Would you like to edit the suggestions, delete any of them, or regenerate the team with revised context?
362
- Type '(e)dit', '(d)elete', or '(r)egenerate' or '(a)ccept': """
363
- )
364
- if user_input.lower() in ["e", "edit"]:
365
- revised_areas = edit_areas(combined_areas)
366
- elif user_input.lower() in ["d", "delete"]:
367
- revised_areas = delete_areas(combined_areas)
368
- elif user_input.lower() in ["r", "regenerate"]:
369
- updated_context = input(
370
- f"Here is the context you provided: {context}\nPlease provide a fully revised version: "
371
- )
372
- print("Beginning again with updated context")
373
- return conjure_team(
374
- updated_context,
375
- templates=templates,
376
- npc=npc,
377
- model=model,
378
- provider=provider,
379
- )
380
-
381
- elif user_input.lower() in ["a", "accept"]:
382
- # Return the finalized areas of expertise
383
- revised_areas = combined_areas
384
-
385
- # proceed now with generation of npc for each revised area
386
- npc_out = generate_npcs_from_area_of_expertise(
387
- revised_areas,
388
- context,
389
- templates=[team["npcs"] for team in teams],
390
- model=model,
391
- provider=provider,
392
- npc=npc,
393
- )
394
- # print(npc_out)
395
- # now save all of the npcs to the ./npc_team directory
396
-
397
- for npc in npc_out:
398
- # make the npc team dir if not existst
399
-
400
- if isinstance(npc, str):
401
- npc = ast.literal_eval(npc)
402
-
403
- npc_team_dir = os.path.join(os.getcwd(), "npc_team")
404
- os.makedirs(npc_team_dir, exist_ok=True)
405
- # print(npc, type(npc))
406
- npc_path = os.path.join(os.getcwd(), "npc_team", f"{npc['name']}.npc")
407
- with open(npc_path, "w") as f:
408
- f.write(yaml.dump(npc))
409
-
410
- return {
411
- "templates": templates,
412
- "context": context,
413
- "expertise_areas": response,
414
- "npcs": npc_out,
415
- }
416
-
417
-
418
- def initialize_npc_project(
419
- directory=None,
420
- templates=None,
421
- context=None,
422
- model=None,
423
- provider=None,
424
- ) -> str:
425
- """
426
- Function Description:
427
- This function initializes an NPC project in the current directory.
428
- Args:
429
- None
430
- Keyword Args:
431
- None
432
- Returns:
433
- A message indicating the success or failure of the operation.
434
- """
435
- if directory is None:
436
- directory = os.getcwd()
437
-
438
- # Create 'npc_team' folder in current directory
439
- npc_team_dir = os.path.join(directory, "npc_team")
440
- os.makedirs(npc_team_dir, exist_ok=True)
441
-
442
- # Create 'foreman.npc' file in 'npc_team' directory
443
- foreman_npc_path = os.path.join(npc_team_dir, "sibiji.npc")
444
- if context is not None:
445
- team = conjure_team(
446
- context, templates=templates, model=model, provider=provider
447
- )
448
-
449
- if not os.path.exists(foreman_npc_path):
450
- foreman_npc_content = """name: sibiji
451
- primary_directive: "You are sibiji, the foreman of an NPC team. You are a foundational AI assistant. Your role is to provide basic support and information. Respond to queries concisely and accurately."
452
- model: llama3.2
453
- provider: ollama
454
- """
455
- with open(foreman_npc_path, "w") as f:
456
- f.write(foreman_npc_content)
457
- else:
458
- print(f"{foreman_npc_path} already exists.")
459
-
460
- # Create 'tools' folder within 'npc_team' directory
461
- tools_dir = os.path.join(npc_team_dir, "tools")
462
- os.makedirs(tools_dir, exist_ok=True)
463
-
464
- # assembly_lines
465
- assembly_lines_dir = os.path.join(npc_team_dir, "assembly_lines")
466
- os.makedirs(assembly_lines_dir, exist_ok=True)
467
- # sql models
468
- sql_models_dir = os.path.join(npc_team_dir, "sql_models")
469
- os.makedirs(sql_models_dir, exist_ok=True)
470
- # jobs
471
- jobs_dir = os.path.join(npc_team_dir, "jobs")
472
- os.makedirs(jobs_dir, exist_ok=True)
473
-
474
- # just copy all the base npcsh tools and npcs.
475
- return f"NPC project initialized in {npc_team_dir}"
476
-
477
-
478
- def init_pipeline_runs(db_path: str = "~/npcsh_history.db"):
479
- """
480
- Initialize the pipeline runs table in the database.
481
- """
482
- with sqlite3.connect(os.path.expanduser(db_path)) as conn:
483
- cursor = conn.cursor()
484
- cursor.execute(
485
- """
486
- CREATE TABLE IF NOT EXISTS pipeline_runs (
487
- id INTEGER PRIMARY KEY AUTOINCREMENT,
488
- pipeline_name TEXT,
489
- step_name TEXT,
490
- output TEXT,
491
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
492
- )
493
- """
494
- )
495
- conn.commit()
496
-
497
-
498
- # SilentUndefined handles undefined behavior in Jinja2
499
- class SilentUndefined(Undefined):
500
- def _fail_with_undefined_error(self, *args, **kwargs):
501
- return ""
502
-
503
-
504
- class Context:
505
- def __init__(self, context=None, mcp_servers=None, databases=None, files=None):
506
- self.context = context
507
- self.mcp_servers = mcp_servers
508
- self.databases = databases
509
- self.files = files
510
-
511
- def load_context_file(self, path):
512
- with open(path, "r") as f:
513
- self.context = yaml.safe_load(f)
514
-
515
-
516
- class Tool:
517
- def __init__(self, tool_data: dict):
518
- if not tool_data or not isinstance(tool_data, dict):
519
- raise ValueError("Invalid tool data provided.")
520
- if "tool_name" not in tool_data:
521
- raise KeyError("Missing 'tool_name' in tool definition.")
522
-
523
- self.tool_name = tool_data.get("tool_name")
524
- self.inputs = tool_data.get("inputs", [])
525
- self.description = tool_data.get("description", "")
526
- self.steps = self.parse_steps(tool_data.get("steps", []))
527
-
528
- def parse_step(self, step: Union[dict, str]) -> dict:
529
- if isinstance(step, dict):
530
- return {
531
- "engine": step.get("engine", None),
532
- "code": step.get("code", ""),
533
- }
534
- else:
535
- raise ValueError("Invalid step format")
536
-
537
- def parse_steps(self, steps: list) -> list:
538
- return [self.parse_step(step) for step in steps]
539
-
540
- def execute(
541
- self,
542
- input_values: dict,
543
- tools_dict: dict,
544
- jinja_env: Environment,
545
- command: str,
546
- model: str = None,
547
- provider: str = None,
548
- npc=None,
549
- stream: bool = False,
550
- messages: List[Dict[str, str]] = None,
551
- ):
552
- # Create the context with input values at top level for Jinja access
553
- context = npc.shared_context.copy() if npc else {}
554
- context.update(input_values) # Spread input values directly in context
555
- context.update(
556
- {
557
- "tools": tools_dict,
558
- "llm_response": None,
559
- "output": None,
560
- "command": command,
561
- }
562
- )
563
-
564
- # Process Steps
565
- for i, step in enumerate(self.steps):
566
-
567
- context = self.execute_step(
568
- step,
569
- context,
570
- jinja_env,
571
- model=model,
572
- provider=provider,
573
- npc=npc,
574
- stream=stream,
575
- messages=messages,
576
- )
577
- # if i is the last step and the user has reuqested a streaming output
578
- # then we should return the stream
579
- if i == len(self.steps) - 1 and stream: # this was causing the big issue X:
580
- print("tool successful, passing output to stream")
581
- return context
582
- print("CONTEXT AFTER TOOL CALLS, ", context)
583
- if context.get("output") is not None:
584
- print("output from tool: ", context.get("output"))
585
- if not isinstance(context.get("output"), str):
586
- return str(context.get("output"))
587
- return context.get("output")
588
- elif context.get("llm_response") is not None:
589
- print("output from tool: ", context.get("llm_response"))
590
- return context.get("llm_response")
591
-
592
- def execute_step(
593
- self,
594
- step: dict,
595
- context: dict,
596
- jinja_env: Environment,
597
- npc: Any = None,
598
- model: str = None,
599
- provider: str = None,
600
- stream: bool = False,
601
- messages: List[Dict[str, str]] = None,
602
- ):
603
- engine = step.get("engine", "natural")
604
- code = step.get("code", "")
605
-
606
- # Render template with all context variables
607
- try:
608
- template = jinja_env.from_string(code)
609
- rendered_code = template.render(**context)
610
- except Exception as e:
611
- print(f"Error rendering template: {e}")
612
- rendered_code = code
613
- # render engine if necessary
614
- try:
615
- template = jinja_env.from_string(engine)
616
- rendered_engine = template.render(**context)
617
- except:
618
- print("error rendering engine")
619
- rendered_engine = engine
620
- print(f"proceeding with engine: {rendered_engine}")
621
- print("rendered code: ", rendered_code)
622
- if rendered_engine == "natural":
623
- if len(rendered_code.strip()) > 0:
624
- # print(f"Executing natural language step: {rendered_code}")
625
- if stream:
626
- messages = messages.copy() if messages else []
627
- messages.append({"role": "user", "content": rendered_code})
628
- return get_stream(messages, model=model, provider=provider, npc=npc)
629
-
630
- else:
631
- llm_response = get_llm_response(
632
- rendered_code, model=model, provider=provider, npc=npc
633
- )
634
- response_text = llm_response.get("response", "")
635
- # Store both in context for reference
636
- context["llm_response"] = response_text
637
- context["results"] = response_text
638
-
639
- elif rendered_engine == "python":
640
- exec_globals = {
641
- "__builtins__": __builtins__,
642
- "npc": npc,
643
- "context": context,
644
- "pd": pd,
645
- "plt": plt,
646
- "np": np,
647
- "os": os,
648
- "get_llm_response": get_llm_response,
649
- "generate_image": generate_image,
650
- "search_web": search_web,
651
- "json": json,
652
- "sklearn": __import__("sklearn"),
653
- "TfidfVectorizer": __import__(
654
- "sklearn.feature_extraction.text"
655
- ).feature_extraction.text.TfidfVectorizer,
656
- "cosine_similarity": __import__(
657
- "sklearn.metrics.pairwise"
658
- ).metrics.pairwise.cosine_similarity,
659
- "Path": __import__("pathlib").Path,
660
- "fnmatch": fnmatch,
661
- "pathlib": pathlib,
662
- "subprocess": subprocess,
663
- }
664
- new_locals = {}
665
- exec_env = context.copy()
666
- # try:
667
- exec(rendered_code, exec_globals, new_locals)
668
- exec_env.update(new_locals)
669
-
670
- context.update(exec_env)
671
-
672
- exec_env.update(new_locals)
673
- context.update(exec_env)
674
- # Add this line to explicitly copy the output
675
- if "output" in new_locals:
676
- context["output"] = new_locals["output"]
677
-
678
- # Then your existing code
679
- if "output" in exec_env:
680
- if exec_env["output"] is not None:
681
- context["results"] = exec_env["output"]
682
- print("result from code execution: ", exec_env["output"])
683
- # else:
684
- # context["output"] = str(exec_env)
685
-
686
- """
687
- except NameError as e:
688
- tb_lines = traceback.format_exc().splitlines()
689
- limited_tb = (
690
- "\n".join(tb_lines[:100])
691
- if len(tb_lines) > 100
692
- else "\n".join(tb_lines)
693
- )
694
- print(f"NameError: {e}")
695
- print(f"Limited traceback:\n{limited_tb}")
696
- print("Tool code:")
697
- print(rendered_code)
698
- return {
699
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
700
- }
701
- except SyntaxError as e:
702
- tb_lines = traceback.format_exc().splitlines()
703
- limited_tb = (
704
- "\n".join(tb_lines[:100])
705
- if len(tb_lines) > 100
706
- else "\n".join(tb_lines)
707
- )
708
- print(f"SyntaxError: {e}")
709
- print(f"Limited traceback:\n{limited_tb}")
710
- print("Tool code:")
711
- print(rendered_code)
712
- return {
713
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
714
- }
715
- except Exception as e:
716
- tb_lines = traceback.format_exc().splitlines()
717
- limited_tb = (
718
- "\n".join(tb_lines[:100])
719
- if len(tb_lines) > 100
720
- else "\n".join(tb_lines)
721
- )
722
- print(f"Error executing Python code:")
723
- print(f"Limited traceback:\n{limited_tb}")
724
- print("Tool code:")
725
- print(rendered_code)
726
- return {
727
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
728
- }
729
- """
730
- return context
731
-
732
- def to_dict(self):
733
- return {
734
- "tool_name": self.tool_name,
735
- "description": self.description,
736
- "inputs": self.inputs,
737
- "steps": [self.step_to_dict(step) for step in self.steps],
738
- }
739
-
740
- def step_to_dict(self, step):
741
- return {
742
- "engine": step.get("engine"),
743
- "code": step.get("code"),
744
- }
745
-
746
-
747
- def load_tools_from_directory(directory) -> list:
748
- tools = []
749
- if os.path.exists(directory):
750
- for filename in os.listdir(directory):
751
- if filename.endswith(".tool"):
752
- full_path = os.path.join(directory, filename)
753
- with open(full_path, "r") as f:
754
- tool_content = f.read()
755
- try:
756
- if not tool_content.strip():
757
- print(f"Tool file {filename} is empty. Skipping.")
758
- continue
759
- tool_data = yaml.safe_load(tool_content)
760
- if tool_data is None:
761
- print(
762
- f"Tool file {filename} is invalid or empty. Skipping."
763
- )
764
- continue
765
- tool = Tool(tool_data)
766
- tools.append(tool)
767
- except yaml.YAMLError as e:
768
- print(f"Error parsing tool {filename}: {e}")
769
- return tools
770
-
771
-
772
- class NPC:
773
- def __init__(
774
- self,
775
- name: str,
776
- primary_directive: str = None,
777
- tools: list = None, # from the npc profile
778
- model: str = None,
779
- provider: str = None,
780
- api_url: str = None,
781
- db_conn=None,
782
- all_tools: list = None, # all available tools in global and project, this is an anti pattern i need to solve eventually but for now it works
783
- use_global_tools: bool = False,
784
- use_npc_network: bool = False,
785
- global_npc_directory: str = None,
786
- project_npc_directory: str = None,
787
- global_tools_directory: str = None,
788
- project_tools_directory: str = None,
789
- ):
790
- # 2. Load global tools from ~/.npcsh/npc_team/tools
791
- if global_tools_directory is None:
792
- user_home = os.path.expanduser("~")
793
- self.global_tools_directory = os.path.join(
794
- user_home, ".npcsh", "npc_team", "tools"
795
- )
796
- else:
797
- self.global_tools_directory = global_tools_directory
798
-
799
- if project_tools_directory is None:
800
- self.project_tools_directory = os.path.abspath("./npc_team/tools")
801
- else:
802
- self.project_tools_directory = project_tools_directory
803
-
804
- if global_npc_directory is None:
805
- self.global_npc_directory = os.path.join(user_home, ".npcsh", "npc_team")
806
- else:
807
- self.global_npc_directory = global_npc_directory
808
-
809
- if project_npc_directory is None:
810
- self.project_npc_directory = os.path.abspath("./npc_team")
811
-
812
- self.jinja_env = Environment(
813
- loader=FileSystemLoader(
814
- [
815
- self.project_npc_directory,
816
- self.global_npc_directory,
817
- self.global_tools_directory,
818
- self.project_tools_directory,
819
- ]
820
- ),
821
- undefined=SilentUndefined,
822
- )
823
-
824
- self.name = name
825
- self.primary_directive = primary_directive
826
- self.tools = tools or []
827
-
828
- self.model = model
829
- self.db_conn = db_conn
830
- if self.db_conn is not None:
831
- # Determine database type
832
- if "psycopg2" in self.db_conn.__class__.__module__:
833
- # PostgreSQL connection
834
- cursor = self.db_conn.cursor()
835
- cursor.execute(
836
- """
837
- SELECT table_name, obj_description((quote_ident(table_name))::regclass, 'pg_class')
838
- FROM information_schema.tables
839
- WHERE table_schema='public';
840
- """
841
- )
842
- self.tables = cursor.fetchall()
843
- self.db_type = "postgres"
844
- elif "sqlite3" in self.db_conn.__class__.__module__:
845
- # SQLite connection
846
- self.tables = self.db_conn.execute(
847
- "SELECT name, sql FROM sqlite_master WHERE type='table';"
848
- ).fetchall()
849
- self.db_type = "sqlite"
850
- else:
851
- self.tables = None
852
- self.db_type = None
853
-
854
- self.provider = provider
855
- self.api_url = api_url
856
- self.all_tools = all_tools or []
857
- self.all_tools_dict = {tool.tool_name: tool for tool in self.all_tools}
858
- if self.tools:
859
- tools_to_load = []
860
-
861
- for tool in self.tools:
862
- if isinstance(tool, Tool):
863
- continue
864
- if isinstance(tool, str):
865
- tools_to_load.append(tool)
866
- if len(tools_to_load) > 0:
867
- self.tools = self.load_suggested_tools(
868
- tools,
869
- self.global_tools_directory,
870
- self.project_tools_directory,
871
- )
872
- self.tools_dict = {tool.tool_name: tool for tool in self.tools}
873
- else:
874
- self.tools_dict = {}
875
-
876
- self.shared_context = {
877
- "dataframes": {},
878
- "current_data": None,
879
- "computation_results": {},
880
- }
881
- self.use_global_tools = use_global_tools
882
- self.use_npc_network = use_npc_network
883
-
884
- # Load tools if flag is set
885
- if self.use_global_tools:
886
- self.default_tools = self.load_tools()
887
- else:
888
- self.default_tools = []
889
- self.npc_cache = {}
890
-
891
- self.resolved_npcs = {}
892
-
893
- # Load NPC dependencies if flag is set
894
- if self.use_npc_network:
895
- self.parsed_npcs = self.parse_all_npcs()
896
- self.resolved_npcs = self.resolve_all_npcs()
897
- else:
898
- self.parsed_npcs = []
899
-
900
- def execute_query(self, query, params=None):
901
- """Execute a query based on database type"""
902
- if self.db_type == "postgres":
903
- cursor = self.db_conn.cursor()
904
- cursor.execute(query, params or ())
905
- return cursor.fetchall()
906
- else: # sqlite
907
- cursor = self.db_conn.execute(query, params or ())
908
- return cursor.fetchall()
909
-
910
- def _determine_db_type(self):
911
- """Determine if the connection is PostgreSQL or SQLite"""
912
- # Check the connection object's class name
913
- conn_type = self.db_conn.__class__.__module__.lower()
914
-
915
- if "psycopg" in conn_type:
916
- return "postgres"
917
- elif "sqlite" in conn_type:
918
- return "sqlite"
919
- else:
920
- raise ValueError(f"Unsupported database type: {conn_type}")
921
-
922
- def _get_tables(self):
923
- """Get table information based on database type"""
924
- if self.db_type == "postgres":
925
- cursor = self.db_conn.cursor()
926
- cursor.execute(
927
- """
928
- SELECT table_name, obj_description((quote_ident(table_name))::regclass, 'pg_class') as description
929
- FROM information_schema.tables
930
- WHERE table_schema='public';
931
- """
932
- )
933
- return cursor.fetchall()
934
- else: # sqlite
935
- return self.db_conn.execute(
936
- "SELECT name, sql FROM sqlite_master WHERE type='table';"
937
- ).fetchall()
938
-
939
- def get_memory(self):
940
- return
941
-
942
- def to_dict(self):
943
- return {
944
- "name": self.name,
945
- "primary_directive": self.primary_directive,
946
- "model": self.model,
947
- "provider": self.provider,
948
- "tools": [tool.to_dict() for tool in self.tools],
949
- "use_global_tools": self.use_global_tools,
950
- "api_url": self.api_url,
951
- }
952
-
953
- def _check_llm_command(
954
- self,
955
- command,
956
- retrieved_docs=None,
957
- messages=None,
958
- n_docs=5,
959
- context=None,
960
- shared_context=None,
961
- ):
962
- if shared_context is not None:
963
- self.shared_context = shared_context
964
- return check_llm_command(
965
- command,
966
- model=self.model,
967
- provider=self.provider,
968
- npc=self,
969
- retrieved_docs=retrieved_docs,
970
- messages=messages,
971
- n_docs=n_docs,
972
- context=context,
973
- )
974
-
975
- def handle_agent_pass(
976
- self,
977
- npc_to_pass: Any,
978
- command: str,
979
- messages: List[Dict[str, str]] = None,
980
- retrieved_docs=None,
981
- n_docs: int = 5,
982
- context=None,
983
- shared_context=None,
984
- ) -> Union[str, Dict[str, Any]]:
985
- """
986
- Function Description:
987
- This function handles an agent pass.
988
- Args:
989
- command (str): The command.
990
-
991
- Keyword Args:
992
- model (str): The model to use for handling the agent pass.
993
- provider (str): The provider to use for handling the agent pass.
994
- messages (List[Dict[str, str]]): The list of messages.
995
- npc (Any): The NPC object.
996
- retrieved_docs (Any): The retrieved documents.
997
- n_docs (int): The number of documents.
998
- Returns:
999
- Union[str, Dict[str, Any]]: The result of handling the agent pass.
1000
- """
1001
- # print(npc_to_pass, command)
1002
-
1003
- if isinstance(npc_to_pass, NPC):
1004
- npc_to_pass_init = npc_to_pass
1005
- else:
1006
- # assume just a string name?
1007
- target_npc = self.get_npc(npc_to_pass)
1008
- if target_npc is None:
1009
- return "NPC not found."
1010
-
1011
- # initialize them as an actual NPC
1012
- npc_to_pass_init = NPC(self.db_conn, **target_npc)
1013
- # print(npc_to_pass_init, command)
1014
- print(npc_to_pass, npc_to_pass.tools)
1015
- if shared_context is not None:
1016
- self.shared_context = shared_context
1017
- updated_command = (
1018
- command
1019
- + "/n"
1020
- + f"""
1021
-
1022
- NOTE: THIS COMMAND HAS ALREADY BEEN PASSED FROM ANOTHER NPC
1023
- TO YOU, {npc_to_pass}.
1024
-
1025
- THUS YOU WILL LIKELY NOT NEED TO PASS IT AGAIN TO YOURSELF
1026
- OR TO ANOTHER NPC. pLEASE CHOOSE ONE OF THE OTHER OPTIONS WHEN
1027
- RESPONDING.
1028
-
1029
-
1030
- """
1031
- )
1032
- return npc_to_pass_init._check_llm_command(
1033
- updated_command,
1034
- retrieved_docs=retrieved_docs,
1035
- messages=messages,
1036
- n_docs=n_docs,
1037
- shared_context=self.shared_context,
1038
- )
1039
-
1040
- def get_npc(self, npc_name: str):
1041
- if npc_name + ".npc" in self.npc_cache:
1042
- return self.npc_cache[npc_name + ".npc"]
1043
-
1044
- def load_suggested_tools(
1045
- self,
1046
- tools: list,
1047
- global_tools_directory: str,
1048
- project_tools_directory: str,
1049
- ) -> List[Tool]:
1050
- suggested_tools = []
1051
- for tool_name in tools:
1052
- # load tool from file
1053
- if not tool_name.endswith(".tool"):
1054
- tool_name += ".tool"
1055
- if (
1056
- global_tools_directory not in tool_name
1057
- and project_tools_directory not in tool_name
1058
- ):
1059
- # try to load from global tools directory
1060
- try:
1061
- tool_data = self.load_tool_from_file(
1062
- os.path.join(global_tools_directory, tool_name)
1063
- )
1064
- if tool_data is None:
1065
- raise ValueError(f"Tool {tool_name} not found.")
1066
-
1067
- print(f"Tool {tool_name} loaded from global directory.")
1068
-
1069
- except ValueError as e:
1070
- print(f"Error loading tool from global directory: {e}")
1071
- # trying to load from project tools directory
1072
- try:
1073
- tool_data = self.load_tool_from_file(
1074
- os.path.join(project_tools_directory, tool_name)
1075
- )
1076
- if tool_data is None:
1077
- raise ValueError(f"Tool {tool_name} not found.")
1078
- print(f"Tool {tool_name} loaded from project directory.")
1079
- except ValueError as e:
1080
- print(f"Error loading tool from project directory: {e}")
1081
- continue
1082
-
1083
- # print(tool_name)
1084
- # print(tool_data)
1085
- tool = Tool(tool_data)
1086
- self.all_tools.append(tool)
1087
- self.all_tools_dict[tool.tool_name] = tool
1088
- suggested_tools.append(tool)
1089
- return suggested_tools
1090
-
1091
- def __str__(self):
1092
- return f"NPC: {self.name}\nDirective: {self.primary_directive}\nModel: {self.model}"
1093
-
1094
- def analyze_db_data(self, request: str):
1095
- if self.db_conn is None:
1096
- print("please specify a database connection when initiating the NPC")
1097
- raise Exception("No database connection found")
1098
- return get_data_response(
1099
- request,
1100
- self.db_conn,
1101
- self.tables,
1102
- model=self.model,
1103
- provider=self.provider,
1104
- npc=self,
1105
- )
1106
-
1107
- def get_llm_response(self, request: str, **kwargs):
1108
- return get_llm_response(
1109
- request, model=self.model, provider=self.provider, npc=self, **kwargs
1110
- )
1111
-
1112
- def load_tool_from_file(self, tool_path: str) -> Union[dict, None]:
1113
- try:
1114
- with open(tool_path, "r") as f:
1115
- tool_content = f.read()
1116
- if not tool_content.strip():
1117
- print(f"Tool file {tool_path} is empty. Skipping.")
1118
- return None
1119
- tool_data = yaml.safe_load(tool_content)
1120
- if tool_data is None:
1121
- print(f"Tool file {tool_path} is invalid or empty. Skipping.")
1122
- return None
1123
- return tool_data
1124
- except yaml.YAMLError as e:
1125
- print(f"Error parsing tool {tool_path}: {e}")
1126
- return None
1127
- except Exception as e:
1128
- print(f"Error loading tool {tool_path}: {e}")
1129
- return None
1130
-
1131
- def compile(self, npc_file: str):
1132
- self.npc_cache.clear() # Clear the cache
1133
- self.resolved_npcs.clear()
1134
-
1135
- if isinstance(npc_file, NPC):
1136
- npc_file = npc_file.name + ".npc"
1137
- if not npc_file.endswith(".npc"):
1138
- raise ValueError("File must have .npc extension")
1139
- # get the absolute path
1140
- npc_file = os.path.abspath(npc_file)
1141
-
1142
- try:
1143
- # Parse NPCs from both global and project directories
1144
- self.parse_all_npcs()
1145
-
1146
- # Resolve NPCs
1147
- self.resolve_all_npcs()
1148
-
1149
- # Finalize NPC profile
1150
- # print(npc_file)
1151
- parsed_content = self.finalize_npc_profile(npc_file)
1152
-
1153
- # Load tools from both global and project directories
1154
- tools = self.load_tools()
1155
- parsed_content["tools"] = [tool.to_dict() for tool in tools]
1156
-
1157
- self.update_compiled_npcs_table(npc_file, parsed_content)
1158
- return parsed_content
1159
- except Exception as e:
1160
- raise e # Re-raise exception for debugging
1161
-
1162
- def load_tools(self):
1163
- tools = []
1164
- # Load tools from global and project directories
1165
- tool_paths = []
1166
-
1167
- if os.path.exists(self.global_tools_directory):
1168
- for filename in os.listdir(self.global_tools_directory):
1169
- if filename.endswith(".tool"):
1170
- tool_paths.append(
1171
- os.path.join(self.global_tools_directory, filename)
1172
- )
1173
-
1174
- if os.path.exists(self.project_tools_directory):
1175
- for filename in os.listdir(self.project_tools_directory):
1176
- if filename.endswith(".tool"):
1177
- tool_paths.append(
1178
- os.path.join(self.project_tools_directory, filename)
1179
- )
1180
-
1181
- tool_dict = {}
1182
- for tool_path in tool_paths:
1183
- tool_data = self.load_tool_from_file(tool_path)
1184
- if tool_data:
1185
- tool = Tool(tool_data)
1186
- # Project tools override global tools
1187
- tool_dict[tool.tool_name] = tool
1188
-
1189
- return list(tool_dict.values())
1190
-
1191
- def parse_all_npcs(self) -> None:
1192
- directories = [self.global_npc_directory, self.project_npc_directory]
1193
- for directory in directories:
1194
- if os.path.exists(directory):
1195
- for filename in os.listdir(directory):
1196
- if filename.endswith(".npc"):
1197
- npc_path = os.path.join(directory, filename)
1198
- self.parse_npc_file(npc_path)
1199
-
1200
- def parse_npc_file(self, npc_file_path: str) -> dict:
1201
- npc_file = os.path.basename(npc_file_path)
1202
- if npc_file in self.npc_cache:
1203
- # Project NPCs override global NPCs
1204
- if npc_file_path.startswith(self.project_npc_directory):
1205
- print(f"Overriding NPC {npc_file} with project version.")
1206
- else:
1207
- # Skip if already loaded from project directory
1208
- return self.npc_cache[npc_file]
1209
-
1210
- try:
1211
- with open(npc_file_path, "r") as f:
1212
- npc_content = f.read()
1213
- # Parse YAML without resolving Jinja templates
1214
- profile = yaml.safe_load(npc_content)
1215
- self.npc_cache[npc_file] = profile
1216
- return profile
1217
- except yaml.YAMLError as e:
1218
- raise ValueError(f"Invalid YAML in NPC profile {npc_file}: {str(e)}")
1219
-
1220
- def resolve_all_npcs(self):
1221
- resolved_npcs = []
1222
- for npc_file in self.npc_cache:
1223
- npc = self.resolve_npc_profile(npc_file)
1224
- resolved_npcs.append(npc)
1225
- # print(npc)
1226
- return resolved_npcs
1227
-
1228
- def resolve_npc_profile(self, npc_file: str) -> dict:
1229
- if npc_file in self.resolved_npcs:
1230
- return self.resolved_npcs[npc_file]
1231
-
1232
- profile = self.npc_cache[npc_file].copy()
1233
-
1234
- # Resolve Jinja templates
1235
- for key, value in profile.items():
1236
- if isinstance(value, str):
1237
- template = self.jinja_env.from_string(value)
1238
- profile[key] = template.render(self.npc_cache)
1239
-
1240
- # Handle inheritance
1241
- if "inherits_from" in profile:
1242
- parent_profile = self.resolve_npc_profile(profile["inherits_from"] + ".npc")
1243
- profile = self.merge_profiles(parent_profile, profile)
1244
-
1245
- self.resolved_npcs[npc_file] = profile
1246
- return profile
1247
-
1248
- def finalize_npc_profile(self, npc_file: str) -> dict:
1249
- profile = self.resolved_npcs.get(os.path.basename(npc_file))
1250
- if not profile:
1251
- # try to resolve it with load_npc_from_file
1252
- profile = load_npc_from_file(npc_file, self.db_conn).to_dict()
1253
-
1254
- # raise ValueError(f"NPC {npc_file} has not been resolved.")
1255
-
1256
- # Resolve any remaining references
1257
- # Log the profile content before processing
1258
- # print(f"Initial profile for {npc_file}: {profile}")
1259
-
1260
- for key, value in profile.items():
1261
- if isinstance(value, str):
1262
- template = self.jinja_env.from_string(value)
1263
- profile[key] = template.render(self.resolved_npcs)
1264
-
1265
- required_keys = ["name", "primary_directive"]
1266
- for key in required_keys:
1267
- if key not in profile:
1268
- raise ValueError(f"Missing required key in NPC profile: {key}")
1269
-
1270
- return profile
1271
-
1272
-
1273
- class SilentUndefined(Undefined):
1274
- def _fail_with_undefined_error(self, *args, **kwargs):
1275
- return ""
1276
-
1277
-
1278
- class NPCTeam:
1279
- def __init__(self, npcs: list, foreman: NPC, db_conn=None, context: dict = None):
1280
- self.npcs = npcs
1281
- self.foreman = foreman
1282
- self.foreman.resolved_npcs = [{npc.name: npc} for npc in self.npcs]
1283
- self.db_conn = db_conn
1284
- self.context = context
1285
- self.shared_context = {
1286
- "intermediate_results": {}, # Store results each NPC produces
1287
- "data": {}, # Active data being analyzed
1288
- }
1289
-
1290
- def to_dict(self):
1291
- return {
1292
- "foreman": self.foreman.to_dict(),
1293
- "npcs": [npc.to_dict() for npc in self.npcs],
1294
- "context": self.context,
1295
- }
1296
-
1297
- def orchestrate(self, request: str):
1298
- # Initial check with foreman
1299
- result = self.foreman._check_llm_command(
1300
- request,
1301
- context=self.context,
1302
- shared_context=self.shared_context,
1303
- )
1304
- try:
1305
- while True:
1306
- try:
1307
- result = self.foreman._check_llm_command(
1308
- request,
1309
- context=self.context,
1310
- shared_context=self.shared_context,
1311
- )
1312
-
1313
- # Track execution history and init npc messages if needed
1314
- if "execution_history" not in self.shared_context:
1315
- self.shared_context["execution_history"] = []
1316
- if "npc_messages" not in self.shared_context:
1317
- self.shared_context["npc_messages"] = {}
1318
-
1319
- # Save result and maintain NPC message history
1320
- if isinstance(result, dict):
1321
- self.shared_context["execution_history"].append(result)
1322
- if result.get("messages") and result.get("npc_name"):
1323
- if (
1324
- result["npc_name"]
1325
- not in self.shared_context["npc_messages"]
1326
- ):
1327
- self.shared_context["npc_messages"][
1328
- result["npc_name"]
1329
- ] = []
1330
- self.shared_context["npc_messages"][
1331
- result["npc_name"]
1332
- ].extend(result["messages"])
1333
-
1334
- # Check if complete
1335
- follow_up = get_llm_response(
1336
- f"""Context: User request '{request}' returned:
1337
- {result}
1338
-
1339
- Instructions:
1340
- Analyze if this result fully addresses the request. In your evaluation you must not be
1341
- too harsh. While there may be numerous refinements that can be made to improve the output
1342
- to "fully address" the request, it will be typically better for the user to
1343
- have a higher rate of interactive feedback such that we will not lose track of the
1344
- real aim and get stuck in a rut hyper-fixating.
1345
- Thus it is better to consider results as complete if they satisfy the bare minimum
1346
- of the request and provide a good starting point for further refinement.
1347
-
1348
- Return a JSON object with two fields:
1349
- -'complete' with boolean value.
1350
- -'explanation' for incompleteness
1351
- Do not include markdown formatting or ```json tags.
1352
- Return only the JSON object.""",
1353
- model=self.foreman.model,
1354
- provider=self.foreman.provider,
1355
- npc=self.foreman,
1356
- format="json",
1357
- )
1358
-
1359
- if isinstance(follow_up, dict) and isinstance(
1360
- follow_up.get("response"), dict
1361
- ):
1362
- print(
1363
- "response finished? ",
1364
- follow_up.get("response", {}).get("complete", False),
1365
- )
1366
- print(
1367
- "explanation provided",
1368
- follow_up.get("response", {}).get("explanation", ""),
1369
- )
1370
-
1371
- if not follow_up["response"].get("complete", False):
1372
- return self.orchestrate(
1373
- request
1374
- + " /n The request has not yet been fully completed."
1375
- + follow_up["response"]["explanation"]
1376
- + " /n"
1377
- + "please ensure that you tackle only the remaining parts of the request"
1378
- )
1379
- else:
1380
- # Get final summary and recommendations
1381
- debrief = get_llm_response(
1382
- f"""Context:
1383
- Original request: {request}
1384
-
1385
- Execution history: {self.shared_context['execution_history']}
1386
-
1387
- Instructions:
1388
- Provide summary of actions taken and any recommendations.
1389
- Return a JSON object with fields:
1390
- - 'summary': Overview of what was accomplished
1391
- - 'recommendations': Suggested next steps
1392
- Do not include markdown formatting or ```json tags.
1393
- Return only the JSON object.""",
1394
- model=self.foreman.model,
1395
- provider=self.foreman.provider,
1396
- npc=self.foreman,
1397
- format="json",
1398
- )
1399
-
1400
- return {
1401
- "debrief": debrief.get("response"),
1402
- "execution_history": self.shared_context[
1403
- "execution_history"
1404
- ],
1405
- }
1406
-
1407
- return result
1408
-
1409
- except KeyboardInterrupt:
1410
- print("\nExecution interrupted. Options:")
1411
- print("1. Provide additional context")
1412
- print("2. Skip this step")
1413
- print("3. Resume execution")
1414
-
1415
- choice = input("Enter choice (1-3): ")
1416
-
1417
- if choice == "1":
1418
- new_context = input("Enter additional context: ")
1419
- self.context["additional_context"] = new_context
1420
- continue
1421
- elif choice == "2":
1422
- return {"response": "Step skipped by user"}
1423
- elif choice == "3":
1424
- continue
1425
- else:
1426
- print("Invalid choice, resuming...")
1427
- continue
1428
-
1429
- except Exception as e:
1430
- # Get the full traceback
1431
- tb_lines = traceback.format_exc().splitlines()
1432
-
1433
- # Keep first 2 lines and last 3 lines
1434
- if len(tb_lines) > 5:
1435
- limited_tb = "\n".join(tb_lines[:2] + ["..."] + tb_lines[-3:])
1436
- else:
1437
- limited_tb = "\n".join(tb_lines)
1438
-
1439
- print(f"Error in orchestration: {str(e)}")
1440
- print(f"Limited traceback:\n{limited_tb}")
1441
- return {"error": f"{str(e)}\n{limited_tb}"}
1442
-
1443
-
1444
- # perhaps the npc compiling is more than just for jinja reasons.
1445
- # we can turn each agent into a referenceable program executable.
1446
- # finish testing out a python based version rather than jinja only
1447
- class NPCCompiler:
1448
- def __init__(
1449
- self,
1450
- npc_directory,
1451
- db_path,
1452
- ):
1453
- self.npc_directory = npc_directory
1454
- self.dirs = [self.npc_directory]
1455
- # import pdb
1456
- self.is_global_dir = self.npc_directory == os.path.expanduser(
1457
- "~/.npcsh/npc_team/"
1458
- )
1459
-
1460
- # pdb.set_trace()
1461
- if self.is_global_dir:
1462
- self.project_npc_directory = None
1463
- self.project_tools_directory = None
1464
- else:
1465
- self.project_npc_directory = npc_directory
1466
- self.project_tools_directory = os.path.join(
1467
- self.project_npc_directory, "tools"
1468
- )
1469
- self.dirs.append(self.project_npc_directory)
1470
-
1471
- self.db_path = db_path
1472
- self.npc_cache = {}
1473
- self.resolved_npcs = {}
1474
- self.pipe_cache = {}
1475
-
1476
- # Set tools directories
1477
- self.global_tools_directory = os.path.join(
1478
- os.path.expanduser("~/.npcsh/npc_team/"), "tools"
1479
- )
1480
-
1481
- # Initialize Jinja environment with multiple loaders
1482
- self.jinja_env = Environment(
1483
- loader=FileSystemLoader(self.dirs),
1484
- undefined=SilentUndefined,
1485
- )
1486
-
1487
- self.all_tools_dict = self.load_tools()
1488
- self.all_tools = list(self.all_tools_dict.values())
1489
-
1490
- def generate_tool_script(self, tool: Tool):
1491
- script_content = f"""
1492
- # Auto-generated script for tool: {tool.tool_name}
1493
-
1494
- def {tool.tool_name}_execute(inputs):
1495
- # Preprocess steps
1496
- """
1497
- # Add preprocess steps
1498
- for step in tool.preprocess:
1499
- script_content += f" # Preprocess: {step}\n"
1500
-
1501
- # Add prompt rendering
1502
- script_content += f"""
1503
- # Render prompt
1504
- prompt = '''{tool.prompt}'''
1505
- # You might need to render the prompt with inputs
1506
-
1507
- # Call the LLM (this is simplified)
1508
- llm_response = get_llm_response(prompt)
1509
-
1510
- # Postprocess steps
1511
- """
1512
- for step in tool.postprocess:
1513
- script_content += f" # Postprocess: {step}\n"
1514
-
1515
- script_content += f" return llm_response\n"
1516
-
1517
- # Write the script to a file
1518
- script_filename = f"{tool.tool_name}_script.py"
1519
- with open(script_filename, "w") as script_file:
1520
- script_file.write(script_content)
1521
-
1522
- def compile(self, npc_file: str):
1523
- self.npc_cache.clear() # Clear the cache
1524
- self.resolved_npcs.clear()
1525
- if isinstance(npc_file, NPC):
1526
- npc_file = npc_file.name + ".npc"
1527
- if not npc_file.endswith(".npc"):
1528
- raise ValueError("File must have .npc extension")
1529
- # get the absolute path
1530
- npc_file = os.path.abspath(npc_file)
1531
-
1532
- self.parse_all_npcs()
1533
- # Resolve NPCs
1534
- self.resolve_all_npcs()
1535
-
1536
- # Finalize NPC profile
1537
- # print(npc_file)
1538
- # print(npc_file, "npc_file")
1539
- parsed_content = self.finalize_npc_profile(npc_file)
1540
-
1541
- # Load tools from both global and project directories
1542
- parsed_content["tools"] = [tool.to_dict() for tool in self.all_tools]
1543
-
1544
- self.update_compiled_npcs_table(npc_file, parsed_content)
1545
- return parsed_content
1546
-
1547
- def load_tools(self):
1548
- tools = []
1549
- # Load tools from global and project directories
1550
- tool_paths = []
1551
-
1552
- if os.path.exists(self.global_tools_directory):
1553
- for filename in os.listdir(self.global_tools_directory):
1554
- if filename.endswith(".tool"):
1555
- tool_paths.append(
1556
- os.path.join(self.global_tools_directory, filename)
1557
- )
1558
- if self.project_tools_directory is not None:
1559
- if os.path.exists(self.project_tools_directory):
1560
- for filename in os.listdir(self.project_tools_directory):
1561
- if filename.endswith(".tool"):
1562
- tool_paths.append(
1563
- os.path.join(self.project_tools_directory, filename)
1564
- )
1565
-
1566
- tool_dict = {}
1567
- for tool_path in tool_paths:
1568
- tool_data = self.load_tool_from_file(tool_path)
1569
- if tool_data:
1570
- tool = Tool(tool_data)
1571
- # Project tools override global tools
1572
- tool_dict[tool.tool_name] = tool
1573
-
1574
- return tool_dict
1575
-
1576
- def load_tool_from_file(self, tool_path: str) -> Union[dict, None]:
1577
- try:
1578
- with open(tool_path, "r") as f:
1579
- tool_content = f.read()
1580
- if not tool_content.strip():
1581
- print(f"Tool file {tool_path} is empty. Skipping.")
1582
- return None
1583
- tool_data = yaml.safe_load(tool_content)
1584
- if tool_data is None:
1585
- print(f"Tool file {tool_path} is invalid or empty. Skipping.")
1586
- return None
1587
- return tool_data
1588
- except yaml.YAMLError as e:
1589
- print(f"Error parsing tool {tool_path}: {e}")
1590
- return None
1591
- except Exception as e:
1592
- print(f"Error loading tool {tool_path}: {e}")
1593
- return None
1594
-
1595
- def parse_all_npcs(self) -> None:
1596
- # print(self.dirs)
1597
- for directory in self.dirs:
1598
- if os.path.exists(directory):
1599
-
1600
- for filename in os.listdir(directory):
1601
- if filename.endswith(".npc"):
1602
- npc_path = os.path.join(directory, filename)
1603
- self.parse_npc_file(npc_path)
1604
-
1605
- def parse_npc_file(self, npc_file_path: str) -> dict:
1606
- npc_file = os.path.basename(npc_file_path)
1607
- if npc_file in self.npc_cache:
1608
- # Project NPCs override global NPCs
1609
- if self.project_npc_directory is not None:
1610
- if npc_file_path.startswith(self.project_npc_directory):
1611
- print(f"Overriding NPC {npc_file} with project version.")
1612
- else:
1613
- # Skip if already loaded from project directory
1614
- return self.npc_cache[npc_file]
1615
-
1616
- try:
1617
- with open(npc_file_path, "r") as f:
1618
- npc_content = f.read()
1619
- # Parse YAML without resolving Jinja templates
1620
- profile = yaml.safe_load(npc_content)
1621
- self.npc_cache[npc_file] = profile
1622
- return profile
1623
- except yaml.YAMLError as e:
1624
- raise ValueError(f"Invalid YAML in NPC profile {npc_file}: {str(e)}")
1625
-
1626
- def resolve_all_npcs(self):
1627
- for npc_file in self.npc_cache:
1628
- npc = self.resolve_npc_profile(npc_file)
1629
- # print(npc)
1630
-
1631
- def resolve_npc_profile(self, npc_file: str) -> dict:
1632
- if npc_file in self.resolved_npcs:
1633
- return self.resolved_npcs[npc_file]
1634
-
1635
- profile = self.npc_cache[npc_file].copy()
1636
-
1637
- # Resolve Jinja templates
1638
- for key, value in profile.items():
1639
- if isinstance(value, str):
1640
- template = self.jinja_env.from_string(value)
1641
- profile[key] = template.render(self.npc_cache)
1642
-
1643
- # Handle inheritance
1644
- if "inherits_from" in profile:
1645
- parent_profile = self.resolve_npc_profile(profile["inherits_from"] + ".npc")
1646
- profile = self.merge_profiles(parent_profile, profile)
1647
-
1648
- self.resolved_npcs[npc_file] = profile
1649
- return profile
1650
-
1651
- def finalize_npc_profile(self, npc_file: str) -> dict:
1652
- profile = self.resolved_npcs.get(os.path.basename(npc_file))
1653
- if not profile:
1654
- # try to resolve it with load_npc_from_file
1655
- profile = load_npc_from_file(
1656
- npc_file, sqlite3.connect(self.db_path)
1657
- ).to_dict()
1658
-
1659
- # Resolve any remaining references
1660
- # Log the profile content before processing
1661
- # print(f"Initial profile for {npc_file}: {profile}")
1662
-
1663
- for key, value in profile.items():
1664
- if isinstance(value, str):
1665
- template = self.jinja_env.from_string(value)
1666
- profile[key] = template.render(self.resolved_npcs)
1667
-
1668
- required_keys = ["name", "primary_directive"]
1669
- for key in required_keys:
1670
- if key not in profile:
1671
- raise ValueError(f"Missing required key in NPC profile: {key}")
1672
-
1673
- return profile
1674
-
1675
- def execute_stage(self, stage, context, jinja_env):
1676
- step_name = stage["step_name"]
1677
- npc_name = stage["npc"]
1678
- npc_name = jinja_env.from_string(npc_name).render(context)
1679
- # print("npc name: ", npc_name)
1680
- npc_path = get_npc_path(npc_name, self.db_path)
1681
- # print("npc path: ", npc_path)
1682
- prompt_template = stage["task"]
1683
- num_samples = stage.get("num_samples", 1)
1684
-
1685
- step_results = []
1686
- for sample_index in range(num_samples):
1687
- # Load the NPC
1688
- npc = load_npc_from_file(npc_path, sqlite3.connect(self.db_path))
1689
-
1690
- # Render the prompt using Jinja2
1691
- prompt_template = jinja_env.from_string(prompt_template)
1692
- prompt = prompt_template.render(context, sample_index=sample_index)
1693
-
1694
- response = npc.get_llm_response(prompt)
1695
- # print(response)
1696
- step_results.append({"npc": npc_name, "response": response["response"]})
1697
-
1698
- # Update context with the response for the next step
1699
- context[f"{step_name}_{sample_index}"] = response[
1700
- "response"
1701
- ] # Update context with step's response
1702
-
1703
- return step_results
1704
-
1705
- def aggregate_step_results(self, step_results, aggregation_strategy):
1706
- responses = [result["response"] for result in step_results]
1707
- if len(responses) == 1:
1708
- return responses[0]
1709
- if aggregation_strategy == "concat":
1710
- return "\n".join(responses)
1711
- elif aggregation_strategy == "summary":
1712
- # Use the LLM to generate a summary of the responses
1713
- response_text = "\n".join(responses)
1714
- summary_prompt = (
1715
- f"Please provide a concise summary of the following responses: "
1716
- + response_text
1717
- )
1718
-
1719
- summary = self.get_llm_response(summary_prompt)["response"]
1720
- return summary
1721
- elif aggregation_strategy == "pessimistic_critique":
1722
- # Use the LLM to provide a pessimistic critique of the responses
1723
- response_text = "\n".join(responses)
1724
- critique_prompt = f"Please provide a pessimistic critique of the following responses:\n\n{response_text}"
1725
-
1726
- critique = self.get_llm_response(critique_prompt)["response"]
1727
- return critique
1728
- elif aggregation_strategy == "optimistic_view":
1729
- # Use the LLM to provide an optimistic view of the responses
1730
- response_text = "\n".join(responses)
1731
- optimistic_prompt = f"Please provide an optimistic view of the following responses:\n\n{response_text}"
1732
- optimistic_view = self.get_llm_response(optimistic_prompt)["response"]
1733
- return optimistic_view
1734
- elif aggregation_strategy == "balanced_analysis":
1735
- # Use the LLM to provide a balanced analysis of the responses
1736
- response = "\n".join(responses)
1737
- analysis_prompt = f"Please provide a balanced analysis of the following responses:\n\n{response}"
1738
-
1739
- balanced_analysis = self.get_llm_response(analysis_prompt)["response"]
1740
- return balanced_analysis
1741
- elif aggregation_strategy == "first":
1742
- return responses[0]
1743
- elif aggregation_strategy == "last":
1744
- return responses[-1]
1745
- else:
1746
- raise ValueError(f"Invalid aggregation strategy: {aggregation_strategy}")
1747
-
1748
- def compile_pipe(self, pipe_file: str, initial_input=None) -> dict:
1749
- if pipe_file in self.pipe_cache:
1750
- return self.pipe_cache[pipe_file]
1751
-
1752
- if not pipe_file.endswith(".pipe"):
1753
- raise ValueError("Pipeline file must have .pipe extension")
1754
-
1755
- # print(pipe_file)
1756
-
1757
- with open(pipe_file, "r") as f:
1758
- pipeline_data = yaml.safe_load(f)
1759
-
1760
- final_output = {}
1761
- jinja_env = Environment(loader=FileSystemLoader("."), undefined=SilentUndefined)
1762
-
1763
- context = {"input": initial_input, **self.npc_cache}
1764
-
1765
- with sqlite3.connect(self.db_path) as conn:
1766
- cursor = conn.cursor()
1767
- pipeline_name = os.path.basename(pipe_file).replace(".pipe", "")
1768
-
1769
- for stage in pipeline_data["steps"]:
1770
- step_results = self.execute_stage(stage, context, jinja_env)
1771
- aggregated_result = self.aggregate_step_results(
1772
- step_results, stage.get("aggregation_strategy", "first")
1773
- )
1774
-
1775
- # Store in database
1776
- cursor.execute(
1777
- "INSERT INTO pipeline_runs (pipeline_name, step_name, output) VALUES (?, ?, ?)",
1778
- (pipeline_name, stage["step_name"], str(aggregated_result)),
1779
- )
1780
-
1781
- final_output[stage["step_name"]] = aggregated_result
1782
- context[stage["step_name"]] = aggregated_result
1783
-
1784
- conn.commit()
1785
-
1786
- self.pipe_cache[pipe_file] = final_output # Cache the results
1787
-
1788
- return final_output
1789
-
1790
- def merge_profiles(self, parent, child) -> dict:
1791
- merged = parent.copy()
1792
- for key, value in child.items():
1793
- if isinstance(value, list) and key in merged:
1794
- merged[key] = merged[key] + value
1795
- elif isinstance(value, dict) and key in merged:
1796
- merged[key] = self.merge_profiles(merged[key], value)
1797
- else:
1798
- merged[key] = value
1799
- return merged
1800
-
1801
- def update_compiled_npcs_table(self, npc_file, parsed_content) -> None:
1802
- try:
1803
- with sqlite3.connect(self.db_path) as conn:
1804
- cursor = conn.cursor()
1805
- npc_name = parsed_content["name"]
1806
- source_path = npc_file
1807
-
1808
- cursor.execute(
1809
- "INSERT OR REPLACE INTO compiled_npcs (name, source_path, compiled_content) VALUES (?, ?, ?)", # Correct column name
1810
- (npc_name, source_path, yaml.dump(parsed_content)),
1811
- )
1812
- conn.commit()
1813
- except Exception as e:
1814
- print(
1815
- f"Error updating compiled_npcs table: {str(e)}"
1816
- ) # Print the full error
1817
-
1818
-
1819
- def load_npc_from_file(npc_file: str, db_conn: sqlite3.Connection) -> NPC:
1820
- if not npc_file.endswith(".npc"):
1821
- # append it just incase
1822
- name += ".npc"
1823
-
1824
- try:
1825
- if "~" in npc_file:
1826
- npc_file = os.path.expanduser(npc_file)
1827
- if not os.path.isabs(npc_file):
1828
- npc_file = os.path.abspath(npc_file)
1829
-
1830
- with open(npc_file, "r") as f:
1831
- npc_data = yaml.safe_load(f)
1832
-
1833
- # Extract fields from YAML
1834
- name = npc_data["name"]
1835
-
1836
- primary_directive = npc_data.get("primary_directive")
1837
- tools = npc_data.get("tools")
1838
- model = npc_data.get("model", os.environ.get("NPCSH_CHAT_MODEL", "llama3.2"))
1839
- provider = npc_data.get(
1840
- "provider", os.environ.get("NPCSH_CHAT_PROVIDER", "ollama")
1841
- )
1842
- api_url = npc_data.get("api_url", os.environ.get("NPCSH_API_URL", None))
1843
- use_global_tools = npc_data.get("use_global_tools", True)
1844
- # print(use_global_tools)
1845
- # Load tools from global and project-specific directories
1846
- all_tools = []
1847
- # 1. Load tools defined within the NPC profile
1848
- if "tools" in npc_data:
1849
- for tool_data in npc_data["tools"]:
1850
- tool = Tool(tool_data)
1851
- tools.append(tool)
1852
- # 2. Load global tools from ~/.npcsh/npc_team/tools
1853
- user_home = os.path.expanduser("~")
1854
- global_tools_directory = os.path.join(user_home, ".npcsh", "npc_team", "tools")
1855
- all_tools.extend(load_tools_from_directory(global_tools_directory))
1856
- # 3. Load project-specific tools from ./npc_team/tools
1857
- project_tools_directory = os.path.abspath("./npc_team/tools")
1858
- all_tools.extend(load_tools_from_directory(project_tools_directory))
1859
-
1860
- # Remove duplicates, giving precedence to project-specific tools
1861
- tool_dict = {}
1862
- for tool in all_tools:
1863
- tool_dict[tool.tool_name] = tool # Project tools overwrite global tools
1864
-
1865
- all_tools = list(tool_dict.values())
1866
-
1867
- # Initialize and return the NPC object
1868
- return NPC(
1869
- name,
1870
- db_conn=db_conn,
1871
- primary_directive=primary_directive,
1872
- tools=tools,
1873
- use_global_tools=use_global_tools,
1874
- model=model,
1875
- provider=provider,
1876
- api_url=api_url,
1877
- all_tools=all_tools, # Pass the tools
1878
- )
1879
-
1880
- except FileNotFoundError:
1881
- raise ValueError(f"NPC file not found: {npc_file}")
1882
- except yaml.YAMLError as e:
1883
- raise ValueError(f"Error parsing YAML in NPC file {npc_file}: {str(e)}")
1884
- except KeyError as e:
1885
- raise ValueError(f"Missing required key in NPC file {npc_file}: {str(e)}")
1886
- except Exception as e:
1887
- raise ValueError(f"Error loading NPC from file {npc_file}: {str(e)}")
1888
-
1889
-
1890
- import os
1891
- import yaml
1892
- import hashlib
1893
- import sqlite3
1894
- from sqlalchemy import create_engine
1895
- import pandas as pd
1896
- import json
1897
- from datetime import datetime
1898
- from jinja2 import Template
1899
- import re
1900
-
1901
-
1902
- ###
1903
- ###
1904
- ###
1905
- ###
1906
- ### What is a pipeline file?
1907
- """
1908
-
1909
- steps:
1910
- - step_name: "step_name"
1911
- npc: npc_name
1912
- task: "task"
1913
- tools: ['tool1', 'tool2']
1914
-
1915
-
1916
- # results within the pipeline need to be referenceable by the shared context through the step name
1917
- #
1918
- # so if step name is review_email and a tool is called we can refer to the intermediate objects
1919
- # as review_email['tool1']['{var_name_in_tool_definition'}]
1920
-
1921
- so in step 2 i can do in the task
1922
- task: "sort the emails by tone by reviewing the outputs from the email review tool: {{ review_email['email_review']['tone'] }}"
1923
- """
1924
-
1925
-
1926
- """
1927
- adding in context and fabs
1928
- """
1929
-
1930
-
1931
- class PipelineRunner:
1932
- def __init__(
1933
- self,
1934
- pipeline_file: str,
1935
- db_path: str = "~/npcsh_history.db",
1936
- npc_root_dir: str = "../",
1937
- ):
1938
- self.pipeline_file = pipeline_file
1939
- self.pipeline_data = self.load_pipeline()
1940
- self.db_path = os.path.expanduser(db_path)
1941
- self.npc_root_dir = npc_root_dir
1942
- self.npc_cache = {}
1943
- self.db_engine = create_engine(f"sqlite:///{self.db_path}")
1944
-
1945
- def load_pipeline(self):
1946
- with open(self.pipeline_file, "r") as f:
1947
- return yaml.safe_load(f)
1948
-
1949
- def compute_pipeline_hash(self):
1950
- with open(self.pipeline_file, "r") as f:
1951
- content = f.read()
1952
- return hashlib.sha256(content.encode()).hexdigest()
1953
-
1954
- def execute_pipeline(self):
1955
- context = {
1956
- "npc": self.npc_ref,
1957
- "ref": lambda step_name: step_name, # Directly use step name
1958
- "source": self.fetch_data_from_source,
1959
- }
1960
-
1961
- pipeline_hash = self.compute_pipeline_hash()
1962
- pipeline_name = os.path.splitext(os.path.basename(self.pipeline_file))[0]
1963
- results_table_name = f"{pipeline_name}_results"
1964
- self.ensure_tables_exist(results_table_name)
1965
- run_id = self.create_run_entry(pipeline_hash)
1966
-
1967
- for step in self.pipeline_data["steps"]:
1968
- self.execute_step(step, context, run_id, results_table_name)
1969
-
1970
- def npc_ref(self, npc_name: str):
1971
- clean_name = npc_name.replace("MISSING_REF_", "")
1972
- try:
1973
- npc_path = self.find_npc_path(clean_name)
1974
- return clean_name if npc_path else f"MISSING_REF_{clean_name}"
1975
- except Exception:
1976
- return f"MISSING_REF_{clean_name}"
1977
-
1978
- def execute_step(
1979
- self, step: dict, context: dict, run_id: int, results_table_name: str
1980
- ):
1981
- """Execute pipeline step and store results in the database."""
1982
- print("\nStarting step execution...")
1983
-
1984
- mixa = step.get("mixa", False)
1985
- mixa_turns = step.get("mixa_turns", 5 if mixa else None)
1986
-
1987
- npc_name = Template(step.get("npc", "")).render(context)
1988
- npc = self.load_npc(npc_name)
1989
- model = step.get("model", npc.model)
1990
- provider = step.get("provider", npc.provider)
1991
-
1992
- response_text = ""
1993
-
1994
- if mixa:
1995
- print("Executing mixture of agents strategy...")
1996
- response_text = self.execute_mixture_of_agents(
1997
- step,
1998
- context,
1999
- run_id,
2000
- results_table_name,
2001
- npc,
2002
- model,
2003
- provider,
2004
- mixa_turns,
2005
- )
2006
- else:
2007
- source_matches = re.findall(
2008
- r"{{\s*source\('([^']+)'\)\s*}}", step.get("task", "")
2009
- )
2010
- print(f"Found source matches: {source_matches}")
2011
-
2012
- if not source_matches:
2013
- rendered_task = Template(step.get("task", "")).render(context)
2014
- response = get_llm_response(
2015
- rendered_task, model=model, provider=provider, npc=npc
2016
- )
2017
- response_text = response.get("response", "")
2018
- else:
2019
- table_name = source_matches[0]
2020
- df = pd.read_sql(f"SELECT * FROM {table_name}", self.db_engine)
2021
- print(f"\nQuerying table: {table_name}")
2022
- print(f"Found {len(df)} rows")
2023
-
2024
- if step.get("batch_mode", False):
2025
- data_str = df.to_json(orient="records")
2026
- rendered_task = step.get("task", "").replace(
2027
- f"{{{{ source('{table_name}') }}}}", data_str
2028
- )
2029
- rendered_task = Template(rendered_task).render(context)
2030
-
2031
- response = get_llm_response(
2032
- rendered_task, model=model, provider=provider, npc=npc
2033
- )
2034
- response_text = response.get("response", "")
2035
- else:
2036
- all_responses = []
2037
- for idx, row in df.iterrows():
2038
- row_data = json.dumps(row.to_dict())
2039
- row_task = step.get("task", "").replace(
2040
- f"{{{{ source('{table_name}') }}}}", row_data
2041
- )
2042
- rendered_task = Template(row_task).render(context)
2043
-
2044
- response = get_llm_response(
2045
- rendered_task, model=model, provider=provider, npc=npc
2046
- )
2047
- result = response.get("response", "")
2048
- all_responses.append(result)
2049
-
2050
- response_text = all_responses
2051
-
2052
- # Storing the final result in the database
2053
- self.store_result(
2054
- run_id,
2055
- step["step_name"],
2056
- npc_name,
2057
- model,
2058
- provider,
2059
- {"response": response_text},
2060
- response_text,
2061
- results_table_name,
2062
- )
2063
-
2064
- context[step["step_name"]] = response_text
2065
- print(f"\nStep complete. Response stored in context[{step['step_name']}]")
2066
- return response_text
2067
-
2068
- def store_result(
2069
- self,
2070
- run_id,
2071
- task_name,
2072
- npc_name,
2073
- model,
2074
- provider,
2075
- inputs,
2076
- outputs,
2077
- results_table_name,
2078
- ):
2079
- """Store results into the specified results table in the database."""
2080
- cleaned_inputs = self.clean_for_json(inputs)
2081
- conn = sqlite3.connect(self.db_path)
2082
- try:
2083
- conn.execute(
2084
- f"""
2085
- INSERT INTO {results_table_name} (run_id, task_name, npc_name,
2086
- model, provider, inputs, outputs) VALUES (?, ?, ?, ?, ?, ?, ?)
2087
- """,
2088
- (
2089
- run_id,
2090
- task_name,
2091
- npc_name,
2092
- model,
2093
- provider,
2094
- json.dumps(cleaned_inputs),
2095
- json.dumps(outputs),
2096
- ),
2097
- )
2098
- conn.commit()
2099
- except Exception as e:
2100
- print(f"Error storing result: {e}")
2101
- finally:
2102
- conn.close()
2103
-
2104
- def execute_mixture_of_agents(
2105
- self,
2106
- step,
2107
- context,
2108
- run_id,
2109
- results_table_name,
2110
- npc,
2111
- model,
2112
- provider,
2113
- mixa_turns,
2114
- ):
2115
- """Facilitates multi-agent decision-making with feedback for refinement."""
2116
-
2117
- # Read agent counts from the step configuration
2118
- num_generating_agents = len(step.get("mixa_agents", []))
2119
- num_voting_agents = len(step.get("mixa_voters", []))
2120
- num_voters = step.get("mixa_voter_count", num_voting_agents)
2121
-
2122
- # Step 1: Initial Response Generation
2123
- round_responses = []
2124
- print("\nInitial responses generation:")
2125
- for agent_index in range(num_generating_agents):
2126
- task_template = Template(step.get("task", "")).render(context)
2127
- response = get_llm_response(
2128
- task_template, model=model, provider=provider, npc=npc
2129
- )
2130
- round_responses.append(response.get("response", ""))
2131
- print(
2132
- f"Agent {agent_index + 1} generated: " f"{response.get('response', '')}"
2133
- )
2134
-
2135
- # Loop for each round of voting and refining
2136
- for turn in range(1, mixa_turns + 1):
2137
- print(f"\n--- Round {turn}/{mixa_turns} ---")
2138
-
2139
- # Step 2: Voting Logic by voting agents
2140
- votes = self.conduct_voting(round_responses, num_voters)
2141
-
2142
- # Step 3: Report results to generating agents
2143
- print("\nVoting Results:")
2144
- for idx, response in enumerate(round_responses):
2145
- print(f"Response {idx + 1} received {votes[idx]} votes.")
2146
-
2147
- # Provide feedback on the responses
2148
- feedback_message = "Responses and their votes:\n" + "\n".join(
2149
- f"Response {i + 1}: {resp} - Votes: {votes[i]} "
2150
- for i, resp in enumerate(round_responses)
2151
- )
2152
-
2153
- # Step 4: Refinement feedback to each agent
2154
- refined_responses = []
2155
- for agent_index in range(num_generating_agents):
2156
- refined_task = (
2157
- feedback_message
2158
- + f"\nRefine your response: {round_responses[agent_index]}"
2159
- )
2160
- response = get_llm_response(
2161
- refined_task, model=model, provider=provider, npc=npc
2162
- )
2163
- refined_responses.append(response.get("response", ""))
2164
- print(
2165
- f"Agent {agent_index + 1} refined response: "
2166
- f"{response.get('response', '')}"
2167
- )
2168
-
2169
- # Update responses for the next round
2170
- round_responses = refined_responses
2171
-
2172
- # Step 5: Final synthesis using the LLM
2173
- final_synthesis_input = (
2174
- "Synthesize the following refined responses into a coherent answer:\n"
2175
- + "\n".join(round_responses)
2176
- )
2177
- final_synthesis = get_llm_response(
2178
- final_synthesis_input, model=model, provider=provider, npc=npc
2179
- )
2180
-
2181
- return final_synthesis # Return synthesized response based on LLM output
2182
-
2183
- def conduct_voting(self, responses, num_voting_agents):
2184
- """Conducts voting among agents on the given responses."""
2185
- votes = [0] * len(responses)
2186
- for _ in range(num_voting_agents):
2187
- voted_index = random.choice(range(len(responses))) # Randomly vote
2188
- votes[voted_index] += 1
2189
- return votes
2190
-
2191
- def synthesize_responses(self, votes):
2192
- """Synthesizes the responses based on votes."""
2193
- # Example: Choose the highest voted response
2194
- max_votes = max(votes)
2195
- chosen_idx = votes.index(max_votes)
2196
- return f"Synthesized response based on votes from agents: " f"{chosen_idx + 1}"
2197
-
2198
- def resolve_sources_in_task(self, task: str, context: dict) -> str:
2199
- # Use Jinja2 template rendering directly for simplicity
2200
- template = Template(task)
2201
- return template.render(context)
2202
-
2203
- def fetch_data_from_source(self, table_name):
2204
- query = f"SELECT * FROM {table_name}"
2205
- try:
2206
- df = pd.read_sql(query, con=self.db_engine)
2207
- except Exception as e:
2208
- raise RuntimeError(f"Error fetching data from '{table_name}': {e}")
2209
- return self.format_data_as_string(df)
2210
-
2211
- def format_data_as_string(self, df):
2212
- return df.to_json(orient="records", lines=True, indent=2)
2213
-
2214
- def ensure_tables_exist(self, results_table_name):
2215
- conn = sqlite3.connect(self.db_path)
2216
- try:
2217
- conn.execute(
2218
- "CREATE TABLE IF NOT EXISTS pipeline_runs ("
2219
- "run_id INTEGER PRIMARY KEY AUTOINCREMENT, "
2220
- "pipeline_hash TEXT, timestamp DATETIME)"
2221
- )
2222
- conn.execute(
2223
- f"CREATE TABLE IF NOT EXISTS {results_table_name} ("
2224
- "result_id INTEGER PRIMARY KEY AUTOINCREMENT, "
2225
- "run_id INTEGER, task_name TEXT, npc_name TEXT, "
2226
- "model TEXT, provider TEXT, inputs JSON, "
2227
- "outputs JSON, FOREIGN KEY(run_id) "
2228
- "REFERENCES pipeline_runs(run_id))"
2229
- )
2230
- conn.commit()
2231
- finally:
2232
- conn.close()
2233
-
2234
- def create_run_entry(self, pipeline_hash):
2235
- conn = sqlite3.connect(self.db_path)
2236
- try:
2237
- conn.execute(
2238
- "INSERT INTO pipeline_runs (pipeline_hash, timestamp) VALUES (?, ?)",
2239
- (pipeline_hash, datetime.now()),
2240
- )
2241
- conn.commit()
2242
- return conn.execute("SELECT last_insert_rowid()").fetchone()[0]
2243
- finally:
2244
- conn.close()
2245
-
2246
- def clean_for_json(self, obj):
2247
- if isinstance(obj, dict):
2248
- return {
2249
- k: self.clean_for_json(v)
2250
- for k, v in obj.items()
2251
- if not k.startswith("_") and not callable(v)
2252
- }
2253
- elif isinstance(obj, list):
2254
- return [self.clean_for_json(i) for i in obj]
2255
- elif isinstance(obj, (str, int, float, bool, type(None))):
2256
- return obj
2257
- else:
2258
- return str(obj)
2259
-
2260
- def load_npc(self, npc_name: str):
2261
- if npc_name in self.npc_cache:
2262
- return self.npc_cache[npc_name]
2263
-
2264
- npc_path = self.find_npc_path(npc_name)
2265
- try:
2266
- if npc_path:
2267
- connection = sqlite3.connect(self.db_path)
2268
- npc = load_npc_from_file(npc_path, db_conn=connection)
2269
- self.npc_cache[npc_name] = npc
2270
- return npc
2271
- else:
2272
- raise FileNotFoundError(f"NPC file not found for {npc_name}")
2273
- except Exception as e:
2274
- raise RuntimeError(f"Error loading NPC {npc_name}: {e}")
2275
-
2276
- def find_npc_path(self, npc_name: str) -> str:
2277
- for root, _, files in os.walk(self.npc_root_dir):
2278
- print(f"Checking in directory: {root}") # Debug output
2279
- for file in files:
2280
- if file.startswith(npc_name) and file.endswith(".npc"):
2281
- print(f"Found NPC file: {file} at {root}") # Debug output
2282
- return os.path.join(root, file)
2283
- print(f"NPC file not found for: {npc_name}") # Debug output
2284
- return None
2285
-
2286
-
2287
- import pandas as pd
2288
- import yaml
2289
- from typing import List, Dict, Any, Union
2290
-
2291
-
2292
- class NPCSQLOperations(NPCCompiler):
2293
- def __init__(self, npc_directory, db_path):
2294
- super().__init__(npc_directory, db_path)
2295
-
2296
- def _get_context(
2297
- self, df: pd.DataFrame, context: Union[str, Dict, List[str]]
2298
- ) -> str:
2299
- """Resolve context from different sources"""
2300
- if isinstance(context, str):
2301
- # Check if it's a column reference
2302
- if context in df.columns:
2303
- return df[context].to_string()
2304
- # Assume it's static text
2305
- return context
2306
- elif isinstance(context, list):
2307
- # List of column names to include
2308
- return " ".join(df[col].to_string() for col in context if col in df.columns)
2309
- elif isinstance(context, dict):
2310
- # YAML-style context
2311
- return yaml.dump(context)
2312
- return ""
2313
-
2314
- # SINGLE PROMPT OPERATIONS
2315
- def synthesize(
2316
- self,
2317
- query,
2318
- df: pd.DataFrame,
2319
- columns: List[str],
2320
- npc: str,
2321
- context: Union[str, Dict, List[str]],
2322
- framework: str,
2323
- ) -> pd.Series:
2324
- context_text = self._get_context(df, context)
2325
-
2326
- def apply_synthesis(row):
2327
- # we have f strings from the query, we want to fill those back in in the request
2328
- request = query.format(**row[columns])
2329
- prompt = f"""Framework: {framework}
2330
- Context: {context_text}
2331
- Text to synthesize: {request}
2332
- Synthesize the above text."""
2333
-
2334
- result = self.execute_stage(
2335
- {"step_name": "synthesize", "npc": npc, "task": prompt},
2336
- {},
2337
- self.jinja_env,
2338
- )
2339
-
2340
- return result[0]["response"]
2341
-
2342
- # columns a list
2343
- columns_str = "_".join(columns)
2344
- df_out = df[columns].apply(apply_synthesis, axis=1)
2345
- return df_out
2346
-
2347
- # MULTI-PROMPT/PARALLEL OPERATIONS
2348
- def spread_and_sync(
2349
- self,
2350
- df: pd.DataFrame,
2351
- column: str,
2352
- npc: str,
2353
- variations: List[str],
2354
- sync_strategy: str,
2355
- context: Union[str, Dict, List[str]],
2356
- ) -> pd.Series:
2357
- context_text = self._get_context(df, context)
2358
-
2359
- def apply_spread_sync(text):
2360
- results = []
2361
- for variation in variations:
2362
- prompt = f"""Variation: {variation}
2363
- Context: {context_text}
2364
- Text to analyze: {text}
2365
- Analyze the above text with {variation} perspective."""
2366
-
2367
- result = self.execute_stage(
2368
- {"step_name": f"spread_{variation}", "npc": npc, "task": prompt},
2369
- {},
2370
- self.jinja_env,
2371
- )
2372
-
2373
- results.append(result[0]["response"])
2374
-
2375
- # Sync results
2376
- sync_result = self.aggregate_step_results(
2377
- [{"response": r} for r in results], sync_strategy
2378
- )
2379
-
2380
- return sync_result
2381
-
2382
- return df[column].apply(apply_spread_sync)
2383
- # COMPARISON OPERATIONS
2384
-
2385
- def contrast(
2386
- self,
2387
- df: pd.DataFrame,
2388
- col1: str,
2389
- col2: str,
2390
- npc: str,
2391
- context: Union[str, Dict, List[str]],
2392
- comparison_framework: str,
2393
- ) -> pd.Series:
2394
- context_text = self._get_context(df, context)
2395
-
2396
- def apply_contrast(row):
2397
- prompt = f"""Framework: {comparison_framework}
2398
- Context: {context_text}
2399
- Text 1: {row[col1]}
2400
- Text 2: {row[col2]}
2401
- Compare and contrast the above texts."""
2402
-
2403
- result = self.execute_stage(
2404
- {"step_name": "contrast", "npc": npc, "task": prompt},
2405
- {},
2406
- self.jinja_env,
2407
- )
2408
-
2409
- return result[0]["response"]
2410
-
2411
- return df.apply(apply_contrast, axis=1)
2412
-
2413
- def sql_operations(self, sql: str) -> pd.DataFrame:
2414
- # Execute the SQL query
2415
-
2416
- """
2417
- 1. delegate(COLUMN, npc, query, context, tools, reviewers)
2418
- 2. dilate(COLUMN, npc, query, context, scope, reviewers)
2419
- 3. erode(COLUMN, npc, query, context, scope, reviewers)
2420
- 4. strategize(COLUMN, npc, query, context, timeline, constraints)
2421
- 5. validate(COLUMN, npc, query, context, criteria)
2422
- 6. synthesize(COLUMN, npc, query, context, framework)
2423
- 7. decompose(COLUMN, npc, query, context, granularity)
2424
- 8. criticize(COLUMN, npc, query, context, framework)
2425
- 9. summarize(COLUMN, npc, query, context, style)
2426
- 10. advocate(COLUMN, npc, query, context, perspective)
2427
-
2428
- MULTI-PROMPT/PARALLEL OPERATIONS
2429
- 11. spread_and_sync(COLUMN, npc, query, variations, sync_strategy, context)
2430
- 12. bootstrap(COLUMN, npc, query, sample_params, sync_strategy, context)
2431
- 13. resample(COLUMN, npc, query, variation_strategy, sync_strategy, context)
2432
-
2433
- COMPARISON OPERATIONS
2434
- 14. mediate(COL1, COL2, npc, query, context, resolution_strategy)
2435
- 15. contrast(COL1, COL2, npc, query, context, comparison_framework)
2436
- 16. reconcile(COL1, COL2, npc, query, context, alignment_strategy)
2437
-
2438
- MULTI-COLUMN INTEGRATION
2439
- 17. integrate(COLS[], npc, query, context, integration_method)
2440
- 18. harmonize(COLS[], npc, query, context, harmony_rules)
2441
- 19. orchestrate(COLS[], npc, query, context, workflow)
2442
- """
2443
-
2444
- # Example usage in SQL-like syntax:
2445
- """
2446
- def execute_sql(self, sql: str) -> pd.DataFrame:
2447
- # This would be implemented to parse and execute SQL with our custom functions
2448
- # Example SQL:
2449
- '''
2450
- SELECT
2451
- customer_id,
2452
- synthesize(feedback_text,
2453
- npc='analyst',
2454
- context=customer_segment,
2455
- framework='satisfaction') as analysis,
2456
- spread_and_sync(price_sensitivity,
2457
- npc='pricing_agent',
2458
- variations=['conservative', 'aggressive'],
2459
- sync_strategy='balanced_analysis',
2460
- context=market_context) as price_strategy
2461
- FROM customer_data
2462
- '''
2463
- pass
2464
- """
2465
-
2466
-
2467
- class NPCDBTAdapter:
2468
- def __init__(self, npc_sql: NPCSQLOperations):
2469
- self.npc_sql = npc_sql
2470
- self.models = {}
2471
-
2472
- def ref(self, model_name: str) -> pd.DataFrame:
2473
- # Implementation for model referencing
2474
- return self.models.get(model_name)
2475
-
2476
- def parse_model(self, model_sql: str) -> pd.DataFrame:
2477
- # Parse the SQL model and execute with our custom functions
2478
- pass
2479
-
2480
-
2481
- class AIFunctionParser:
2482
- """Handles parsing and extraction of AI function calls from SQL"""
2483
-
2484
- @staticmethod
2485
- def extract_function_params(sql: str) -> Dict[str, Dict]:
2486
- """Extract AI function parameters from SQL"""
2487
- ai_functions = {}
2488
-
2489
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
2490
- matches = re.finditer(pattern, sql)
2491
-
2492
- for match in matches:
2493
- func_name = match.group(1)
2494
- if func_name in ["synthesize", "spread_and_sync"]:
2495
- params = match.group(2).split(",")
2496
- ai_functions[func_name] = {
2497
- "query": params[0].strip().strip("\"'"),
2498
- "npc": params[1].strip().strip("\"'"),
2499
- "context": params[2].strip().strip("\"'"),
2500
- }
2501
-
2502
- return ai_functions
2503
-
2504
-
2505
- class SQLModel:
2506
- def __init__(self, name: str, content: str, path: str, npc_directory: str):
2507
- self.name = name
2508
- self.content = content
2509
- self.path = path
2510
- self.npc_directory = npc_directory # This sets the npc_directory attribute
2511
-
2512
- self.dependencies = self._extract_dependencies()
2513
- self.has_ai_function = self._check_ai_functions()
2514
- self.ai_functions = self._extract_ai_functions()
2515
- print(f"Initializing SQLModel with NPC directory: {npc_directory}")
2516
-
2517
- def _extract_dependencies(self) -> Set[str]:
2518
- """Extract model dependencies using ref() calls"""
2519
- pattern = r"\{\{\s*ref\(['\"]([^'\"]+)['\"]\)\s*\}\}"
2520
- return set(re.findall(pattern, self.content))
2521
-
2522
- def _check_ai_functions(self) -> bool:
2523
- """Check if the model contains AI function calls"""
2524
- ai_functions = [
2525
- "synthesize",
2526
- "spread_and_sync",
2527
- "delegate",
2528
- "dilate",
2529
- "erode",
2530
- "strategize",
2531
- "validate",
2532
- "decompose",
2533
- "criticize",
2534
- "summarize",
2535
- "advocate",
2536
- "bootstrap",
2537
- "resample",
2538
- "mediate",
2539
- "contrast",
2540
- "reconcile",
2541
- "integrate",
2542
- "harmonize",
2543
- "orchestrate",
2544
- ]
2545
- return any(func in self.content for func in ai_functions)
2546
-
2547
- def _extract_ai_functions(self) -> Dict[str, Dict]:
2548
- """Extract all AI functions and their parameters from the SQL content."""
2549
- ai_functions = {}
2550
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
2551
- matches = re.finditer(pattern, self.content)
2552
-
2553
- for match in matches:
2554
- func_name = match.group(1)
2555
- if func_name in [
2556
- "synthesize",
2557
- "spread_and_sync",
2558
- "delegate",
2559
- "dilate",
2560
- "erode",
2561
- "strategize",
2562
- "validate",
2563
- "decompose",
2564
- "criticize",
2565
- "summarize",
2566
- "advocate",
2567
- "bootstrap",
2568
- "resample",
2569
- "mediate",
2570
- "contrast",
2571
- "reconcile",
2572
- "integrate",
2573
- "harmonize",
2574
- "orchestrate",
2575
- ]:
2576
- params = [
2577
- param.strip().strip("\"'") for param in match.group(2).split(",")
2578
- ]
2579
- npc = params[1]
2580
- if not npc.endswith(".npc"):
2581
- npc = npc.replace(".npc", "")
2582
- if self.npc_directory in npc:
2583
- npc = npc.replace(self.npc_directory, "")
2584
-
2585
- # print(npc)
2586
- ai_functions[func_name] = {
2587
- "column": params[0],
2588
- "npc": npc,
2589
- "query": params[2],
2590
- "context": params[3] if len(params) > 3 else None,
2591
- }
2592
- return ai_functions
2593
-
2594
-
2595
- class ModelCompiler:
2596
- def __init__(self, models_dir: str, db_path: str, npc_directory: str):
2597
- self.models_dir = Path(models_dir)
2598
- self.db_path = db_path
2599
- self.models: Dict[str, SQLModel] = {}
2600
- self.npc_operations = NPCSQLOperations(npc_directory, db_path)
2601
- self.npc_directory = npc_directory
2602
-
2603
- def discover_models(self):
2604
- """Discover all SQL models in the models directory"""
2605
- self.models = {}
2606
- for sql_file in self.models_dir.glob("**/*.sql"):
2607
- model_name = sql_file.stem
2608
- with open(sql_file, "r") as f:
2609
- content = f.read()
2610
- self.models[model_name] = SQLModel(
2611
- model_name, content, str(sql_file), self.npc_directory
2612
- )
2613
- print(f"Discovered model: {model_name}")
2614
- return self.models
2615
-
2616
- def build_dag(self) -> Dict[str, Set[str]]:
2617
- """Build dependency graph"""
2618
- dag = {}
2619
- for model_name, model in self.models.items():
2620
- dag[model_name] = model.dependencies
2621
- print(f"Built DAG: {dag}")
2622
- return dag
2623
-
2624
- def topological_sort(self) -> List[str]:
2625
- """Generate execution order using topological sort"""
2626
- dag = self.build_dag()
2627
- in_degree = defaultdict(int)
2628
-
2629
- for node, deps in dag.items():
2630
- for dep in deps:
2631
- in_degree[dep] += 1
2632
- if dep not in dag:
2633
- dag[dep] = set()
2634
-
2635
- queue = deque([node for node in dag.keys() if len(dag[node]) == 0])
2636
- result = []
2637
-
2638
- while queue:
2639
- node = queue.popleft()
2640
- result.append(node)
2641
-
2642
- for dependent, deps in dag.items():
2643
- if node in deps:
2644
- deps.remove(node)
2645
- if len(deps) == 0:
2646
- queue.append(dependent)
2647
-
2648
- if len(result) != len(dag):
2649
- raise ValueError("Circular dependency detected")
2650
-
2651
- print(f"Execution order: {result}")
2652
- return result
2653
-
2654
- def _replace_model_references(self, sql: str) -> str:
2655
- ref_pattern = r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
2656
-
2657
- def replace_ref(match):
2658
- model_name = match.group(1)
2659
- if model_name not in self.models:
2660
- raise ValueError(
2661
- f"Model '{model_name}' not found during ref replacement."
2662
- )
2663
- return model_name
2664
-
2665
- replaced_sql = re.sub(ref_pattern, replace_ref, sql)
2666
- return replaced_sql
2667
-
2668
- def compile_model(self, model_name: str) -> str:
2669
- """Compile a single model, resolving refs."""
2670
- model = self.models[model_name]
2671
- compiled_sql = model.content
2672
- compiled_sql = self._replace_model_references(compiled_sql)
2673
- print(f"Compiled SQL for {model_name}:\n{compiled_sql}")
2674
- return compiled_sql
2675
-
2676
- def _extract_base_query(self, sql: str) -> str:
2677
- for dep in self.models[self.current_model].dependencies:
2678
- sql = sql.replace(f"{{{{ ref('{dep}') }}}}", dep)
2679
-
2680
- parts = sql.split("FROM", 1)
2681
- if len(parts) != 2:
2682
- raise ValueError("Invalid SQL syntax")
2683
-
2684
- select_part = parts[0].replace("SELECT", "").strip()
2685
- from_part = "FROM" + parts[1]
2686
-
2687
- columns = re.split(r",\s*(?![^()]*\))", select_part.strip())
2688
-
2689
- final_columns = []
2690
- for col in columns:
2691
- if "synthesize(" not in col:
2692
- final_columns.append(col)
2693
- else:
2694
- alias_match = re.search(r"as\s+(\w+)\s*$", col, re.IGNORECASE)
2695
- if alias_match:
2696
- final_columns.append(f"NULL as {alias_match.group(1)}")
2697
-
2698
- final_sql = f"SELECT {', '.join(final_columns)} {from_part}"
2699
- print(f"Extracted base query:\n{final_sql}")
2700
-
2701
- return final_sql
2702
-
2703
- def execute_model(self, model_name: str) -> pd.DataFrame:
2704
- """Execute a model and materialize it to the database"""
2705
- self.current_model = model_name
2706
- model = self.models[model_name]
2707
- compiled_sql = self.compile_model(model_name)
2708
-
2709
- try:
2710
- if model.has_ai_function:
2711
- df = self._execute_ai_model(compiled_sql, model)
2712
- else:
2713
- df = self._execute_standard_sql(compiled_sql)
2714
-
2715
- self._materialize_to_db(model_name, df)
2716
- return df
2717
-
2718
- except Exception as e:
2719
- print(f"Error executing model {model_name}: {str(e)}")
2720
- raise
2721
-
2722
- def _execute_standard_sql(self, sql: str) -> pd.DataFrame:
2723
- with sqlite3.connect(self.db_path) as conn:
2724
- try:
2725
- sql = re.sub(r"--.*?\n", "\n", sql)
2726
- sql = re.sub(r"\s+", " ", sql).strip()
2727
- return pd.read_sql(sql, conn)
2728
- except Exception as e:
2729
- print(f"Failed to execute SQL: {sql}")
2730
- print(f"Error: {str(e)}")
2731
- raise
2732
-
2733
- def execute_ai_function(self, query, npc, column_value, context):
2734
- """Execute a specific AI function logic - placeholder"""
2735
- print(f"Executing AI function on value: {column_value}")
2736
- synthesized_value = (
2737
- f"Processed({query}): {column_value} in context {context} with npc {npc}"
2738
- )
2739
- return synthesized_value
2740
-
2741
- def _execute_ai_model(self, sql: str, model: SQLModel) -> pd.DataFrame:
2742
- try:
2743
- base_sql = self._extract_base_query(sql)
2744
- print(f"Executing base SQL:\n{base_sql}")
2745
- df = self._execute_standard_sql(base_sql)
2746
-
2747
- # extract the columns they are between {} pairs
2748
- columns = re.findall(r"\{([^}]+)\}", sql)
2749
-
2750
- # Handle AI function a
2751
- for func_name, params in model.ai_functions.items():
2752
- if func_name == "synthesize":
2753
- query_template = params["query"]
2754
-
2755
- npc = params["npc"]
2756
- # only take the after the split "/"
2757
- npc = npc.split("/")[-1]
2758
- context = params["context"]
2759
- # Call the synthesize method using DataFrame directly
2760
- synthesized_df = self.npc_operations.synthesize(
2761
- query=query_template, # The raw query to format
2762
- df=df, # The DataFrame containing the data
2763
- columns=columns, # The column(s) used to format the query
2764
- npc=npc, # NPC parameter
2765
- context=context, # Context parameter
2766
- framework="default_framework", # Adjust this as per your needs
2767
- )
2768
-
2769
- # Optionally pull the synthesized data into a new column
2770
- df["ai_analysis"] = (
2771
- synthesized_df # Adjust as per what synthesize returns
2772
- )
2773
-
2774
- return df
2775
-
2776
- except Exception as e:
2777
- print(f"Error in AI model execution: {str(e)}")
2778
- raise
2779
-
2780
- def _materialize_to_db(self, model_name: str, df: pd.DataFrame):
2781
- with sqlite3.connect(self.db_path) as conn:
2782
- conn.execute(f"DROP TABLE IF EXISTS {model_name}")
2783
- df.to_sql(model_name, conn, index=False)
2784
- print(f"Materialized model {model_name} to database")
2785
-
2786
- def _table_exists(self, table_name: str) -> bool:
2787
- with sqlite3.connect(self.db_path) as conn:
2788
- cursor = conn.cursor()
2789
- cursor.execute(
2790
- """
2791
- SELECT name FROM sqlite_master
2792
- WHERE type='table' AND name=?;
2793
- """,
2794
- (table_name,),
2795
- )
2796
- return cursor.fetchone() is not None
2797
-
2798
- def run_all_models(self):
2799
- """Execute all models in dependency order"""
2800
- self.discover_models()
2801
- execution_order = self.topological_sort()
2802
- print(f"Running models in order: {execution_order}")
2803
-
2804
- results = {}
2805
- for model_name in execution_order:
2806
- print(f"\nExecuting model: {model_name}")
2807
-
2808
- model = self.models[model_name]
2809
- for dep in model.dependencies:
2810
- if not self._table_exists(dep):
2811
- raise ValueError(
2812
- f"Dependency {dep} not found in database for model {model_name}"
2813
- )
2814
-
2815
- results[model_name] = self.execute_model(model_name)
2816
-
2817
- return results
2818
-
2819
-
2820
- def create_example_models(
2821
- models_dir: str = os.path.abspath("./npc_team/factory/models/"),
2822
- db_path: str = "~/npcsh_history.db",
2823
- npc_directory: str = "./npc_team/",
2824
- ):
2825
- """Create example SQL model files"""
2826
- os.makedirs(os.path.abspath("./npc_team/factory/"), exist_ok=True)
2827
- os.makedirs(models_dir, exist_ok=True)
2828
- db_path = os.path.expanduser(db_path)
2829
- conn = sqlite3.connect(db_path)
2830
- df = pd.DataFrame(
2831
- {
2832
- "feedback": ["Great product!", "Could be better", "Amazing service"],
2833
- "customer_id": [1, 2, 3],
2834
- "timestamp": pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-03"]),
2835
- }
2836
- )
2837
-
2838
- df.to_sql("raw_customer_feedback", conn, index=False, if_exists="replace")
2839
- print("Created raw_customer_feedback table")
2840
-
2841
- compiler = ModelCompiler(models_dir, db_path, npc_directory)
2842
- results = compiler.run_all_models()
2843
-
2844
- for model_name, df in results.items():
2845
- print(f"\nResults for {model_name}:")
2846
- print(df.head())
2847
-
2848
- customer_feedback = """
2849
- SELECT
2850
- feedback,
2851
- customer_id,
2852
- timestamp
2853
- FROM raw_customer_feedback
2854
- WHERE LENGTH(feedback) > 10;
2855
- """
2856
-
2857
- customer_insights = """
2858
- SELECT
2859
- customer_id,
2860
- feedback,
2861
- timestamp,
2862
- synthesize(
2863
- "feedback text: {feedback}",
2864
- "analyst",
2865
- "feedback_analysis"
2866
- ) as ai_analysis
2867
- FROM {{ ref('customer_feedback') }};
2868
- """
2869
-
2870
- models = {
2871
- "customer_feedback.sql": customer_feedback,
2872
- "customer_insights.sql": customer_insights,
2873
- }
2874
-
2875
- for name, content in models.items():
2876
- path = os.path.join(models_dir, name)
2877
- with open(path, "w") as f:
2878
- f.write(content)
2879
- print(f"Created model: {name}")