npcsh 0.3.31__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. npcsh/_state.py +942 -0
  2. npcsh/alicanto.py +1074 -0
  3. npcsh/guac.py +785 -0
  4. npcsh/mcp_helpers.py +357 -0
  5. npcsh/mcp_npcsh.py +822 -0
  6. npcsh/mcp_server.py +184 -0
  7. npcsh/npc.py +218 -0
  8. npcsh/npcsh.py +1161 -0
  9. npcsh/plonk.py +387 -269
  10. npcsh/pti.py +234 -0
  11. npcsh/routes.py +958 -0
  12. npcsh/spool.py +315 -0
  13. npcsh/wander.py +550 -0
  14. npcsh/yap.py +573 -0
  15. npcsh-1.0.0.dist-info/METADATA +596 -0
  16. npcsh-1.0.0.dist-info/RECORD +21 -0
  17. {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/WHEEL +1 -1
  18. npcsh-1.0.0.dist-info/entry_points.txt +9 -0
  19. {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/licenses/LICENSE +1 -1
  20. npcsh/audio.py +0 -210
  21. npcsh/cli.py +0 -545
  22. npcsh/command_history.py +0 -566
  23. npcsh/conversation.py +0 -291
  24. npcsh/data_models.py +0 -46
  25. npcsh/dataframes.py +0 -163
  26. npcsh/embeddings.py +0 -168
  27. npcsh/helpers.py +0 -641
  28. npcsh/image.py +0 -298
  29. npcsh/image_gen.py +0 -79
  30. npcsh/knowledge_graph.py +0 -1006
  31. npcsh/llm_funcs.py +0 -2027
  32. npcsh/load_data.py +0 -83
  33. npcsh/main.py +0 -5
  34. npcsh/model_runner.py +0 -189
  35. npcsh/npc_compiler.py +0 -2870
  36. npcsh/npc_sysenv.py +0 -383
  37. npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
  38. npcsh/npc_team/corca.npc +0 -13
  39. npcsh/npc_team/foreman.npc +0 -7
  40. npcsh/npc_team/npcsh.ctx +0 -11
  41. npcsh/npc_team/sibiji.npc +0 -4
  42. npcsh/npc_team/templates/analytics/celona.npc +0 -0
  43. npcsh/npc_team/templates/hr_support/raone.npc +0 -0
  44. npcsh/npc_team/templates/humanities/eriane.npc +0 -4
  45. npcsh/npc_team/templates/it_support/lineru.npc +0 -0
  46. npcsh/npc_team/templates/marketing/slean.npc +0 -4
  47. npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
  48. npcsh/npc_team/templates/sales/turnic.npc +0 -4
  49. npcsh/npc_team/templates/software/welxor.npc +0 -0
  50. npcsh/npc_team/tools/bash_executer.tool +0 -32
  51. npcsh/npc_team/tools/calculator.tool +0 -8
  52. npcsh/npc_team/tools/code_executor.tool +0 -16
  53. npcsh/npc_team/tools/generic_search.tool +0 -27
  54. npcsh/npc_team/tools/image_generation.tool +0 -25
  55. npcsh/npc_team/tools/local_search.tool +0 -149
  56. npcsh/npc_team/tools/npcsh_executor.tool +0 -9
  57. npcsh/npc_team/tools/screen_cap.tool +0 -27
  58. npcsh/npc_team/tools/sql_executor.tool +0 -26
  59. npcsh/response.py +0 -623
  60. npcsh/search.py +0 -248
  61. npcsh/serve.py +0 -1460
  62. npcsh/shell.py +0 -538
  63. npcsh/shell_helpers.py +0 -3529
  64. npcsh/stream.py +0 -700
  65. npcsh/video.py +0 -49
  66. npcsh-0.3.31.data/data/npcsh/npc_team/bash_executer.tool +0 -32
  67. npcsh-0.3.31.data/data/npcsh/npc_team/calculator.tool +0 -8
  68. npcsh-0.3.31.data/data/npcsh/npc_team/celona.npc +0 -0
  69. npcsh-0.3.31.data/data/npcsh/npc_team/code_executor.tool +0 -16
  70. npcsh-0.3.31.data/data/npcsh/npc_team/corca.npc +0 -13
  71. npcsh-0.3.31.data/data/npcsh/npc_team/eriane.npc +0 -4
  72. npcsh-0.3.31.data/data/npcsh/npc_team/foreman.npc +0 -7
  73. npcsh-0.3.31.data/data/npcsh/npc_team/generic_search.tool +0 -27
  74. npcsh-0.3.31.data/data/npcsh/npc_team/image_generation.tool +0 -25
  75. npcsh-0.3.31.data/data/npcsh/npc_team/lineru.npc +0 -0
  76. npcsh-0.3.31.data/data/npcsh/npc_team/local_search.tool +0 -149
  77. npcsh-0.3.31.data/data/npcsh/npc_team/maurawa.npc +0 -0
  78. npcsh-0.3.31.data/data/npcsh/npc_team/npcsh.ctx +0 -11
  79. npcsh-0.3.31.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
  80. npcsh-0.3.31.data/data/npcsh/npc_team/raone.npc +0 -0
  81. npcsh-0.3.31.data/data/npcsh/npc_team/screen_cap.tool +0 -27
  82. npcsh-0.3.31.data/data/npcsh/npc_team/sibiji.npc +0 -4
  83. npcsh-0.3.31.data/data/npcsh/npc_team/slean.npc +0 -4
  84. npcsh-0.3.31.data/data/npcsh/npc_team/sql_executor.tool +0 -26
  85. npcsh-0.3.31.data/data/npcsh/npc_team/test_pipeline.py +0 -181
  86. npcsh-0.3.31.data/data/npcsh/npc_team/turnic.npc +0 -4
  87. npcsh-0.3.31.data/data/npcsh/npc_team/welxor.npc +0 -0
  88. npcsh-0.3.31.dist-info/METADATA +0 -1853
  89. npcsh-0.3.31.dist-info/RECORD +0 -76
  90. npcsh-0.3.31.dist-info/entry_points.txt +0 -3
  91. {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/top_level.txt +0 -0
npcsh/npc_compiler.py DELETED
@@ -1,2870 +0,0 @@
1
- import subprocess
2
- import sqlite3
3
- import numpy as np
4
- import os
5
- import yaml
6
- from jinja2 import Environment, FileSystemLoader, Template, Undefined
7
- import pandas as pd
8
- from typing import Dict, Any, Optional, Union, List, Set
9
- import matplotlib.pyplot as plt
10
- import json
11
- import pathlib
12
- import fnmatch
13
- import re
14
- import ast
15
- import random
16
- from datetime import datetime
17
- import hashlib
18
- from collections import defaultdict, deque
19
- import traceback
20
-
21
- # Importing functions
22
- from .llm_funcs import (
23
- get_llm_response,
24
- get_stream,
25
- process_data_output,
26
- get_data_response,
27
- generate_image,
28
- check_llm_command,
29
- handle_tool_call,
30
- execute_llm_command,
31
- )
32
- from .helpers import get_npc_path
33
- from .search import search_web, rag_search
34
- from .image import capture_screenshot, analyze_image_base
35
-
36
-
37
- def create_or_replace_table(db_path: str, table_name: str, data: pd.DataFrame):
38
- """
39
- Creates or replaces a table in the SQLite database.
40
-
41
- :param db_path: Path to the SQLite database.
42
- :param table_name: Name of the table to create/replace.
43
- :param data: Pandas DataFrame containing the data to insert.
44
- """
45
- conn = sqlite3.connect(db_path)
46
- try:
47
- data.to_sql(table_name, conn, if_exists="replace", index=False)
48
- print(f"Table '{table_name}' created/replaced successfully.")
49
- except Exception as e:
50
- print(f"Error creating/replacing table '{table_name}': {e}")
51
- finally:
52
- conn.close()
53
-
54
-
55
- def load_npc_team(template_path):
56
- """
57
- Load an NPC team from a template directory.
58
-
59
- Args:
60
- template_path: Path to the NPC team template directory
61
-
62
- Returns:
63
- A dictionary containing the NPC team definition with loaded NPCs and tools
64
- """
65
- template_path = os.path.expanduser(template_path)
66
-
67
- if not os.path.exists(template_path):
68
- raise FileNotFoundError(f"Template directory not found: {template_path}")
69
-
70
- # Initialize team structure
71
- npc_team = {
72
- "name": os.path.basename(template_path),
73
- "npcs": [],
74
- "tools": [],
75
- "assembly_lines": [],
76
- "sql_models": [],
77
- "jobs": [],
78
- }
79
-
80
- # Load NPCs
81
- npc_objects = {}
82
- db_conn = sqlite3.connect(os.path.expanduser("~/npcsh_history.db"))
83
-
84
- for filename in os.listdir(template_path):
85
- if filename.endswith(".npc"):
86
- npc_path = os.path.join(template_path, filename)
87
-
88
- with open(npc_path, "r") as f:
89
- npc_content = f.read()
90
- npc_data = yaml.safe_load(npc_content)
91
- npc_team["npcs"].append(npc_data)
92
-
93
- # Load as NPC object
94
-
95
- npc_obj = load_npc_from_file(npc_path, db_conn)
96
- npc_name = npc_data.get("name", os.path.splitext(filename)[0])
97
- npc_objects[npc_name] = npc_obj
98
-
99
- # Load tools
100
- tools_dir = os.path.join(template_path, "tools")
101
- tool_objects = {}
102
-
103
- if os.path.exists(tools_dir):
104
- for filename in os.listdir(tools_dir):
105
- if filename.endswith(".tool"):
106
- tool_path = os.path.join(tools_dir, filename)
107
- with open(tool_path, "r") as f:
108
- tool_content = f.read()
109
- tool_data = yaml.safe_load(tool_content)
110
- npc_team["tools"].append(tool_data)
111
-
112
- # Load as Tool object
113
- try:
114
- tool_obj = Tool(tool_data)
115
- tool_name = tool_data.get(
116
- "tool_name", os.path.splitext(filename)[0]
117
- )
118
- tool_objects[tool_name] = tool_obj
119
- except Exception as e:
120
- print(f"Warning: Could not load tool {filename}: {str(e)}")
121
-
122
- # Load assembly lines
123
- assembly_lines_dir = os.path.join(template_path, "assembly_lines")
124
- if os.path.exists(assembly_lines_dir):
125
- for filename in os.listdir(assembly_lines_dir):
126
- if filename.endswith(".pipe"):
127
- pipe_path = os.path.join(assembly_lines_dir, filename)
128
- with open(pipe_path, "r") as f:
129
- pipe_content = f.read()
130
- pipe_data = yaml.safe_load(pipe_content)
131
- npc_team["assembly_lines"].append(pipe_data)
132
-
133
- # Load SQL models
134
- sql_models_dir = os.path.join(template_path, "sql_models")
135
- if os.path.exists(sql_models_dir):
136
- for filename in os.listdir(sql_models_dir):
137
- if filename.endswith(".sql"):
138
- sql_path = os.path.join(sql_models_dir, filename)
139
- with open(sql_path, "r") as f:
140
- sql_content = f.read()
141
- npc_team["sql_models"].append(
142
- {"name": os.path.basename(sql_path), "content": sql_content}
143
- )
144
-
145
- # Load jobs
146
- jobs_dir = os.path.join(template_path, "jobs")
147
- if os.path.exists(jobs_dir):
148
- for filename in os.listdir(jobs_dir):
149
- if filename.endswith(".job"):
150
- job_path = os.path.join(jobs_dir, filename)
151
- with open(job_path, "r") as f:
152
- job_content = f.read()
153
- job_data = yaml.safe_load(job_content)
154
- npc_team["jobs"].append(job_data)
155
-
156
- # Add loaded objects to the team structure
157
- npc_team["npc_objects"] = npc_objects
158
- npc_team["tool_objects"] = tool_objects
159
- npc_team["template_path"] = template_path
160
-
161
- return npc_team
162
-
163
-
164
- def get_template_npc_team(template, template_dir="~/.npcsh/npc_team/templates/"):
165
-
166
- # get the working directory where the
167
-
168
- npc_team = load_npc_team(template_dir + template)
169
- return npc_team
170
-
171
-
172
- def generate_npcs_from_area_of_expertise(
173
- areas_of_expertise,
174
- context,
175
- templates: list = None,
176
- model=None,
177
- provider=None,
178
- npc=None,
179
- ):
180
-
181
- prompt = f"""
182
- Here are the areas of expertise that a user requires a team of agents to be developed for.
183
-
184
- {areas_of_expertise}
185
-
186
- Here is some additional context that may be useful:
187
- {context}
188
-
189
- """
190
- # print(templates)
191
- if templates is not None:
192
- prompt += "the user has also provided the following templates to use as a base for the NPC team:\n"
193
- for template in templates:
194
- prompt += f"{template}\n"
195
- prompt += "your output should use these templates and modify them accordingly. Your response must contain the specific named NPCs included in these templates, with their primary directives adjusted accordingly based on the context and the areas of expertise. any other new npcs should complement these template ones and should not overlap."
196
-
197
- prompt += """
198
- Now, generate a set of 2-5 NPCs that cover the required areas of expertise and adequatetly incorporate the context provided.
199
- according to the following framework and return a json response
200
- {"npc_team": [
201
- {
202
- "name":"name of npc1",
203
- "primary_directive": "a 2-3 sentence description of the NPCs duties and responsibilities in the second person"
204
- },
205
- {
206
- "name":"name of npc2",
207
- "primary_directive": "a 2-3 sentence description of the NPCs duties and responsibilities in the second person"
208
- }
209
- ]}
210
-
211
- Each npc's name should be one word.
212
- The npc's primary directive must be essentially an assistant system message, so ensure that when you
213
- write it, you are writing it in that way.
214
- For example, here is an npc named 'sibiji' with a primary directive:
215
- {
216
- "name":"sibiji",
217
- "primary_directive": "You are sibiji, the foreman of an NPC team. You are a foundational AI assistant. Your role is to provide basic support and information. Respond to queries concisely and accurately."
218
- }
219
- When writing out your response, you must ensure that the agents have distinct areas of
220
- expertise such that they are not redundant in their abilities. Keeping the agent team
221
- small is important and we do not wwish to clutter the team with agents that have overlapping
222
- areas of expertise or responsibilities that make it difficult to know which agent should be
223
- called upon in a specific situation.
224
-
225
-
226
- do not include any additional markdown formatting or leading ```json tags.
227
- """
228
-
229
- response = get_llm_response(
230
- prompt, model=model, provider=provider, npc=npc, format="json"
231
- )
232
- response = response.get("response").get("npc_team")
233
- return response
234
-
235
-
236
- def edit_areas(areas):
237
- for i, area in enumerate(areas):
238
- print(f"{i+1}. {area}")
239
-
240
- index = input("Which area would you like to edit? (number or 'c' to continue): ")
241
- if index.lower() in ["c", "continue"]:
242
- return areas
243
- else:
244
- index = int(index)
245
- if 0 <= index < len(areas):
246
- new_value = input(f"Current value: {areas[index]}. Enter new value: ")
247
- areas[index] = new_value
248
- else:
249
- print("invalid index, please try again")
250
- return edit_areas(areas)
251
-
252
-
253
- def delete_areas(areas):
254
- for i, area in enumerate(areas):
255
- print(f"{i+1}. {area}")
256
-
257
- index = (
258
- int(input("Which area would you like to delete? (number or 'c' to continue): "))
259
- - 1
260
- )
261
-
262
- if index.lower() in ["c", "continue"]:
263
- return areas
264
- if 0 <= index < len(areas):
265
- del areas[index]
266
-
267
- return delete_areas(areas)
268
-
269
-
270
- def conjure_team(
271
- context,
272
- templates,
273
- npc=None,
274
- model=None,
275
- provider=None,
276
- ):
277
- """
278
- Function to generate an NPC team using existing templates and identifying additional areas of expertise.
279
-
280
- Args:
281
- templates: List of template names to use as a base
282
- context: Description of the project and what the team should do
283
- npc: The NPC to use for generating the areas (optional)
284
- model: The model to use for generation (optional)
285
- provider: The provider to use for generation (optional)
286
-
287
- Returns:
288
- Dictionary with identified areas of expertise
289
- """
290
- teams = []
291
- for team in templates:
292
- npc_team = get_template_npc_team(team)
293
- teams.append(npc_team)
294
-
295
- # Extract existing areas of expertise from templates
296
- prompt = f"""
297
- The user has provided the following context:
298
-
299
- {context}
300
- """
301
-
302
- if templates is not None:
303
- prompt += f"""
304
- The user has requested to generate an NPC team using the following templates:
305
-
306
- {templates}
307
-
308
- """
309
-
310
- prompt += """
311
- Now what is important in generating an NPC team is to ensure that the NPCs are balanced and distinctly necessary.
312
- Each NPC should essentially focus on a single area of expertise. This does not mean that they should only focus on a
313
- single function, but rather that they have a specific purview.
314
-
315
- To first figure out what NPCs would be necessary in addition to the templates given the combination of the templates
316
- and the user-provided context, we will need to generate a list of the abstract areas that the user requires in an NPC team.
317
- Now, given that information, consider whether other potential areas of expertise would complement the provided templates and the user context?
318
- Try to think carefully about this in a way to determine what other potential issues might arise for a team like this to anticipate whether it may be
319
- necessary to cover additional areas of expertise.
320
-
321
- Now, generate a list of 3-5 abstract areas explicitly required.
322
- It is actually quite important that you consolidate and abstract away various areas
323
- into general forms. Agents will be generated based on these descriptions, and an agentic team is more
324
- useful when it is as small as reasonably possible.
325
-
326
- Similarly, generate a list of 2-3 suggested areas of expertise that would complement the existing templates and the user context.
327
-
328
- This will be provided to the user for confirmation and adjustment before the NPC team is generated.
329
-
330
- Return a json response with two lists. It should be formatted like so:
331
-
332
- {
333
- "explicit_areas": ["area 1", "area 2"],
334
- "suggested_areas": ["area 3", "area 4"]
335
- }
336
-
337
- Do not include any additional markdown formatting or leading ```json tags.
338
-
339
- """
340
-
341
- response = get_llm_response(
342
- prompt, model=model, provider=provider, npc=npc, format="json"
343
- )
344
-
345
- response = response.get("response")
346
- explicit_areas = response.get("explicit_areas", [])
347
- suggested_areas = response.get("suggested_areas", [])
348
- combined_areas = explicit_areas + suggested_areas
349
- print("\nExplicit areas of expertise:")
350
- for i, area in enumerate(explicit_areas):
351
- print(f"{i+1}. {area}")
352
-
353
- print("\nSuggested areas of expertise:")
354
- for i, area in enumerate(suggested_areas):
355
- print(f"{i+1}. {area}")
356
-
357
- user_input = input(
358
- """\n\n
359
- Above is the generated list of areas of expertise.
360
-
361
- Would you like to edit the suggestions, delete any of them, or regenerate the team with revised context?
362
- Type '(e)dit', '(d)elete', or '(r)egenerate' or '(a)ccept': """
363
- )
364
- if user_input.lower() in ["e", "edit"]:
365
- revised_areas = edit_areas(combined_areas)
366
- elif user_input.lower() in ["d", "delete"]:
367
- revised_areas = delete_areas(combined_areas)
368
- elif user_input.lower() in ["r", "regenerate"]:
369
- updated_context = input(
370
- f"Here is the context you provided: {context}\nPlease provide a fully revised version: "
371
- )
372
- print("Beginning again with updated context")
373
- return conjure_team(
374
- updated_context,
375
- templates=templates,
376
- npc=npc,
377
- model=model,
378
- provider=provider,
379
- )
380
-
381
- elif user_input.lower() in ["a", "accept"]:
382
- # Return the finalized areas of expertise
383
- revised_areas = combined_areas
384
-
385
- # proceed now with generation of npc for each revised area
386
- npc_out = generate_npcs_from_area_of_expertise(
387
- revised_areas,
388
- context,
389
- templates=[team["npcs"] for team in teams],
390
- model=model,
391
- provider=provider,
392
- npc=npc,
393
- )
394
- # print(npc_out)
395
- # now save all of the npcs to the ./npc_team directory
396
-
397
- for npc in npc_out:
398
- # make the npc team dir if not existst
399
-
400
- if isinstance(npc, str):
401
- npc = ast.literal_eval(npc)
402
-
403
- npc_team_dir = os.path.join(os.getcwd(), "npc_team")
404
- os.makedirs(npc_team_dir, exist_ok=True)
405
- # print(npc, type(npc))
406
- npc_path = os.path.join(os.getcwd(), "npc_team", f"{npc['name']}.npc")
407
- with open(npc_path, "w") as f:
408
- f.write(yaml.dump(npc))
409
-
410
- return {
411
- "templates": templates,
412
- "context": context,
413
- "expertise_areas": response,
414
- "npcs": npc_out,
415
- }
416
-
417
-
418
- def initialize_npc_project(
419
- directory=None,
420
- templates=None,
421
- context=None,
422
- model=None,
423
- provider=None,
424
- ) -> str:
425
- """
426
- Function Description:
427
- This function initializes an NPC project in the current directory.
428
- Args:
429
- None
430
- Keyword Args:
431
- None
432
- Returns:
433
- A message indicating the success or failure of the operation.
434
- """
435
- if directory is None:
436
- directory = os.getcwd()
437
-
438
- # Create 'npc_team' folder in current directory
439
- npc_team_dir = os.path.join(directory, "npc_team")
440
- os.makedirs(npc_team_dir, exist_ok=True)
441
-
442
- # Create 'foreman.npc' file in 'npc_team' directory
443
- foreman_npc_path = os.path.join(npc_team_dir, "sibiji.npc")
444
- if context is not None:
445
- team = conjure_team(
446
- context, templates=templates, model=model, provider=provider
447
- )
448
-
449
- if not os.path.exists(foreman_npc_path):
450
- foreman_npc_content = """name: sibiji
451
- primary_directive: "You are sibiji, the foreman of an NPC team. You are a foundational AI assistant. Your role is to provide basic support and information. Respond to queries concisely and accurately."
452
- model: llama3.2
453
- provider: ollama
454
- """
455
- with open(foreman_npc_path, "w") as f:
456
- f.write(foreman_npc_content)
457
- else:
458
- print(f"{foreman_npc_path} already exists.")
459
-
460
- # Create 'tools' folder within 'npc_team' directory
461
- tools_dir = os.path.join(npc_team_dir, "tools")
462
- os.makedirs(tools_dir, exist_ok=True)
463
-
464
- # assembly_lines
465
- assembly_lines_dir = os.path.join(npc_team_dir, "assembly_lines")
466
- os.makedirs(assembly_lines_dir, exist_ok=True)
467
- # sql models
468
- sql_models_dir = os.path.join(npc_team_dir, "sql_models")
469
- os.makedirs(sql_models_dir, exist_ok=True)
470
- # jobs
471
- jobs_dir = os.path.join(npc_team_dir, "jobs")
472
- os.makedirs(jobs_dir, exist_ok=True)
473
-
474
- # just copy all the base npcsh tools and npcs.
475
- return f"NPC project initialized in {npc_team_dir}"
476
-
477
-
478
- def init_pipeline_runs(db_path: str = "~/npcsh_history.db"):
479
- """
480
- Initialize the pipeline runs table in the database.
481
- """
482
- with sqlite3.connect(os.path.expanduser(db_path)) as conn:
483
- cursor = conn.cursor()
484
- cursor.execute(
485
- """
486
- CREATE TABLE IF NOT EXISTS pipeline_runs (
487
- id INTEGER PRIMARY KEY AUTOINCREMENT,
488
- pipeline_name TEXT,
489
- step_name TEXT,
490
- output TEXT,
491
- timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
492
- )
493
- """
494
- )
495
- conn.commit()
496
-
497
-
498
- # SilentUndefined handles undefined behavior in Jinja2
499
- class SilentUndefined(Undefined):
500
- def _fail_with_undefined_error(self, *args, **kwargs):
501
- return ""
502
-
503
-
504
- class Context:
505
- def __init__(self, context=None, mcp_servers=None, databases=None, files=None):
506
- self.context = context
507
- self.mcp_servers = mcp_servers
508
- self.databases = databases
509
- self.files = files
510
-
511
- def load_context_file(self, path):
512
- with open(path, "r") as f:
513
- self.context = yaml.safe_load(f)
514
-
515
-
516
- class Tool:
517
- def __init__(self, tool_data: dict):
518
- if not tool_data or not isinstance(tool_data, dict):
519
- raise ValueError("Invalid tool data provided.")
520
- if "tool_name" not in tool_data:
521
- raise KeyError("Missing 'tool_name' in tool definition.")
522
-
523
- self.tool_name = tool_data.get("tool_name")
524
- self.inputs = tool_data.get("inputs", [])
525
- self.description = tool_data.get("description", "")
526
- self.steps = self.parse_steps(tool_data.get("steps", []))
527
-
528
- def parse_step(self, step: Union[dict, str]) -> dict:
529
- if isinstance(step, dict):
530
- return {
531
- "engine": step.get("engine", None),
532
- "code": step.get("code", ""),
533
- }
534
- else:
535
- raise ValueError("Invalid step format")
536
-
537
- def parse_steps(self, steps: list) -> list:
538
- return [self.parse_step(step) for step in steps]
539
-
540
- def execute(
541
- self,
542
- input_values: dict,
543
- tools_dict: dict,
544
- jinja_env: Environment,
545
- command: str,
546
- model: str = None,
547
- provider: str = None,
548
- npc=None,
549
- stream: bool = False,
550
- messages: List[Dict[str, str]] = None,
551
- ):
552
- # Create the context with input values at top level for Jinja access
553
- context = npc.shared_context.copy() if npc else {}
554
- context.update(input_values) # Spread input values directly in context
555
- context.update(
556
- {
557
- "tools": tools_dict,
558
- "llm_response": None,
559
- "output": None,
560
- "command": command,
561
- }
562
- )
563
-
564
- # Process Steps
565
- for i, step in enumerate(self.steps):
566
-
567
- context = self.execute_step(
568
- step,
569
- context,
570
- jinja_env,
571
- model=model,
572
- provider=provider,
573
- npc=npc,
574
- stream=stream,
575
- messages=messages,
576
- )
577
- # if i is the last step and the user has reuqested a streaming output
578
- # then we should return the stream
579
- if i == len(self.steps) - 1 and stream: # this was causing the big issue X:
580
- print("tool successful, passing output to stream")
581
- return context
582
- # Return the final output
583
- if context.get("output") is not None:
584
- return context.get("output")
585
- elif context.get("llm_response") is not None:
586
- return context.get("llm_response")
587
-
588
- def execute_step(
589
- self,
590
- step: dict,
591
- context: dict,
592
- jinja_env: Environment,
593
- npc: Any = None,
594
- model: str = None,
595
- provider: str = None,
596
- stream: bool = False,
597
- messages: List[Dict[str, str]] = None,
598
- ):
599
- engine = step.get("engine", "natural")
600
- code = step.get("code", "")
601
-
602
- # Render template with all context variables
603
- try:
604
- template = jinja_env.from_string(code)
605
- rendered_code = template.render(**context)
606
- except Exception as e:
607
- print(f"Error rendering template: {e}")
608
- rendered_code = code
609
- # render engine if necessary
610
- try:
611
- template = jinja_env.from_string(engine)
612
- rendered_engine = template.render(**context)
613
- except:
614
- print("error rendering engine")
615
- rendered_engine = engine
616
- if rendered_engine == "natural":
617
- if len(rendered_code.strip()) > 0:
618
- # print(f"Executing natural language step: {rendered_code}")
619
- if stream:
620
- messages = messages.copy() if messages else []
621
- messages.append({"role": "user", "content": rendered_code})
622
- return get_stream(messages, model=model, provider=provider, npc=npc)
623
-
624
- else:
625
- llm_response = get_llm_response(
626
- rendered_code, model=model, provider=provider, npc=npc
627
- )
628
- response_text = llm_response.get("response", "")
629
- # Store both in context for reference
630
- context["llm_response"] = response_text
631
- context["results"] = response_text
632
-
633
- elif rendered_engine == "python":
634
- exec_globals = {
635
- "__builtins__": __builtins__,
636
- "npc": npc,
637
- "context": context,
638
- "pd": pd,
639
- "plt": plt,
640
- "np": np,
641
- "os": os,
642
- "get_llm_response": get_llm_response,
643
- "generate_image": generate_image,
644
- "search_web": search_web,
645
- "json": json,
646
- "sklearn": __import__("sklearn"),
647
- "TfidfVectorizer": __import__(
648
- "sklearn.feature_extraction.text"
649
- ).feature_extraction.text.TfidfVectorizer,
650
- "cosine_similarity": __import__(
651
- "sklearn.metrics.pairwise"
652
- ).metrics.pairwise.cosine_similarity,
653
- "Path": __import__("pathlib").Path,
654
- "fnmatch": fnmatch,
655
- "pathlib": pathlib,
656
- "subprocess": subprocess,
657
- }
658
- new_locals = {}
659
- exec_env = context.copy()
660
- try:
661
- exec(rendered_code, exec_globals, new_locals)
662
- exec_env.update(new_locals)
663
-
664
- context.update(exec_env)
665
-
666
- exec_env.update(new_locals)
667
- context.update(exec_env)
668
-
669
- # Add this line to explicitly copy the output
670
- if "output" in new_locals:
671
- context["output"] = new_locals["output"]
672
-
673
- # Then your existing code
674
- if "output" in exec_env:
675
- if exec_env["output"] is not None:
676
- context["results"] = exec_env["output"]
677
- print("result from code execution: ", exec_env["output"])
678
-
679
- except NameError as e:
680
- tb_lines = traceback.format_exc().splitlines()
681
- limited_tb = (
682
- "\n".join(tb_lines[:100])
683
- if len(tb_lines) > 100
684
- else "\n".join(tb_lines)
685
- )
686
- print(f"NameError: {e}")
687
- print(f"Limited traceback:\n{limited_tb}")
688
- print("Tool code:")
689
- print(rendered_code)
690
- return {
691
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
692
- }
693
- except SyntaxError as e:
694
- tb_lines = traceback.format_exc().splitlines()
695
- limited_tb = (
696
- "\n".join(tb_lines[:100])
697
- if len(tb_lines) > 100
698
- else "\n".join(tb_lines)
699
- )
700
- print(f"SyntaxError: {e}")
701
- print(f"Limited traceback:\n{limited_tb}")
702
- print("Tool code:")
703
- print(rendered_code)
704
- return {
705
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
706
- }
707
- except Exception as e:
708
- tb_lines = traceback.format_exc().splitlines()
709
- limited_tb = (
710
- "\n".join(tb_lines[:100])
711
- if len(tb_lines) > 100
712
- else "\n".join(tb_lines)
713
- )
714
- print(f"Error executing Python code:")
715
- print(f"Limited traceback:\n{limited_tb}")
716
- print("Tool code:")
717
- print(rendered_code)
718
- return {
719
- "output": f"Error executing Python code : {e} with traceback: {limited_tb}"
720
- }
721
- return context
722
-
723
- def to_dict(self):
724
- return {
725
- "tool_name": self.tool_name,
726
- "description": self.description,
727
- "inputs": self.inputs,
728
- "steps": [self.step_to_dict(step) for step in self.steps],
729
- }
730
-
731
- def step_to_dict(self, step):
732
- return {
733
- "engine": step.get("engine"),
734
- "code": step.get("code"),
735
- }
736
-
737
-
738
- def load_tools_from_directory(directory) -> list:
739
- tools = []
740
- if os.path.exists(directory):
741
- for filename in os.listdir(directory):
742
- if filename.endswith(".tool"):
743
- full_path = os.path.join(directory, filename)
744
- with open(full_path, "r") as f:
745
- tool_content = f.read()
746
- try:
747
- if not tool_content.strip():
748
- print(f"Tool file {filename} is empty. Skipping.")
749
- continue
750
- tool_data = yaml.safe_load(tool_content)
751
- if tool_data is None:
752
- print(
753
- f"Tool file {filename} is invalid or empty. Skipping."
754
- )
755
- continue
756
- tool = Tool(tool_data)
757
- tools.append(tool)
758
- except yaml.YAMLError as e:
759
- print(f"Error parsing tool {filename}: {e}")
760
- return tools
761
-
762
-
763
- class NPC:
764
- def __init__(
765
- self,
766
- name: str,
767
- primary_directive: str = None,
768
- tools: list = None, # from the npc profile
769
- model: str = None,
770
- provider: str = None,
771
- api_url: str = None,
772
- db_conn=None,
773
- all_tools: list = None, # all available tools in global and project, this is an anti pattern i need to solve eventually but for now it works
774
- use_global_tools: bool = False,
775
- use_npc_network: bool = False,
776
- global_npc_directory: str = None,
777
- project_npc_directory: str = None,
778
- global_tools_directory: str = None,
779
- project_tools_directory: str = None,
780
- ):
781
- # 2. Load global tools from ~/.npcsh/npc_team/tools
782
- if global_tools_directory is None:
783
- user_home = os.path.expanduser("~")
784
- self.global_tools_directory = os.path.join(
785
- user_home, ".npcsh", "npc_team", "tools"
786
- )
787
- else:
788
- self.global_tools_directory = global_tools_directory
789
-
790
- if project_tools_directory is None:
791
- self.project_tools_directory = os.path.abspath("./npc_team/tools")
792
- else:
793
- self.project_tools_directory = project_tools_directory
794
-
795
- if global_npc_directory is None:
796
- self.global_npc_directory = os.path.join(user_home, ".npcsh", "npc_team")
797
- else:
798
- self.global_npc_directory = global_npc_directory
799
-
800
- if project_npc_directory is None:
801
- self.project_npc_directory = os.path.abspath("./npc_team")
802
-
803
- self.jinja_env = Environment(
804
- loader=FileSystemLoader(
805
- [
806
- self.project_npc_directory,
807
- self.global_npc_directory,
808
- self.global_tools_directory,
809
- self.project_tools_directory,
810
- ]
811
- ),
812
- undefined=SilentUndefined,
813
- )
814
-
815
- self.name = name
816
- self.primary_directive = primary_directive
817
- self.tools = tools or []
818
-
819
- self.model = model
820
- self.db_conn = db_conn
821
- if self.db_conn is not None:
822
- # Determine database type
823
- if "psycopg2" in self.db_conn.__class__.__module__:
824
- # PostgreSQL connection
825
- cursor = self.db_conn.cursor()
826
- cursor.execute(
827
- """
828
- SELECT table_name, obj_description((quote_ident(table_name))::regclass, 'pg_class')
829
- FROM information_schema.tables
830
- WHERE table_schema='public';
831
- """
832
- )
833
- self.tables = cursor.fetchall()
834
- self.db_type = "postgres"
835
- elif "sqlite3" in self.db_conn.__class__.__module__:
836
- # SQLite connection
837
- self.tables = self.db_conn.execute(
838
- "SELECT name, sql FROM sqlite_master WHERE type='table';"
839
- ).fetchall()
840
- self.db_type = "sqlite"
841
- else:
842
- self.tables = None
843
- self.db_type = None
844
-
845
- self.provider = provider
846
- self.api_url = api_url
847
- self.all_tools = all_tools or []
848
- self.all_tools_dict = {tool.tool_name: tool for tool in self.all_tools}
849
- if self.tools:
850
- tools_to_load = []
851
-
852
- for tool in self.tools:
853
- if isinstance(tool, Tool):
854
- continue
855
- if isinstance(tool, str):
856
- tools_to_load.append(tool)
857
- if len(tools_to_load) > 0:
858
- self.tools = self.load_suggested_tools(
859
- tools,
860
- self.global_tools_directory,
861
- self.project_tools_directory,
862
- )
863
- self.tools_dict = {tool.tool_name: tool for tool in self.tools}
864
- else:
865
- self.tools_dict = {}
866
-
867
- self.shared_context = {
868
- "dataframes": {},
869
- "current_data": None,
870
- "computation_results": {},
871
- }
872
- self.use_global_tools = use_global_tools
873
- self.use_npc_network = use_npc_network
874
-
875
- # Load tools if flag is set
876
- if self.use_global_tools:
877
- self.default_tools = self.load_tools()
878
- else:
879
- self.default_tools = []
880
- self.npc_cache = {}
881
-
882
- self.resolved_npcs = {}
883
-
884
- # Load NPC dependencies if flag is set
885
- if self.use_npc_network:
886
- self.parsed_npcs = self.parse_all_npcs()
887
- self.resolved_npcs = self.resolve_all_npcs()
888
- else:
889
- self.parsed_npcs = []
890
-
891
- def execute_query(self, query, params=None):
892
- """Execute a query based on database type"""
893
- if self.db_type == "postgres":
894
- cursor = self.db_conn.cursor()
895
- cursor.execute(query, params or ())
896
- return cursor.fetchall()
897
- else: # sqlite
898
- cursor = self.db_conn.execute(query, params or ())
899
- return cursor.fetchall()
900
-
901
- def _determine_db_type(self):
902
- """Determine if the connection is PostgreSQL or SQLite"""
903
- # Check the connection object's class name
904
- conn_type = self.db_conn.__class__.__module__.lower()
905
-
906
- if "psycopg" in conn_type:
907
- return "postgres"
908
- elif "sqlite" in conn_type:
909
- return "sqlite"
910
- else:
911
- raise ValueError(f"Unsupported database type: {conn_type}")
912
-
913
- def _get_tables(self):
914
- """Get table information based on database type"""
915
- if self.db_type == "postgres":
916
- cursor = self.db_conn.cursor()
917
- cursor.execute(
918
- """
919
- SELECT table_name, obj_description((quote_ident(table_name))::regclass, 'pg_class') as description
920
- FROM information_schema.tables
921
- WHERE table_schema='public';
922
- """
923
- )
924
- return cursor.fetchall()
925
- else: # sqlite
926
- return self.db_conn.execute(
927
- "SELECT name, sql FROM sqlite_master WHERE type='table';"
928
- ).fetchall()
929
-
930
- def get_memory(self):
931
- return
932
-
933
- def to_dict(self):
934
- return {
935
- "name": self.name,
936
- "primary_directive": self.primary_directive,
937
- "model": self.model,
938
- "provider": self.provider,
939
- "tools": [tool.to_dict() for tool in self.tools],
940
- "use_global_tools": self.use_global_tools,
941
- "api_url": self.api_url,
942
- }
943
-
944
- def _check_llm_command(
945
- self,
946
- command,
947
- retrieved_docs=None,
948
- messages=None,
949
- n_docs=5,
950
- context=None,
951
- shared_context=None,
952
- ):
953
- if shared_context is not None:
954
- self.shared_context = shared_context
955
- return check_llm_command(
956
- command,
957
- model=self.model,
958
- provider=self.provider,
959
- npc=self,
960
- retrieved_docs=retrieved_docs,
961
- messages=messages,
962
- n_docs=n_docs,
963
- context=context,
964
- )
965
-
966
- def handle_agent_pass(
967
- self,
968
- npc_to_pass: Any,
969
- command: str,
970
- messages: List[Dict[str, str]] = None,
971
- retrieved_docs=None,
972
- n_docs: int = 5,
973
- context=None,
974
- shared_context=None,
975
- ) -> Union[str, Dict[str, Any]]:
976
- """
977
- Function Description:
978
- This function handles an agent pass.
979
- Args:
980
- command (str): The command.
981
-
982
- Keyword Args:
983
- model (str): The model to use for handling the agent pass.
984
- provider (str): The provider to use for handling the agent pass.
985
- messages (List[Dict[str, str]]): The list of messages.
986
- npc (Any): The NPC object.
987
- retrieved_docs (Any): The retrieved documents.
988
- n_docs (int): The number of documents.
989
- Returns:
990
- Union[str, Dict[str, Any]]: The result of handling the agent pass.
991
- """
992
- # print(npc_to_pass, command)
993
-
994
- if isinstance(npc_to_pass, NPC):
995
- npc_to_pass_init = npc_to_pass
996
- else:
997
- # assume just a string name?
998
- target_npc = self.get_npc(npc_to_pass)
999
- if target_npc is None:
1000
- return "NPC not found."
1001
-
1002
- # initialize them as an actual NPC
1003
- npc_to_pass_init = NPC(self.db_conn, **target_npc)
1004
- # print(npc_to_pass_init, command)
1005
- print(npc_to_pass, npc_to_pass.tools)
1006
- if shared_context is not None:
1007
- self.shared_context = shared_context
1008
- updated_command = (
1009
- command
1010
- + "/n"
1011
- + f"""
1012
-
1013
- NOTE: THIS COMMAND HAS ALREADY BEEN PASSED FROM ANOTHER NPC
1014
- TO YOU, {npc_to_pass}.
1015
-
1016
- THUS YOU WILL LIKELY NOT NEED TO PASS IT AGAIN TO YOURSELF
1017
- OR TO ANOTHER NPC. pLEASE CHOOSE ONE OF THE OTHER OPTIONS WHEN
1018
- RESPONDING.
1019
-
1020
-
1021
- """
1022
- )
1023
- return npc_to_pass_init._check_llm_command(
1024
- updated_command,
1025
- retrieved_docs=retrieved_docs,
1026
- messages=messages,
1027
- n_docs=n_docs,
1028
- shared_context=self.shared_context,
1029
- )
1030
-
1031
- def get_npc(self, npc_name: str):
1032
- if npc_name + ".npc" in self.npc_cache:
1033
- return self.npc_cache[npc_name + ".npc"]
1034
-
1035
- def load_suggested_tools(
1036
- self,
1037
- tools: list,
1038
- global_tools_directory: str,
1039
- project_tools_directory: str,
1040
- ) -> List[Tool]:
1041
- suggested_tools = []
1042
- for tool_name in tools:
1043
- # load tool from file
1044
- if not tool_name.endswith(".tool"):
1045
- tool_name += ".tool"
1046
- if (
1047
- global_tools_directory not in tool_name
1048
- and project_tools_directory not in tool_name
1049
- ):
1050
- # try to load from global tools directory
1051
- try:
1052
- tool_data = self.load_tool_from_file(
1053
- os.path.join(global_tools_directory, tool_name)
1054
- )
1055
- if tool_data is None:
1056
- raise ValueError(f"Tool {tool_name} not found.")
1057
-
1058
- print(f"Tool {tool_name} loaded from global directory.")
1059
-
1060
- except ValueError as e:
1061
- print(f"Error loading tool from global directory: {e}")
1062
- # trying to load from project tools directory
1063
- try:
1064
- tool_data = self.load_tool_from_file(
1065
- os.path.join(project_tools_directory, tool_name)
1066
- )
1067
- if tool_data is None:
1068
- raise ValueError(f"Tool {tool_name} not found.")
1069
- print(f"Tool {tool_name} loaded from project directory.")
1070
- except ValueError as e:
1071
- print(f"Error loading tool from project directory: {e}")
1072
- continue
1073
-
1074
- # print(tool_name)
1075
- # print(tool_data)
1076
- tool = Tool(tool_data)
1077
- self.all_tools.append(tool)
1078
- self.all_tools_dict[tool.tool_name] = tool
1079
- suggested_tools.append(tool)
1080
- return suggested_tools
1081
-
1082
- def __str__(self):
1083
- return f"NPC: {self.name}\nDirective: {self.primary_directive}\nModel: {self.model}"
1084
-
1085
- def analyze_db_data(self, request: str):
1086
- if self.db_conn is None:
1087
- print("please specify a database connection when initiating the NPC")
1088
- raise Exception("No database connection found")
1089
- return get_data_response(
1090
- request,
1091
- self.db_conn,
1092
- self.tables,
1093
- model=self.model,
1094
- provider=self.provider,
1095
- npc=self,
1096
- )
1097
-
1098
- def get_llm_response(self, request: str, **kwargs):
1099
- return get_llm_response(
1100
- request, model=self.model, provider=self.provider, npc=self, **kwargs
1101
- )
1102
-
1103
- def load_tool_from_file(self, tool_path: str) -> Union[dict, None]:
1104
- try:
1105
- with open(tool_path, "r") as f:
1106
- tool_content = f.read()
1107
- if not tool_content.strip():
1108
- print(f"Tool file {tool_path} is empty. Skipping.")
1109
- return None
1110
- tool_data = yaml.safe_load(tool_content)
1111
- if tool_data is None:
1112
- print(f"Tool file {tool_path} is invalid or empty. Skipping.")
1113
- return None
1114
- return tool_data
1115
- except yaml.YAMLError as e:
1116
- print(f"Error parsing tool {tool_path}: {e}")
1117
- return None
1118
- except Exception as e:
1119
- print(f"Error loading tool {tool_path}: {e}")
1120
- return None
1121
-
1122
- def compile(self, npc_file: str):
1123
- self.npc_cache.clear() # Clear the cache
1124
- self.resolved_npcs.clear()
1125
-
1126
- if isinstance(npc_file, NPC):
1127
- npc_file = npc_file.name + ".npc"
1128
- if not npc_file.endswith(".npc"):
1129
- raise ValueError("File must have .npc extension")
1130
- # get the absolute path
1131
- npc_file = os.path.abspath(npc_file)
1132
-
1133
- try:
1134
- # Parse NPCs from both global and project directories
1135
- self.parse_all_npcs()
1136
-
1137
- # Resolve NPCs
1138
- self.resolve_all_npcs()
1139
-
1140
- # Finalize NPC profile
1141
- # print(npc_file)
1142
- parsed_content = self.finalize_npc_profile(npc_file)
1143
-
1144
- # Load tools from both global and project directories
1145
- tools = self.load_tools()
1146
- parsed_content["tools"] = [tool.to_dict() for tool in tools]
1147
-
1148
- self.update_compiled_npcs_table(npc_file, parsed_content)
1149
- return parsed_content
1150
- except Exception as e:
1151
- raise e # Re-raise exception for debugging
1152
-
1153
- def load_tools(self):
1154
- tools = []
1155
- # Load tools from global and project directories
1156
- tool_paths = []
1157
-
1158
- if os.path.exists(self.global_tools_directory):
1159
- for filename in os.listdir(self.global_tools_directory):
1160
- if filename.endswith(".tool"):
1161
- tool_paths.append(
1162
- os.path.join(self.global_tools_directory, filename)
1163
- )
1164
-
1165
- if os.path.exists(self.project_tools_directory):
1166
- for filename in os.listdir(self.project_tools_directory):
1167
- if filename.endswith(".tool"):
1168
- tool_paths.append(
1169
- os.path.join(self.project_tools_directory, filename)
1170
- )
1171
-
1172
- tool_dict = {}
1173
- for tool_path in tool_paths:
1174
- tool_data = self.load_tool_from_file(tool_path)
1175
- if tool_data:
1176
- tool = Tool(tool_data)
1177
- # Project tools override global tools
1178
- tool_dict[tool.tool_name] = tool
1179
-
1180
- return list(tool_dict.values())
1181
-
1182
- def parse_all_npcs(self) -> None:
1183
- directories = [self.global_npc_directory, self.project_npc_directory]
1184
- for directory in directories:
1185
- if os.path.exists(directory):
1186
- for filename in os.listdir(directory):
1187
- if filename.endswith(".npc"):
1188
- npc_path = os.path.join(directory, filename)
1189
- self.parse_npc_file(npc_path)
1190
-
1191
- def parse_npc_file(self, npc_file_path: str) -> dict:
1192
- npc_file = os.path.basename(npc_file_path)
1193
- if npc_file in self.npc_cache:
1194
- # Project NPCs override global NPCs
1195
- if npc_file_path.startswith(self.project_npc_directory):
1196
- print(f"Overriding NPC {npc_file} with project version.")
1197
- else:
1198
- # Skip if already loaded from project directory
1199
- return self.npc_cache[npc_file]
1200
-
1201
- try:
1202
- with open(npc_file_path, "r") as f:
1203
- npc_content = f.read()
1204
- # Parse YAML without resolving Jinja templates
1205
- profile = yaml.safe_load(npc_content)
1206
- self.npc_cache[npc_file] = profile
1207
- return profile
1208
- except yaml.YAMLError as e:
1209
- raise ValueError(f"Invalid YAML in NPC profile {npc_file}: {str(e)}")
1210
-
1211
- def resolve_all_npcs(self):
1212
- resolved_npcs = []
1213
- for npc_file in self.npc_cache:
1214
- npc = self.resolve_npc_profile(npc_file)
1215
- resolved_npcs.append(npc)
1216
- # print(npc)
1217
- return resolved_npcs
1218
-
1219
- def resolve_npc_profile(self, npc_file: str) -> dict:
1220
- if npc_file in self.resolved_npcs:
1221
- return self.resolved_npcs[npc_file]
1222
-
1223
- profile = self.npc_cache[npc_file].copy()
1224
-
1225
- # Resolve Jinja templates
1226
- for key, value in profile.items():
1227
- if isinstance(value, str):
1228
- template = self.jinja_env.from_string(value)
1229
- profile[key] = template.render(self.npc_cache)
1230
-
1231
- # Handle inheritance
1232
- if "inherits_from" in profile:
1233
- parent_profile = self.resolve_npc_profile(profile["inherits_from"] + ".npc")
1234
- profile = self.merge_profiles(parent_profile, profile)
1235
-
1236
- self.resolved_npcs[npc_file] = profile
1237
- return profile
1238
-
1239
- def finalize_npc_profile(self, npc_file: str) -> dict:
1240
- profile = self.resolved_npcs.get(os.path.basename(npc_file))
1241
- if not profile:
1242
- # try to resolve it with load_npc_from_file
1243
- profile = load_npc_from_file(npc_file, self.db_conn).to_dict()
1244
-
1245
- # raise ValueError(f"NPC {npc_file} has not been resolved.")
1246
-
1247
- # Resolve any remaining references
1248
- # Log the profile content before processing
1249
- # print(f"Initial profile for {npc_file}: {profile}")
1250
-
1251
- for key, value in profile.items():
1252
- if isinstance(value, str):
1253
- template = self.jinja_env.from_string(value)
1254
- profile[key] = template.render(self.resolved_npcs)
1255
-
1256
- required_keys = ["name", "primary_directive"]
1257
- for key in required_keys:
1258
- if key not in profile:
1259
- raise ValueError(f"Missing required key in NPC profile: {key}")
1260
-
1261
- return profile
1262
-
1263
-
1264
- class SilentUndefined(Undefined):
1265
- def _fail_with_undefined_error(self, *args, **kwargs):
1266
- return ""
1267
-
1268
-
1269
- class NPCTeam:
1270
- def __init__(self, npcs: list, foreman: NPC, db_conn=None, context: dict = None):
1271
- self.npcs = npcs
1272
- self.foreman = foreman
1273
- self.foreman.resolved_npcs = [{npc.name: npc} for npc in self.npcs]
1274
- self.db_conn = db_conn
1275
- self.context = context
1276
- self.shared_context = {
1277
- "intermediate_results": {}, # Store results each NPC produces
1278
- "data": {}, # Active data being analyzed
1279
- }
1280
-
1281
- def to_dict(self):
1282
- return {
1283
- "foreman": self.foreman.to_dict(),
1284
- "npcs": [npc.to_dict() for npc in self.npcs],
1285
- "context": self.context,
1286
- }
1287
-
1288
- def orchestrate(self, request: str):
1289
- # Initial check with foreman
1290
- result = self.foreman._check_llm_command(
1291
- request,
1292
- context=self.context,
1293
- shared_context=self.shared_context,
1294
- )
1295
- try:
1296
- while True:
1297
- try:
1298
- result = self.foreman._check_llm_command(
1299
- request,
1300
- context=self.context,
1301
- shared_context=self.shared_context,
1302
- )
1303
-
1304
- # Track execution history and init npc messages if needed
1305
- if "execution_history" not in self.shared_context:
1306
- self.shared_context["execution_history"] = []
1307
- if "npc_messages" not in self.shared_context:
1308
- self.shared_context["npc_messages"] = {}
1309
-
1310
- # Save result and maintain NPC message history
1311
- if isinstance(result, dict):
1312
- self.shared_context["execution_history"].append(result)
1313
- if result.get("messages") and result.get("npc_name"):
1314
- if (
1315
- result["npc_name"]
1316
- not in self.shared_context["npc_messages"]
1317
- ):
1318
- self.shared_context["npc_messages"][
1319
- result["npc_name"]
1320
- ] = []
1321
- self.shared_context["npc_messages"][
1322
- result["npc_name"]
1323
- ].extend(result["messages"])
1324
-
1325
- # Check if complete
1326
- follow_up = get_llm_response(
1327
- f"""Context: User request '{request}' returned:
1328
- {result}
1329
-
1330
- Instructions:
1331
- Analyze if this result fully addresses the request. In your evaluation you must not be
1332
- too harsh. While there may be numerous refinements that can be made to improve the output
1333
- to "fully address" the request, it will be typically better for the user to
1334
- have a higher rate of interactive feedback such that we will not lose track of the
1335
- real aim and get stuck in a rut hyper-fixating.
1336
- Thus it is better to consider results as complete if they satisfy the bare minimum
1337
- of the request and provide a good starting point for further refinement.
1338
-
1339
- Return a JSON object with two fields:
1340
- -'complete' with boolean value.
1341
- -'explanation' for incompleteness
1342
- Do not include markdown formatting or ```json tags.
1343
- Return only the JSON object.""",
1344
- model=self.foreman.model,
1345
- provider=self.foreman.provider,
1346
- npc=self.foreman,
1347
- format="json",
1348
- )
1349
-
1350
- if isinstance(follow_up, dict) and isinstance(
1351
- follow_up.get("response"), dict
1352
- ):
1353
- print(
1354
- "response finished? ",
1355
- follow_up.get("response", {}).get("complete", False),
1356
- )
1357
- print(
1358
- "explanation provided",
1359
- follow_up.get("response", {}).get("explanation", ""),
1360
- )
1361
-
1362
- if not follow_up["response"].get("complete", False):
1363
- return self.orchestrate(
1364
- request
1365
- + " /n The request has not yet been fully completed."
1366
- + follow_up["response"]["explanation"]
1367
- + " /n"
1368
- + "please ensure that you tackle only the remaining parts of the request"
1369
- )
1370
- else:
1371
- # Get final summary and recommendations
1372
- debrief = get_llm_response(
1373
- f"""Context:
1374
- Original request: {request}
1375
-
1376
- Execution history: {self.shared_context['execution_history']}
1377
-
1378
- Instructions:
1379
- Provide summary of actions taken and any recommendations.
1380
- Return a JSON object with fields:
1381
- - 'summary': Overview of what was accomplished
1382
- - 'recommendations': Suggested next steps
1383
- Do not include markdown formatting or ```json tags.
1384
- Return only the JSON object.""",
1385
- model=self.foreman.model,
1386
- provider=self.foreman.provider,
1387
- npc=self.foreman,
1388
- format="json",
1389
- )
1390
-
1391
- return {
1392
- "debrief": debrief.get("response"),
1393
- "execution_history": self.shared_context[
1394
- "execution_history"
1395
- ],
1396
- }
1397
-
1398
- return result
1399
-
1400
- except KeyboardInterrupt:
1401
- print("\nExecution interrupted. Options:")
1402
- print("1. Provide additional context")
1403
- print("2. Skip this step")
1404
- print("3. Resume execution")
1405
-
1406
- choice = input("Enter choice (1-3): ")
1407
-
1408
- if choice == "1":
1409
- new_context = input("Enter additional context: ")
1410
- self.context["additional_context"] = new_context
1411
- continue
1412
- elif choice == "2":
1413
- return {"response": "Step skipped by user"}
1414
- elif choice == "3":
1415
- continue
1416
- else:
1417
- print("Invalid choice, resuming...")
1418
- continue
1419
-
1420
- except Exception as e:
1421
- # Get the full traceback
1422
- tb_lines = traceback.format_exc().splitlines()
1423
-
1424
- # Keep first 2 lines and last 3 lines
1425
- if len(tb_lines) > 5:
1426
- limited_tb = "\n".join(tb_lines[:2] + ["..."] + tb_lines[-3:])
1427
- else:
1428
- limited_tb = "\n".join(tb_lines)
1429
-
1430
- print(f"Error in orchestration: {str(e)}")
1431
- print(f"Limited traceback:\n{limited_tb}")
1432
- return {"error": f"{str(e)}\n{limited_tb}"}
1433
-
1434
-
1435
- # perhaps the npc compiling is more than just for jinja reasons.
1436
- # we can turn each agent into a referenceable program executable.
1437
- # finish testing out a python based version rather than jinja only
1438
- class NPCCompiler:
1439
- def __init__(
1440
- self,
1441
- npc_directory,
1442
- db_path,
1443
- ):
1444
- self.npc_directory = npc_directory
1445
- self.dirs = [self.npc_directory]
1446
- # import pdb
1447
- self.is_global_dir = self.npc_directory == os.path.expanduser(
1448
- "~/.npcsh/npc_team/"
1449
- )
1450
-
1451
- # pdb.set_trace()
1452
- if self.is_global_dir:
1453
- self.project_npc_directory = None
1454
- self.project_tools_directory = None
1455
- else:
1456
- self.project_npc_directory = npc_directory
1457
- self.project_tools_directory = os.path.join(
1458
- self.project_npc_directory, "tools"
1459
- )
1460
- self.dirs.append(self.project_npc_directory)
1461
-
1462
- self.db_path = db_path
1463
- self.npc_cache = {}
1464
- self.resolved_npcs = {}
1465
- self.pipe_cache = {}
1466
-
1467
- # Set tools directories
1468
- self.global_tools_directory = os.path.join(
1469
- os.path.expanduser("~/.npcsh/npc_team/"), "tools"
1470
- )
1471
-
1472
- # Initialize Jinja environment with multiple loaders
1473
- self.jinja_env = Environment(
1474
- loader=FileSystemLoader(self.dirs),
1475
- undefined=SilentUndefined,
1476
- )
1477
-
1478
- self.all_tools_dict = self.load_tools()
1479
- self.all_tools = list(self.all_tools_dict.values())
1480
-
1481
- def generate_tool_script(self, tool: Tool):
1482
- script_content = f"""
1483
- # Auto-generated script for tool: {tool.tool_name}
1484
-
1485
- def {tool.tool_name}_execute(inputs):
1486
- # Preprocess steps
1487
- """
1488
- # Add preprocess steps
1489
- for step in tool.preprocess:
1490
- script_content += f" # Preprocess: {step}\n"
1491
-
1492
- # Add prompt rendering
1493
- script_content += f"""
1494
- # Render prompt
1495
- prompt = '''{tool.prompt}'''
1496
- # You might need to render the prompt with inputs
1497
-
1498
- # Call the LLM (this is simplified)
1499
- llm_response = get_llm_response(prompt)
1500
-
1501
- # Postprocess steps
1502
- """
1503
- for step in tool.postprocess:
1504
- script_content += f" # Postprocess: {step}\n"
1505
-
1506
- script_content += f" return llm_response\n"
1507
-
1508
- # Write the script to a file
1509
- script_filename = f"{tool.tool_name}_script.py"
1510
- with open(script_filename, "w") as script_file:
1511
- script_file.write(script_content)
1512
-
1513
- def compile(self, npc_file: str):
1514
- self.npc_cache.clear() # Clear the cache
1515
- self.resolved_npcs.clear()
1516
- if isinstance(npc_file, NPC):
1517
- npc_file = npc_file.name + ".npc"
1518
- if not npc_file.endswith(".npc"):
1519
- raise ValueError("File must have .npc extension")
1520
- # get the absolute path
1521
- npc_file = os.path.abspath(npc_file)
1522
-
1523
- self.parse_all_npcs()
1524
- # Resolve NPCs
1525
- self.resolve_all_npcs()
1526
-
1527
- # Finalize NPC profile
1528
- # print(npc_file)
1529
- # print(npc_file, "npc_file")
1530
- parsed_content = self.finalize_npc_profile(npc_file)
1531
-
1532
- # Load tools from both global and project directories
1533
- parsed_content["tools"] = [tool.to_dict() for tool in self.all_tools]
1534
-
1535
- self.update_compiled_npcs_table(npc_file, parsed_content)
1536
- return parsed_content
1537
-
1538
- def load_tools(self):
1539
- tools = []
1540
- # Load tools from global and project directories
1541
- tool_paths = []
1542
-
1543
- if os.path.exists(self.global_tools_directory):
1544
- for filename in os.listdir(self.global_tools_directory):
1545
- if filename.endswith(".tool"):
1546
- tool_paths.append(
1547
- os.path.join(self.global_tools_directory, filename)
1548
- )
1549
- if self.project_tools_directory is not None:
1550
- if os.path.exists(self.project_tools_directory):
1551
- for filename in os.listdir(self.project_tools_directory):
1552
- if filename.endswith(".tool"):
1553
- tool_paths.append(
1554
- os.path.join(self.project_tools_directory, filename)
1555
- )
1556
-
1557
- tool_dict = {}
1558
- for tool_path in tool_paths:
1559
- tool_data = self.load_tool_from_file(tool_path)
1560
- if tool_data:
1561
- tool = Tool(tool_data)
1562
- # Project tools override global tools
1563
- tool_dict[tool.tool_name] = tool
1564
-
1565
- return tool_dict
1566
-
1567
- def load_tool_from_file(self, tool_path: str) -> Union[dict, None]:
1568
- try:
1569
- with open(tool_path, "r") as f:
1570
- tool_content = f.read()
1571
- if not tool_content.strip():
1572
- print(f"Tool file {tool_path} is empty. Skipping.")
1573
- return None
1574
- tool_data = yaml.safe_load(tool_content)
1575
- if tool_data is None:
1576
- print(f"Tool file {tool_path} is invalid or empty. Skipping.")
1577
- return None
1578
- return tool_data
1579
- except yaml.YAMLError as e:
1580
- print(f"Error parsing tool {tool_path}: {e}")
1581
- return None
1582
- except Exception as e:
1583
- print(f"Error loading tool {tool_path}: {e}")
1584
- return None
1585
-
1586
- def parse_all_npcs(self) -> None:
1587
- # print(self.dirs)
1588
- for directory in self.dirs:
1589
- if os.path.exists(directory):
1590
-
1591
- for filename in os.listdir(directory):
1592
- if filename.endswith(".npc"):
1593
- npc_path = os.path.join(directory, filename)
1594
- self.parse_npc_file(npc_path)
1595
-
1596
- def parse_npc_file(self, npc_file_path: str) -> dict:
1597
- npc_file = os.path.basename(npc_file_path)
1598
- if npc_file in self.npc_cache:
1599
- # Project NPCs override global NPCs
1600
- if self.project_npc_directory is not None:
1601
- if npc_file_path.startswith(self.project_npc_directory):
1602
- print(f"Overriding NPC {npc_file} with project version.")
1603
- else:
1604
- # Skip if already loaded from project directory
1605
- return self.npc_cache[npc_file]
1606
-
1607
- try:
1608
- with open(npc_file_path, "r") as f:
1609
- npc_content = f.read()
1610
- # Parse YAML without resolving Jinja templates
1611
- profile = yaml.safe_load(npc_content)
1612
- self.npc_cache[npc_file] = profile
1613
- return profile
1614
- except yaml.YAMLError as e:
1615
- raise ValueError(f"Invalid YAML in NPC profile {npc_file}: {str(e)}")
1616
-
1617
- def resolve_all_npcs(self):
1618
- for npc_file in self.npc_cache:
1619
- npc = self.resolve_npc_profile(npc_file)
1620
- # print(npc)
1621
-
1622
- def resolve_npc_profile(self, npc_file: str) -> dict:
1623
- if npc_file in self.resolved_npcs:
1624
- return self.resolved_npcs[npc_file]
1625
-
1626
- profile = self.npc_cache[npc_file].copy()
1627
-
1628
- # Resolve Jinja templates
1629
- for key, value in profile.items():
1630
- if isinstance(value, str):
1631
- template = self.jinja_env.from_string(value)
1632
- profile[key] = template.render(self.npc_cache)
1633
-
1634
- # Handle inheritance
1635
- if "inherits_from" in profile:
1636
- parent_profile = self.resolve_npc_profile(profile["inherits_from"] + ".npc")
1637
- profile = self.merge_profiles(parent_profile, profile)
1638
-
1639
- self.resolved_npcs[npc_file] = profile
1640
- return profile
1641
-
1642
- def finalize_npc_profile(self, npc_file: str) -> dict:
1643
- profile = self.resolved_npcs.get(os.path.basename(npc_file))
1644
- if not profile:
1645
- # try to resolve it with load_npc_from_file
1646
- profile = load_npc_from_file(
1647
- npc_file, sqlite3.connect(self.db_path)
1648
- ).to_dict()
1649
-
1650
- # Resolve any remaining references
1651
- # Log the profile content before processing
1652
- # print(f"Initial profile for {npc_file}: {profile}")
1653
-
1654
- for key, value in profile.items():
1655
- if isinstance(value, str):
1656
- template = self.jinja_env.from_string(value)
1657
- profile[key] = template.render(self.resolved_npcs)
1658
-
1659
- required_keys = ["name", "primary_directive"]
1660
- for key in required_keys:
1661
- if key not in profile:
1662
- raise ValueError(f"Missing required key in NPC profile: {key}")
1663
-
1664
- return profile
1665
-
1666
- def execute_stage(self, stage, context, jinja_env):
1667
- step_name = stage["step_name"]
1668
- npc_name = stage["npc"]
1669
- npc_name = jinja_env.from_string(npc_name).render(context)
1670
- # print("npc name: ", npc_name)
1671
- npc_path = get_npc_path(npc_name, self.db_path)
1672
- # print("npc path: ", npc_path)
1673
- prompt_template = stage["task"]
1674
- num_samples = stage.get("num_samples", 1)
1675
-
1676
- step_results = []
1677
- for sample_index in range(num_samples):
1678
- # Load the NPC
1679
- npc = load_npc_from_file(npc_path, sqlite3.connect(self.db_path))
1680
-
1681
- # Render the prompt using Jinja2
1682
- prompt_template = jinja_env.from_string(prompt_template)
1683
- prompt = prompt_template.render(context, sample_index=sample_index)
1684
-
1685
- response = npc.get_llm_response(prompt)
1686
- # print(response)
1687
- step_results.append({"npc": npc_name, "response": response["response"]})
1688
-
1689
- # Update context with the response for the next step
1690
- context[f"{step_name}_{sample_index}"] = response[
1691
- "response"
1692
- ] # Update context with step's response
1693
-
1694
- return step_results
1695
-
1696
- def aggregate_step_results(self, step_results, aggregation_strategy):
1697
- responses = [result["response"] for result in step_results]
1698
- if len(responses) == 1:
1699
- return responses[0]
1700
- if aggregation_strategy == "concat":
1701
- return "\n".join(responses)
1702
- elif aggregation_strategy == "summary":
1703
- # Use the LLM to generate a summary of the responses
1704
- response_text = "\n".join(responses)
1705
- summary_prompt = (
1706
- f"Please provide a concise summary of the following responses: "
1707
- + response_text
1708
- )
1709
-
1710
- summary = self.get_llm_response(summary_prompt)["response"]
1711
- return summary
1712
- elif aggregation_strategy == "pessimistic_critique":
1713
- # Use the LLM to provide a pessimistic critique of the responses
1714
- response_text = "\n".join(responses)
1715
- critique_prompt = f"Please provide a pessimistic critique of the following responses:\n\n{response_text}"
1716
-
1717
- critique = self.get_llm_response(critique_prompt)["response"]
1718
- return critique
1719
- elif aggregation_strategy == "optimistic_view":
1720
- # Use the LLM to provide an optimistic view of the responses
1721
- response_text = "\n".join(responses)
1722
- optimistic_prompt = f"Please provide an optimistic view of the following responses:\n\n{response_text}"
1723
- optimistic_view = self.get_llm_response(optimistic_prompt)["response"]
1724
- return optimistic_view
1725
- elif aggregation_strategy == "balanced_analysis":
1726
- # Use the LLM to provide a balanced analysis of the responses
1727
- response = "\n".join(responses)
1728
- analysis_prompt = f"Please provide a balanced analysis of the following responses:\n\n{response}"
1729
-
1730
- balanced_analysis = self.get_llm_response(analysis_prompt)["response"]
1731
- return balanced_analysis
1732
- elif aggregation_strategy == "first":
1733
- return responses[0]
1734
- elif aggregation_strategy == "last":
1735
- return responses[-1]
1736
- else:
1737
- raise ValueError(f"Invalid aggregation strategy: {aggregation_strategy}")
1738
-
1739
- def compile_pipe(self, pipe_file: str, initial_input=None) -> dict:
1740
- if pipe_file in self.pipe_cache:
1741
- return self.pipe_cache[pipe_file]
1742
-
1743
- if not pipe_file.endswith(".pipe"):
1744
- raise ValueError("Pipeline file must have .pipe extension")
1745
-
1746
- # print(pipe_file)
1747
-
1748
- with open(pipe_file, "r") as f:
1749
- pipeline_data = yaml.safe_load(f)
1750
-
1751
- final_output = {}
1752
- jinja_env = Environment(loader=FileSystemLoader("."), undefined=SilentUndefined)
1753
-
1754
- context = {"input": initial_input, **self.npc_cache}
1755
-
1756
- with sqlite3.connect(self.db_path) as conn:
1757
- cursor = conn.cursor()
1758
- pipeline_name = os.path.basename(pipe_file).replace(".pipe", "")
1759
-
1760
- for stage in pipeline_data["steps"]:
1761
- step_results = self.execute_stage(stage, context, jinja_env)
1762
- aggregated_result = self.aggregate_step_results(
1763
- step_results, stage.get("aggregation_strategy", "first")
1764
- )
1765
-
1766
- # Store in database
1767
- cursor.execute(
1768
- "INSERT INTO pipeline_runs (pipeline_name, step_name, output) VALUES (?, ?, ?)",
1769
- (pipeline_name, stage["step_name"], str(aggregated_result)),
1770
- )
1771
-
1772
- final_output[stage["step_name"]] = aggregated_result
1773
- context[stage["step_name"]] = aggregated_result
1774
-
1775
- conn.commit()
1776
-
1777
- self.pipe_cache[pipe_file] = final_output # Cache the results
1778
-
1779
- return final_output
1780
-
1781
- def merge_profiles(self, parent, child) -> dict:
1782
- merged = parent.copy()
1783
- for key, value in child.items():
1784
- if isinstance(value, list) and key in merged:
1785
- merged[key] = merged[key] + value
1786
- elif isinstance(value, dict) and key in merged:
1787
- merged[key] = self.merge_profiles(merged[key], value)
1788
- else:
1789
- merged[key] = value
1790
- return merged
1791
-
1792
- def update_compiled_npcs_table(self, npc_file, parsed_content) -> None:
1793
- try:
1794
- with sqlite3.connect(self.db_path) as conn:
1795
- cursor = conn.cursor()
1796
- npc_name = parsed_content["name"]
1797
- source_path = npc_file
1798
-
1799
- cursor.execute(
1800
- "INSERT OR REPLACE INTO compiled_npcs (name, source_path, compiled_content) VALUES (?, ?, ?)", # Correct column name
1801
- (npc_name, source_path, yaml.dump(parsed_content)),
1802
- )
1803
- conn.commit()
1804
- except Exception as e:
1805
- print(
1806
- f"Error updating compiled_npcs table: {str(e)}"
1807
- ) # Print the full error
1808
-
1809
-
1810
- def load_npc_from_file(npc_file: str, db_conn: sqlite3.Connection) -> NPC:
1811
- if not npc_file.endswith(".npc"):
1812
- # append it just incase
1813
- name += ".npc"
1814
-
1815
- try:
1816
- if "~" in npc_file:
1817
- npc_file = os.path.expanduser(npc_file)
1818
- if not os.path.isabs(npc_file):
1819
- npc_file = os.path.abspath(npc_file)
1820
-
1821
- with open(npc_file, "r") as f:
1822
- npc_data = yaml.safe_load(f)
1823
-
1824
- # Extract fields from YAML
1825
- name = npc_data["name"]
1826
-
1827
- primary_directive = npc_data.get("primary_directive")
1828
- tools = npc_data.get("tools")
1829
- model = npc_data.get("model", os.environ.get("NPCSH_CHAT_MODEL", "llama3.2"))
1830
- provider = npc_data.get(
1831
- "provider", os.environ.get("NPCSH_CHAT_PROVIDER", "ollama")
1832
- )
1833
- api_url = npc_data.get("api_url", os.environ.get("NPCSH_API_URL", None))
1834
- use_global_tools = npc_data.get("use_global_tools", True)
1835
- # print(use_global_tools)
1836
- # Load tools from global and project-specific directories
1837
- all_tools = []
1838
- # 1. Load tools defined within the NPC profile
1839
- if "tools" in npc_data:
1840
- for tool_data in npc_data["tools"]:
1841
- tool = Tool(tool_data)
1842
- tools.append(tool)
1843
- # 2. Load global tools from ~/.npcsh/npc_team/tools
1844
- user_home = os.path.expanduser("~")
1845
- global_tools_directory = os.path.join(user_home, ".npcsh", "npc_team", "tools")
1846
- all_tools.extend(load_tools_from_directory(global_tools_directory))
1847
- # 3. Load project-specific tools from ./npc_team/tools
1848
- project_tools_directory = os.path.abspath("./npc_team/tools")
1849
- all_tools.extend(load_tools_from_directory(project_tools_directory))
1850
-
1851
- # Remove duplicates, giving precedence to project-specific tools
1852
- tool_dict = {}
1853
- for tool in all_tools:
1854
- tool_dict[tool.tool_name] = tool # Project tools overwrite global tools
1855
-
1856
- all_tools = list(tool_dict.values())
1857
-
1858
- # Initialize and return the NPC object
1859
- return NPC(
1860
- name,
1861
- db_conn=db_conn,
1862
- primary_directive=primary_directive,
1863
- tools=tools,
1864
- use_global_tools=use_global_tools,
1865
- model=model,
1866
- provider=provider,
1867
- api_url=api_url,
1868
- all_tools=all_tools, # Pass the tools
1869
- )
1870
-
1871
- except FileNotFoundError:
1872
- raise ValueError(f"NPC file not found: {npc_file}")
1873
- except yaml.YAMLError as e:
1874
- raise ValueError(f"Error parsing YAML in NPC file {npc_file}: {str(e)}")
1875
- except KeyError as e:
1876
- raise ValueError(f"Missing required key in NPC file {npc_file}: {str(e)}")
1877
- except Exception as e:
1878
- raise ValueError(f"Error loading NPC from file {npc_file}: {str(e)}")
1879
-
1880
-
1881
- import os
1882
- import yaml
1883
- import hashlib
1884
- import sqlite3
1885
- from sqlalchemy import create_engine
1886
- import pandas as pd
1887
- import json
1888
- from datetime import datetime
1889
- from jinja2 import Template
1890
- import re
1891
-
1892
-
1893
- ###
1894
- ###
1895
- ###
1896
- ###
1897
- ### What is a pipeline file?
1898
- """
1899
-
1900
- steps:
1901
- - step_name: "step_name"
1902
- npc: npc_name
1903
- task: "task"
1904
- tools: ['tool1', 'tool2']
1905
-
1906
-
1907
- # results within the pipeline need to be referenceable by the shared context through the step name
1908
- #
1909
- # so if step name is review_email and a tool is called we can refer to the intermediate objects
1910
- # as review_email['tool1']['{var_name_in_tool_definition'}]
1911
-
1912
- so in step 2 i can do in the task
1913
- task: "sort the emails by tone by reviewing the outputs from the email review tool: {{ review_email['email_review']['tone'] }}"
1914
- """
1915
-
1916
-
1917
- """
1918
- adding in context and fabs
1919
- """
1920
-
1921
-
1922
- class PipelineRunner:
1923
- def __init__(
1924
- self,
1925
- pipeline_file: str,
1926
- db_path: str = "~/npcsh_history.db",
1927
- npc_root_dir: str = "../",
1928
- ):
1929
- self.pipeline_file = pipeline_file
1930
- self.pipeline_data = self.load_pipeline()
1931
- self.db_path = os.path.expanduser(db_path)
1932
- self.npc_root_dir = npc_root_dir
1933
- self.npc_cache = {}
1934
- self.db_engine = create_engine(f"sqlite:///{self.db_path}")
1935
-
1936
- def load_pipeline(self):
1937
- with open(self.pipeline_file, "r") as f:
1938
- return yaml.safe_load(f)
1939
-
1940
- def compute_pipeline_hash(self):
1941
- with open(self.pipeline_file, "r") as f:
1942
- content = f.read()
1943
- return hashlib.sha256(content.encode()).hexdigest()
1944
-
1945
- def execute_pipeline(self):
1946
- context = {
1947
- "npc": self.npc_ref,
1948
- "ref": lambda step_name: step_name, # Directly use step name
1949
- "source": self.fetch_data_from_source,
1950
- }
1951
-
1952
- pipeline_hash = self.compute_pipeline_hash()
1953
- pipeline_name = os.path.splitext(os.path.basename(self.pipeline_file))[0]
1954
- results_table_name = f"{pipeline_name}_results"
1955
- self.ensure_tables_exist(results_table_name)
1956
- run_id = self.create_run_entry(pipeline_hash)
1957
-
1958
- for step in self.pipeline_data["steps"]:
1959
- self.execute_step(step, context, run_id, results_table_name)
1960
-
1961
- def npc_ref(self, npc_name: str):
1962
- clean_name = npc_name.replace("MISSING_REF_", "")
1963
- try:
1964
- npc_path = self.find_npc_path(clean_name)
1965
- return clean_name if npc_path else f"MISSING_REF_{clean_name}"
1966
- except Exception:
1967
- return f"MISSING_REF_{clean_name}"
1968
-
1969
- def execute_step(
1970
- self, step: dict, context: dict, run_id: int, results_table_name: str
1971
- ):
1972
- """Execute pipeline step and store results in the database."""
1973
- print("\nStarting step execution...")
1974
-
1975
- mixa = step.get("mixa", False)
1976
- mixa_turns = step.get("mixa_turns", 5 if mixa else None)
1977
-
1978
- npc_name = Template(step.get("npc", "")).render(context)
1979
- npc = self.load_npc(npc_name)
1980
- model = step.get("model", npc.model)
1981
- provider = step.get("provider", npc.provider)
1982
-
1983
- response_text = ""
1984
-
1985
- if mixa:
1986
- print("Executing mixture of agents strategy...")
1987
- response_text = self.execute_mixture_of_agents(
1988
- step,
1989
- context,
1990
- run_id,
1991
- results_table_name,
1992
- npc,
1993
- model,
1994
- provider,
1995
- mixa_turns,
1996
- )
1997
- else:
1998
- source_matches = re.findall(
1999
- r"{{\s*source\('([^']+)'\)\s*}}", step.get("task", "")
2000
- )
2001
- print(f"Found source matches: {source_matches}")
2002
-
2003
- if not source_matches:
2004
- rendered_task = Template(step.get("task", "")).render(context)
2005
- response = get_llm_response(
2006
- rendered_task, model=model, provider=provider, npc=npc
2007
- )
2008
- response_text = response.get("response", "")
2009
- else:
2010
- table_name = source_matches[0]
2011
- df = pd.read_sql(f"SELECT * FROM {table_name}", self.db_engine)
2012
- print(f"\nQuerying table: {table_name}")
2013
- print(f"Found {len(df)} rows")
2014
-
2015
- if step.get("batch_mode", False):
2016
- data_str = df.to_json(orient="records")
2017
- rendered_task = step.get("task", "").replace(
2018
- f"{{{{ source('{table_name}') }}}}", data_str
2019
- )
2020
- rendered_task = Template(rendered_task).render(context)
2021
-
2022
- response = get_llm_response(
2023
- rendered_task, model=model, provider=provider, npc=npc
2024
- )
2025
- response_text = response.get("response", "")
2026
- else:
2027
- all_responses = []
2028
- for idx, row in df.iterrows():
2029
- row_data = json.dumps(row.to_dict())
2030
- row_task = step.get("task", "").replace(
2031
- f"{{{{ source('{table_name}') }}}}", row_data
2032
- )
2033
- rendered_task = Template(row_task).render(context)
2034
-
2035
- response = get_llm_response(
2036
- rendered_task, model=model, provider=provider, npc=npc
2037
- )
2038
- result = response.get("response", "")
2039
- all_responses.append(result)
2040
-
2041
- response_text = all_responses
2042
-
2043
- # Storing the final result in the database
2044
- self.store_result(
2045
- run_id,
2046
- step["step_name"],
2047
- npc_name,
2048
- model,
2049
- provider,
2050
- {"response": response_text},
2051
- response_text,
2052
- results_table_name,
2053
- )
2054
-
2055
- context[step["step_name"]] = response_text
2056
- print(f"\nStep complete. Response stored in context[{step['step_name']}]")
2057
- return response_text
2058
-
2059
- def store_result(
2060
- self,
2061
- run_id,
2062
- task_name,
2063
- npc_name,
2064
- model,
2065
- provider,
2066
- inputs,
2067
- outputs,
2068
- results_table_name,
2069
- ):
2070
- """Store results into the specified results table in the database."""
2071
- cleaned_inputs = self.clean_for_json(inputs)
2072
- conn = sqlite3.connect(self.db_path)
2073
- try:
2074
- conn.execute(
2075
- f"""
2076
- INSERT INTO {results_table_name} (run_id, task_name, npc_name,
2077
- model, provider, inputs, outputs) VALUES (?, ?, ?, ?, ?, ?, ?)
2078
- """,
2079
- (
2080
- run_id,
2081
- task_name,
2082
- npc_name,
2083
- model,
2084
- provider,
2085
- json.dumps(cleaned_inputs),
2086
- json.dumps(outputs),
2087
- ),
2088
- )
2089
- conn.commit()
2090
- except Exception as e:
2091
- print(f"Error storing result: {e}")
2092
- finally:
2093
- conn.close()
2094
-
2095
- def execute_mixture_of_agents(
2096
- self,
2097
- step,
2098
- context,
2099
- run_id,
2100
- results_table_name,
2101
- npc,
2102
- model,
2103
- provider,
2104
- mixa_turns,
2105
- ):
2106
- """Facilitates multi-agent decision-making with feedback for refinement."""
2107
-
2108
- # Read agent counts from the step configuration
2109
- num_generating_agents = len(step.get("mixa_agents", []))
2110
- num_voting_agents = len(step.get("mixa_voters", []))
2111
- num_voters = step.get("mixa_voter_count", num_voting_agents)
2112
-
2113
- # Step 1: Initial Response Generation
2114
- round_responses = []
2115
- print("\nInitial responses generation:")
2116
- for agent_index in range(num_generating_agents):
2117
- task_template = Template(step.get("task", "")).render(context)
2118
- response = get_llm_response(
2119
- task_template, model=model, provider=provider, npc=npc
2120
- )
2121
- round_responses.append(response.get("response", ""))
2122
- print(
2123
- f"Agent {agent_index + 1} generated: " f"{response.get('response', '')}"
2124
- )
2125
-
2126
- # Loop for each round of voting and refining
2127
- for turn in range(1, mixa_turns + 1):
2128
- print(f"\n--- Round {turn}/{mixa_turns} ---")
2129
-
2130
- # Step 2: Voting Logic by voting agents
2131
- votes = self.conduct_voting(round_responses, num_voters)
2132
-
2133
- # Step 3: Report results to generating agents
2134
- print("\nVoting Results:")
2135
- for idx, response in enumerate(round_responses):
2136
- print(f"Response {idx + 1} received {votes[idx]} votes.")
2137
-
2138
- # Provide feedback on the responses
2139
- feedback_message = "Responses and their votes:\n" + "\n".join(
2140
- f"Response {i + 1}: {resp} - Votes: {votes[i]} "
2141
- for i, resp in enumerate(round_responses)
2142
- )
2143
-
2144
- # Step 4: Refinement feedback to each agent
2145
- refined_responses = []
2146
- for agent_index in range(num_generating_agents):
2147
- refined_task = (
2148
- feedback_message
2149
- + f"\nRefine your response: {round_responses[agent_index]}"
2150
- )
2151
- response = get_llm_response(
2152
- refined_task, model=model, provider=provider, npc=npc
2153
- )
2154
- refined_responses.append(response.get("response", ""))
2155
- print(
2156
- f"Agent {agent_index + 1} refined response: "
2157
- f"{response.get('response', '')}"
2158
- )
2159
-
2160
- # Update responses for the next round
2161
- round_responses = refined_responses
2162
-
2163
- # Step 5: Final synthesis using the LLM
2164
- final_synthesis_input = (
2165
- "Synthesize the following refined responses into a coherent answer:\n"
2166
- + "\n".join(round_responses)
2167
- )
2168
- final_synthesis = get_llm_response(
2169
- final_synthesis_input, model=model, provider=provider, npc=npc
2170
- )
2171
-
2172
- return final_synthesis # Return synthesized response based on LLM output
2173
-
2174
- def conduct_voting(self, responses, num_voting_agents):
2175
- """Conducts voting among agents on the given responses."""
2176
- votes = [0] * len(responses)
2177
- for _ in range(num_voting_agents):
2178
- voted_index = random.choice(range(len(responses))) # Randomly vote
2179
- votes[voted_index] += 1
2180
- return votes
2181
-
2182
- def synthesize_responses(self, votes):
2183
- """Synthesizes the responses based on votes."""
2184
- # Example: Choose the highest voted response
2185
- max_votes = max(votes)
2186
- chosen_idx = votes.index(max_votes)
2187
- return f"Synthesized response based on votes from agents: " f"{chosen_idx + 1}"
2188
-
2189
- def resolve_sources_in_task(self, task: str, context: dict) -> str:
2190
- # Use Jinja2 template rendering directly for simplicity
2191
- template = Template(task)
2192
- return template.render(context)
2193
-
2194
- def fetch_data_from_source(self, table_name):
2195
- query = f"SELECT * FROM {table_name}"
2196
- try:
2197
- df = pd.read_sql(query, con=self.db_engine)
2198
- except Exception as e:
2199
- raise RuntimeError(f"Error fetching data from '{table_name}': {e}")
2200
- return self.format_data_as_string(df)
2201
-
2202
- def format_data_as_string(self, df):
2203
- return df.to_json(orient="records", lines=True, indent=2)
2204
-
2205
- def ensure_tables_exist(self, results_table_name):
2206
- conn = sqlite3.connect(self.db_path)
2207
- try:
2208
- conn.execute(
2209
- "CREATE TABLE IF NOT EXISTS pipeline_runs ("
2210
- "run_id INTEGER PRIMARY KEY AUTOINCREMENT, "
2211
- "pipeline_hash TEXT, timestamp DATETIME)"
2212
- )
2213
- conn.execute(
2214
- f"CREATE TABLE IF NOT EXISTS {results_table_name} ("
2215
- "result_id INTEGER PRIMARY KEY AUTOINCREMENT, "
2216
- "run_id INTEGER, task_name TEXT, npc_name TEXT, "
2217
- "model TEXT, provider TEXT, inputs JSON, "
2218
- "outputs JSON, FOREIGN KEY(run_id) "
2219
- "REFERENCES pipeline_runs(run_id))"
2220
- )
2221
- conn.commit()
2222
- finally:
2223
- conn.close()
2224
-
2225
- def create_run_entry(self, pipeline_hash):
2226
- conn = sqlite3.connect(self.db_path)
2227
- try:
2228
- conn.execute(
2229
- "INSERT INTO pipeline_runs (pipeline_hash, timestamp) VALUES (?, ?)",
2230
- (pipeline_hash, datetime.now()),
2231
- )
2232
- conn.commit()
2233
- return conn.execute("SELECT last_insert_rowid()").fetchone()[0]
2234
- finally:
2235
- conn.close()
2236
-
2237
- def clean_for_json(self, obj):
2238
- if isinstance(obj, dict):
2239
- return {
2240
- k: self.clean_for_json(v)
2241
- for k, v in obj.items()
2242
- if not k.startswith("_") and not callable(v)
2243
- }
2244
- elif isinstance(obj, list):
2245
- return [self.clean_for_json(i) for i in obj]
2246
- elif isinstance(obj, (str, int, float, bool, type(None))):
2247
- return obj
2248
- else:
2249
- return str(obj)
2250
-
2251
- def load_npc(self, npc_name: str):
2252
- if npc_name in self.npc_cache:
2253
- return self.npc_cache[npc_name]
2254
-
2255
- npc_path = self.find_npc_path(npc_name)
2256
- try:
2257
- if npc_path:
2258
- connection = sqlite3.connect(self.db_path)
2259
- npc = load_npc_from_file(npc_path, db_conn=connection)
2260
- self.npc_cache[npc_name] = npc
2261
- return npc
2262
- else:
2263
- raise FileNotFoundError(f"NPC file not found for {npc_name}")
2264
- except Exception as e:
2265
- raise RuntimeError(f"Error loading NPC {npc_name}: {e}")
2266
-
2267
- def find_npc_path(self, npc_name: str) -> str:
2268
- for root, _, files in os.walk(self.npc_root_dir):
2269
- print(f"Checking in directory: {root}") # Debug output
2270
- for file in files:
2271
- if file.startswith(npc_name) and file.endswith(".npc"):
2272
- print(f"Found NPC file: {file} at {root}") # Debug output
2273
- return os.path.join(root, file)
2274
- print(f"NPC file not found for: {npc_name}") # Debug output
2275
- return None
2276
-
2277
-
2278
- import pandas as pd
2279
- import yaml
2280
- from typing import List, Dict, Any, Union
2281
-
2282
-
2283
- class NPCSQLOperations(NPCCompiler):
2284
- def __init__(self, npc_directory, db_path):
2285
- super().__init__(npc_directory, db_path)
2286
-
2287
- def _get_context(
2288
- self, df: pd.DataFrame, context: Union[str, Dict, List[str]]
2289
- ) -> str:
2290
- """Resolve context from different sources"""
2291
- if isinstance(context, str):
2292
- # Check if it's a column reference
2293
- if context in df.columns:
2294
- return df[context].to_string()
2295
- # Assume it's static text
2296
- return context
2297
- elif isinstance(context, list):
2298
- # List of column names to include
2299
- return " ".join(df[col].to_string() for col in context if col in df.columns)
2300
- elif isinstance(context, dict):
2301
- # YAML-style context
2302
- return yaml.dump(context)
2303
- return ""
2304
-
2305
- # SINGLE PROMPT OPERATIONS
2306
- def synthesize(
2307
- self,
2308
- query,
2309
- df: pd.DataFrame,
2310
- columns: List[str],
2311
- npc: str,
2312
- context: Union[str, Dict, List[str]],
2313
- framework: str,
2314
- ) -> pd.Series:
2315
- context_text = self._get_context(df, context)
2316
-
2317
- def apply_synthesis(row):
2318
- # we have f strings from the query, we want to fill those back in in the request
2319
- request = query.format(**row[columns])
2320
- prompt = f"""Framework: {framework}
2321
- Context: {context_text}
2322
- Text to synthesize: {request}
2323
- Synthesize the above text."""
2324
-
2325
- result = self.execute_stage(
2326
- {"step_name": "synthesize", "npc": npc, "task": prompt},
2327
- {},
2328
- self.jinja_env,
2329
- )
2330
-
2331
- return result[0]["response"]
2332
-
2333
- # columns a list
2334
- columns_str = "_".join(columns)
2335
- df_out = df[columns].apply(apply_synthesis, axis=1)
2336
- return df_out
2337
-
2338
- # MULTI-PROMPT/PARALLEL OPERATIONS
2339
- def spread_and_sync(
2340
- self,
2341
- df: pd.DataFrame,
2342
- column: str,
2343
- npc: str,
2344
- variations: List[str],
2345
- sync_strategy: str,
2346
- context: Union[str, Dict, List[str]],
2347
- ) -> pd.Series:
2348
- context_text = self._get_context(df, context)
2349
-
2350
- def apply_spread_sync(text):
2351
- results = []
2352
- for variation in variations:
2353
- prompt = f"""Variation: {variation}
2354
- Context: {context_text}
2355
- Text to analyze: {text}
2356
- Analyze the above text with {variation} perspective."""
2357
-
2358
- result = self.execute_stage(
2359
- {"step_name": f"spread_{variation}", "npc": npc, "task": prompt},
2360
- {},
2361
- self.jinja_env,
2362
- )
2363
-
2364
- results.append(result[0]["response"])
2365
-
2366
- # Sync results
2367
- sync_result = self.aggregate_step_results(
2368
- [{"response": r} for r in results], sync_strategy
2369
- )
2370
-
2371
- return sync_result
2372
-
2373
- return df[column].apply(apply_spread_sync)
2374
- # COMPARISON OPERATIONS
2375
-
2376
- def contrast(
2377
- self,
2378
- df: pd.DataFrame,
2379
- col1: str,
2380
- col2: str,
2381
- npc: str,
2382
- context: Union[str, Dict, List[str]],
2383
- comparison_framework: str,
2384
- ) -> pd.Series:
2385
- context_text = self._get_context(df, context)
2386
-
2387
- def apply_contrast(row):
2388
- prompt = f"""Framework: {comparison_framework}
2389
- Context: {context_text}
2390
- Text 1: {row[col1]}
2391
- Text 2: {row[col2]}
2392
- Compare and contrast the above texts."""
2393
-
2394
- result = self.execute_stage(
2395
- {"step_name": "contrast", "npc": npc, "task": prompt},
2396
- {},
2397
- self.jinja_env,
2398
- )
2399
-
2400
- return result[0]["response"]
2401
-
2402
- return df.apply(apply_contrast, axis=1)
2403
-
2404
- def sql_operations(self, sql: str) -> pd.DataFrame:
2405
- # Execute the SQL query
2406
-
2407
- """
2408
- 1. delegate(COLUMN, npc, query, context, tools, reviewers)
2409
- 2. dilate(COLUMN, npc, query, context, scope, reviewers)
2410
- 3. erode(COLUMN, npc, query, context, scope, reviewers)
2411
- 4. strategize(COLUMN, npc, query, context, timeline, constraints)
2412
- 5. validate(COLUMN, npc, query, context, criteria)
2413
- 6. synthesize(COLUMN, npc, query, context, framework)
2414
- 7. decompose(COLUMN, npc, query, context, granularity)
2415
- 8. criticize(COLUMN, npc, query, context, framework)
2416
- 9. summarize(COLUMN, npc, query, context, style)
2417
- 10. advocate(COLUMN, npc, query, context, perspective)
2418
-
2419
- MULTI-PROMPT/PARALLEL OPERATIONS
2420
- 11. spread_and_sync(COLUMN, npc, query, variations, sync_strategy, context)
2421
- 12. bootstrap(COLUMN, npc, query, sample_params, sync_strategy, context)
2422
- 13. resample(COLUMN, npc, query, variation_strategy, sync_strategy, context)
2423
-
2424
- COMPARISON OPERATIONS
2425
- 14. mediate(COL1, COL2, npc, query, context, resolution_strategy)
2426
- 15. contrast(COL1, COL2, npc, query, context, comparison_framework)
2427
- 16. reconcile(COL1, COL2, npc, query, context, alignment_strategy)
2428
-
2429
- MULTI-COLUMN INTEGRATION
2430
- 17. integrate(COLS[], npc, query, context, integration_method)
2431
- 18. harmonize(COLS[], npc, query, context, harmony_rules)
2432
- 19. orchestrate(COLS[], npc, query, context, workflow)
2433
- """
2434
-
2435
- # Example usage in SQL-like syntax:
2436
- """
2437
- def execute_sql(self, sql: str) -> pd.DataFrame:
2438
- # This would be implemented to parse and execute SQL with our custom functions
2439
- # Example SQL:
2440
- '''
2441
- SELECT
2442
- customer_id,
2443
- synthesize(feedback_text,
2444
- npc='analyst',
2445
- context=customer_segment,
2446
- framework='satisfaction') as analysis,
2447
- spread_and_sync(price_sensitivity,
2448
- npc='pricing_agent',
2449
- variations=['conservative', 'aggressive'],
2450
- sync_strategy='balanced_analysis',
2451
- context=market_context) as price_strategy
2452
- FROM customer_data
2453
- '''
2454
- pass
2455
- """
2456
-
2457
-
2458
- class NPCDBTAdapter:
2459
- def __init__(self, npc_sql: NPCSQLOperations):
2460
- self.npc_sql = npc_sql
2461
- self.models = {}
2462
-
2463
- def ref(self, model_name: str) -> pd.DataFrame:
2464
- # Implementation for model referencing
2465
- return self.models.get(model_name)
2466
-
2467
- def parse_model(self, model_sql: str) -> pd.DataFrame:
2468
- # Parse the SQL model and execute with our custom functions
2469
- pass
2470
-
2471
-
2472
- class AIFunctionParser:
2473
- """Handles parsing and extraction of AI function calls from SQL"""
2474
-
2475
- @staticmethod
2476
- def extract_function_params(sql: str) -> Dict[str, Dict]:
2477
- """Extract AI function parameters from SQL"""
2478
- ai_functions = {}
2479
-
2480
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
2481
- matches = re.finditer(pattern, sql)
2482
-
2483
- for match in matches:
2484
- func_name = match.group(1)
2485
- if func_name in ["synthesize", "spread_and_sync"]:
2486
- params = match.group(2).split(",")
2487
- ai_functions[func_name] = {
2488
- "query": params[0].strip().strip("\"'"),
2489
- "npc": params[1].strip().strip("\"'"),
2490
- "context": params[2].strip().strip("\"'"),
2491
- }
2492
-
2493
- return ai_functions
2494
-
2495
-
2496
- class SQLModel:
2497
- def __init__(self, name: str, content: str, path: str, npc_directory: str):
2498
- self.name = name
2499
- self.content = content
2500
- self.path = path
2501
- self.npc_directory = npc_directory # This sets the npc_directory attribute
2502
-
2503
- self.dependencies = self._extract_dependencies()
2504
- self.has_ai_function = self._check_ai_functions()
2505
- self.ai_functions = self._extract_ai_functions()
2506
- print(f"Initializing SQLModel with NPC directory: {npc_directory}")
2507
-
2508
- def _extract_dependencies(self) -> Set[str]:
2509
- """Extract model dependencies using ref() calls"""
2510
- pattern = r"\{\{\s*ref\(['\"]([^'\"]+)['\"]\)\s*\}\}"
2511
- return set(re.findall(pattern, self.content))
2512
-
2513
- def _check_ai_functions(self) -> bool:
2514
- """Check if the model contains AI function calls"""
2515
- ai_functions = [
2516
- "synthesize",
2517
- "spread_and_sync",
2518
- "delegate",
2519
- "dilate",
2520
- "erode",
2521
- "strategize",
2522
- "validate",
2523
- "decompose",
2524
- "criticize",
2525
- "summarize",
2526
- "advocate",
2527
- "bootstrap",
2528
- "resample",
2529
- "mediate",
2530
- "contrast",
2531
- "reconcile",
2532
- "integrate",
2533
- "harmonize",
2534
- "orchestrate",
2535
- ]
2536
- return any(func in self.content for func in ai_functions)
2537
-
2538
- def _extract_ai_functions(self) -> Dict[str, Dict]:
2539
- """Extract all AI functions and their parameters from the SQL content."""
2540
- ai_functions = {}
2541
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
2542
- matches = re.finditer(pattern, self.content)
2543
-
2544
- for match in matches:
2545
- func_name = match.group(1)
2546
- if func_name in [
2547
- "synthesize",
2548
- "spread_and_sync",
2549
- "delegate",
2550
- "dilate",
2551
- "erode",
2552
- "strategize",
2553
- "validate",
2554
- "decompose",
2555
- "criticize",
2556
- "summarize",
2557
- "advocate",
2558
- "bootstrap",
2559
- "resample",
2560
- "mediate",
2561
- "contrast",
2562
- "reconcile",
2563
- "integrate",
2564
- "harmonize",
2565
- "orchestrate",
2566
- ]:
2567
- params = [
2568
- param.strip().strip("\"'") for param in match.group(2).split(",")
2569
- ]
2570
- npc = params[1]
2571
- if not npc.endswith(".npc"):
2572
- npc = npc.replace(".npc", "")
2573
- if self.npc_directory in npc:
2574
- npc = npc.replace(self.npc_directory, "")
2575
-
2576
- # print(npc)
2577
- ai_functions[func_name] = {
2578
- "column": params[0],
2579
- "npc": npc,
2580
- "query": params[2],
2581
- "context": params[3] if len(params) > 3 else None,
2582
- }
2583
- return ai_functions
2584
-
2585
-
2586
- class ModelCompiler:
2587
- def __init__(self, models_dir: str, db_path: str, npc_directory: str):
2588
- self.models_dir = Path(models_dir)
2589
- self.db_path = db_path
2590
- self.models: Dict[str, SQLModel] = {}
2591
- self.npc_operations = NPCSQLOperations(npc_directory, db_path)
2592
- self.npc_directory = npc_directory
2593
-
2594
- def discover_models(self):
2595
- """Discover all SQL models in the models directory"""
2596
- self.models = {}
2597
- for sql_file in self.models_dir.glob("**/*.sql"):
2598
- model_name = sql_file.stem
2599
- with open(sql_file, "r") as f:
2600
- content = f.read()
2601
- self.models[model_name] = SQLModel(
2602
- model_name, content, str(sql_file), self.npc_directory
2603
- )
2604
- print(f"Discovered model: {model_name}")
2605
- return self.models
2606
-
2607
- def build_dag(self) -> Dict[str, Set[str]]:
2608
- """Build dependency graph"""
2609
- dag = {}
2610
- for model_name, model in self.models.items():
2611
- dag[model_name] = model.dependencies
2612
- print(f"Built DAG: {dag}")
2613
- return dag
2614
-
2615
- def topological_sort(self) -> List[str]:
2616
- """Generate execution order using topological sort"""
2617
- dag = self.build_dag()
2618
- in_degree = defaultdict(int)
2619
-
2620
- for node, deps in dag.items():
2621
- for dep in deps:
2622
- in_degree[dep] += 1
2623
- if dep not in dag:
2624
- dag[dep] = set()
2625
-
2626
- queue = deque([node for node in dag.keys() if len(dag[node]) == 0])
2627
- result = []
2628
-
2629
- while queue:
2630
- node = queue.popleft()
2631
- result.append(node)
2632
-
2633
- for dependent, deps in dag.items():
2634
- if node in deps:
2635
- deps.remove(node)
2636
- if len(deps) == 0:
2637
- queue.append(dependent)
2638
-
2639
- if len(result) != len(dag):
2640
- raise ValueError("Circular dependency detected")
2641
-
2642
- print(f"Execution order: {result}")
2643
- return result
2644
-
2645
- def _replace_model_references(self, sql: str) -> str:
2646
- ref_pattern = r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
2647
-
2648
- def replace_ref(match):
2649
- model_name = match.group(1)
2650
- if model_name not in self.models:
2651
- raise ValueError(
2652
- f"Model '{model_name}' not found during ref replacement."
2653
- )
2654
- return model_name
2655
-
2656
- replaced_sql = re.sub(ref_pattern, replace_ref, sql)
2657
- return replaced_sql
2658
-
2659
- def compile_model(self, model_name: str) -> str:
2660
- """Compile a single model, resolving refs."""
2661
- model = self.models[model_name]
2662
- compiled_sql = model.content
2663
- compiled_sql = self._replace_model_references(compiled_sql)
2664
- print(f"Compiled SQL for {model_name}:\n{compiled_sql}")
2665
- return compiled_sql
2666
-
2667
- def _extract_base_query(self, sql: str) -> str:
2668
- for dep in self.models[self.current_model].dependencies:
2669
- sql = sql.replace(f"{{{{ ref('{dep}') }}}}", dep)
2670
-
2671
- parts = sql.split("FROM", 1)
2672
- if len(parts) != 2:
2673
- raise ValueError("Invalid SQL syntax")
2674
-
2675
- select_part = parts[0].replace("SELECT", "").strip()
2676
- from_part = "FROM" + parts[1]
2677
-
2678
- columns = re.split(r",\s*(?![^()]*\))", select_part.strip())
2679
-
2680
- final_columns = []
2681
- for col in columns:
2682
- if "synthesize(" not in col:
2683
- final_columns.append(col)
2684
- else:
2685
- alias_match = re.search(r"as\s+(\w+)\s*$", col, re.IGNORECASE)
2686
- if alias_match:
2687
- final_columns.append(f"NULL as {alias_match.group(1)}")
2688
-
2689
- final_sql = f"SELECT {', '.join(final_columns)} {from_part}"
2690
- print(f"Extracted base query:\n{final_sql}")
2691
-
2692
- return final_sql
2693
-
2694
- def execute_model(self, model_name: str) -> pd.DataFrame:
2695
- """Execute a model and materialize it to the database"""
2696
- self.current_model = model_name
2697
- model = self.models[model_name]
2698
- compiled_sql = self.compile_model(model_name)
2699
-
2700
- try:
2701
- if model.has_ai_function:
2702
- df = self._execute_ai_model(compiled_sql, model)
2703
- else:
2704
- df = self._execute_standard_sql(compiled_sql)
2705
-
2706
- self._materialize_to_db(model_name, df)
2707
- return df
2708
-
2709
- except Exception as e:
2710
- print(f"Error executing model {model_name}: {str(e)}")
2711
- raise
2712
-
2713
- def _execute_standard_sql(self, sql: str) -> pd.DataFrame:
2714
- with sqlite3.connect(self.db_path) as conn:
2715
- try:
2716
- sql = re.sub(r"--.*?\n", "\n", sql)
2717
- sql = re.sub(r"\s+", " ", sql).strip()
2718
- return pd.read_sql(sql, conn)
2719
- except Exception as e:
2720
- print(f"Failed to execute SQL: {sql}")
2721
- print(f"Error: {str(e)}")
2722
- raise
2723
-
2724
- def execute_ai_function(self, query, npc, column_value, context):
2725
- """Execute a specific AI function logic - placeholder"""
2726
- print(f"Executing AI function on value: {column_value}")
2727
- synthesized_value = (
2728
- f"Processed({query}): {column_value} in context {context} with npc {npc}"
2729
- )
2730
- return synthesized_value
2731
-
2732
- def _execute_ai_model(self, sql: str, model: SQLModel) -> pd.DataFrame:
2733
- try:
2734
- base_sql = self._extract_base_query(sql)
2735
- print(f"Executing base SQL:\n{base_sql}")
2736
- df = self._execute_standard_sql(base_sql)
2737
-
2738
- # extract the columns they are between {} pairs
2739
- columns = re.findall(r"\{([^}]+)\}", sql)
2740
-
2741
- # Handle AI function a
2742
- for func_name, params in model.ai_functions.items():
2743
- if func_name == "synthesize":
2744
- query_template = params["query"]
2745
-
2746
- npc = params["npc"]
2747
- # only take the after the split "/"
2748
- npc = npc.split("/")[-1]
2749
- context = params["context"]
2750
- # Call the synthesize method using DataFrame directly
2751
- synthesized_df = self.npc_operations.synthesize(
2752
- query=query_template, # The raw query to format
2753
- df=df, # The DataFrame containing the data
2754
- columns=columns, # The column(s) used to format the query
2755
- npc=npc, # NPC parameter
2756
- context=context, # Context parameter
2757
- framework="default_framework", # Adjust this as per your needs
2758
- )
2759
-
2760
- # Optionally pull the synthesized data into a new column
2761
- df["ai_analysis"] = (
2762
- synthesized_df # Adjust as per what synthesize returns
2763
- )
2764
-
2765
- return df
2766
-
2767
- except Exception as e:
2768
- print(f"Error in AI model execution: {str(e)}")
2769
- raise
2770
-
2771
- def _materialize_to_db(self, model_name: str, df: pd.DataFrame):
2772
- with sqlite3.connect(self.db_path) as conn:
2773
- conn.execute(f"DROP TABLE IF EXISTS {model_name}")
2774
- df.to_sql(model_name, conn, index=False)
2775
- print(f"Materialized model {model_name} to database")
2776
-
2777
- def _table_exists(self, table_name: str) -> bool:
2778
- with sqlite3.connect(self.db_path) as conn:
2779
- cursor = conn.cursor()
2780
- cursor.execute(
2781
- """
2782
- SELECT name FROM sqlite_master
2783
- WHERE type='table' AND name=?;
2784
- """,
2785
- (table_name,),
2786
- )
2787
- return cursor.fetchone() is not None
2788
-
2789
- def run_all_models(self):
2790
- """Execute all models in dependency order"""
2791
- self.discover_models()
2792
- execution_order = self.topological_sort()
2793
- print(f"Running models in order: {execution_order}")
2794
-
2795
- results = {}
2796
- for model_name in execution_order:
2797
- print(f"\nExecuting model: {model_name}")
2798
-
2799
- model = self.models[model_name]
2800
- for dep in model.dependencies:
2801
- if not self._table_exists(dep):
2802
- raise ValueError(
2803
- f"Dependency {dep} not found in database for model {model_name}"
2804
- )
2805
-
2806
- results[model_name] = self.execute_model(model_name)
2807
-
2808
- return results
2809
-
2810
-
2811
- def create_example_models(
2812
- models_dir: str = os.path.abspath("./npc_team/factory/models/"),
2813
- db_path: str = "~/npcsh_history.db",
2814
- npc_directory: str = "./npc_team/",
2815
- ):
2816
- """Create example SQL model files"""
2817
- os.makedirs(os.path.abspath("./npc_team/factory/"), exist_ok=True)
2818
- os.makedirs(models_dir, exist_ok=True)
2819
- db_path = os.path.expanduser(db_path)
2820
- conn = sqlite3.connect(db_path)
2821
- df = pd.DataFrame(
2822
- {
2823
- "feedback": ["Great product!", "Could be better", "Amazing service"],
2824
- "customer_id": [1, 2, 3],
2825
- "timestamp": pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-03"]),
2826
- }
2827
- )
2828
-
2829
- df.to_sql("raw_customer_feedback", conn, index=False, if_exists="replace")
2830
- print("Created raw_customer_feedback table")
2831
-
2832
- compiler = ModelCompiler(models_dir, db_path, npc_directory)
2833
- results = compiler.run_all_models()
2834
-
2835
- for model_name, df in results.items():
2836
- print(f"\nResults for {model_name}:")
2837
- print(df.head())
2838
-
2839
- customer_feedback = """
2840
- SELECT
2841
- feedback,
2842
- customer_id,
2843
- timestamp
2844
- FROM raw_customer_feedback
2845
- WHERE LENGTH(feedback) > 10;
2846
- """
2847
-
2848
- customer_insights = """
2849
- SELECT
2850
- customer_id,
2851
- feedback,
2852
- timestamp,
2853
- synthesize(
2854
- "feedback text: {feedback}",
2855
- "analyst",
2856
- "feedback_analysis"
2857
- ) as ai_analysis
2858
- FROM {{ ref('customer_feedback') }};
2859
- """
2860
-
2861
- models = {
2862
- "customer_feedback.sql": customer_feedback,
2863
- "customer_insights.sql": customer_insights,
2864
- }
2865
-
2866
- for name, content in models.items():
2867
- path = os.path.join(models_dir, name)
2868
- with open(path, "w") as f:
2869
- f.write(content)
2870
- print(f"Created model: {name}")