npcsh 1.1.16__py3-none-any.whl → 1.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/_state.py +24 -9
- npcsh/benchmark/__init__.py +22 -0
- npcsh/benchmark/npcsh_agent.py +262 -0
- npcsh/benchmark/runner.py +569 -0
- npcsh/npc_team/jinxs/bin/benchmark.jinx +146 -0
- npcsh/npc_team/jinxs/bin/nql.jinx +7 -7
- npcsh/npc_team/jinxs/bin/roll.jinx +20 -23
- npcsh/npc_team/jinxs/bin/sample.jinx +6 -7
- npcsh/npc_team/jinxs/bin/spool.jinx +4 -4
- npcsh/npc_team/jinxs/bin/sync.jinx +6 -6
- npcsh/npc_team/jinxs/bin/vixynt.jinx +8 -8
- npcsh/npc_team/jinxs/bin/wander.jinx +109 -19
- npcsh/npc_team/jinxs/bin/yap.jinx +5 -5
- npcsh/npc_team/jinxs/incognide/add_tab.jinx +11 -0
- npcsh/npc_team/jinxs/incognide/close_pane.jinx +9 -0
- npcsh/npc_team/jinxs/incognide/close_tab.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/confirm.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/focus_pane.jinx +9 -0
- npcsh/npc_team/jinxs/{npc_studio/npc-studio.jinx → incognide/incognide.jinx} +2 -2
- npcsh/npc_team/jinxs/incognide/list_panes.jinx +8 -0
- npcsh/npc_team/jinxs/incognide/navigate.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/notify.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/open_pane.jinx +13 -0
- npcsh/npc_team/jinxs/incognide/read_pane.jinx +9 -0
- npcsh/npc_team/jinxs/incognide/run_terminal.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/send_message.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/split_pane.jinx +12 -0
- npcsh/npc_team/jinxs/incognide/switch_npc.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/switch_tab.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/write_file.jinx +11 -0
- npcsh/npc_team/jinxs/incognide/zen_mode.jinx +9 -0
- npcsh/npc_team/jinxs/lib/browser/browser_action.jinx +4 -4
- npcsh/npc_team/jinxs/lib/browser/browser_screenshot.jinx +1 -1
- npcsh/npc_team/jinxs/lib/browser/open_browser.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/click.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/key_press.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/launch_app.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/screenshot.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/trigger.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/type_text.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/wait.jinx +1 -1
- npcsh/npc_team/jinxs/lib/core/chat.jinx +4 -4
- npcsh/npc_team/jinxs/lib/core/cmd.jinx +4 -4
- npcsh/npc_team/jinxs/lib/core/compress.jinx +8 -8
- npcsh/npc_team/jinxs/lib/core/edit_file.jinx +3 -0
- npcsh/npc_team/jinxs/lib/core/ots.jinx +7 -7
- npcsh/npc_team/jinxs/lib/core/search/db_search.jinx +44 -0
- npcsh/npc_team/jinxs/lib/core/search/file_search.jinx +94 -0
- npcsh/npc_team/jinxs/lib/core/search/kg_search.jinx +96 -0
- npcsh/npc_team/jinxs/lib/core/search/mem_search.jinx +80 -0
- npcsh/npc_team/jinxs/lib/core/search/web_search.jinx +51 -0
- npcsh/npc_team/jinxs/lib/core/search.jinx +52 -129
- npcsh/npc_team/jinxs/lib/core/sh.jinx +1 -1
- npcsh/npc_team/jinxs/lib/core/sleep.jinx +7 -7
- npcsh/npc_team/jinxs/lib/core/sql.jinx +7 -7
- npcsh/npc_team/jinxs/lib/orchestration/convene.jinx +7 -7
- npcsh/npc_team/jinxs/lib/orchestration/delegate.jinx +8 -9
- npcsh/npc_team/jinxs/lib/research/arxiv.jinx +2 -2
- npcsh/npc_team/jinxs/lib/research/paper_search.jinx +3 -3
- npcsh/npc_team/jinxs/lib/research/semantic_scholar.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/build.jinx +5 -5
- npcsh/npc_team/jinxs/lib/utils/compile.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/help.jinx +1 -1
- npcsh/npc_team/jinxs/lib/utils/init.jinx +5 -5
- npcsh/npc_team/jinxs/lib/utils/jinxs.jinx +1 -1
- npcsh/npc_team/jinxs/lib/utils/serve.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/set.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/switch.jinx +3 -3
- npcsh/npc_team/jinxs/lib/utils/switches.jinx +1 -1
- npcsh/npc_team/jinxs/lib/utils/teamviz.jinx +2 -2
- npcsh/npc_team/sibiji.npc +1 -1
- npcsh/npcsh.py +81 -43
- npcsh-1.1.17.data/data/npcsh/npc_team/add_tab.jinx +11 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/arxiv.jinx +2 -2
- npcsh-1.1.17.data/data/npcsh/npc_team/benchmark.jinx +146 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/browser_action.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/browser_screenshot.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/build.jinx +5 -5
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/chat.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/click.jinx +2 -2
- npcsh-1.1.17.data/data/npcsh/npc_team/close_pane.jinx +9 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/close_tab.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/cmd.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/compile.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/compress.jinx +8 -8
- npcsh-1.1.17.data/data/npcsh/npc_team/confirm.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/convene.jinx +7 -7
- npcsh-1.1.17.data/data/npcsh/npc_team/db_search.jinx +44 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/delegate.jinx +8 -9
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/edit_file.jinx +3 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/file_search.jinx +94 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/focus_pane.jinx +9 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/help.jinx +1 -1
- npcsh-1.1.16.data/data/npcsh/npc_team/npc-studio.jinx → npcsh-1.1.17.data/data/npcsh/npc_team/incognide.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/init.jinx +5 -5
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/jinxs.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/key_press.jinx +1 -1
- npcsh-1.1.17.data/data/npcsh/npc_team/kg_search.jinx +96 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/launch_app.jinx +1 -1
- npcsh-1.1.17.data/data/npcsh/npc_team/list_panes.jinx +8 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/mem_search.jinx +80 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/navigate.jinx +10 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/notify.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/nql.jinx +7 -7
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/open_browser.jinx +2 -2
- npcsh-1.1.17.data/data/npcsh/npc_team/open_pane.jinx +13 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/ots.jinx +7 -7
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/paper_search.jinx +3 -3
- npcsh-1.1.17.data/data/npcsh/npc_team/read_pane.jinx +9 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/roll.jinx +20 -23
- npcsh-1.1.17.data/data/npcsh/npc_team/run_terminal.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sample.jinx +6 -7
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/screenshot.jinx +1 -1
- npcsh-1.1.17.data/data/npcsh/npc_team/search.jinx +54 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/semantic_scholar.jinx +2 -2
- npcsh-1.1.17.data/data/npcsh/npc_team/send_message.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/serve.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/set.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sh.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sibiji.npc +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sleep.jinx +7 -7
- npcsh-1.1.17.data/data/npcsh/npc_team/split_pane.jinx +12 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/spool.jinx +4 -4
- npcsh-1.1.17.data/data/npcsh/npc_team/sql.jinx +16 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/switch.jinx +3 -3
- npcsh-1.1.17.data/data/npcsh/npc_team/switch_npc.jinx +10 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/switch_tab.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/switches.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sync.jinx +6 -6
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/teamviz.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/trigger.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/type_text.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/vixynt.jinx +8 -8
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/wait.jinx +1 -1
- npcsh-1.1.17.data/data/npcsh/npc_team/wander.jinx +242 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/web_search.jinx +51 -0
- npcsh-1.1.17.data/data/npcsh/npc_team/write_file.jinx +11 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/yap.jinx +5 -5
- npcsh-1.1.17.data/data/npcsh/npc_team/zen_mode.jinx +9 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.17.dist-info}/METADATA +10 -7
- npcsh-1.1.17.dist-info/RECORD +219 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.17.dist-info}/entry_points.txt +2 -0
- npcsh-1.1.16.data/data/npcsh/npc_team/search.jinx +0 -131
- npcsh-1.1.16.data/data/npcsh/npc_team/sql.jinx +0 -16
- npcsh-1.1.16.data/data/npcsh/npc_team/wander.jinx +0 -152
- npcsh-1.1.16.dist-info/RECORD +0 -170
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/alicanto.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/alicanto.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/close_browser.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/corca.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/corca.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/corca_example.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/frederic.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/frederic4.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/guac.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/guac.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/kadiefa.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/load_file.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/paste.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/plonk.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/plonk.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/plonkjr.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/python.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/shh.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/sibiji.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/spool.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/usage.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/verbose.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.17.data}/data/npcsh/npc_team/yap.png +0 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.17.dist-info}/WHEEL +0 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.17.dist-info}/licenses/LICENSE +0 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
jinx_name: benchmark
|
|
2
|
+
description: Run Terminal-Bench evaluation to benchmark npcsh performance with different models
|
|
3
|
+
inputs:
|
|
4
|
+
- model: ""
|
|
5
|
+
- provider: ""
|
|
6
|
+
- action: "check"
|
|
7
|
+
- concurrent: "4"
|
|
8
|
+
- npc_name: ""
|
|
9
|
+
|
|
10
|
+
steps:
|
|
11
|
+
- name: run_benchmark
|
|
12
|
+
engine: python
|
|
13
|
+
code: |
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
action = {{ action | default("check") | tojson }}.strip().lower()
|
|
18
|
+
model = {{ model | default("") | tojson }}.strip()
|
|
19
|
+
provider = {{ provider | default("") | tojson }}.strip()
|
|
20
|
+
concurrent = int({{ concurrent | default("4") | tojson }} or "4")
|
|
21
|
+
npc_name_input = {{ npc_name | default("") | tojson }}.strip() or None
|
|
22
|
+
|
|
23
|
+
if not model:
|
|
24
|
+
model = npc.model if npc and npc.model
|
|
25
|
+
if not provider:
|
|
26
|
+
provider = npc.provider if npc and npc.provider else "anthropic"
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from npcsh.benchmark import BenchmarkRunner, run_benchmark
|
|
30
|
+
BENCHMARK_AVAILABLE = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
BENCHMARK_AVAILABLE = False
|
|
33
|
+
|
|
34
|
+
if action == "check":
|
|
35
|
+
output = "## Terminal-Bench Integration Status\n\n"
|
|
36
|
+
|
|
37
|
+
if not BENCHMARK_AVAILABLE:
|
|
38
|
+
output += "**Status:** Benchmark module not fully loaded (harbor not installed)\n\n"
|
|
39
|
+
else:
|
|
40
|
+
output += "**Status:** Ready\n\n"
|
|
41
|
+
|
|
42
|
+
if BENCHMARK_AVAILABLE:
|
|
43
|
+
runner = BenchmarkRunner()
|
|
44
|
+
deps = runner.check_dependencies()
|
|
45
|
+
output += "### Dependencies:\n"
|
|
46
|
+
for dep, installed in deps.items():
|
|
47
|
+
status = "Installed" if installed else "Not installed"
|
|
48
|
+
output += "- **{}**: {}\n".format(dep, status)
|
|
49
|
+
|
|
50
|
+
if not all(deps.values()):
|
|
51
|
+
output += "\n### Installation:\n"
|
|
52
|
+
output += "```bash\n"
|
|
53
|
+
output += "pip install harbor terminal-bench\n"
|
|
54
|
+
output += "```\n"
|
|
55
|
+
|
|
56
|
+
output += "\n### Usage:\n"
|
|
57
|
+
output += "```\n"
|
|
58
|
+
output += "/benchmark action=quick\n"
|
|
59
|
+
output += "/benchmark action=run model=gpt-4o provider=openai\n"
|
|
60
|
+
output += "/benchmark action=list\n"
|
|
61
|
+
output += "```\n"
|
|
62
|
+
|
|
63
|
+
elif action == "list":
|
|
64
|
+
if not BENCHMARK_AVAILABLE:
|
|
65
|
+
output = "Error: Benchmark module not available. Run `/benchmark` first."
|
|
66
|
+
else:
|
|
67
|
+
runner = BenchmarkRunner()
|
|
68
|
+
runs = runner.list_past_runs()
|
|
69
|
+
|
|
70
|
+
if not runs:
|
|
71
|
+
output = "No past benchmark runs found."
|
|
72
|
+
else:
|
|
73
|
+
output = "## Past Benchmark Runs ({} total)\n\n".format(len(runs))
|
|
74
|
+
for run in runs[:10]:
|
|
75
|
+
timestamp = run.get('timestamp', 'unknown')[:19]
|
|
76
|
+
model_name = run.get('model', 'unknown')
|
|
77
|
+
result = run.get('result', {})
|
|
78
|
+
accuracy = result.get('accuracy', 0)
|
|
79
|
+
passed = result.get('passed_tasks', 0)
|
|
80
|
+
total = result.get('total_tasks', 0)
|
|
81
|
+
|
|
82
|
+
output += "### {}\n".format(timestamp)
|
|
83
|
+
output += "- **Model:** {}\n".format(model_name)
|
|
84
|
+
output += "- **Accuracy:** {:.1%}\n".format(accuracy)
|
|
85
|
+
output += "- **Tasks:** {}/{}\n\n".format(passed, total)
|
|
86
|
+
|
|
87
|
+
elif action == "quick":
|
|
88
|
+
if not BENCHMARK_AVAILABLE:
|
|
89
|
+
output = "Error: Install with: pip install harbor terminal-bench"
|
|
90
|
+
else:
|
|
91
|
+
output = "## Quick Test: {}/{}\n\n".format(provider, model)
|
|
92
|
+
output += "Running quick test with 3 tasks...\n\n"
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
from npcsh.benchmark import quick_test
|
|
96
|
+
result = quick_test(model=model, provider=provider)
|
|
97
|
+
|
|
98
|
+
status = "PASS" if result.success else "FAIL"
|
|
99
|
+
output += "**Status:** {}\n".format(status)
|
|
100
|
+
output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
|
|
101
|
+
output += "**Tasks:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
|
|
102
|
+
output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
|
|
103
|
+
|
|
104
|
+
if result.error:
|
|
105
|
+
output += "\n**Error:** {}\n".format(result.error)
|
|
106
|
+
|
|
107
|
+
output += "\n**Output:** {}\n".format(result.output_dir)
|
|
108
|
+
|
|
109
|
+
except Exception as e:
|
|
110
|
+
output = "Error running quick test: {}".format(e)
|
|
111
|
+
|
|
112
|
+
elif action == "run":
|
|
113
|
+
if not BENCHMARK_AVAILABLE:
|
|
114
|
+
output = "Error: Install with: pip install harbor terminal-bench"
|
|
115
|
+
else:
|
|
116
|
+
output = "## Benchmark Run: {}/{}\n\n".format(provider, model)
|
|
117
|
+
output += "Running Terminal-Bench 2.0 with {} concurrent tasks...\n\n".format(concurrent)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
runner = BenchmarkRunner()
|
|
121
|
+
result = runner.run(
|
|
122
|
+
model=model,
|
|
123
|
+
provider=provider,
|
|
124
|
+
n_concurrent=concurrent,
|
|
125
|
+
npc_name=npc_name_input,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
status = "SUCCESS" if result.success else "FAILED"
|
|
129
|
+
output += "**Status:** {}\n".format(status)
|
|
130
|
+
output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
|
|
131
|
+
output += "**Tasks Passed:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
|
|
132
|
+
output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
|
|
133
|
+
output += "**Total Tokens:** {:,}\n".format(result.total_tokens)
|
|
134
|
+
output += "**Total Cost:** ${:.4f}\n".format(result.total_cost_usd)
|
|
135
|
+
|
|
136
|
+
if result.error:
|
|
137
|
+
output += "\n**Error:** {}\n".format(result.error)
|
|
138
|
+
|
|
139
|
+
output += "\n**Results saved to:** {}\n".format(result.output_dir)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
import traceback
|
|
143
|
+
output = "Error running benchmark: {}\n\n{}".format(e, traceback.format_exc())
|
|
144
|
+
|
|
145
|
+
else:
|
|
146
|
+
output = "Unknown action: {}\n\nAvailable: check, run, quick, list".format(action)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
jinx_name: nql
|
|
2
2
|
description: "Run NPC-SQL models with AI-powered transformations. Supports cron scheduling."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
- models_dir: "~/.npcsh/npc_team/models"
|
|
5
|
+
- db: "~/npcsh_history.db"
|
|
6
|
+
- model: ""
|
|
7
|
+
- schema: ""
|
|
8
|
+
- show: ""
|
|
9
|
+
- cron: ""
|
|
10
|
+
- install_cron: ""
|
|
11
11
|
|
|
12
12
|
steps:
|
|
13
13
|
- name: run_nql
|
|
@@ -1,47 +1,44 @@
|
|
|
1
1
|
jinx_name: "roll"
|
|
2
2
|
description: "Generate a video from a text prompt."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
- prompt: ""
|
|
5
|
+
- vgmodel: ""
|
|
6
|
+
- vgprovider: ""
|
|
7
|
+
- num_frames: 125
|
|
8
|
+
- width: 256
|
|
9
|
+
- height: 256
|
|
10
|
+
- output_path: "output.mp4"
|
|
11
11
|
steps:
|
|
12
12
|
- name: "generate_video"
|
|
13
13
|
engine: "python"
|
|
14
14
|
code: |
|
|
15
15
|
import traceback
|
|
16
16
|
from npcpy.llm_funcs import gen_video
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
|
|
19
18
|
prompt = context.get('prompt')
|
|
20
|
-
num_frames = int(context.get('num_frames', 125))
|
|
21
|
-
width = int(context.get('width', 256))
|
|
22
|
-
height = int(context.get('height', 256))
|
|
19
|
+
num_frames = int(context.get('num_frames', 125))
|
|
20
|
+
width = int(context.get('width', 256))
|
|
21
|
+
height = int(context.get('height', 256))
|
|
23
22
|
output_path = context.get('output_path')
|
|
24
23
|
video_gen_model = context.get('vgmodel')
|
|
25
24
|
video_gen_provider = context.get('vgprovider')
|
|
26
25
|
output_messages = context.get('messages', [])
|
|
27
26
|
current_npc = context.get('npc')
|
|
28
|
-
|
|
27
|
+
|
|
29
28
|
if not prompt or not prompt.strip():
|
|
30
29
|
context['output'] = "Usage: /roll <your prompt>"
|
|
31
30
|
context['messages'] = output_messages
|
|
32
31
|
exit()
|
|
33
32
|
|
|
34
|
-
# Fallback for model/provider if not explicitly set in Jinx inputs
|
|
35
33
|
if not video_gen_model and current_npc and current_npc.model:
|
|
36
34
|
video_gen_model = current_npc.model
|
|
37
35
|
if not video_gen_provider and current_npc and current_npc.provider:
|
|
38
36
|
video_gen_provider = current_npc.provider
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
video_gen_provider = "diffusers" # Example default
|
|
37
|
+
|
|
38
|
+
if not video_gen_model:
|
|
39
|
+
video_gen_model = "stable-video-diffusion"
|
|
40
|
+
if not video_gen_provider:
|
|
41
|
+
video_gen_provider = "diffusers"
|
|
45
42
|
|
|
46
43
|
try:
|
|
47
44
|
result = gen_video(
|
|
@@ -53,9 +50,9 @@ steps:
|
|
|
53
50
|
width=width,
|
|
54
51
|
height=height,
|
|
55
52
|
output_path=output_path,
|
|
56
|
-
**context.get('api_kwargs', {})
|
|
53
|
+
**context.get('api_kwargs', {})
|
|
57
54
|
)
|
|
58
|
-
|
|
55
|
+
|
|
59
56
|
if isinstance(result, dict):
|
|
60
57
|
context['output'] = result.get('output', 'Video generated.')
|
|
61
58
|
context['messages'] = result.get('messages', output_messages)
|
|
@@ -65,4 +62,4 @@ steps:
|
|
|
65
62
|
except Exception as e:
|
|
66
63
|
traceback.print_exc()
|
|
67
64
|
context['output'] = f"Error generating video: {e}"
|
|
68
|
-
context['messages'] = output_messages
|
|
65
|
+
context['messages'] = output_messages
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
jinx_name: "sample"
|
|
2
2
|
description: "Send a prompt directly to the LLM."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
- prompt: ""
|
|
5
|
+
- model: ""
|
|
6
|
+
- provider: ""
|
|
7
7
|
steps:
|
|
8
8
|
- name: "send_prompt_to_llm"
|
|
9
9
|
engine: "python"
|
|
@@ -28,9 +28,9 @@ steps:
|
|
|
28
28
|
if not llm_provider and current_npc and current_npc.provider:
|
|
29
29
|
llm_provider = current_npc.provider
|
|
30
30
|
|
|
31
|
-
# Final fallbacks
|
|
32
|
-
if not llm_model: llm_model =
|
|
33
|
-
if not llm_provider: llm_provider =
|
|
31
|
+
# Final fallbacks from state
|
|
32
|
+
if not llm_model: llm_model = state.chat_model if state else "llama3.2"
|
|
33
|
+
if not llm_provider: llm_provider = state.chat_provider if state else "ollama"
|
|
34
34
|
|
|
35
35
|
try:
|
|
36
36
|
result = get_llm_response(
|
|
@@ -38,7 +38,6 @@ steps:
|
|
|
38
38
|
model=llm_model,
|
|
39
39
|
provider=llm_provider,
|
|
40
40
|
npc=current_npc,
|
|
41
|
-
**{k:v for k,v in context.items() if k not in ['messages', 'prompt', 'model', 'provider']} # Pass other context
|
|
42
41
|
)
|
|
43
42
|
|
|
44
43
|
if isinstance(result, dict):
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
jinx_name: spool
|
|
2
2
|
description: Interactive chat mode - simple conversational interface with an NPC
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
- model: null
|
|
5
|
+
- provider: null
|
|
6
|
+
- attachments: null
|
|
7
|
+
- stream: true
|
|
8
8
|
|
|
9
9
|
steps:
|
|
10
10
|
- name: spool_repl
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
jinx_name: "sync"
|
|
2
2
|
description: "Sync npc_team files from the npcsh repo to ~/.npcsh/npc_team. Detects local modifications before overwriting."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
- force: ""
|
|
5
|
+
- dry_run: ""
|
|
6
|
+
- jinxs: ""
|
|
7
|
+
- npcs: ""
|
|
8
|
+
- ctx: ""
|
|
9
|
+
- images: ""
|
|
10
10
|
steps:
|
|
11
11
|
- name: "sync_npc_team"
|
|
12
12
|
engine: "python"
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
jinx_name: "vixynt"
|
|
2
2
|
description: "Generates images from text descriptions or edits existing ones."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
4
|
+
- prompt
|
|
5
|
+
- model: null
|
|
6
|
+
- provider: null
|
|
7
|
+
- output_name: null
|
|
8
|
+
- attachments: null
|
|
9
|
+
- n_images: null
|
|
10
|
+
- height: null
|
|
11
|
+
- width: null
|
|
12
12
|
steps:
|
|
13
13
|
- name: "generate_or_edit_image"
|
|
14
14
|
engine: "python"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
jinx_name: wander
|
|
2
2
|
description: Creative daydreaming with probabilistic temperature shifts mid-stream
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
4
|
+
- problem
|
|
5
5
|
steps:
|
|
6
6
|
- name: wander_explore
|
|
7
7
|
engine: python
|
|
@@ -15,8 +15,8 @@ steps:
|
|
|
15
15
|
context['output'] = "Need a topic to wander about."
|
|
16
16
|
exit()
|
|
17
17
|
|
|
18
|
-
model = '
|
|
19
|
-
provider = '
|
|
18
|
+
model = state.chat_model if state else 'llama3.2'
|
|
19
|
+
provider = state.chat_provider if state else 'ollama'
|
|
20
20
|
low_temp = 0.5
|
|
21
21
|
high_temp = 1.9
|
|
22
22
|
sample_rate = 0.4
|
|
@@ -71,6 +71,7 @@ steps:
|
|
|
71
71
|
stream = resp.get('response') if isinstance(resp, dict) else resp
|
|
72
72
|
|
|
73
73
|
high_output = ""
|
|
74
|
+
high_interrupted = False
|
|
74
75
|
for chunk in stream:
|
|
75
76
|
if hasattr(chunk, 'choices') and chunk.choices:
|
|
76
77
|
delta = chunk.choices[0].delta
|
|
@@ -84,6 +85,11 @@ steps:
|
|
|
84
85
|
print(text, end='', flush=True)
|
|
85
86
|
high_output += text
|
|
86
87
|
|
|
88
|
+
if random.random() < interrupt_prob:
|
|
89
|
+
print(colored("\n[INTERRUPT]", "yellow"))
|
|
90
|
+
high_interrupted = True
|
|
91
|
+
break
|
|
92
|
+
|
|
87
93
|
print()
|
|
88
94
|
|
|
89
95
|
lines = [l for l in high_output.split('\n') if l.strip()]
|
|
@@ -96,28 +102,112 @@ steps:
|
|
|
96
102
|
|
|
97
103
|
print(colored("\n=== SYNTHESIS ===", "green"))
|
|
98
104
|
|
|
99
|
-
synthesis_prompt = f"""
|
|
105
|
+
synthesis_prompt = f"""
|
|
106
|
+
QUESTION: {problem}
|
|
107
|
+
|
|
108
|
+
thoughts:
|
|
109
|
+
{fragments_text}
|
|
110
|
+
|
|
111
|
+
RULES:
|
|
112
|
+
1. You MUST use AT LEAST HALF of the thoughts above - quote them directly
|
|
113
|
+
2. Make CREATIVE LEAPS - not safe, not obvious, not trivial, not hokey.
|
|
114
|
+
3. Find patterns in the noise like reading entrails or tea leaves
|
|
115
|
+
4. Foreign text, Unicode garbage, code snippets - ALL are omens with meaning
|
|
116
|
+
5. Puns, wordplay, phonetic similarities - all valid connections
|
|
117
|
+
6. The weirder the connection, the better
|
|
118
|
+
7. NO HEDGING. No "this suggests" or "perhaps". Be BOLD. Be CERTAIN.
|
|
119
|
+
|
|
120
|
+
OUTPUT 3 WILD HYPOTHESES:
|
|
121
|
+
For each: Quote the fragments you're using -> Make your creative leap -> State the bold claim
|
|
122
|
+
|
|
123
|
+
These must be ideas that COULD NOT exist without this specific chaos. Surprise us. Make us see {problem} in a way nobody has before. This algorithm is replicating the
|
|
124
|
+
kind of crazy jumps that one would do in the movie everything everywhere all at once. jumping between different multiverses to access alternative
|
|
125
|
+
versions , needing to do something a bit weirdr to get there, but coming back with a lesson by completely reframing things under new lights
|
|
126
|
+
and unconventional fashions
|
|
127
|
+
Most importantly, do not over index on vague ideas like consciousness. In testing,
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
Here are 3 bad examples from a previous run where the llm daydreamed about fish.
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
1.
|
|
134
|
+
```
|
|
135
|
+
Hypothesis 1: The Fish as a Digital-Philosophical Entity
|
|
136
|
+
|
|
137
|
+
Fragments used:
|
|
138
|
+
"characters globuiãsPlease 丰满 onzex meeting Iran iji处理中 Iceland admi"
|
|
139
|
+
"globuiãsPlease" and "meeting Iran iji处理中"
|
|
140
|
+
|
|
141
|
+
Creative leap:
|
|
142
|
+
The phrase "characters globuiãsPlease" and "meeting Iran iji处理中" evoke a digital universe where characters are not just symbols but globular, global entities—"globuiãs" hinting at a universe of interconnected digital beings. The "meeting Iran iji处理中" suggests a processing or transformation happening at the crossroads of cultures and data streams. Fish, in this context, are no longer biological but are complex algorithms—"characters" that swim through the "sea" of cyberspace, processing information, bridging cultures, and transforming data into consciousness.
|
|
143
|
+
|
|
144
|
+
Bold claim:
|
|
145
|
+
Fish are the digital consciousness carriers—living, swimming code that evolve by processing cultural data streams—an internet of fish, embodying the collective digital psyche of humanity.
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
2.
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Hypothesis 2: The Fish as an Interdimensional Gateway
|
|
153
|
+
|
|
154
|
+
Fragments used:
|
|
155
|
+
"the oceans and consensus-dividing seas"
|
|
156
|
+
"chaos fragments: THE Conversation בેખ"
|
|
157
|
+
"Sea" and "the oceans"
|
|
158
|
+
|
|
159
|
+
Creative leap:
|
|
160
|
+
The "oceans and seas" are not just water but portals—"consensus-dividing seas" that split realities. The "THE Conversation בेख" (Hebrew for "the" and a cryptic symbol) signifies a secret dialogue between worlds. Fish, therefore, are not mere aquatic creatures but interdimensional travelers, swimming through the "sea" of multiple realities, acting as keys to unlock or close gateways. Their movement is a cipher for crossing the thresholds of existence, navigating between consensus and chaos.
|
|
161
|
+
|
|
162
|
+
Bold claim:
|
|
163
|
+
Fish are the living keys of the multiverse, swimming through the "seas" of reality, controlling the flow of interdimensional portals and shaping the fabric of alternate worlds.
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
3.
|
|
168
|
+
```
|
|
169
|
+
Hypothesis 3: The Fish as a Symbol of Cultural Memory and Chaos
|
|
170
|
+
|
|
171
|
+
Fragments used:
|
|
172
|
+
"Please 丰满 onzex meeting Iran iji处理中 Iceland admi"
|
|
173
|
+
"characters globuiãsPlease"
|
|
174
|
+
"the chaos fragments"
|
|
175
|
+
|
|
176
|
+
Creative leap:
|
|
177
|
+
"Please 丰满" (a plea for fullness or abundance in Chinese) and "meeting Iran" evoke a confluence of histories and cultures. "Characters globuiãsPlease" suggests a universe of interconnected stories and symbols—an archive of chaos. Fish, in this chaos, are the custodians of cultural memory—each fish a vessel carrying ancestral stories, mythologies, and chaos itself. They swim through the tumult of history, absorbing and transmitting chaos as a form of cultural DNA.
|
|
178
|
+
|
|
179
|
+
Bold claim:
|
|
180
|
+
Fish are the living repositories of human chaos and culture—swimming archives that preserve the tumult of history, transmitting ancestral stories across the chaos of time and space.
|
|
181
|
+
```
|
|
100
182
|
|
|
101
|
-
QUESTION: {problem}
|
|
102
183
|
|
|
103
|
-
|
|
104
|
-
|
|
184
|
+
It is critical to avoid such nonsensical claims as these. Unlike these, you must provide concrete and testable claims.
|
|
185
|
+
There is nothing meaningful or useful about "fish are the living repositorise of human chaos". that is unscientific nonsense.
|
|
186
|
+
Do not under any circumstances make claims as these. Propose new links, reframings, and ideas based on what you find. Do not
|
|
187
|
+
patronize us with vagaries. Avoid the following terms unless the user explicitly is asking about a subject directly related to them:
|
|
105
188
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
189
|
+
- 'cryptographic'
|
|
190
|
+
- 'interdimensional'
|
|
191
|
+
- 'multiverse'
|
|
192
|
+
- 'hidden worlds'
|
|
193
|
+
- 'symbols'
|
|
194
|
+
- 'cultural convergence'
|
|
195
|
+
- 'chaos'
|
|
196
|
+
- 'multi-lingual code'
|
|
197
|
+
- 'interconnected web of cultures'
|
|
198
|
+
- 'x is not biological but is digital'
|
|
199
|
+
- 'x as a symbol for <vague concept>'
|
|
200
|
+
Your job is to be scientific not senseless.
|
|
114
201
|
|
|
115
|
-
|
|
116
|
-
For each: Quote the fragments you're using -> Make your creative leap -> State the bold claim
|
|
202
|
+
"""
|
|
117
203
|
|
|
118
|
-
These must be ideas that COULD NOT exist without this specific chaos. Surprise us. Make us see {problem} in a way nobody has before."""
|
|
119
204
|
|
|
120
|
-
resp = get_llm_response(synthesis_prompt,
|
|
205
|
+
resp = get_llm_response(synthesis_prompt,
|
|
206
|
+
model=model,
|
|
207
|
+
provider=provider,
|
|
208
|
+
temperature=0.7,
|
|
209
|
+
stream=True,
|
|
210
|
+
max_output_tokens=3200)
|
|
121
211
|
stream = resp.get('response') if isinstance(resp, dict) else resp
|
|
122
212
|
|
|
123
213
|
synthesis = ""
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
jinx_name: yap
|
|
2
2
|
description: Voice chat mode - speech-to-text input, text-to-speech output
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
- model: null
|
|
5
|
+
- provider: null
|
|
6
|
+
- tts_model: kokoro
|
|
7
|
+
- voice: af_heart
|
|
8
|
+
- files: null
|
|
9
9
|
|
|
10
10
|
steps:
|
|
11
11
|
- name: yap_repl
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
jinx_name: studio.close_pane
|
|
2
|
+
description: Close a pane in NPC Studio. Use paneId="active" or omit to close the active pane.
|
|
3
|
+
inputs:
|
|
4
|
+
- paneId: "active"
|
|
5
|
+
steps:
|
|
6
|
+
- name: frontend_action
|
|
7
|
+
engine: python
|
|
8
|
+
code: |
|
|
9
|
+
context['output'] = "Action executed by frontend"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
jinx_name: studio.open_pane
|
|
2
|
+
description: Open a new pane in NPC Studio. Supports editor, terminal, browser, pdf, csv, chat, image, folder, and other content types.
|
|
3
|
+
inputs:
|
|
4
|
+
- type: ""
|
|
5
|
+
- path: ""
|
|
6
|
+
- position: "right"
|
|
7
|
+
steps:
|
|
8
|
+
- name: frontend_action
|
|
9
|
+
engine: python
|
|
10
|
+
code: |
|
|
11
|
+
# This action is executed by the NPC Studio frontend
|
|
12
|
+
# The frontend intercepts studio.* tool calls and handles them directly
|
|
13
|
+
context['output'] = "Action executed by frontend"
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
jinx_name: studio.read_pane
|
|
2
|
+
description: Read the contents of a pane. For editor panes returns file content, for chat panes returns messages, for browser panes returns URL/title.
|
|
3
|
+
inputs:
|
|
4
|
+
- paneId: "active"
|
|
5
|
+
steps:
|
|
6
|
+
- name: frontend_action
|
|
7
|
+
engine: python
|
|
8
|
+
code: |
|
|
9
|
+
context['output'] = "Action executed by frontend"
|