npcsh 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/_state.py +138 -100
- npcsh/alicanto.py +2 -2
- npcsh/benchmark/__init__.py +28 -0
- npcsh/benchmark/npcsh_agent.py +296 -0
- npcsh/benchmark/runner.py +611 -0
- npcsh/benchmark/templates/install-npcsh.sh.j2 +35 -0
- npcsh/build.py +2 -4
- npcsh/completion.py +2 -6
- npcsh/config.py +1 -3
- npcsh/conversation_viewer.py +389 -0
- npcsh/corca.py +0 -1
- npcsh/execution.py +0 -1
- npcsh/guac.py +0 -1
- npcsh/mcp_helpers.py +2 -3
- npcsh/mcp_server.py +5 -10
- npcsh/npc.py +10 -11
- npcsh/npc_team/jinxs/bin/benchmark.jinx +146 -0
- npcsh/npc_team/jinxs/bin/nql.jinx +7 -7
- npcsh/npc_team/jinxs/bin/roll.jinx +20 -23
- npcsh/npc_team/jinxs/bin/sample.jinx +6 -7
- npcsh/npc_team/jinxs/bin/sync.jinx +6 -6
- npcsh/npc_team/jinxs/bin/vixynt.jinx +8 -8
- npcsh/npc_team/jinxs/incognide/add_tab.jinx +11 -0
- npcsh/npc_team/jinxs/incognide/close_pane.jinx +9 -0
- npcsh/npc_team/jinxs/incognide/close_tab.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/confirm.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/focus_pane.jinx +9 -0
- npcsh/npc_team/jinxs/{npc_studio/npc-studio.jinx → incognide/incognide.jinx} +2 -2
- npcsh/npc_team/jinxs/incognide/list_panes.jinx +8 -0
- npcsh/npc_team/jinxs/incognide/navigate.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/notify.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/open_pane.jinx +13 -0
- npcsh/npc_team/jinxs/incognide/read_pane.jinx +9 -0
- npcsh/npc_team/jinxs/incognide/run_terminal.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/send_message.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/split_pane.jinx +12 -0
- npcsh/npc_team/jinxs/incognide/switch_npc.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/switch_tab.jinx +10 -0
- npcsh/npc_team/jinxs/incognide/write_file.jinx +11 -0
- npcsh/npc_team/jinxs/incognide/zen_mode.jinx +9 -0
- npcsh/npc_team/jinxs/lib/browser/browser_action.jinx +4 -4
- npcsh/npc_team/jinxs/lib/browser/browser_screenshot.jinx +1 -1
- npcsh/npc_team/jinxs/lib/browser/open_browser.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/click.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/key_press.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/launch_app.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/screenshot.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/trigger.jinx +2 -2
- npcsh/npc_team/jinxs/lib/computer_use/type_text.jinx +1 -1
- npcsh/npc_team/jinxs/lib/computer_use/wait.jinx +1 -1
- npcsh/npc_team/jinxs/lib/core/chat.jinx +4 -4
- npcsh/npc_team/jinxs/lib/core/cmd.jinx +4 -4
- npcsh/npc_team/jinxs/lib/core/compress.jinx +8 -8
- npcsh/npc_team/jinxs/lib/core/edit_file.jinx +3 -0
- npcsh/npc_team/jinxs/lib/core/ots.jinx +7 -7
- npcsh/npc_team/jinxs/lib/core/search/db_search.jinx +348 -0
- npcsh/npc_team/jinxs/lib/core/search/file_search.jinx +339 -0
- npcsh/npc_team/jinxs/lib/core/search/kg_search.jinx +418 -0
- npcsh/npc_team/jinxs/lib/core/search/mem_review.jinx +73 -0
- npcsh/npc_team/jinxs/lib/core/search/mem_search.jinx +388 -0
- npcsh/npc_team/jinxs/lib/core/search/web_search.jinx +283 -0
- npcsh/npc_team/jinxs/lib/core/search.jinx +52 -129
- npcsh/npc_team/jinxs/lib/core/sh.jinx +1 -1
- npcsh/npc_team/jinxs/lib/core/sleep.jinx +29 -18
- npcsh/npc_team/jinxs/lib/core/sql.jinx +15 -11
- npcsh/npc_team/jinxs/lib/orchestration/convene.jinx +7 -7
- npcsh/npc_team/jinxs/lib/orchestration/delegate.jinx +8 -9
- npcsh/npc_team/jinxs/lib/research/paper_search.jinx +389 -78
- npcsh/npc_team/jinxs/lib/research/semantic_scholar.jinx +373 -56
- npcsh/npc_team/jinxs/lib/utils/build.jinx +5 -5
- npcsh/npc_team/jinxs/lib/utils/compile.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/help.jinx +1 -1
- npcsh/npc_team/jinxs/lib/utils/init.jinx +5 -5
- npcsh/npc_team/jinxs/lib/utils/jinxs.jinx +300 -145
- npcsh/npc_team/jinxs/lib/utils/serve.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/set.jinx +2 -2
- npcsh/npc_team/jinxs/lib/utils/switch.jinx +3 -3
- npcsh/npc_team/jinxs/lib/utils/switches.jinx +1 -1
- npcsh/npc_team/jinxs/lib/utils/teamviz.jinx +2 -2
- npcsh/npc_team/jinxs/modes/alicanto.jinx +356 -0
- npcsh/npc_team/jinxs/modes/arxiv.jinx +720 -0
- npcsh/npc_team/jinxs/modes/corca.jinx +430 -0
- npcsh/npc_team/jinxs/modes/guac.jinx +544 -0
- npcsh/npc_team/jinxs/modes/plonk.jinx +379 -0
- npcsh/npc_team/jinxs/modes/pti.jinx +357 -0
- npcsh/npc_team/jinxs/modes/reattach.jinx +291 -0
- npcsh/npc_team/jinxs/modes/spool.jinx +350 -0
- npcsh/npc_team/jinxs/modes/wander.jinx +455 -0
- {npcsh-1.1.16.data/data/npcsh/npc_team → npcsh/npc_team/jinxs/modes}/yap.jinx +8 -2
- npcsh/npc_team/sibiji.npc +1 -1
- npcsh/npcsh.py +87 -46
- npcsh/plonk.py +0 -1
- npcsh/pti.py +0 -1
- npcsh/routes.py +1 -3
- npcsh/spool.py +0 -1
- npcsh/ui.py +0 -1
- npcsh/wander.py +0 -1
- npcsh/yap.py +0 -1
- npcsh-1.1.18.data/data/npcsh/npc_team/add_tab.jinx +11 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/alicanto.jinx +356 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/arxiv.jinx +720 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/benchmark.jinx +146 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_action.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_screenshot.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/build.jinx +5 -5
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/chat.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/click.jinx +2 -2
- npcsh-1.1.18.data/data/npcsh/npc_team/close_pane.jinx +9 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/close_tab.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/cmd.jinx +4 -4
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compile.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compress.jinx +8 -8
- npcsh-1.1.18.data/data/npcsh/npc_team/confirm.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/convene.jinx +7 -7
- npcsh-1.1.18.data/data/npcsh/npc_team/corca.jinx +430 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/db_search.jinx +348 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/delegate.jinx +8 -9
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/edit_file.jinx +3 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/file_search.jinx +339 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/focus_pane.jinx +9 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/guac.jinx +544 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/help.jinx +1 -1
- npcsh-1.1.16.data/data/npcsh/npc_team/npc-studio.jinx → npcsh-1.1.18.data/data/npcsh/npc_team/incognide.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/init.jinx +5 -5
- npcsh-1.1.18.data/data/npcsh/npc_team/jinxs.jinx +331 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/key_press.jinx +1 -1
- npcsh-1.1.18.data/data/npcsh/npc_team/kg_search.jinx +418 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/launch_app.jinx +1 -1
- npcsh-1.1.18.data/data/npcsh/npc_team/list_panes.jinx +8 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/mem_review.jinx +73 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/mem_search.jinx +388 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/navigate.jinx +10 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/notify.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/nql.jinx +7 -7
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/open_browser.jinx +2 -2
- npcsh-1.1.18.data/data/npcsh/npc_team/open_pane.jinx +13 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/ots.jinx +7 -7
- npcsh-1.1.18.data/data/npcsh/npc_team/paper_search.jinx +412 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/plonk.jinx +379 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/pti.jinx +357 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/read_pane.jinx +9 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/reattach.jinx +291 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/roll.jinx +20 -23
- npcsh-1.1.18.data/data/npcsh/npc_team/run_terminal.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sample.jinx +6 -7
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/screenshot.jinx +1 -1
- npcsh-1.1.18.data/data/npcsh/npc_team/search.jinx +54 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/semantic_scholar.jinx +386 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/send_message.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/serve.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/set.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sh.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.npc +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sleep.jinx +29 -18
- npcsh-1.1.18.data/data/npcsh/npc_team/split_pane.jinx +12 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/spool.jinx +350 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/sql.jinx +20 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switch.jinx +3 -3
- npcsh-1.1.18.data/data/npcsh/npc_team/switch_npc.jinx +10 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/switch_tab.jinx +10 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switches.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sync.jinx +6 -6
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/teamviz.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/trigger.jinx +2 -2
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/type_text.jinx +1 -1
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/vixynt.jinx +8 -8
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/wait.jinx +1 -1
- npcsh-1.1.18.data/data/npcsh/npc_team/wander.jinx +455 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/web_search.jinx +283 -0
- npcsh-1.1.18.data/data/npcsh/npc_team/write_file.jinx +11 -0
- {npcsh/npc_team/jinxs/bin → npcsh-1.1.18.data/data/npcsh/npc_team}/yap.jinx +8 -2
- npcsh-1.1.18.data/data/npcsh/npc_team/zen_mode.jinx +9 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/METADATA +99 -7
- npcsh-1.1.18.dist-info/RECORD +235 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/WHEEL +1 -1
- {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/entry_points.txt +2 -3
- npcsh/npc_team/jinxs/bin/spool.jinx +0 -161
- npcsh/npc_team/jinxs/bin/wander.jinx +0 -152
- npcsh/npc_team/jinxs/lib/research/arxiv.jinx +0 -76
- npcsh-1.1.16.data/data/npcsh/npc_team/arxiv.jinx +0 -76
- npcsh-1.1.16.data/data/npcsh/npc_team/jinxs.jinx +0 -176
- npcsh-1.1.16.data/data/npcsh/npc_team/paper_search.jinx +0 -101
- npcsh-1.1.16.data/data/npcsh/npc_team/search.jinx +0 -131
- npcsh-1.1.16.data/data/npcsh/npc_team/semantic_scholar.jinx +0 -69
- npcsh-1.1.16.data/data/npcsh/npc_team/spool.jinx +0 -161
- npcsh-1.1.16.data/data/npcsh/npc_team/sql.jinx +0 -16
- npcsh-1.1.16.data/data/npcsh/npc_team/wander.jinx +0 -152
- npcsh-1.1.16.dist-info/RECORD +0 -170
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/close_browser.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca_example.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic4.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/load_file.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/paste.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/python.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/shh.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/spool.png +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/usage.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/verbose.jinx +0 -0
- {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/yap.png +0 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/licenses/LICENSE +0 -0
- {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
jinx_name: benchmark
|
|
2
|
+
description: Run Terminal-Bench evaluation to benchmark npcsh performance with different models
|
|
3
|
+
inputs:
|
|
4
|
+
- model: ""
|
|
5
|
+
- provider: ""
|
|
6
|
+
- action: "check"
|
|
7
|
+
- concurrent: "4"
|
|
8
|
+
- npc_name: ""
|
|
9
|
+
|
|
10
|
+
steps:
|
|
11
|
+
- name: run_benchmark
|
|
12
|
+
engine: python
|
|
13
|
+
code: |
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
action = {{ action | default("check") | tojson }}.strip().lower()
|
|
18
|
+
model = {{ model | default("") | tojson }}.strip()
|
|
19
|
+
provider = {{ provider | default("") | tojson }}.strip()
|
|
20
|
+
concurrent = int({{ concurrent | default("4") | tojson }} or "4")
|
|
21
|
+
npc_name_input = {{ npc_name | default("") | tojson }}.strip() or None
|
|
22
|
+
|
|
23
|
+
if not model:
|
|
24
|
+
model = npc.model if npc and npc.model else ""
|
|
25
|
+
if not provider:
|
|
26
|
+
provider = npc.provider if npc and npc.provider else "anthropic"
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from npcsh.benchmark import BenchmarkRunner, run_benchmark
|
|
30
|
+
BENCHMARK_AVAILABLE = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
BENCHMARK_AVAILABLE = False
|
|
33
|
+
|
|
34
|
+
if action == "check":
|
|
35
|
+
output = "## Terminal-Bench Integration Status\n\n"
|
|
36
|
+
|
|
37
|
+
if not BENCHMARK_AVAILABLE:
|
|
38
|
+
output += "**Status:** Benchmark module not fully loaded (harbor not installed)\n\n"
|
|
39
|
+
else:
|
|
40
|
+
output += "**Status:** Ready\n\n"
|
|
41
|
+
|
|
42
|
+
if BENCHMARK_AVAILABLE:
|
|
43
|
+
runner = BenchmarkRunner()
|
|
44
|
+
deps = runner.check_dependencies()
|
|
45
|
+
output += "### Dependencies:\n"
|
|
46
|
+
for dep, installed in deps.items():
|
|
47
|
+
status = "Installed" if installed else "Not installed"
|
|
48
|
+
output += "- **{}**: {}\n".format(dep, status)
|
|
49
|
+
|
|
50
|
+
if not all(deps.values()):
|
|
51
|
+
output += "\n### Installation:\n"
|
|
52
|
+
output += "```bash\n"
|
|
53
|
+
output += "pip install harbor terminal-bench\n"
|
|
54
|
+
output += "```\n"
|
|
55
|
+
|
|
56
|
+
output += "\n### Usage:\n"
|
|
57
|
+
output += "```\n"
|
|
58
|
+
output += "/benchmark action=quick\n"
|
|
59
|
+
output += "/benchmark action=run model=gpt-4o provider=openai\n"
|
|
60
|
+
output += "/benchmark action=list\n"
|
|
61
|
+
output += "```\n"
|
|
62
|
+
|
|
63
|
+
elif action == "list":
|
|
64
|
+
if not BENCHMARK_AVAILABLE:
|
|
65
|
+
output = "Error: Benchmark module not available. Run `/benchmark` first."
|
|
66
|
+
else:
|
|
67
|
+
runner = BenchmarkRunner()
|
|
68
|
+
runs = runner.list_past_runs()
|
|
69
|
+
|
|
70
|
+
if not runs:
|
|
71
|
+
output = "No past benchmark runs found."
|
|
72
|
+
else:
|
|
73
|
+
output = "## Past Benchmark Runs ({} total)\n\n".format(len(runs))
|
|
74
|
+
for run in runs[:10]:
|
|
75
|
+
timestamp = run.get('timestamp', 'unknown')[:19]
|
|
76
|
+
model_name = run.get('model', 'unknown')
|
|
77
|
+
result = run.get('result', {})
|
|
78
|
+
accuracy = result.get('accuracy', 0)
|
|
79
|
+
passed = result.get('passed_tasks', 0)
|
|
80
|
+
total = result.get('total_tasks', 0)
|
|
81
|
+
|
|
82
|
+
output += "### {}\n".format(timestamp)
|
|
83
|
+
output += "- **Model:** {}\n".format(model_name)
|
|
84
|
+
output += "- **Accuracy:** {:.1%}\n".format(accuracy)
|
|
85
|
+
output += "- **Tasks:** {}/{}\n\n".format(passed, total)
|
|
86
|
+
|
|
87
|
+
elif action == "quick":
|
|
88
|
+
if not BENCHMARK_AVAILABLE:
|
|
89
|
+
output = "Error: Install with: pip install harbor terminal-bench"
|
|
90
|
+
else:
|
|
91
|
+
output = "## Quick Test: {}/{}\n\n".format(provider, model)
|
|
92
|
+
output += "Running quick test with 3 tasks...\n\n"
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
from npcsh.benchmark import quick_test
|
|
96
|
+
result = quick_test(model=model, provider=provider)
|
|
97
|
+
|
|
98
|
+
status = "PASS" if result.success else "FAIL"
|
|
99
|
+
output += "**Status:** {}\n".format(status)
|
|
100
|
+
output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
|
|
101
|
+
output += "**Tasks:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
|
|
102
|
+
output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
|
|
103
|
+
|
|
104
|
+
if result.error:
|
|
105
|
+
output += "\n**Error:** {}\n".format(result.error)
|
|
106
|
+
|
|
107
|
+
output += "\n**Output:** {}\n".format(result.output_dir)
|
|
108
|
+
|
|
109
|
+
except Exception as e:
|
|
110
|
+
output = "Error running quick test: {}".format(e)
|
|
111
|
+
|
|
112
|
+
elif action == "run":
|
|
113
|
+
if not BENCHMARK_AVAILABLE:
|
|
114
|
+
output = "Error: Install with: pip install harbor terminal-bench"
|
|
115
|
+
else:
|
|
116
|
+
output = "## Benchmark Run: {}/{}\n\n".format(provider, model)
|
|
117
|
+
output += "Running Terminal-Bench 2.0 with {} concurrent tasks...\n\n".format(concurrent)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
runner = BenchmarkRunner()
|
|
121
|
+
result = runner.run(
|
|
122
|
+
model=model,
|
|
123
|
+
provider=provider,
|
|
124
|
+
n_concurrent=concurrent,
|
|
125
|
+
npc_name=npc_name_input,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
status = "SUCCESS" if result.success else "FAILED"
|
|
129
|
+
output += "**Status:** {}\n".format(status)
|
|
130
|
+
output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
|
|
131
|
+
output += "**Tasks Passed:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
|
|
132
|
+
output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
|
|
133
|
+
output += "**Total Tokens:** {:,}\n".format(result.total_tokens)
|
|
134
|
+
output += "**Total Cost:** ${:.4f}\n".format(result.total_cost_usd)
|
|
135
|
+
|
|
136
|
+
if result.error:
|
|
137
|
+
output += "\n**Error:** {}\n".format(result.error)
|
|
138
|
+
|
|
139
|
+
output += "\n**Results saved to:** {}\n".format(result.output_dir)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
import traceback
|
|
143
|
+
output = "Error running benchmark: {}\n\n{}".format(e, traceback.format_exc())
|
|
144
|
+
|
|
145
|
+
else:
|
|
146
|
+
output = "Unknown action: {}\n\nAvailable: check, run, quick, list".format(action)
|
|
@@ -12,14 +12,14 @@ description: |
|
|
|
12
12
|
- get_page: Get page title, URL, and visible text
|
|
13
13
|
- get_elements: Get interactive elements with their selectors
|
|
14
14
|
- press_key: Press a key (enter, tab, escape, etc)
|
|
15
|
-
Selectors: CSS (
|
|
15
|
+
Selectors: CSS (
|
|
16
16
|
inputs:
|
|
17
|
-
|
|
17
|
+
- action:
|
|
18
18
|
description: "Action: click, type, type_and_enter, set_value, select, wait, scroll, get_text, get_page, get_elements, press_key"
|
|
19
|
-
|
|
19
|
+
- selector:
|
|
20
20
|
description: "CSS selector or XPath (prefix xpath: for XPath)"
|
|
21
21
|
default: ""
|
|
22
|
-
|
|
22
|
+
- value:
|
|
23
23
|
description: "Value for type/select, or scroll direction, or key name"
|
|
24
24
|
default: ""
|
|
25
25
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
jinx_name: "build"
|
|
2
2
|
description: "Build deployment artifacts for NPC team"
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
- target: "flask"
|
|
5
|
+
- outdir: "./build"
|
|
6
|
+
- team: "./npc_team"
|
|
7
|
+
- port: 5337
|
|
8
|
+
- cors: ""
|
|
9
9
|
steps:
|
|
10
10
|
- name: "execute_build"
|
|
11
11
|
engine: "python"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
jinx_name: chat
|
|
2
2
|
description: Simple chat mode - LLM conversation without tool execution
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
- query: null
|
|
5
|
+
- model: null
|
|
6
|
+
- provider: null
|
|
7
|
+
- stream: true
|
|
8
8
|
|
|
9
9
|
steps:
|
|
10
10
|
- name: chat_response
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
jinx_name: studio.close_pane
|
|
2
|
+
description: Close a pane in NPC Studio. Use paneId="active" or omit to close the active pane.
|
|
3
|
+
inputs:
|
|
4
|
+
- paneId: "active"
|
|
5
|
+
steps:
|
|
6
|
+
- name: frontend_action
|
|
7
|
+
engine: python
|
|
8
|
+
code: |
|
|
9
|
+
context['output'] = "Action executed by frontend"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
jinx_name: cmd
|
|
2
2
|
description: Command mode - LLM generates and executes shell commands
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
- query: null
|
|
5
|
+
- model: null
|
|
6
|
+
- provider: null
|
|
7
|
+
- stream: true
|
|
8
8
|
|
|
9
9
|
steps:
|
|
10
10
|
- name: cmd_execute
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
jinx_name: "compile"
|
|
2
2
|
description: "Compile NPC profiles"
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
- npc_file_path: ""
|
|
5
|
+
- npc_team_dir: "./npc_team"
|
|
6
6
|
steps:
|
|
7
7
|
- name: "compile_npcs"
|
|
8
8
|
engine: "python"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
jinx_name: "compress"
|
|
2
2
|
description: "Manages conversation and knowledge context. Defaults to compacting context. Use flags for other operations."
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
- flush: ""
|
|
5
|
+
- sleep: False
|
|
6
|
+
- dream: False
|
|
7
|
+
- ops: ""
|
|
8
|
+
- model: ""
|
|
9
|
+
- provider: ""
|
|
10
10
|
steps:
|
|
11
11
|
- name: "manage_context_and_memory"
|
|
12
12
|
engine: "python"
|
|
@@ -53,8 +53,8 @@ steps:
|
|
|
53
53
|
operations_config = [op.strip() for op in operations_str.split(',')] if operations_str else None
|
|
54
54
|
if not llm_model and current_npc: llm_model = current_npc.model
|
|
55
55
|
if not llm_provider and current_npc: llm_provider = current_npc.provider
|
|
56
|
-
if not llm_model: llm_model = "
|
|
57
|
-
if not llm_provider: llm_provider = "
|
|
56
|
+
if not llm_model: llm_model = state.chat_model if state else "llama3.2"
|
|
57
|
+
if not llm_provider: llm_provider = state.chat_provider if state else "ollama"
|
|
58
58
|
|
|
59
59
|
team_name = current_team.name if current_team else "__none__"
|
|
60
60
|
npc_name = current_npc.name if current_npc else "__none__"
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
jinx_name: convene
|
|
2
2
|
description: Run a cycle of discussions between NPCs on a topic. The orchestrator convenes agents to discuss and synthesize.
|
|
3
3
|
inputs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
- topic: ""
|
|
5
|
+
- npcs: "alicanto,corca,guac"
|
|
6
|
+
- rounds: 3
|
|
7
|
+
- model: null
|
|
8
|
+
- provider: null
|
|
9
9
|
steps:
|
|
10
10
|
- name: convene_discussion
|
|
11
11
|
engine: python
|
|
@@ -21,8 +21,8 @@ steps:
|
|
|
21
21
|
team = context.get('team')
|
|
22
22
|
messages = context.get('messages', [])
|
|
23
23
|
|
|
24
|
-
model = context.get('model') or (npc.model if npc else '
|
|
25
|
-
provider = context.get('provider') or (npc.provider if npc else '
|
|
24
|
+
model = context.get('model') or (npc.model if npc else (state.chat_model if state else 'llama3.2'))
|
|
25
|
+
provider = context.get('provider') or (npc.provider if npc else (state.chat_provider if state else 'ollama'))
|
|
26
26
|
|
|
27
27
|
if not topic:
|
|
28
28
|
context['output'] = """Usage: /convene <topic>
|