npcsh 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. npcsh/_state.py +138 -100
  2. npcsh/alicanto.py +2 -2
  3. npcsh/benchmark/__init__.py +28 -0
  4. npcsh/benchmark/npcsh_agent.py +296 -0
  5. npcsh/benchmark/runner.py +611 -0
  6. npcsh/benchmark/templates/install-npcsh.sh.j2 +35 -0
  7. npcsh/build.py +2 -4
  8. npcsh/completion.py +2 -6
  9. npcsh/config.py +1 -3
  10. npcsh/conversation_viewer.py +389 -0
  11. npcsh/corca.py +0 -1
  12. npcsh/execution.py +0 -1
  13. npcsh/guac.py +0 -1
  14. npcsh/mcp_helpers.py +2 -3
  15. npcsh/mcp_server.py +5 -10
  16. npcsh/npc.py +10 -11
  17. npcsh/npc_team/jinxs/bin/benchmark.jinx +146 -0
  18. npcsh/npc_team/jinxs/bin/nql.jinx +7 -7
  19. npcsh/npc_team/jinxs/bin/roll.jinx +20 -23
  20. npcsh/npc_team/jinxs/bin/sample.jinx +6 -7
  21. npcsh/npc_team/jinxs/bin/sync.jinx +6 -6
  22. npcsh/npc_team/jinxs/bin/vixynt.jinx +8 -8
  23. npcsh/npc_team/jinxs/incognide/add_tab.jinx +11 -0
  24. npcsh/npc_team/jinxs/incognide/close_pane.jinx +9 -0
  25. npcsh/npc_team/jinxs/incognide/close_tab.jinx +10 -0
  26. npcsh/npc_team/jinxs/incognide/confirm.jinx +10 -0
  27. npcsh/npc_team/jinxs/incognide/focus_pane.jinx +9 -0
  28. npcsh/npc_team/jinxs/{npc_studio/npc-studio.jinx → incognide/incognide.jinx} +2 -2
  29. npcsh/npc_team/jinxs/incognide/list_panes.jinx +8 -0
  30. npcsh/npc_team/jinxs/incognide/navigate.jinx +10 -0
  31. npcsh/npc_team/jinxs/incognide/notify.jinx +10 -0
  32. npcsh/npc_team/jinxs/incognide/open_pane.jinx +13 -0
  33. npcsh/npc_team/jinxs/incognide/read_pane.jinx +9 -0
  34. npcsh/npc_team/jinxs/incognide/run_terminal.jinx +10 -0
  35. npcsh/npc_team/jinxs/incognide/send_message.jinx +10 -0
  36. npcsh/npc_team/jinxs/incognide/split_pane.jinx +12 -0
  37. npcsh/npc_team/jinxs/incognide/switch_npc.jinx +10 -0
  38. npcsh/npc_team/jinxs/incognide/switch_tab.jinx +10 -0
  39. npcsh/npc_team/jinxs/incognide/write_file.jinx +11 -0
  40. npcsh/npc_team/jinxs/incognide/zen_mode.jinx +9 -0
  41. npcsh/npc_team/jinxs/lib/browser/browser_action.jinx +4 -4
  42. npcsh/npc_team/jinxs/lib/browser/browser_screenshot.jinx +1 -1
  43. npcsh/npc_team/jinxs/lib/browser/open_browser.jinx +2 -2
  44. npcsh/npc_team/jinxs/lib/computer_use/click.jinx +2 -2
  45. npcsh/npc_team/jinxs/lib/computer_use/key_press.jinx +1 -1
  46. npcsh/npc_team/jinxs/lib/computer_use/launch_app.jinx +1 -1
  47. npcsh/npc_team/jinxs/lib/computer_use/screenshot.jinx +1 -1
  48. npcsh/npc_team/jinxs/lib/computer_use/trigger.jinx +2 -2
  49. npcsh/npc_team/jinxs/lib/computer_use/type_text.jinx +1 -1
  50. npcsh/npc_team/jinxs/lib/computer_use/wait.jinx +1 -1
  51. npcsh/npc_team/jinxs/lib/core/chat.jinx +4 -4
  52. npcsh/npc_team/jinxs/lib/core/cmd.jinx +4 -4
  53. npcsh/npc_team/jinxs/lib/core/compress.jinx +8 -8
  54. npcsh/npc_team/jinxs/lib/core/edit_file.jinx +3 -0
  55. npcsh/npc_team/jinxs/lib/core/ots.jinx +7 -7
  56. npcsh/npc_team/jinxs/lib/core/search/db_search.jinx +348 -0
  57. npcsh/npc_team/jinxs/lib/core/search/file_search.jinx +339 -0
  58. npcsh/npc_team/jinxs/lib/core/search/kg_search.jinx +418 -0
  59. npcsh/npc_team/jinxs/lib/core/search/mem_review.jinx +73 -0
  60. npcsh/npc_team/jinxs/lib/core/search/mem_search.jinx +388 -0
  61. npcsh/npc_team/jinxs/lib/core/search/web_search.jinx +283 -0
  62. npcsh/npc_team/jinxs/lib/core/search.jinx +52 -129
  63. npcsh/npc_team/jinxs/lib/core/sh.jinx +1 -1
  64. npcsh/npc_team/jinxs/lib/core/sleep.jinx +29 -18
  65. npcsh/npc_team/jinxs/lib/core/sql.jinx +15 -11
  66. npcsh/npc_team/jinxs/lib/orchestration/convene.jinx +7 -7
  67. npcsh/npc_team/jinxs/lib/orchestration/delegate.jinx +8 -9
  68. npcsh/npc_team/jinxs/lib/research/paper_search.jinx +389 -78
  69. npcsh/npc_team/jinxs/lib/research/semantic_scholar.jinx +373 -56
  70. npcsh/npc_team/jinxs/lib/utils/build.jinx +5 -5
  71. npcsh/npc_team/jinxs/lib/utils/compile.jinx +2 -2
  72. npcsh/npc_team/jinxs/lib/utils/help.jinx +1 -1
  73. npcsh/npc_team/jinxs/lib/utils/init.jinx +5 -5
  74. npcsh/npc_team/jinxs/lib/utils/jinxs.jinx +300 -145
  75. npcsh/npc_team/jinxs/lib/utils/serve.jinx +2 -2
  76. npcsh/npc_team/jinxs/lib/utils/set.jinx +2 -2
  77. npcsh/npc_team/jinxs/lib/utils/switch.jinx +3 -3
  78. npcsh/npc_team/jinxs/lib/utils/switches.jinx +1 -1
  79. npcsh/npc_team/jinxs/lib/utils/teamviz.jinx +2 -2
  80. npcsh/npc_team/jinxs/modes/alicanto.jinx +356 -0
  81. npcsh/npc_team/jinxs/modes/arxiv.jinx +720 -0
  82. npcsh/npc_team/jinxs/modes/corca.jinx +430 -0
  83. npcsh/npc_team/jinxs/modes/guac.jinx +544 -0
  84. npcsh/npc_team/jinxs/modes/plonk.jinx +379 -0
  85. npcsh/npc_team/jinxs/modes/pti.jinx +357 -0
  86. npcsh/npc_team/jinxs/modes/reattach.jinx +291 -0
  87. npcsh/npc_team/jinxs/modes/spool.jinx +350 -0
  88. npcsh/npc_team/jinxs/modes/wander.jinx +455 -0
  89. {npcsh-1.1.16.data/data/npcsh/npc_team → npcsh/npc_team/jinxs/modes}/yap.jinx +8 -2
  90. npcsh/npc_team/sibiji.npc +1 -1
  91. npcsh/npcsh.py +87 -46
  92. npcsh/plonk.py +0 -1
  93. npcsh/pti.py +0 -1
  94. npcsh/routes.py +1 -3
  95. npcsh/spool.py +0 -1
  96. npcsh/ui.py +0 -1
  97. npcsh/wander.py +0 -1
  98. npcsh/yap.py +0 -1
  99. npcsh-1.1.18.data/data/npcsh/npc_team/add_tab.jinx +11 -0
  100. npcsh-1.1.18.data/data/npcsh/npc_team/alicanto.jinx +356 -0
  101. npcsh-1.1.18.data/data/npcsh/npc_team/arxiv.jinx +720 -0
  102. npcsh-1.1.18.data/data/npcsh/npc_team/benchmark.jinx +146 -0
  103. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_action.jinx +4 -4
  104. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_screenshot.jinx +1 -1
  105. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/build.jinx +5 -5
  106. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/chat.jinx +4 -4
  107. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/click.jinx +2 -2
  108. npcsh-1.1.18.data/data/npcsh/npc_team/close_pane.jinx +9 -0
  109. npcsh-1.1.18.data/data/npcsh/npc_team/close_tab.jinx +10 -0
  110. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/cmd.jinx +4 -4
  111. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compile.jinx +2 -2
  112. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compress.jinx +8 -8
  113. npcsh-1.1.18.data/data/npcsh/npc_team/confirm.jinx +10 -0
  114. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/convene.jinx +7 -7
  115. npcsh-1.1.18.data/data/npcsh/npc_team/corca.jinx +430 -0
  116. npcsh-1.1.18.data/data/npcsh/npc_team/db_search.jinx +348 -0
  117. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/delegate.jinx +8 -9
  118. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/edit_file.jinx +3 -0
  119. npcsh-1.1.18.data/data/npcsh/npc_team/file_search.jinx +339 -0
  120. npcsh-1.1.18.data/data/npcsh/npc_team/focus_pane.jinx +9 -0
  121. npcsh-1.1.18.data/data/npcsh/npc_team/guac.jinx +544 -0
  122. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/help.jinx +1 -1
  123. npcsh-1.1.16.data/data/npcsh/npc_team/npc-studio.jinx → npcsh-1.1.18.data/data/npcsh/npc_team/incognide.jinx +2 -2
  124. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/init.jinx +5 -5
  125. npcsh-1.1.18.data/data/npcsh/npc_team/jinxs.jinx +331 -0
  126. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/key_press.jinx +1 -1
  127. npcsh-1.1.18.data/data/npcsh/npc_team/kg_search.jinx +418 -0
  128. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/launch_app.jinx +1 -1
  129. npcsh-1.1.18.data/data/npcsh/npc_team/list_panes.jinx +8 -0
  130. npcsh-1.1.18.data/data/npcsh/npc_team/mem_review.jinx +73 -0
  131. npcsh-1.1.18.data/data/npcsh/npc_team/mem_search.jinx +388 -0
  132. npcsh-1.1.18.data/data/npcsh/npc_team/navigate.jinx +10 -0
  133. npcsh-1.1.18.data/data/npcsh/npc_team/notify.jinx +10 -0
  134. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/nql.jinx +7 -7
  135. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/open_browser.jinx +2 -2
  136. npcsh-1.1.18.data/data/npcsh/npc_team/open_pane.jinx +13 -0
  137. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/ots.jinx +7 -7
  138. npcsh-1.1.18.data/data/npcsh/npc_team/paper_search.jinx +412 -0
  139. npcsh-1.1.18.data/data/npcsh/npc_team/plonk.jinx +379 -0
  140. npcsh-1.1.18.data/data/npcsh/npc_team/pti.jinx +357 -0
  141. npcsh-1.1.18.data/data/npcsh/npc_team/read_pane.jinx +9 -0
  142. npcsh-1.1.18.data/data/npcsh/npc_team/reattach.jinx +291 -0
  143. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/roll.jinx +20 -23
  144. npcsh-1.1.18.data/data/npcsh/npc_team/run_terminal.jinx +10 -0
  145. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sample.jinx +6 -7
  146. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/screenshot.jinx +1 -1
  147. npcsh-1.1.18.data/data/npcsh/npc_team/search.jinx +54 -0
  148. npcsh-1.1.18.data/data/npcsh/npc_team/semantic_scholar.jinx +386 -0
  149. npcsh-1.1.18.data/data/npcsh/npc_team/send_message.jinx +10 -0
  150. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/serve.jinx +2 -2
  151. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/set.jinx +2 -2
  152. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sh.jinx +1 -1
  153. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.npc +1 -1
  154. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sleep.jinx +29 -18
  155. npcsh-1.1.18.data/data/npcsh/npc_team/split_pane.jinx +12 -0
  156. npcsh-1.1.18.data/data/npcsh/npc_team/spool.jinx +350 -0
  157. npcsh-1.1.18.data/data/npcsh/npc_team/sql.jinx +20 -0
  158. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switch.jinx +3 -3
  159. npcsh-1.1.18.data/data/npcsh/npc_team/switch_npc.jinx +10 -0
  160. npcsh-1.1.18.data/data/npcsh/npc_team/switch_tab.jinx +10 -0
  161. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switches.jinx +1 -1
  162. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sync.jinx +6 -6
  163. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/teamviz.jinx +2 -2
  164. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/trigger.jinx +2 -2
  165. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/type_text.jinx +1 -1
  166. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/vixynt.jinx +8 -8
  167. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/wait.jinx +1 -1
  168. npcsh-1.1.18.data/data/npcsh/npc_team/wander.jinx +455 -0
  169. npcsh-1.1.18.data/data/npcsh/npc_team/web_search.jinx +283 -0
  170. npcsh-1.1.18.data/data/npcsh/npc_team/write_file.jinx +11 -0
  171. {npcsh/npc_team/jinxs/bin → npcsh-1.1.18.data/data/npcsh/npc_team}/yap.jinx +8 -2
  172. npcsh-1.1.18.data/data/npcsh/npc_team/zen_mode.jinx +9 -0
  173. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/METADATA +99 -7
  174. npcsh-1.1.18.dist-info/RECORD +235 -0
  175. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/WHEEL +1 -1
  176. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/entry_points.txt +2 -3
  177. npcsh/npc_team/jinxs/bin/spool.jinx +0 -161
  178. npcsh/npc_team/jinxs/bin/wander.jinx +0 -152
  179. npcsh/npc_team/jinxs/lib/research/arxiv.jinx +0 -76
  180. npcsh-1.1.16.data/data/npcsh/npc_team/arxiv.jinx +0 -76
  181. npcsh-1.1.16.data/data/npcsh/npc_team/jinxs.jinx +0 -176
  182. npcsh-1.1.16.data/data/npcsh/npc_team/paper_search.jinx +0 -101
  183. npcsh-1.1.16.data/data/npcsh/npc_team/search.jinx +0 -131
  184. npcsh-1.1.16.data/data/npcsh/npc_team/semantic_scholar.jinx +0 -69
  185. npcsh-1.1.16.data/data/npcsh/npc_team/spool.jinx +0 -161
  186. npcsh-1.1.16.data/data/npcsh/npc_team/sql.jinx +0 -16
  187. npcsh-1.1.16.data/data/npcsh/npc_team/wander.jinx +0 -152
  188. npcsh-1.1.16.dist-info/RECORD +0 -170
  189. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.npc +0 -0
  190. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.png +0 -0
  191. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/close_browser.jinx +0 -0
  192. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.npc +0 -0
  193. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.png +0 -0
  194. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca_example.png +0 -0
  195. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic.npc +0 -0
  196. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic4.png +0 -0
  197. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.npc +0 -0
  198. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.png +0 -0
  199. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
  200. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.png +0 -0
  201. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/load_file.jinx +0 -0
  202. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
  203. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
  204. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/paste.jinx +0 -0
  205. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.npc +0 -0
  206. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.png +0 -0
  207. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
  208. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.png +0 -0
  209. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/python.jinx +0 -0
  210. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/shh.jinx +0 -0
  211. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.png +0 -0
  212. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/spool.png +0 -0
  213. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/usage.jinx +0 -0
  214. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/verbose.jinx +0 -0
  215. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/yap.png +0 -0
  216. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/licenses/LICENSE +0 -0
  217. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,146 @@
1
+ jinx_name: benchmark
2
+ description: Run Terminal-Bench evaluation to benchmark npcsh performance with different models
3
+ inputs:
4
+ - model: ""
5
+ - provider: ""
6
+ - action: "check"
7
+ - concurrent: "4"
8
+ - npc_name: ""
9
+
10
+ steps:
11
+ - name: run_benchmark
12
+ engine: python
13
+ code: |
14
+ import os
15
+ import sys
16
+
17
+ action = {{ action | default("check") | tojson }}.strip().lower()
18
+ model = {{ model | default("") | tojson }}.strip()
19
+ provider = {{ provider | default("") | tojson }}.strip()
20
+ concurrent = int({{ concurrent | default("4") | tojson }} or "4")
21
+ npc_name_input = {{ npc_name | default("") | tojson }}.strip() or None
22
+
23
+ if not model:
24
+ model = npc.model if npc and npc.model else ""
25
+ if not provider:
26
+ provider = npc.provider if npc and npc.provider else "anthropic"
27
+
28
+ try:
29
+ from npcsh.benchmark import BenchmarkRunner, run_benchmark
30
+ BENCHMARK_AVAILABLE = True
31
+ except ImportError:
32
+ BENCHMARK_AVAILABLE = False
33
+
34
+ if action == "check":
35
+ output = "## Terminal-Bench Integration Status\n\n"
36
+
37
+ if not BENCHMARK_AVAILABLE:
38
+ output += "**Status:** Benchmark module not fully loaded (harbor not installed)\n\n"
39
+ else:
40
+ output += "**Status:** Ready\n\n"
41
+
42
+ if BENCHMARK_AVAILABLE:
43
+ runner = BenchmarkRunner()
44
+ deps = runner.check_dependencies()
45
+ output += "### Dependencies:\n"
46
+ for dep, installed in deps.items():
47
+ status = "Installed" if installed else "Not installed"
48
+ output += "- **{}**: {}\n".format(dep, status)
49
+
50
+ if not all(deps.values()):
51
+ output += "\n### Installation:\n"
52
+ output += "```bash\n"
53
+ output += "pip install harbor terminal-bench\n"
54
+ output += "```\n"
55
+
56
+ output += "\n### Usage:\n"
57
+ output += "```\n"
58
+ output += "/benchmark action=quick\n"
59
+ output += "/benchmark action=run model=gpt-4o provider=openai\n"
60
+ output += "/benchmark action=list\n"
61
+ output += "```\n"
62
+
63
+ elif action == "list":
64
+ if not BENCHMARK_AVAILABLE:
65
+ output = "Error: Benchmark module not available. Run `/benchmark` first."
66
+ else:
67
+ runner = BenchmarkRunner()
68
+ runs = runner.list_past_runs()
69
+
70
+ if not runs:
71
+ output = "No past benchmark runs found."
72
+ else:
73
+ output = "## Past Benchmark Runs ({} total)\n\n".format(len(runs))
74
+ for run in runs[:10]:
75
+ timestamp = run.get('timestamp', 'unknown')[:19]
76
+ model_name = run.get('model', 'unknown')
77
+ result = run.get('result', {})
78
+ accuracy = result.get('accuracy', 0)
79
+ passed = result.get('passed_tasks', 0)
80
+ total = result.get('total_tasks', 0)
81
+
82
+ output += "### {}\n".format(timestamp)
83
+ output += "- **Model:** {}\n".format(model_name)
84
+ output += "- **Accuracy:** {:.1%}\n".format(accuracy)
85
+ output += "- **Tasks:** {}/{}\n\n".format(passed, total)
86
+
87
+ elif action == "quick":
88
+ if not BENCHMARK_AVAILABLE:
89
+ output = "Error: Install with: pip install harbor terminal-bench"
90
+ else:
91
+ output = "## Quick Test: {}/{}\n\n".format(provider, model)
92
+ output += "Running quick test with 3 tasks...\n\n"
93
+
94
+ try:
95
+ from npcsh.benchmark import quick_test
96
+ result = quick_test(model=model, provider=provider)
97
+
98
+ status = "PASS" if result.success else "FAIL"
99
+ output += "**Status:** {}\n".format(status)
100
+ output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
101
+ output += "**Tasks:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
102
+ output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
103
+
104
+ if result.error:
105
+ output += "\n**Error:** {}\n".format(result.error)
106
+
107
+ output += "\n**Output:** {}\n".format(result.output_dir)
108
+
109
+ except Exception as e:
110
+ output = "Error running quick test: {}".format(e)
111
+
112
+ elif action == "run":
113
+ if not BENCHMARK_AVAILABLE:
114
+ output = "Error: Install with: pip install harbor terminal-bench"
115
+ else:
116
+ output = "## Benchmark Run: {}/{}\n\n".format(provider, model)
117
+ output += "Running Terminal-Bench 2.0 with {} concurrent tasks...\n\n".format(concurrent)
118
+
119
+ try:
120
+ runner = BenchmarkRunner()
121
+ result = runner.run(
122
+ model=model,
123
+ provider=provider,
124
+ n_concurrent=concurrent,
125
+ npc_name=npc_name_input,
126
+ )
127
+
128
+ status = "SUCCESS" if result.success else "FAILED"
129
+ output += "**Status:** {}\n".format(status)
130
+ output += "**Accuracy:** {:.1%}\n".format(result.accuracy)
131
+ output += "**Tasks Passed:** {}/{}\n".format(result.passed_tasks, result.total_tasks)
132
+ output += "**Duration:** {:.1f}s\n".format(result.duration_seconds)
133
+ output += "**Total Tokens:** {:,}\n".format(result.total_tokens)
134
+ output += "**Total Cost:** ${:.4f}\n".format(result.total_cost_usd)
135
+
136
+ if result.error:
137
+ output += "\n**Error:** {}\n".format(result.error)
138
+
139
+ output += "\n**Results saved to:** {}\n".format(result.output_dir)
140
+
141
+ except Exception as e:
142
+ import traceback
143
+ output = "Error running benchmark: {}\n\n{}".format(e, traceback.format_exc())
144
+
145
+ else:
146
+ output = "Unknown action: {}\n\nAvailable: check, run, quick, list".format(action)
@@ -12,14 +12,14 @@ description: |
12
12
  - get_page: Get page title, URL, and visible text
13
13
  - get_elements: Get interactive elements with their selectors
14
14
  - press_key: Press a key (enter, tab, escape, etc)
15
- Selectors: CSS (#id, .class, input[name="x"]) or xpath://... for XPath
15
+ Selectors: CSS (
16
16
  inputs:
17
- - action:
17
+ - action:
18
18
  description: "Action: click, type, type_and_enter, set_value, select, wait, scroll, get_text, get_page, get_elements, press_key"
19
- - selector:
19
+ - selector:
20
20
  description: "CSS selector or XPath (prefix xpath: for XPath)"
21
21
  default: ""
22
- - value:
22
+ - value:
23
23
  description: "Value for type/select, or scroll direction, or key name"
24
24
  default: ""
25
25
 
@@ -1,7 +1,7 @@
1
1
  jinx_name: browser_screenshot
2
2
  description: Take a screenshot of the current browser page.
3
3
  inputs:
4
- - filename:
4
+ - filename:
5
5
  description: "Optional filename for screenshot"
6
6
  default: ""
7
7
 
@@ -1,11 +1,11 @@
1
1
  jinx_name: "build"
2
2
  description: "Build deployment artifacts for NPC team"
3
3
  inputs:
4
- - target: "flask" # The type of deployment target (e.g., flask, docker, cli, static).
5
- - outdir: "./build" # The output directory for built artifacts.
6
- - team: "./npc_team" # The path to the NPC team directory.
7
- - port: 5337 # The port for flask server builds.
8
- - cors: "" # Comma-separated CORS origins for flask server builds.
4
+ - target: "flask"
5
+ - outdir: "./build"
6
+ - team: "./npc_team"
7
+ - port: 5337
8
+ - cors: ""
9
9
  steps:
10
10
  - name: "execute_build"
11
11
  engine: "python"
@@ -1,10 +1,10 @@
1
1
  jinx_name: chat
2
2
  description: Simple chat mode - LLM conversation without tool execution
3
3
  inputs:
4
- - query: null
5
- - model: null
6
- - provider: null
7
- - stream: true
4
+ - query: null
5
+ - model: null
6
+ - provider: null
7
+ - stream: true
8
8
 
9
9
  steps:
10
10
  - name: chat_response
@@ -1,8 +1,8 @@
1
1
  jinx_name: click
2
2
  description: Click at screen coordinates (0-100 percentage)
3
3
  inputs:
4
- - x: 50 # X coordinate as percentage (0-100)
5
- - y: 50 # Y coordinate as percentage (0-100)
4
+ - x: 50
5
+ - y: 50
6
6
 
7
7
  steps:
8
8
  - name: perform_click
@@ -0,0 +1,9 @@
1
+ jinx_name: studio.close_pane
2
+ description: Close a pane in NPC Studio. Use paneId="active" or omit to close the active pane.
3
+ inputs:
4
+ - paneId: "active"
5
+ steps:
6
+ - name: frontend_action
7
+ engine: python
8
+ code: |
9
+ context['output'] = "Action executed by frontend"
@@ -0,0 +1,10 @@
1
+ jinx_name: studio.close_tab
2
+ description: Close a specific tab in a pane.
3
+ inputs:
4
+ - paneId: "active"
5
+ - tabIndex: 0
6
+ steps:
7
+ - name: frontend_action
8
+ engine: python
9
+ code: |
10
+ context['output'] = "Action executed by frontend"
@@ -1,10 +1,10 @@
1
1
  jinx_name: cmd
2
2
  description: Command mode - LLM generates and executes shell commands
3
3
  inputs:
4
- - query: null
5
- - model: null
6
- - provider: null
7
- - stream: true
4
+ - query: null
5
+ - model: null
6
+ - provider: null
7
+ - stream: true
8
8
 
9
9
  steps:
10
10
  - name: cmd_execute
@@ -1,8 +1,8 @@
1
1
  jinx_name: "compile"
2
2
  description: "Compile NPC profiles"
3
3
  inputs:
4
- - npc_file_path: "" # Optional path to a specific NPC file to compile.
5
- - npc_team_dir: "./npc_team" # Directory containing NPC profiles to compile, if no specific file is given.
4
+ - npc_file_path: ""
5
+ - npc_team_dir: "./npc_team"
6
6
  steps:
7
7
  - name: "compile_npcs"
8
8
  engine: "python"
@@ -1,12 +1,12 @@
1
1
  jinx_name: "compress"
2
2
  description: "Manages conversation and knowledge context. Defaults to compacting context. Use flags for other operations."
3
3
  inputs:
4
- - flush: "" # The number of recent messages to flush.
5
- - sleep: False # If true, evolves the knowledge graph.
6
- - dream: False # Used with --sleep. Runs creative synthesis.
7
- - ops: "" # Used with --sleep. Comma-separated list of KG operations.
8
- - model: "" # Used with --sleep. LLM model for KG evolution.
9
- - provider: "" # Used with --sleep. LLM provider for KG evolution.
4
+ - flush: ""
5
+ - sleep: False
6
+ - dream: False
7
+ - ops: ""
8
+ - model: ""
9
+ - provider: ""
10
10
  steps:
11
11
  - name: "manage_context_and_memory"
12
12
  engine: "python"
@@ -53,8 +53,8 @@ steps:
53
53
  operations_config = [op.strip() for op in operations_str.split(',')] if operations_str else None
54
54
  if not llm_model and current_npc: llm_model = current_npc.model
55
55
  if not llm_provider and current_npc: llm_provider = current_npc.provider
56
- if not llm_model: llm_model = "gemini-1.5-pro"
57
- if not llm_provider: llm_provider = "gemini"
56
+ if not llm_model: llm_model = state.chat_model if state else "llama3.2"
57
+ if not llm_provider: llm_provider = state.chat_provider if state else "ollama"
58
58
 
59
59
  team_name = current_team.name if current_team else "__none__"
60
60
  npc_name = current_npc.name if current_npc else "__none__"
@@ -0,0 +1,10 @@
1
+ jinx_name: studio.confirm
2
+ description: Show a confirmation dialog and return the user's choice.
3
+ inputs:
4
+ - message: ""
5
+ - title: "Confirm"
6
+ steps:
7
+ - name: frontend_action
8
+ engine: python
9
+ code: |
10
+ context['output'] = "Action executed by frontend"
@@ -1,11 +1,11 @@
1
1
  jinx_name: convene
2
2
  description: Run a cycle of discussions between NPCs on a topic. The orchestrator convenes agents to discuss and synthesize.
3
3
  inputs:
4
- - topic: ""
5
- - npcs: "alicanto,corca,guac"
6
- - rounds: 3
7
- - model: null
8
- - provider: null
4
+ - topic: ""
5
+ - npcs: "alicanto,corca,guac"
6
+ - rounds: 3
7
+ - model: null
8
+ - provider: null
9
9
  steps:
10
10
  - name: convene_discussion
11
11
  engine: python
@@ -21,8 +21,8 @@ steps:
21
21
  team = context.get('team')
22
22
  messages = context.get('messages', [])
23
23
 
24
- model = context.get('model') or (npc.model if npc else 'gemini-1.5-flash')
25
- provider = context.get('provider') or (npc.provider if npc else 'gemini')
24
+ model = context.get('model') or (npc.model if npc else (state.chat_model if state else 'llama3.2'))
25
+ provider = context.get('provider') or (npc.provider if npc else (state.chat_provider if state else 'ollama'))
26
26
 
27
27
  if not topic:
28
28
  context['output'] = """Usage: /convene <topic>