npcsh 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. npcsh/_state.py +138 -100
  2. npcsh/alicanto.py +2 -2
  3. npcsh/benchmark/__init__.py +28 -0
  4. npcsh/benchmark/npcsh_agent.py +296 -0
  5. npcsh/benchmark/runner.py +611 -0
  6. npcsh/benchmark/templates/install-npcsh.sh.j2 +35 -0
  7. npcsh/build.py +2 -4
  8. npcsh/completion.py +2 -6
  9. npcsh/config.py +1 -3
  10. npcsh/conversation_viewer.py +389 -0
  11. npcsh/corca.py +0 -1
  12. npcsh/execution.py +0 -1
  13. npcsh/guac.py +0 -1
  14. npcsh/mcp_helpers.py +2 -3
  15. npcsh/mcp_server.py +5 -10
  16. npcsh/npc.py +10 -11
  17. npcsh/npc_team/jinxs/bin/benchmark.jinx +146 -0
  18. npcsh/npc_team/jinxs/bin/nql.jinx +7 -7
  19. npcsh/npc_team/jinxs/bin/roll.jinx +20 -23
  20. npcsh/npc_team/jinxs/bin/sample.jinx +6 -7
  21. npcsh/npc_team/jinxs/bin/sync.jinx +6 -6
  22. npcsh/npc_team/jinxs/bin/vixynt.jinx +8 -8
  23. npcsh/npc_team/jinxs/incognide/add_tab.jinx +11 -0
  24. npcsh/npc_team/jinxs/incognide/close_pane.jinx +9 -0
  25. npcsh/npc_team/jinxs/incognide/close_tab.jinx +10 -0
  26. npcsh/npc_team/jinxs/incognide/confirm.jinx +10 -0
  27. npcsh/npc_team/jinxs/incognide/focus_pane.jinx +9 -0
  28. npcsh/npc_team/jinxs/{npc_studio/npc-studio.jinx → incognide/incognide.jinx} +2 -2
  29. npcsh/npc_team/jinxs/incognide/list_panes.jinx +8 -0
  30. npcsh/npc_team/jinxs/incognide/navigate.jinx +10 -0
  31. npcsh/npc_team/jinxs/incognide/notify.jinx +10 -0
  32. npcsh/npc_team/jinxs/incognide/open_pane.jinx +13 -0
  33. npcsh/npc_team/jinxs/incognide/read_pane.jinx +9 -0
  34. npcsh/npc_team/jinxs/incognide/run_terminal.jinx +10 -0
  35. npcsh/npc_team/jinxs/incognide/send_message.jinx +10 -0
  36. npcsh/npc_team/jinxs/incognide/split_pane.jinx +12 -0
  37. npcsh/npc_team/jinxs/incognide/switch_npc.jinx +10 -0
  38. npcsh/npc_team/jinxs/incognide/switch_tab.jinx +10 -0
  39. npcsh/npc_team/jinxs/incognide/write_file.jinx +11 -0
  40. npcsh/npc_team/jinxs/incognide/zen_mode.jinx +9 -0
  41. npcsh/npc_team/jinxs/lib/browser/browser_action.jinx +4 -4
  42. npcsh/npc_team/jinxs/lib/browser/browser_screenshot.jinx +1 -1
  43. npcsh/npc_team/jinxs/lib/browser/open_browser.jinx +2 -2
  44. npcsh/npc_team/jinxs/lib/computer_use/click.jinx +2 -2
  45. npcsh/npc_team/jinxs/lib/computer_use/key_press.jinx +1 -1
  46. npcsh/npc_team/jinxs/lib/computer_use/launch_app.jinx +1 -1
  47. npcsh/npc_team/jinxs/lib/computer_use/screenshot.jinx +1 -1
  48. npcsh/npc_team/jinxs/lib/computer_use/trigger.jinx +2 -2
  49. npcsh/npc_team/jinxs/lib/computer_use/type_text.jinx +1 -1
  50. npcsh/npc_team/jinxs/lib/computer_use/wait.jinx +1 -1
  51. npcsh/npc_team/jinxs/lib/core/chat.jinx +4 -4
  52. npcsh/npc_team/jinxs/lib/core/cmd.jinx +4 -4
  53. npcsh/npc_team/jinxs/lib/core/compress.jinx +8 -8
  54. npcsh/npc_team/jinxs/lib/core/edit_file.jinx +3 -0
  55. npcsh/npc_team/jinxs/lib/core/ots.jinx +7 -7
  56. npcsh/npc_team/jinxs/lib/core/search/db_search.jinx +348 -0
  57. npcsh/npc_team/jinxs/lib/core/search/file_search.jinx +339 -0
  58. npcsh/npc_team/jinxs/lib/core/search/kg_search.jinx +418 -0
  59. npcsh/npc_team/jinxs/lib/core/search/mem_review.jinx +73 -0
  60. npcsh/npc_team/jinxs/lib/core/search/mem_search.jinx +388 -0
  61. npcsh/npc_team/jinxs/lib/core/search/web_search.jinx +283 -0
  62. npcsh/npc_team/jinxs/lib/core/search.jinx +52 -129
  63. npcsh/npc_team/jinxs/lib/core/sh.jinx +1 -1
  64. npcsh/npc_team/jinxs/lib/core/sleep.jinx +29 -18
  65. npcsh/npc_team/jinxs/lib/core/sql.jinx +15 -11
  66. npcsh/npc_team/jinxs/lib/orchestration/convene.jinx +7 -7
  67. npcsh/npc_team/jinxs/lib/orchestration/delegate.jinx +8 -9
  68. npcsh/npc_team/jinxs/lib/research/paper_search.jinx +389 -78
  69. npcsh/npc_team/jinxs/lib/research/semantic_scholar.jinx +373 -56
  70. npcsh/npc_team/jinxs/lib/utils/build.jinx +5 -5
  71. npcsh/npc_team/jinxs/lib/utils/compile.jinx +2 -2
  72. npcsh/npc_team/jinxs/lib/utils/help.jinx +1 -1
  73. npcsh/npc_team/jinxs/lib/utils/init.jinx +5 -5
  74. npcsh/npc_team/jinxs/lib/utils/jinxs.jinx +300 -145
  75. npcsh/npc_team/jinxs/lib/utils/serve.jinx +2 -2
  76. npcsh/npc_team/jinxs/lib/utils/set.jinx +2 -2
  77. npcsh/npc_team/jinxs/lib/utils/switch.jinx +3 -3
  78. npcsh/npc_team/jinxs/lib/utils/switches.jinx +1 -1
  79. npcsh/npc_team/jinxs/lib/utils/teamviz.jinx +2 -2
  80. npcsh/npc_team/jinxs/modes/alicanto.jinx +356 -0
  81. npcsh/npc_team/jinxs/modes/arxiv.jinx +720 -0
  82. npcsh/npc_team/jinxs/modes/corca.jinx +430 -0
  83. npcsh/npc_team/jinxs/modes/guac.jinx +544 -0
  84. npcsh/npc_team/jinxs/modes/plonk.jinx +379 -0
  85. npcsh/npc_team/jinxs/modes/pti.jinx +357 -0
  86. npcsh/npc_team/jinxs/modes/reattach.jinx +291 -0
  87. npcsh/npc_team/jinxs/modes/spool.jinx +350 -0
  88. npcsh/npc_team/jinxs/modes/wander.jinx +455 -0
  89. {npcsh-1.1.16.data/data/npcsh/npc_team → npcsh/npc_team/jinxs/modes}/yap.jinx +8 -2
  90. npcsh/npc_team/sibiji.npc +1 -1
  91. npcsh/npcsh.py +87 -46
  92. npcsh/plonk.py +0 -1
  93. npcsh/pti.py +0 -1
  94. npcsh/routes.py +1 -3
  95. npcsh/spool.py +0 -1
  96. npcsh/ui.py +0 -1
  97. npcsh/wander.py +0 -1
  98. npcsh/yap.py +0 -1
  99. npcsh-1.1.18.data/data/npcsh/npc_team/add_tab.jinx +11 -0
  100. npcsh-1.1.18.data/data/npcsh/npc_team/alicanto.jinx +356 -0
  101. npcsh-1.1.18.data/data/npcsh/npc_team/arxiv.jinx +720 -0
  102. npcsh-1.1.18.data/data/npcsh/npc_team/benchmark.jinx +146 -0
  103. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_action.jinx +4 -4
  104. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/browser_screenshot.jinx +1 -1
  105. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/build.jinx +5 -5
  106. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/chat.jinx +4 -4
  107. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/click.jinx +2 -2
  108. npcsh-1.1.18.data/data/npcsh/npc_team/close_pane.jinx +9 -0
  109. npcsh-1.1.18.data/data/npcsh/npc_team/close_tab.jinx +10 -0
  110. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/cmd.jinx +4 -4
  111. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compile.jinx +2 -2
  112. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/compress.jinx +8 -8
  113. npcsh-1.1.18.data/data/npcsh/npc_team/confirm.jinx +10 -0
  114. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/convene.jinx +7 -7
  115. npcsh-1.1.18.data/data/npcsh/npc_team/corca.jinx +430 -0
  116. npcsh-1.1.18.data/data/npcsh/npc_team/db_search.jinx +348 -0
  117. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/delegate.jinx +8 -9
  118. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/edit_file.jinx +3 -0
  119. npcsh-1.1.18.data/data/npcsh/npc_team/file_search.jinx +339 -0
  120. npcsh-1.1.18.data/data/npcsh/npc_team/focus_pane.jinx +9 -0
  121. npcsh-1.1.18.data/data/npcsh/npc_team/guac.jinx +544 -0
  122. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/help.jinx +1 -1
  123. npcsh-1.1.16.data/data/npcsh/npc_team/npc-studio.jinx → npcsh-1.1.18.data/data/npcsh/npc_team/incognide.jinx +2 -2
  124. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/init.jinx +5 -5
  125. npcsh-1.1.18.data/data/npcsh/npc_team/jinxs.jinx +331 -0
  126. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/key_press.jinx +1 -1
  127. npcsh-1.1.18.data/data/npcsh/npc_team/kg_search.jinx +418 -0
  128. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/launch_app.jinx +1 -1
  129. npcsh-1.1.18.data/data/npcsh/npc_team/list_panes.jinx +8 -0
  130. npcsh-1.1.18.data/data/npcsh/npc_team/mem_review.jinx +73 -0
  131. npcsh-1.1.18.data/data/npcsh/npc_team/mem_search.jinx +388 -0
  132. npcsh-1.1.18.data/data/npcsh/npc_team/navigate.jinx +10 -0
  133. npcsh-1.1.18.data/data/npcsh/npc_team/notify.jinx +10 -0
  134. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/nql.jinx +7 -7
  135. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/open_browser.jinx +2 -2
  136. npcsh-1.1.18.data/data/npcsh/npc_team/open_pane.jinx +13 -0
  137. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/ots.jinx +7 -7
  138. npcsh-1.1.18.data/data/npcsh/npc_team/paper_search.jinx +412 -0
  139. npcsh-1.1.18.data/data/npcsh/npc_team/plonk.jinx +379 -0
  140. npcsh-1.1.18.data/data/npcsh/npc_team/pti.jinx +357 -0
  141. npcsh-1.1.18.data/data/npcsh/npc_team/read_pane.jinx +9 -0
  142. npcsh-1.1.18.data/data/npcsh/npc_team/reattach.jinx +291 -0
  143. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/roll.jinx +20 -23
  144. npcsh-1.1.18.data/data/npcsh/npc_team/run_terminal.jinx +10 -0
  145. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sample.jinx +6 -7
  146. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/screenshot.jinx +1 -1
  147. npcsh-1.1.18.data/data/npcsh/npc_team/search.jinx +54 -0
  148. npcsh-1.1.18.data/data/npcsh/npc_team/semantic_scholar.jinx +386 -0
  149. npcsh-1.1.18.data/data/npcsh/npc_team/send_message.jinx +10 -0
  150. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/serve.jinx +2 -2
  151. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/set.jinx +2 -2
  152. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sh.jinx +1 -1
  153. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.npc +1 -1
  154. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sleep.jinx +29 -18
  155. npcsh-1.1.18.data/data/npcsh/npc_team/split_pane.jinx +12 -0
  156. npcsh-1.1.18.data/data/npcsh/npc_team/spool.jinx +350 -0
  157. npcsh-1.1.18.data/data/npcsh/npc_team/sql.jinx +20 -0
  158. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switch.jinx +3 -3
  159. npcsh-1.1.18.data/data/npcsh/npc_team/switch_npc.jinx +10 -0
  160. npcsh-1.1.18.data/data/npcsh/npc_team/switch_tab.jinx +10 -0
  161. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/switches.jinx +1 -1
  162. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sync.jinx +6 -6
  163. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/teamviz.jinx +2 -2
  164. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/trigger.jinx +2 -2
  165. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/type_text.jinx +1 -1
  166. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/vixynt.jinx +8 -8
  167. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/wait.jinx +1 -1
  168. npcsh-1.1.18.data/data/npcsh/npc_team/wander.jinx +455 -0
  169. npcsh-1.1.18.data/data/npcsh/npc_team/web_search.jinx +283 -0
  170. npcsh-1.1.18.data/data/npcsh/npc_team/write_file.jinx +11 -0
  171. {npcsh/npc_team/jinxs/bin → npcsh-1.1.18.data/data/npcsh/npc_team}/yap.jinx +8 -2
  172. npcsh-1.1.18.data/data/npcsh/npc_team/zen_mode.jinx +9 -0
  173. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/METADATA +99 -7
  174. npcsh-1.1.18.dist-info/RECORD +235 -0
  175. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/WHEEL +1 -1
  176. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/entry_points.txt +2 -3
  177. npcsh/npc_team/jinxs/bin/spool.jinx +0 -161
  178. npcsh/npc_team/jinxs/bin/wander.jinx +0 -152
  179. npcsh/npc_team/jinxs/lib/research/arxiv.jinx +0 -76
  180. npcsh-1.1.16.data/data/npcsh/npc_team/arxiv.jinx +0 -76
  181. npcsh-1.1.16.data/data/npcsh/npc_team/jinxs.jinx +0 -176
  182. npcsh-1.1.16.data/data/npcsh/npc_team/paper_search.jinx +0 -101
  183. npcsh-1.1.16.data/data/npcsh/npc_team/search.jinx +0 -131
  184. npcsh-1.1.16.data/data/npcsh/npc_team/semantic_scholar.jinx +0 -69
  185. npcsh-1.1.16.data/data/npcsh/npc_team/spool.jinx +0 -161
  186. npcsh-1.1.16.data/data/npcsh/npc_team/sql.jinx +0 -16
  187. npcsh-1.1.16.data/data/npcsh/npc_team/wander.jinx +0 -152
  188. npcsh-1.1.16.dist-info/RECORD +0 -170
  189. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.npc +0 -0
  190. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/alicanto.png +0 -0
  191. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/close_browser.jinx +0 -0
  192. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.npc +0 -0
  193. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca.png +0 -0
  194. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/corca_example.png +0 -0
  195. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic.npc +0 -0
  196. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/frederic4.png +0 -0
  197. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.npc +0 -0
  198. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/guac.png +0 -0
  199. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.npc +0 -0
  200. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/kadiefa.png +0 -0
  201. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/load_file.jinx +0 -0
  202. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh.ctx +0 -0
  203. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/npcsh_sibiji.png +0 -0
  204. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/paste.jinx +0 -0
  205. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.npc +0 -0
  206. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonk.png +0 -0
  207. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.npc +0 -0
  208. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/plonkjr.png +0 -0
  209. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/python.jinx +0 -0
  210. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/shh.jinx +0 -0
  211. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/sibiji.png +0 -0
  212. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/spool.png +0 -0
  213. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/usage.jinx +0 -0
  214. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/verbose.jinx +0 -0
  215. {npcsh-1.1.16.data → npcsh-1.1.18.data}/data/npcsh/npc_team/yap.png +0 -0
  216. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/licenses/LICENSE +0 -0
  217. {npcsh-1.1.16.dist-info → npcsh-1.1.18.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
1
+ """
2
+ npcsh Harbor Agent Adapter for Terminal-Bench.
3
+
4
+ This module implements the BaseInstalledAgent interface for running npcsh
5
+ as an agent in Terminal-Bench evaluations.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import shlex
11
+ from pathlib import Path
12
+
13
+ from harbor.agents.installed.base import BaseInstalledAgent, ExecInput
14
+ from harbor.models.agent.context import AgentContext
15
+
16
+
17
+ class NpcshAgent(BaseInstalledAgent):
18
+ """
19
+ Harbor agent adapter for npcsh.
20
+
21
+ This allows npcsh to be evaluated on Terminal-Bench tasks by:
22
+ 1. Installing npcsh in the benchmark container
23
+ 2. Running npcsh with the task instruction
24
+ 3. Parsing output for token usage and results
25
+
26
+ Usage:
27
+ harbor run -d terminal-bench@2.0 \\
28
+ --agent-import-path npcsh.benchmark:NpcshAgent \\
29
+ -m anthropic/claude-sonnet-4-20250514 -n 4
30
+ """
31
+
32
+ SUPPORTS_ATIF = True # Agent Trajectory Interchange Format
33
+
34
+ def __init__(self, logs_dir: Path = None, model_name: str = None, logger=None, **kwargs):
35
+ super().__init__(logs_dir=logs_dir, model_name=model_name, logger=logger, **kwargs)
36
+
37
+ @staticmethod
38
+ def name() -> str:
39
+ return "npcsh"
40
+
41
+ @property
42
+ def _install_agent_template_path(self) -> Path:
43
+ """Path to the jinja template script for installing npcsh in the container."""
44
+ return Path(__file__).parent / "templates" / "install-npcsh.sh.j2"
45
+
46
+ def create_run_agent_commands(self, instruction: str) -> list:
47
+ """
48
+ Create the commands to run npcsh in the container.
49
+
50
+ Args:
51
+ instruction: The task instruction from Terminal-Bench
52
+
53
+ Returns:
54
+ List of ExecInput commands to execute
55
+ """
56
+ # Wrap the instruction with explicit jinx usage directions
57
+ tool_instruction = f"""You have access to jinxs including edit_file (for writing/creating files), sh (for running shell commands), and python (for running Python code).
58
+
59
+ IMPORTANT: You MUST use these jinxs to complete the task. Do NOT just output code as text - use the edit_file jinx to actually write files to disk.
60
+
61
+ Task: {instruction}
62
+
63
+ Remember: Use edit_file to write any code files. Use sh to run shell commands like gcc, make, etc."""
64
+
65
+ escaped_instruction = shlex.quote(tool_instruction)
66
+ model_name = self.model_name
67
+
68
+ if model_name and "/" in model_name:
69
+ provider, model = model_name.split("/", 1)
70
+ elif model_name:
71
+ provider = os.environ.get("NPCSH_CHAT_PROVIDER", "")
72
+ model = model_name
73
+ else:
74
+ provider = os.environ.get("NPCSH_CHAT_PROVIDER", "")
75
+ model = os.environ.get("NPCSH_CHAT_MODEL", "")
76
+
77
+ # Map provider names to npcsh provider format
78
+ provider_map = {
79
+ "anthropic": "anthropic",
80
+ "openai": "openai",
81
+ "google": "gemini",
82
+ "gemini": "gemini",
83
+ "deepseek": "deepseek",
84
+ "ollama": "ollama",
85
+ "groq": "groq",
86
+ "openrouter": "openrouter",
87
+ }
88
+ npcsh_provider = provider_map.get(provider.lower(), provider)
89
+
90
+ # Build environment variables for API keys
91
+ env_vars = []
92
+ api_key_map = {
93
+ "anthropic": "ANTHROPIC_API_KEY",
94
+ "openai": "OPENAI_API_KEY",
95
+ "gemini": "GOOGLE_API_KEY",
96
+ "google": "GOOGLE_API_KEY",
97
+ "deepseek": "DEEPSEEK_API_KEY",
98
+ "groq": "GROQ_API_KEY",
99
+ "openrouter": "OPENROUTER_API_KEY",
100
+ }
101
+
102
+ for prov, env_key in api_key_map.items():
103
+ if env_key in os.environ:
104
+ env_vars.append(f'{env_key}="{os.environ[env_key]}"')
105
+
106
+ env_prefix = " ".join(env_vars) + " " if env_vars else ""
107
+
108
+ # Output directory for logs
109
+ output_dir = str(self.logs_dir / "npcsh_output")
110
+ output_file = str(self.logs_dir / "npcsh_output" / "output.jsonl")
111
+
112
+ commands = []
113
+
114
+ # Create output directory
115
+ commands.append(ExecInput(
116
+ command=f"mkdir -p {shlex.quote(output_dir)}",
117
+ timeout_sec=30
118
+ ))
119
+
120
+ # Create .npcsh_global file to use global team and avoid interactive prompts
121
+ commands.append(ExecInput(
122
+ command="touch /app/.npcsh_global",
123
+ timeout_sec=10
124
+ ))
125
+
126
+ # Run npcsh with the instruction
127
+ # Using corca NPC which has edit_file tool for writing files
128
+ # Using the npc CLI which supports single-command execution
129
+ # NPCSH_DEFAULT_MODE=agent enables automatic tool execution
130
+ npcsh_cmd = (
131
+ f'{env_prefix}'
132
+ f'NPCSH_CHAT_MODEL="{model}" '
133
+ f'NPCSH_CHAT_PROVIDER="{npcsh_provider}" '
134
+ f'NPCSH_STREAM_OUTPUT=0 '
135
+ f'NPCSH_DEFAULT_MODE=agent '
136
+ f'npc --npc corca {escaped_instruction} '
137
+ f'2>&1 | tee {shlex.quote(output_file)}'
138
+ )
139
+
140
+ commands.append(ExecInput(
141
+ command=npcsh_cmd,
142
+ timeout_sec=600, # 10 minute timeout for complex tasks
143
+ ))
144
+
145
+ return commands
146
+
147
+ def populate_context_post_run(self, context: AgentContext) -> None:
148
+ """
149
+ Populate the context with results of the agent execution.
150
+
151
+ Parses the output file to extract token usage metrics.
152
+
153
+ Args:
154
+ context: The AgentContext to populate with metrics
155
+ """
156
+ output_file = self.logs_dir / "npcsh_output" / "output.jsonl"
157
+
158
+ total_input_tokens = 0
159
+ total_output_tokens = 0
160
+ total_cost_usd = 0.0
161
+
162
+ if output_file.exists():
163
+ try:
164
+ with open(output_file, 'r') as f:
165
+ content = f.read()
166
+
167
+ # Try to parse as JSONL first
168
+ for line in content.strip().split('\n'):
169
+ if not line.strip():
170
+ continue
171
+ try:
172
+ event = json.loads(line)
173
+ # Extract token usage from events if present
174
+ if isinstance(event, dict):
175
+ usage = event.get('usage', {})
176
+ total_input_tokens += usage.get('input_tokens', 0)
177
+ total_output_tokens += usage.get('output_tokens', 0)
178
+ total_cost_usd += usage.get('cost_usd', 0.0)
179
+ except json.JSONDecodeError:
180
+ # Not JSON, just regular output
181
+ pass
182
+
183
+ except Exception as e:
184
+ self.logger.warning(f"Failed to parse npcsh output: {e}")
185
+
186
+ # Set context metrics
187
+ if hasattr(context, 'input_tokens'):
188
+ context.input_tokens = total_input_tokens
189
+ if hasattr(context, 'output_tokens'):
190
+ context.output_tokens = total_output_tokens
191
+ if hasattr(context, 'cost_usd'):
192
+ context.cost_usd = total_cost_usd
193
+
194
+
195
+ class NpcshAgentWithNpc(NpcshAgent):
196
+ """
197
+ Variant that uses a specific NPC for task execution.
198
+
199
+ This allows benchmarking specific NPCs like sibiji (orchestrator),
200
+ corca (coding), or custom NPCs.
201
+
202
+ Usage:
203
+ harbor run -d terminal-bench@2.0 \\
204
+ --agent-import-path "npcsh.benchmark:NpcshAgentWithNpc" \\
205
+ -m anthropic/claude-sonnet-4-20250514 -n 4
206
+ """
207
+
208
+ def __init__(self, *args, npc_name: str = "sibiji", **kwargs):
209
+ super().__init__(*args, **kwargs)
210
+ self.npc_name = npc_name
211
+
212
+ @staticmethod
213
+ def name() -> str:
214
+ return "npcsh-npc"
215
+
216
+ def create_run_agent_commands(self, instruction: str) -> list:
217
+ """Create commands using a specific NPC."""
218
+ # Wrap the instruction with explicit jinx usage directions
219
+ tool_instruction = f"""You have access to jinxs including edit_file (for writing/creating files), sh (for running shell commands), and python (for running Python code).
220
+
221
+ IMPORTANT: You MUST use these jinxs to complete the task. Do NOT just output code as text - use the edit_file jinx to actually write files to disk.
222
+
223
+ Task: {instruction}
224
+
225
+ Remember: Use edit_file to write any code files. Use sh to run shell commands like gcc, make, etc."""
226
+
227
+ escaped_instruction = shlex.quote(tool_instruction)
228
+ model_name = self.model_name
229
+
230
+ if model_name and "/" in model_name:
231
+ provider, model = model_name.split("/", 1)
232
+ elif model_name:
233
+ provider = os.environ.get("NPCSH_CHAT_PROVIDER", "")
234
+ model = model_name
235
+ else:
236
+ provider = os.environ.get("NPCSH_CHAT_PROVIDER", "")
237
+ model = os.environ.get("NPCSH_CHAT_MODEL", "")
238
+
239
+ provider_map = {
240
+ "anthropic": "anthropic",
241
+ "openai": "openai",
242
+ "google": "gemini",
243
+ "gemini": "gemini",
244
+ "deepseek": "deepseek",
245
+ "ollama": "ollama",
246
+ }
247
+ npcsh_provider = provider_map.get(provider.lower(), provider)
248
+
249
+ env_vars = []
250
+ api_key_map = {
251
+ "anthropic": "ANTHROPIC_API_KEY",
252
+ "openai": "OPENAI_API_KEY",
253
+ "gemini": "GOOGLE_API_KEY",
254
+ "deepseek": "DEEPSEEK_API_KEY",
255
+ }
256
+
257
+ for prov, env_key in api_key_map.items():
258
+ if env_key in os.environ:
259
+ env_vars.append(f'{env_key}="{os.environ[env_key]}"')
260
+
261
+ env_prefix = " ".join(env_vars) + " " if env_vars else ""
262
+
263
+ output_dir = str(self.logs_dir / "npcsh_output")
264
+ output_file = str(self.logs_dir / "npcsh_output" / "output.jsonl")
265
+
266
+ commands = []
267
+
268
+ commands.append(ExecInput(
269
+ command=f"mkdir -p {shlex.quote(output_dir)}",
270
+ timeout_sec=30
271
+ ))
272
+
273
+ # Create .npcsh_global file to use global team and avoid interactive prompts
274
+ commands.append(ExecInput(
275
+ command="touch /app/.npcsh_global",
276
+ timeout_sec=10
277
+ ))
278
+
279
+ # Use specific NPC with --npc flag
280
+ # NPCSH_DEFAULT_MODE=agent enables automatic tool execution
281
+ npcsh_cmd = (
282
+ f'{env_prefix}'
283
+ f'NPCSH_CHAT_MODEL="{model}" '
284
+ f'NPCSH_CHAT_PROVIDER="{npcsh_provider}" '
285
+ f'NPCSH_STREAM_OUTPUT=0 '
286
+ f'NPCSH_DEFAULT_MODE=agent '
287
+ f'npc --npc {self.npc_name} {escaped_instruction} '
288
+ f'2>&1 | tee {shlex.quote(output_file)}'
289
+ )
290
+
291
+ commands.append(ExecInput(
292
+ command=npcsh_cmd,
293
+ timeout_sec=600,
294
+ ))
295
+
296
+ return commands