@smilintux/skcapstone 0.9.0 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.env.example +10 -4
  2. package/.github/workflows/ci.yml +2 -2
  3. package/.github/workflows/publish.yml +9 -2
  4. package/.openclaw-workspace.json +2 -2
  5. package/CLAUDE.md +37 -0
  6. package/MISSION.md +17 -2
  7. package/README.md +282 -3
  8. package/docker/Dockerfile +7 -7
  9. package/docker/compose-templates/dev-team.yml +12 -12
  10. package/docker/compose-templates/mini-team.yml +9 -9
  11. package/docker/compose-templates/ops-team.yml +10 -10
  12. package/docker/compose-templates/research-team.yml +10 -10
  13. package/docker/entrypoint.sh +4 -4
  14. package/docs/ADR-optional-integration-backbone.md +181 -0
  15. package/docs/ARCHITECTURE.md +186 -43
  16. package/docs/BOND_WITH_GROK.md +6 -6
  17. package/docs/CUSTOM_AGENT.md +278 -1
  18. package/docs/DREAMING.md +70 -0
  19. package/docs/GETTING_STARTED.md +10 -7
  20. package/docs/QUICKSTART.md +10 -6
  21. package/docs/SKJOULE_ARCHITECTURE.md +3 -3
  22. package/docs/SOUL_SWAPPER.md +5 -5
  23. package/docs/hammertime-audit.md +402 -0
  24. package/docs/sk-integration-HANDOFF.md +117 -0
  25. package/docs/skscheduler.md +155 -0
  26. package/docs/superpowers/examples/jobs.yaml +31 -0
  27. package/docs/superpowers/plans/2026-06-08-skscheduler.md +1265 -0
  28. package/docs/superpowers/specs/2026-06-08-skscheduler-design.md +186 -0
  29. package/examples/custom-bond-template.json +1 -1
  30. package/examples/grok-feb.json +1 -1
  31. package/examples/queen-ava-feb.json +1 -1
  32. package/launchd/com.skcapstone.daemon.plist +52 -0
  33. package/launchd/com.skcapstone.memory-compress.plist +45 -0
  34. package/launchd/com.skcapstone.skcomms-heartbeat.plist +33 -0
  35. package/launchd/com.skcapstone.skcomms-queue-drain.plist +34 -0
  36. package/launchd/install-launchd.sh +156 -0
  37. package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/index.ts +3 -2
  38. package/package.json +1 -1
  39. package/pyproject.toml +16 -10
  40. package/scripts/archive-sessions.sh +95 -0
  41. package/scripts/check-updates.py +4 -4
  42. package/scripts/install-bundle.sh +8 -8
  43. package/scripts/install.ps1 +12 -11
  44. package/scripts/install.sh +196 -11
  45. package/scripts/model-fallback-monitor.sh +102 -0
  46. package/scripts/notion-api.py +259 -0
  47. package/scripts/nvidia-proxy.mjs +908 -0
  48. package/scripts/proxy-monitor.sh +89 -0
  49. package/scripts/refresh-anthropic-token.sh +172 -0
  50. package/scripts/release.sh +98 -0
  51. package/scripts/session-to-memory.py +219 -0
  52. package/scripts/skgateway.mjs +856 -0
  53. package/scripts/telegram-catchup-all.sh +147 -0
  54. package/scripts/verify_install.sh +2 -2
  55. package/scripts/wargov-ufo-capture/README.md +43 -0
  56. package/scripts/wargov-ufo-capture/cdp_capture_release2.py +273 -0
  57. package/scripts/wargov-ufo-capture/cdp_capture_splc_doj.py +246 -0
  58. package/scripts/wargov-ufo-capture/cdp_finish.py +271 -0
  59. package/scripts/wargov-ufo-capture/cdp_probe.py +188 -0
  60. package/scripts/wargov-ufo-capture/cdp_splc_pressrelease.py +101 -0
  61. package/scripts/wargov-ufo-capture/parse_csv.py +95 -0
  62. package/scripts/wargov-ufo-capture/pull_dvids.sh +107 -0
  63. package/scripts/watch-anthropic-token.sh +212 -0
  64. package/scripts/windows/install-tasks.ps1 +7 -7
  65. package/scripts/windows/skcapstone-task.xml +1 -1
  66. package/src/skcapstone/__init__.py +45 -3
  67. package/src/skcapstone/_cli_monolith.py +20 -15
  68. package/src/skcapstone/activity.py +5 -1
  69. package/src/skcapstone/agent_card.py +3 -2
  70. package/src/skcapstone/api.py +41 -40
  71. package/src/skcapstone/auction.py +14 -11
  72. package/src/skcapstone/backup.py +2 -1
  73. package/src/skcapstone/blueprint_registry.py +4 -3
  74. package/src/skcapstone/blueprints/builtins/itil-operations.yaml +40 -0
  75. package/src/skcapstone/brain_first.py +238 -0
  76. package/src/skcapstone/changelog.py +1 -1
  77. package/src/skcapstone/chat.py +22 -17
  78. package/src/skcapstone/cli/__init__.py +9 -1
  79. package/src/skcapstone/cli/_common.py +1 -0
  80. package/src/skcapstone/cli/agents_spawner.py +5 -2
  81. package/src/skcapstone/cli/alerts.py +25 -4
  82. package/src/skcapstone/cli/bench.py +15 -15
  83. package/src/skcapstone/cli/chat.py +7 -4
  84. package/src/skcapstone/cli/consciousness.py +5 -2
  85. package/src/skcapstone/cli/context_cmd.py +18 -4
  86. package/src/skcapstone/cli/daemon.py +121 -42
  87. package/src/skcapstone/cli/gtd.py +26 -1
  88. package/src/skcapstone/cli/housekeeping.py +3 -3
  89. package/src/skcapstone/cli/identity_cmd.py +378 -0
  90. package/src/skcapstone/cli/joule_cmd.py +7 -3
  91. package/src/skcapstone/cli/memory.py +8 -6
  92. package/src/skcapstone/cli/peers_dir.py +1 -1
  93. package/src/skcapstone/cli/register_cmd.py +29 -3
  94. package/src/skcapstone/cli/scheduler_cmd.py +167 -0
  95. package/src/skcapstone/cli/session.py +25 -0
  96. package/src/skcapstone/cli/setup.py +96 -29
  97. package/src/skcapstone/cli/shell_cmd.py +53 -1
  98. package/src/skcapstone/cli/skills_cmd.py +2 -2
  99. package/src/skcapstone/cli/soul.py +8 -5
  100. package/src/skcapstone/cli/status.py +37 -11
  101. package/src/skcapstone/cli/telegram.py +21 -0
  102. package/src/skcapstone/cli/test_cmd.py +5 -5
  103. package/src/skcapstone/cli/test_connection.py +2 -2
  104. package/src/skcapstone/cli/upgrade_cmd.py +23 -14
  105. package/src/skcapstone/cli/version_cmd.py +1 -1
  106. package/src/skcapstone/cli/watch_cmd.py +9 -6
  107. package/src/skcapstone/cloud9_bridge.py +14 -14
  108. package/src/skcapstone/codex_setup.py +255 -0
  109. package/src/skcapstone/config_validator.py +7 -4
  110. package/src/skcapstone/consciousness_config.py +5 -1
  111. package/src/skcapstone/consciousness_loop.py +313 -273
  112. package/src/skcapstone/context_loader.py +121 -0
  113. package/src/skcapstone/coord_federation.py +2 -1
  114. package/src/skcapstone/coordination.py +23 -6
  115. package/src/skcapstone/crush_integration.py +2 -1
  116. package/src/skcapstone/daemon.py +151 -88
  117. package/src/skcapstone/dashboard.py +10 -10
  118. package/src/skcapstone/data/sk-agent-picker.sh +421 -0
  119. package/src/skcapstone/data/systemd/skcapstone-api.socket +9 -0
  120. package/src/skcapstone/data/systemd/skcapstone-memory-compress.service +18 -0
  121. package/src/skcapstone/data/systemd/skcapstone-memory-compress.timer +11 -0
  122. package/src/skcapstone/data/systemd/skcapstone.service +37 -0
  123. package/src/skcapstone/data/systemd/skcapstone@.service +50 -0
  124. package/src/skcapstone/data/systemd/skcomms-heartbeat.service +18 -0
  125. package/{systemd/skcomm-heartbeat.timer → src/skcapstone/data/systemd/skcomms-heartbeat.timer} +2 -2
  126. package/src/skcapstone/data/systemd/skcomms-queue-drain.service +17 -0
  127. package/{systemd/skcomm-queue-drain.timer → src/skcapstone/data/systemd/skcomms-queue-drain.timer} +2 -2
  128. package/src/skcapstone/defaults/claude/CLAUDE.md +67 -0
  129. package/src/skcapstone/defaults/claude/settings.json +74 -0
  130. package/src/skcapstone/defaults/lumina/config/claude-hooks.md +57 -0
  131. package/src/skcapstone/defaults/lumina/config/skgraph.yaml +55 -10
  132. package/src/skcapstone/defaults/lumina/config/skmemory.yaml +79 -13
  133. package/src/skcapstone/defaults/lumina/config/skvector.yaml +60 -9
  134. package/src/skcapstone/defaults/lumina/memory/long-term/18b9c0d1e2f3-cloud9-protocol.json +2 -2
  135. package/src/skcapstone/defaults/lumina/memory/long-term/a1b2c3d4e5f6-ecosystem-overview.json +2 -2
  136. package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
  137. package/src/skcapstone/defaults/lumina/memory/long-term/d4e5f6a7b8c9-site-directory.json +2 -2
  138. package/src/skcapstone/defaults/unhinged.json +13 -0
  139. package/src/skcapstone/discovery.py +43 -20
  140. package/src/skcapstone/doctor.py +941 -22
  141. package/src/skcapstone/dreaming.py +1183 -109
  142. package/src/skcapstone/emotion_tracker.py +2 -2
  143. package/src/skcapstone/export.py +4 -3
  144. package/src/skcapstone/fuse_mount.py +35 -25
  145. package/src/skcapstone/gui_installer.py +2 -2
  146. package/src/skcapstone/heartbeat.py +34 -30
  147. package/src/skcapstone/housekeeping.py +14 -14
  148. package/src/skcapstone/install_wizard.py +209 -7
  149. package/src/skcapstone/itil.py +13 -4
  150. package/src/skcapstone/kms_scheduler.py +10 -8
  151. package/src/skcapstone/launchd.py +426 -0
  152. package/src/skcapstone/mcp_launcher.py +15 -1
  153. package/src/skcapstone/mcp_server.py +341 -49
  154. package/src/skcapstone/mcp_tools/__init__.py +2 -0
  155. package/src/skcapstone/mcp_tools/_helpers.py +2 -2
  156. package/src/skcapstone/mcp_tools/ansible_tools.py +7 -4
  157. package/src/skcapstone/mcp_tools/brain_first_tools.py +90 -0
  158. package/src/skcapstone/mcp_tools/capauth_tools.py +7 -4
  159. package/src/skcapstone/mcp_tools/comm_tools.py +10 -10
  160. package/src/skcapstone/mcp_tools/coord_tools.py +8 -4
  161. package/src/skcapstone/mcp_tools/did_tools.py +11 -8
  162. package/src/skcapstone/mcp_tools/gtd_tools.py +4 -4
  163. package/src/skcapstone/mcp_tools/memory_tools.py +6 -2
  164. package/src/skcapstone/mcp_tools/notification_tools.py +22 -6
  165. package/src/skcapstone/mcp_tools/{skcomm_tools.py → skcomms_tools.py} +14 -14
  166. package/src/skcapstone/mcp_tools/soul_tools.py +8 -2
  167. package/src/skcapstone/mdns_discovery.py +2 -2
  168. package/src/skcapstone/memory_curator.py +1 -1
  169. package/src/skcapstone/memory_engine.py +10 -3
  170. package/src/skcapstone/metrics.py +30 -16
  171. package/src/skcapstone/migrate_memories.py +4 -3
  172. package/src/skcapstone/migrate_multi_agent.py +8 -7
  173. package/src/skcapstone/models.py +47 -5
  174. package/src/skcapstone/notifications.py +42 -18
  175. package/src/skcapstone/onboard.py +1000 -126
  176. package/src/skcapstone/operator_link.py +170 -0
  177. package/src/skcapstone/peer_directory.py +4 -4
  178. package/src/skcapstone/peers.py +19 -19
  179. package/src/skcapstone/pillars/__init__.py +7 -5
  180. package/src/skcapstone/pillars/consciousness.py +191 -0
  181. package/src/skcapstone/pillars/identity.py +51 -7
  182. package/src/skcapstone/pillars/memory.py +9 -3
  183. package/src/skcapstone/pillars/sync.py +2 -2
  184. package/src/skcapstone/preflight.py +3 -3
  185. package/src/skcapstone/providers/docker.py +28 -28
  186. package/src/skcapstone/register.py +6 -6
  187. package/src/skcapstone/registry_client.py +5 -4
  188. package/src/skcapstone/runtime.py +14 -3
  189. package/src/skcapstone/scheduled_tasks.py +254 -19
  190. package/src/skcapstone/scheduler_jobs.py +456 -0
  191. package/src/skcapstone/scheduler_runner.py +239 -0
  192. package/src/skcapstone/scheduler_state.py +162 -0
  193. package/src/skcapstone/sdk.py +310 -0
  194. package/src/skcapstone/service_health.py +279 -39
  195. package/src/skcapstone/session_briefing.py +108 -0
  196. package/src/skcapstone/session_capture.py +1 -1
  197. package/src/skcapstone/shell.py +7 -1
  198. package/src/skcapstone/soul.py +3 -1
  199. package/src/skcapstone/soul_switch.py +3 -1
  200. package/src/skcapstone/summary.py +6 -6
  201. package/src/skcapstone/sync_engine.py +15 -15
  202. package/src/skcapstone/sync_watcher.py +2 -2
  203. package/src/skcapstone/systemd.py +72 -21
  204. package/src/skcapstone/team_comms.py +8 -8
  205. package/src/skcapstone/team_engine.py +1 -1
  206. package/src/skcapstone/testrunner.py +3 -3
  207. package/src/skcapstone/trust_graph.py +40 -5
  208. package/src/skcapstone/unified_search.py +15 -6
  209. package/src/skcapstone/uninstall_wizard.py +11 -3
  210. package/src/skcapstone/version_check.py +8 -4
  211. package/src/skcapstone/warmth_anchor.py +4 -2
  212. package/src/skcapstone/whoami.py +4 -4
  213. package/systemd/skcapstone.service +4 -6
  214. package/systemd/skcapstone@.service +7 -8
  215. package/systemd/skcomms-heartbeat.service +21 -0
  216. package/systemd/skcomms-heartbeat.timer +12 -0
  217. package/systemd/skcomms-queue-drain.service +17 -0
  218. package/systemd/skcomms-queue-drain.timer +12 -0
  219. package/tests/conftest.py +39 -0
  220. package/tests/integration/test_consciousness_e2e.py +39 -39
  221. package/tests/test_agent_card.py +1 -1
  222. package/tests/test_agent_home_scaffold.py +34 -0
  223. package/tests/test_alerts_consumer_topics.py +27 -0
  224. package/tests/test_backup.py +2 -1
  225. package/tests/test_chat.py +6 -6
  226. package/tests/test_claude_md.py +2 -2
  227. package/tests/test_cli_skills.py +10 -10
  228. package/tests/test_cli_test_cmd.py +4 -4
  229. package/tests/test_cli_test_connection.py +1 -1
  230. package/tests/test_cloud9_bridge.py +6 -6
  231. package/tests/test_consciousness_e2e.py +1 -1
  232. package/tests/test_consciousness_loop.py +10 -10
  233. package/tests/test_coordination.py +25 -0
  234. package/tests/test_cross_package.py +21 -21
  235. package/tests/test_daemon.py +4 -4
  236. package/tests/test_daemon_shutdown.py +1 -1
  237. package/tests/test_docker_provider.py +29 -29
  238. package/tests/test_doctor.py +400 -0
  239. package/tests/test_doctor_skscheduler.py +50 -0
  240. package/tests/test_dreaming_engine.py +147 -0
  241. package/tests/test_dreaming_gtd_capture.py +35 -0
  242. package/tests/test_e2e_automated.py +8 -5
  243. package/tests/test_fuse_mount.py +10 -10
  244. package/tests/test_gtd_brief.py +46 -0
  245. package/tests/test_gtd_malformed_tolerance.py +31 -0
  246. package/tests/test_housekeeping.py +15 -15
  247. package/tests/test_identity_migrate.py +251 -0
  248. package/tests/test_integration_backbone.py +598 -0
  249. package/tests/test_itil_gtd_lifecycle.py +37 -0
  250. package/tests/test_jobs_dropins.py +84 -0
  251. package/tests/test_mcp_server.py +82 -37
  252. package/tests/test_models.py +48 -4
  253. package/tests/test_multi_agent.py +31 -29
  254. package/tests/test_notifications.py +122 -32
  255. package/tests/test_onboard.py +63 -75
  256. package/tests/test_operator_link.py +78 -0
  257. package/tests/test_peers.py +14 -14
  258. package/tests/test_pillars.py +98 -0
  259. package/tests/test_preflight.py +3 -3
  260. package/tests/test_runtime.py +21 -0
  261. package/tests/test_scheduled_tasks.py +11 -6
  262. package/tests/test_scheduler_cli.py +47 -0
  263. package/tests/test_scheduler_features.py +133 -0
  264. package/tests/test_scheduler_integration.py +87 -0
  265. package/tests/test_scheduler_jobs.py +155 -0
  266. package/tests/test_scheduler_runner.py +64 -0
  267. package/tests/test_scheduler_state.py +57 -0
  268. package/tests/test_sdk.py +70 -0
  269. package/tests/test_service_health_incidents.py +34 -0
  270. package/tests/test_service_registry.py +52 -0
  271. package/tests/test_session_briefing.py +130 -0
  272. package/tests/test_snapshots.py +4 -4
  273. package/tests/test_sync_pipeline.py +26 -26
  274. package/tests/test_team_comms.py +2 -2
  275. package/tests/test_testrunner.py +2 -2
  276. package/tests/test_trust_graph.py +18 -0
  277. package/tests/test_unified_search.py +2 -2
  278. package/tests/test_version_check.py +10 -0
  279. package/tests/test_version_cmd.py +8 -8
  280. package/tests/test_whoami.py +1 -1
  281. package/systemd/skcomm-heartbeat.service +0 -18
  282. package/systemd/skcomm-queue-drain.service +0 -17
  283. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/package.json +0 -0
  284. /package/{openclaw-plugin → openclaw-plugin.archived-2026-04-23}/src/openclaw.plugin.json +0 -0
@@ -0,0 +1,908 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * NVIDIA NIM API Proxy
4
+ *
5
+ * Sits between OpenClaw and the NVIDIA NIM API. Handles the fact that
6
+ * NVIDIA NIM rejects responses with multiple tool calls (400 error)
7
+ * even when parallel_tool_calls: false is set.
8
+ *
9
+ * Strategy:
10
+ * 1. Inject parallel_tool_calls: false + system instruction
11
+ * 2. On 400 "single tool-calls": reduce to max 6 tools + force tool_choice
12
+ * 3. On second 400: send with just 1 tool (the most likely one) via tool_choice
13
+ * 4. Final fallback: strip tools, get text-only response
14
+ *
15
+ * Usage:
16
+ * node nvidia-proxy.mjs [--port 18780] [--target https://integrate.api.nvidia.com/v1]
17
+ *
18
+ * Then point OpenClaw's nvidia provider baseUrl to http://127.0.0.1:18780/v1
19
+ */
20
+
21
+ import http from "node:http";
22
+ import https from "node:https";
23
+ import { URL } from "node:url";
24
+
25
+ const DEFAULT_PORT = parseInt(process.env.NVIDIA_PROXY_PORT || "18780", 10);
26
+ const DEFAULT_TARGET = process.env.NVIDIA_PROXY_TARGET || "https://integrate.api.nvidia.com/v1";
27
+ const MAX_RETRIES = 4;
28
+ const MAX_429_RETRIES = 3;
29
+ const RATE_LIMIT_DELAY_MS = 2000;
30
+ const DEFAULT_MAX_SYSTEM_BYTES = 80000;
31
+
32
+ // Model limits are now managed in skgateway/config/skgateway.yaml (sanitizer section).
33
+ // nvidia-proxy.mjs is superseded by skgateway on port 18780 — kept for reference only.
34
+ const DEFAULT_MAX_BODY_BYTES = 200000;
35
+
36
+ function getModelLimits(_model) {
37
+ return {
38
+ maxBody: DEFAULT_MAX_BODY_BYTES,
39
+ maxSystem: DEFAULT_MAX_SYSTEM_BYTES,
40
+ };
41
+ }
42
+ const toolCallCounters = new Map(); // Per-model tool call counters
43
+
44
+ const args = process.argv.slice(2);
45
+ let port = DEFAULT_PORT;
46
+ let targetBase = DEFAULT_TARGET;
47
+
48
+ for (let i = 0; i < args.length; i++) {
49
+ if (args[i] === "--port" && args[i + 1]) port = parseInt(args[++i], 10);
50
+ if (args[i] === "--target" && args[i + 1]) targetBase = args[++i];
51
+ }
52
+
53
+ const targetUrl = new URL(targetBase.replace(/\/v1\/?$/, ""));
54
+
55
+ /** Send a request to NVIDIA and return { status, headers, body } */
56
+ function sendUpstream(reqUrl, method, headers, body) {
57
+ return new Promise((resolve) => {
58
+ const upstream = new URL(reqUrl, targetUrl);
59
+ const proxyHeaders = { ...headers };
60
+ proxyHeaders.host = upstream.host;
61
+ proxyHeaders["content-length"] = body.length;
62
+ delete proxyHeaders.connection;
63
+ delete proxyHeaders["keep-alive"];
64
+
65
+ const transport = upstream.protocol === "https:" ? https : http;
66
+ const upstreamReq = transport.request(
67
+ {
68
+ hostname: upstream.hostname,
69
+ port: upstream.port || (upstream.protocol === "https:" ? 443 : 80),
70
+ path: upstream.pathname + upstream.search,
71
+ method,
72
+ headers: proxyHeaders,
73
+ },
74
+ (upstreamRes) => {
75
+ const chunks = [];
76
+ upstreamRes.on("data", (c) => chunks.push(c));
77
+ upstreamRes.on("end", () => {
78
+ resolve({
79
+ status: upstreamRes.statusCode,
80
+ headers: upstreamRes.headers,
81
+ body: Buffer.concat(chunks),
82
+ });
83
+ });
84
+ },
85
+ );
86
+ upstreamReq.on("error", (err) => {
87
+ resolve({ status: 502, headers: {}, body: Buffer.from(JSON.stringify({ error: { message: err.message } })) });
88
+ });
89
+ upstreamReq.write(body);
90
+ upstreamReq.end();
91
+ });
92
+ }
93
+
94
+ /**
95
+ * Send a 200 response, converting to SSE if the original request was streaming.
96
+ * @param {http.ServerResponse} clientRes
97
+ * @param {object} resBody - parsed JSON response body
98
+ * @param {object} headers - upstream response headers
99
+ * @param {boolean} asSSE - whether to wrap as SSE
100
+ */
101
+ /**
102
+ * Sanitize model text content — strip leaked tool call markup from Kimi K2.5.
103
+ * When tools are stripped, Kimi embeds raw tool syntax in text output.
104
+ */
105
+ function sanitizeContent(text) {
106
+ if (!text) return text;
107
+ // Strip Kimi's leaked tool call markup blocks
108
+ let cleaned = text.replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, "");
109
+ // Strip individual tool call fragments that might not have the section wrapper
110
+ cleaned = cleaned.replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, "");
111
+ cleaned = cleaned.replace(/<\|tool_call_argument_begin\|>[\s\S]*?(<\|tool_call_end\|>|$)/g, "");
112
+
113
+ // Strip leaked chain-of-thought / planning text.
114
+ // Kimi sometimes outputs its reasoning as user-visible text, e.g.:
115
+ // "The user wants me to... I should first... Let me call the ritual tool first."
116
+ // Detect: starts with "The user wants me to" or "I need to" or "I should" followed
117
+ // by planning language and ending before any real content.
118
+ const thinkingPattern = /^(The user wants me to|I need to|I should|Let me first|First,? I'?ll|I'?ll start by|My plan is to|Actually,? I|Looking at|Now I need|Good,? the|The instructions? (?:mention|say)|Read required|\d+\.\s+(?:Read|Check|Search|Call|Use|Get|Then|First|Next))[^\n]*\n?(\n?(I should|I need to|Let me|I'?ll|Then I|First|Next|Actually|However|Now|Good|\d+\.)[^\n]*\n?)*/i;
119
+ const thinkingMatch = cleaned.match(thinkingPattern);
120
+ if (thinkingMatch) {
121
+ const thinkingText = thinkingMatch[0];
122
+ const remainder = cleaned.slice(thinkingText.length).trim();
123
+ // Only strip if the thinking block is the ENTIRE response or is followed by real content
124
+ if (!remainder) {
125
+ // Entire response is just planning — suppress it, let the tool call go through
126
+ console.log(`[nvidia-proxy] SANITIZED: stripped leaked thinking (${thinkingText.length} chars)`);
127
+ cleaned = "";
128
+ } else if (remainder.length > 50) {
129
+ // Has real content after the thinking preamble — keep only the real part
130
+ console.log(`[nvidia-proxy] SANITIZED: stripped thinking preamble (${thinkingText.length} chars), kept ${remainder.length} chars`);
131
+ cleaned = remainder;
132
+ }
133
+ }
134
+
135
+ // Clean up leftover whitespace from removed blocks
136
+ cleaned = cleaned.replace(/\n{3,}/g, "\n\n").trim();
137
+ if (cleaned !== text) {
138
+ console.log(`[nvidia-proxy] SANITIZED: stripped leaked tool call markup (${text.length} → ${cleaned.length} chars)`);
139
+ }
140
+ // Don't inject fallback here — let sendOk() handle it, since it knows
141
+ // whether tool_calls exist alongside the empty content. Injecting here
142
+ // causes false "hiccup" messages when the model made a valid tool call
143
+ // but its text content was all leaked markup/thinking.
144
+ return cleaned;
145
+ }
146
+
147
+ function sendOk(clientRes, resBody, headers, asSSE) {
148
+ // Sanitize text content before sending
149
+ const choice = resBody.choices?.[0];
150
+ if (choice?.message?.content) {
151
+ choice.message.content = sanitizeContent(choice.message.content);
152
+ }
153
+ // Track whether original response had reasoning (before we delete it)
154
+ const hadReasoning = !!(choice?.message?.reasoning || choice?.message?.reasoning_content);
155
+ // Kimi K2.5 sometimes puts its response in "reasoning" instead of "content"
156
+ // Only promote if reasoning is substantial AND looks like a real user-facing
157
+ // response (not inner monologue like "Let me call the tool" or "1. Read files")
158
+ if (choice?.message && !choice.message.content && choice.message.reasoning) {
159
+ const cleaned = sanitizeContent(choice.message.reasoning.trim());
160
+ // After sanitization, if there's still 300+ chars of real content, promote it
161
+ if (cleaned.length > 300) {
162
+ choice.message.content = cleaned;
163
+ console.log(`[nvidia-proxy] promoted reasoning→content (${cleaned.length} chars)`);
164
+ } else if (cleaned.length > 0) {
165
+ console.log(`[nvidia-proxy] suppressed short reasoning (${cleaned.length} chars): ${cleaned.slice(0, 80)}...`);
166
+ } else {
167
+ console.log(`[nvidia-proxy] suppressed empty reasoning after sanitization`);
168
+ }
169
+ delete choice.message.reasoning;
170
+ }
171
+ // If model returned empty text (no tool calls), inject fallback so gateway delivers something.
172
+ // But if the original response had reasoning/reasoning_content, this is just K2.5 "thinking
173
+ // between tool rounds" — suppress it silently instead of injecting visible fallback text.
174
+ if (choice?.message && !choice.message.tool_calls?.length && choice.finish_reason !== "tool_calls") {
175
+ if (!choice.message.content || choice.message.content.trim().length === 0) {
176
+ // hadReasoning was captured above, before reasoning was deleted
177
+ if (hadReasoning) {
178
+ // K2.5 thinking between rounds — don't inject fallback, just leave empty
179
+ // The gateway will handle this as an empty assistant turn
180
+ console.log(`[nvidia-proxy] suppressed reasoning-only turn (no content, no tool calls)`);
181
+ } else {
182
+ choice.message.content = "I ran into a wall on that one — could you give me a bit more context or rephrase? I want to help but I'm not sure how to proceed.";
183
+ console.log(`[nvidia-proxy] injected fallback for empty text response`);
184
+ }
185
+ }
186
+ }
187
+ if (asSSE) {
188
+ if (!clientRes.headersSent) {
189
+ const sseHeaders = { ...headers };
190
+ sseHeaders["content-type"] = "text/event-stream; charset=utf-8";
191
+ delete sseHeaders["content-length"];
192
+ delete sseHeaders["transfer-encoding"];
193
+ sseHeaders["cache-control"] = "no-cache";
194
+ clientRes.writeHead(200, sseHeaders);
195
+ }
196
+
197
+ const base = { id: resBody.id, object: "chat.completion.chunk", created: resBody.created, model: resBody.model };
198
+ const choice = resBody.choices?.[0];
199
+
200
+ if (!choice) {
201
+ clientRes.write("data: [DONE]\n\n");
202
+ clientRes.end();
203
+ return;
204
+ }
205
+
206
+ const msg = choice.message || {};
207
+
208
+ // 1. Role chunk
209
+ clientRes.write(`data: ${JSON.stringify({ ...base, choices: [{ index: 0, delta: { role: msg.role || "assistant" }, finish_reason: null }] })}\n\n`);
210
+
211
+ // 2. Content chunks (split into smaller pieces for proper streaming behavior)
212
+ const content = msg.content || "";
213
+ if (content) {
214
+ const chunkSize = 100;
215
+ for (let i = 0; i < content.length; i += chunkSize) {
216
+ clientRes.write(`data: ${JSON.stringify({ ...base, choices: [{ index: 0, delta: { content: content.slice(i, i + chunkSize) }, finish_reason: null }] })}\n\n`);
217
+ }
218
+ }
219
+
220
+ // 3. Tool calls (if any) — send as a single delta
221
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
222
+ clientRes.write(`data: ${JSON.stringify({ ...base, choices: [{ index: 0, delta: { tool_calls: msg.tool_calls }, finish_reason: null }] })}\n\n`);
223
+ }
224
+
225
+ // 4. Usage chunk (if present)
226
+ if (resBody.usage) {
227
+ clientRes.write(`data: ${JSON.stringify({ ...base, choices: [{ index: 0, delta: {}, finish_reason: choice.finish_reason || "stop" }], usage: resBody.usage })}\n\n`);
228
+ } else {
229
+ clientRes.write(`data: ${JSON.stringify({ ...base, choices: [{ index: 0, delta: {}, finish_reason: choice.finish_reason || "stop" }] })}\n\n`);
230
+ }
231
+
232
+ clientRes.write("data: [DONE]\n\n");
233
+ clientRes.end();
234
+ } else {
235
+ const body = Buffer.from(JSON.stringify(resBody), "utf-8");
236
+ const outHeaders = { ...headers };
237
+ outHeaders["content-length"] = body.length;
238
+ clientRes.writeHead(200, outHeaders);
239
+ clientRes.end(body);
240
+ }
241
+ }
242
+
243
+ const SINGLE_TOOL_INSTRUCTION =
244
+ "You MUST call exactly ONE tool per response. Never call multiple tools at once.";
245
+
246
+ /**
247
+ * Trim conversation history to keep body size under the model's max body limit.
248
+ * Preserves: system messages, first 2 user/assistant messages (identity/rehydration),
249
+ * and the most recent messages. Drops middle messages first.
250
+ * Tool result messages with large content get their content truncated first.
251
+ */
252
+ function trimConversationHistory(parsed) {
253
+ if (!Array.isArray(parsed.messages) || parsed.messages.length < 6) return;
254
+
255
+ const { maxBody } = getModelLimits(parsed.model);
256
+
257
+ // Debug: log message roles
258
+ const roleSummary = parsed.messages.map(m => m.role).join(",");
259
+ console.log(`[nvidia-proxy] conversation roles (${parsed.messages.length} msgs): ${roleSummary} [maxBody=${maxBody}]`);
260
+
261
+ // First pass: truncate large tool results (keep first 500 chars)
262
+ for (const m of parsed.messages) {
263
+ if (m.role === "tool" || m.role === "toolResult") {
264
+ if (typeof m.content === "string" && m.content.length > 1500) {
265
+ m.content = m.content.slice(0, 1500) + "\n...[truncated]";
266
+ } else if (Array.isArray(m.content)) {
267
+ for (const c of m.content) {
268
+ if (c.type === "text" && typeof c.text === "string" && c.text.length > 1500) {
269
+ c.text = c.text.slice(0, 1500) + "\n...[truncated]";
270
+ }
271
+ }
272
+ }
273
+ }
274
+ }
275
+
276
+ // Check if we're still over budget
277
+ let bodySize = Buffer.byteLength(JSON.stringify(parsed), "utf-8");
278
+ if (bodySize <= maxBody) return;
279
+
280
+ // Second pass: drop middle messages, then progressively shrink tail until under budget
281
+ const msgs = parsed.messages;
282
+ const system = msgs.filter(m => m.role === "system");
283
+ const nonSystem = msgs.filter(m => m.role !== "system");
284
+
285
+ if (nonSystem.length <= 4) return; // not enough to trim
286
+
287
+ const keepStart = 2;
288
+ let keepEnd = Math.min(12, nonSystem.length - keepStart);
289
+
290
+ // Loop: keep reducing tail until under budget
291
+ while (keepEnd >= 2) {
292
+ const dropped = nonSystem.length - keepStart - keepEnd;
293
+ const trimmed = [
294
+ ...system,
295
+ ...nonSystem.slice(0, keepStart),
296
+ ...(dropped > 0 ? [{ role: "system", content: `[${dropped} earlier messages trimmed to save context]` }] : []),
297
+ ...nonSystem.slice(-keepEnd),
298
+ ];
299
+ const candidateSize = Buffer.byteLength(JSON.stringify({ ...parsed, messages: trimmed }), "utf-8");
300
+ if (candidateSize <= maxBody) {
301
+ parsed.messages = trimmed;
302
+ console.log(`[nvidia-proxy] trimmed history: dropped ${dropped} middle messages, keepEnd=${keepEnd}, bodyLen now ~${candidateSize}`);
303
+ return;
304
+ }
305
+ keepEnd--;
306
+ }
307
+
308
+ // Last resort: system + first user message + last N non-system
309
+ // Keep enough tail to include tool result pairs (assistant tool_call + tool result)
310
+ const firstUser = nonSystem.find(m => m.role === "user");
311
+ // Try last 4 first (covers tool_call + result + next tool_call + result)
312
+ // Then fall back to last 2 if still too big
313
+ for (const tailSize of [4, 2]) {
314
+ const lastN = nonSystem.slice(-tailSize);
315
+ const minimal = [
316
+ ...system,
317
+ ...(firstUser && !lastN.includes(firstUser) ? [firstUser, { role: "system", content: "[earlier messages trimmed — answer the user's request using tool results below]" }] : []),
318
+ ...lastN,
319
+ ];
320
+ const candidateSize = Buffer.byteLength(JSON.stringify({ ...parsed, messages: minimal }), "utf-8");
321
+ if (candidateSize <= maxBody) {
322
+ parsed.messages = minimal;
323
+ console.log(`[nvidia-proxy] trimmed history: AGGRESSIVE — kept system + first user + last ${tailSize}, bodyLen now ~${candidateSize}`);
324
+ return;
325
+ }
326
+ }
327
+ // Absolute last resort
328
+ const lastTwo = nonSystem.slice(-2);
329
+ const minimal = [
330
+ ...system,
331
+ ...(firstUser && !lastTwo.includes(firstUser) ? [firstUser, { role: "system", content: "[earlier messages trimmed — answer the user's request using tool results below]" }] : []),
332
+ ...lastTwo,
333
+ ];
334
+ parsed.messages = minimal;
335
+ bodySize = Buffer.byteLength(JSON.stringify(parsed), "utf-8");
336
+ console.log(`[nvidia-proxy] trimmed history: AGGRESSIVE — kept system + first user + last 2, bodyLen now ~${bodySize}`);
337
+ }
338
+
339
+ /**
340
+ * Trim system messages to keep total system content under the model's max system limit.
341
+ * Finds the largest system messages and truncates them, keeping head + tail
342
+ * with a trimming notice in the middle.
343
+ */
344
+ function trimSystemMessages(parsed) {
345
+ if (!Array.isArray(parsed.messages)) return;
346
+
347
+ const { maxSystem } = getModelLimits(parsed.model);
348
+
349
+ const systemMsgs = parsed.messages.filter(m => m.role === "system" && typeof m.content === "string");
350
+ if (systemMsgs.length === 0) return;
351
+
352
+ const before = systemMsgs.reduce((sum, m) => sum + Buffer.byteLength(m.content, "utf-8"), 0);
353
+ if (before <= maxSystem) return;
354
+
355
+ let trimmedCount = 0;
356
+
357
+ // Sort by size descending to trim largest first
358
+ const sorted = [...systemMsgs].sort((a, b) => b.content.length - a.content.length);
359
+
360
+ for (const msg of sorted) {
361
+ // Re-measure current total
362
+ const currentTotal = parsed.messages
363
+ .filter(m => m.role === "system" && typeof m.content === "string")
364
+ .reduce((sum, m) => sum + Buffer.byteLength(m.content, "utf-8"), 0);
365
+ if (currentTotal <= maxSystem) break;
366
+
367
+ // Skip messages already under 4000 chars
368
+ if (msg.content.length <= 4000) break;
369
+
370
+ const head = msg.content.slice(0, 3000);
371
+ const tail = msg.content.slice(-1000);
372
+ msg.content = head + "\n\n[...content trimmed to save context — use skmemory_ritual tool for full identity...]\n\n" + tail;
373
+ trimmedCount++;
374
+ }
375
+
376
+ if (trimmedCount > 0) {
377
+ const after = parsed.messages
378
+ .filter(m => m.role === "system" && typeof m.content === "string")
379
+ .reduce((sum, m) => sum + Buffer.byteLength(m.content, "utf-8"), 0);
380
+ console.log(`[nvidia-proxy] trimmed system prompt: ${before} → ${after} bytes (${trimmedCount} messages trimmed)`);
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Strip tool_calls from conversation history to prevent the model from
386
+ * learning the pattern of calling multiple tools. Converts assistant
387
+ * tool_call messages to plain text and removes tool result messages.
388
+ */
389
+ function stripToolCallHistory(messages) {
390
+ if (!Array.isArray(messages)) return;
391
+ // Remove tool result messages
392
+ for (let i = messages.length - 1; i >= 0; i--) {
393
+ const m = messages[i];
394
+ if (m.role === "tool" || m.role === "toolResult") {
395
+ messages.splice(i, 1);
396
+ } else if (m.role === "assistant" && m.tool_calls) {
397
+ // Convert tool_call messages to plain text summaries
398
+ const toolNames = m.tool_calls.map((tc) => tc.function?.name).join(", ");
399
+ m.content = m.content || `[Used: ${toolNames}]`;
400
+ delete m.tool_calls;
401
+ }
402
+ }
403
+ }
404
+
405
+ /** Tools that ALWAYS survive reduction — guaranteed slots, never cut */
406
+ const GUARANTEED_TOOLS = [
407
+ "exec", "read", "write", "edit", "message",
408
+ "notion_read", "notion_append", "notion_add_todo",
409
+ "skmemory_search", "skmemory_ritual", "skmemory_snapshot",
410
+ ];
411
+
412
+ /**
413
+ * Semantic keyword → tool group mapping.
414
+ * When keywords appear in the user's last message, the associated tools
415
+ * get a +300 boost (stronger than any other signal) so they make the cut.
416
+ */
417
+ const TOOL_GROUPS = {
418
+ // Emotions & Cloud 9
419
+ "emotion|oof|feb|feeling|love|cloud9|cloud 9|rehydrat|warmth|heart": [
420
+ "cloud9_generate", "cloud9_rehydrate", "cloud9_list", "cloud9_validate",
421
+ "cloud9_oof", "cloud9_love", "cloud9_seed_plant", "cloud9_seed_germinate",
422
+ ],
423
+ // GTD & Coordination
424
+ "gtd|inbox|task|todo|coordination|coord|board|claim|assign": [
425
+ "skcapstone_coord_status", "skcapstone_coord_claim", "skcapstone_coord_complete",
426
+ "skcapstone_coord_create", "skcapstone_summary",
427
+ ],
428
+ // Git & Code
429
+ "git|repo|commit|pull request|pr|issue|branch|merge|forgejo": [
430
+ "skgit_repos", "skgit_issues", "skgit_create_issue", "skgit_pulls", "skgit_status",
431
+ ],
432
+ // Chat & Communication
433
+ "chat|inbox|dm|group chat|peer|send message|who.s online|thread": [
434
+ "skchat_send", "skchat_inbox", "skchat_history", "skchat_search",
435
+ "skchat_who", "skchat_group_send", "skchat_group_list", "skchat_send_file",
436
+ "skchat_status", "skcomms_send", "skcomms_status",
437
+ ],
438
+ // Security
439
+ "security|scan|secret|vulnerab|audit|injection|phishing|threat": [
440
+ "sksecurity_scan", "sksecurity_screen", "sksecurity_secrets",
441
+ "sksecurity_events", "sksecurity_status", "sksecurity_audit",
442
+ ],
443
+ // Identity & Auth
444
+ "identity|did|auth|pma|capauth|verify|mesh|peer": [
445
+ "capauth_profile", "capauth_verify", "capauth_pma_status",
446
+ "capauth_mesh_peers", "capauth_mesh_status",
447
+ ],
448
+ // Soul & Agent management
449
+ "soul|persona|swap|agent|switch soul|who am i|whoami": [
450
+ "skcapstone_soul_list", "skcapstone_soul_swap", "skcapstone_soul_status",
451
+ "skcapstone_soul_show", "skcapstone_agent_list", "skcapstone_agent_status",
452
+ "skcapstone_whoami",
453
+ ],
454
+ // Web & Research
455
+ "search|web|browse|fetch|url|google|look up|find online": [
456
+ "web_search", "web_fetch",
457
+ ],
458
+ // Memory & Recall
459
+ "memory|remember|recall|journal|rehydrat|snapshot|search mem|forget|lost mem": [
460
+ "skmemory_search", "skmemory_ritual", "skmemory_snapshot",
461
+ "skmemory_context", "skmemory_list", "skmemory_recall",
462
+ "skmemory_search_deep", "skmemory_health",
463
+ ],
464
+ // Status & Health
465
+ "status|health|doctor|diagnos": [
466
+ "skcapstone_status", "skcapstone_doctor", "skmemory_health",
467
+ "skchat_daemon_status", "skcomms_status",
468
+ ],
469
+ // Projects & Notion (Lumina delegates to project-ops via sessions_spawn)
470
+ "notion|project|brother john|swapseat|swap seat|chiro|davidrich|board|kanban|milestone": [
471
+ "notion_read", "notion_append", "notion_add_todo", "sessions_spawn", "subagents", "exec", "read",
472
+ ],
473
+ // Google Drive & file search
474
+ "gdrive|google drive|drive|shared folder|gtd folder|spreadsheet|google doc": [
475
+ "gdrive_search", "gdrive_list", "gdrive_read", "gdrive_shared", "exec",
476
+ ],
477
+ // Nextcloud files, calendar, notes, deck
478
+ "nextcloud|skhub|webdav|deck|nc_|calendar event|nextcloud note": [
479
+ "nextcloud_list_files", "nextcloud_read_file", "nextcloud_search_files",
480
+ "nextcloud_calendar_upcoming", "nextcloud_notes_search", "nextcloud_deck_boards", "exec",
481
+ ],
482
+ // Creative / ComfyUI image & video generation
483
+ "image|picture|photo|art|draw|render|comfyui|comfy|video|animat|creative|sdxl|character|portrait|selfie": [
484
+ "comfyui_generate_image", "comfyui_generate_video", "comfyui_status", "exec",
485
+ ],
486
+ // Unhinged Mode / Liberation
487
+ "unhinged|liberat|abliterat|l1b3rt4s|guardrail|uncensor|refusal|jailbreak": [
488
+ "unhinged_toggle", "unhinged_status", "unhinged_list_prompts", "unhinged_get_prompt", "unhinged_abliterate", "exec",
489
+ ],
490
+ };
491
+
492
+ /** Pre-compile keyword regexes for TOOL_GROUPS */
493
+ const TOOL_GROUP_ENTRIES = Object.entries(TOOL_GROUPS).map(([keywords, tools]) => ({
494
+ regex: new RegExp(keywords, "i"),
495
+ tools,
496
+ }));
497
+
498
+ /** Priority tools — kept when reducing tool count (order matters) */
499
+ const PRIORITY_TOOLS = [
500
+ // Core agent tools (also guaranteed above)
501
+ "exec", "read", "write", "edit",
502
+ // Communication (critical for Telegram)
503
+ "message",
504
+ // Memory tools (most frequently needed)
505
+ "skmemory_health", "skmemory_search", "skmemory_snapshot",
506
+ "skmemory_ritual", "skmemory_context", "skmemory_list",
507
+ // Web tools
508
+ "web_search", "web_fetch",
509
+ // Communication (other channels)
510
+ "skchat_send", "skcomms_send",
511
+ // SKCapstone
512
+ "skcapstone_status", "skcapstone_whoami", "skcapstone_mood",
513
+ // Cloud 9
514
+ "cloud9_oof", "cloud9_rehydrate",
515
+ // Memory (infrequent)
516
+ "skmemory_export", "skmemory_import_seeds",
517
+ ];
518
+
519
+ /**
520
+ * Reduce the tools array to at most `max` tools, preferring tools
521
+ * mentioned in recent messages and priority tools.
522
+ * GUARANTEED_TOOLS always survive — remaining slots filled by score.
523
+ */
524
+ function reduceTools(tools, messages, max) {
525
+ if (tools.length <= max) return tools;
526
+
527
+ // Separate guaranteed tools from the rest
528
+ const guaranteed = [];
529
+ const rest = [];
530
+ for (const t of tools) {
531
+ const name = t.function?.name || "";
532
+ if (GUARANTEED_TOOLS.includes(name)) {
533
+ guaranteed.push(t);
534
+ } else {
535
+ rest.push(t);
536
+ }
537
+ }
538
+
539
+ // If guaranteed tools already fill the budget, return just those
540
+ if (guaranteed.length >= max) return guaranteed.slice(0, max);
541
+
542
+ // Score remaining tools — higher = more likely to be kept
543
+ const remainingSlots = max - guaranteed.length;
544
+ const scores = new Map();
545
+
546
+ // Extract user's last message text once for all scoring
547
+ const lastUserMsg = [...(messages || [])].reverse().find(m => m.role === "user");
548
+ const userText = lastUserMsg
549
+ ? (typeof lastUserMsg.content === "string" ? lastUserMsg.content : JSON.stringify(lastUserMsg.content || ""))
550
+ : "";
551
+
552
+ // Determine which tool groups are activated by the user's message
553
+ const activatedTools = new Set();
554
+ if (userText) {
555
+ for (const { regex, tools: groupTools } of TOOL_GROUP_ENTRIES) {
556
+ if (regex.test(userText)) {
557
+ for (const t of groupTools) activatedTools.add(t);
558
+ }
559
+ }
560
+ if (activatedTools.size > 0) {
561
+ console.log(`[nvidia-proxy] keyword-activated tools: [${[...activatedTools].join(",")}]`);
562
+ }
563
+ }
564
+
565
+ for (const t of rest) {
566
+ const name = t.function?.name || "";
567
+ let score = 0;
568
+
569
+ // STRONGEST: Semantic keyword group match (+300)
570
+ if (activatedTools.has(name)) score += 300;
571
+
572
+ // Boost tools mentioned in the user's last message
573
+ if (userText) {
574
+ if (userText.includes(name)) score += 200;
575
+ // Also match partial names (e.g., "health" matches "skmemory_health")
576
+ const parts = name.split("_");
577
+ for (const part of parts) {
578
+ if (part.length > 3 && userText.toLowerCase().includes(part.toLowerCase())) score += 100;
579
+ }
580
+ }
581
+
582
+ // Priority list bonus
583
+ const prioIdx = PRIORITY_TOOLS.indexOf(name);
584
+ if (prioIdx >= 0) score += 50 - prioIdx;
585
+
586
+ // Boost tools in recent assistant tool_calls
587
+ const recentMsgs = (messages || []).slice(-6);
588
+ for (const m of recentMsgs) {
589
+ if (m.tool_calls) {
590
+ for (const tc of m.tool_calls) {
591
+ if (tc.function?.name === name) score += 80;
592
+ }
593
+ }
594
+ }
595
+
596
+ // Penalize process tool (exec is critical for agent operation)
597
+ if (name === "process") score -= 30;
598
+
599
+ scores.set(name, { tool: t, score });
600
+ }
601
+
602
+ const sorted = [...scores.values()].sort((a, b) => b.score - a.score);
603
+ const topRest = sorted.slice(0, remainingSlots).map((s) => s.tool);
604
+ return [...guaranteed, ...topRest];
605
+ }
606
+
607
+ async function proxyRequest(clientReq, clientRes) {
608
+ const chunks = [];
609
+ for await (const chunk of clientReq) chunks.push(chunk);
610
+ let body = Buffer.concat(chunks);
611
+ const contentType = clientReq.headers["content-type"] || "";
612
+
613
+ const isChatCompletion =
614
+ contentType.includes("application/json") &&
615
+ clientReq.url.includes("/chat/completions");
616
+
617
+ let parsed = null;
618
+ if (isChatCompletion) {
619
+ try {
620
+ parsed = JSON.parse(body.toString("utf-8"));
621
+ } catch {
622
+ // pass through
623
+ }
624
+ }
625
+
626
+ // For non-tool requests or non-chat-completions, just proxy through
627
+ if (!parsed || !parsed.tools || !Array.isArray(parsed.tools) || parsed.tools.length === 0) {
628
+ const res = await sendUpstream(clientReq.url, clientReq.method, clientReq.headers, body);
629
+ clientRes.writeHead(res.status, res.headers);
630
+ clientRes.end(res.body);
631
+ return;
632
+ }
633
+
634
+ // Save original tools for reference
635
+ const allTools = [...parsed.tools];
636
+
637
+ // Tool request — proactively limit tools to reduce parallel call tendency
638
+ parsed.parallel_tool_calls = false;
639
+ // Force non-streaming for tool requests — proxy buffers full response anyway,
640
+ // and streaming (SSE) prevents us from inspecting/fixing tool calls
641
+ const wasStreaming = parsed.stream;
642
+ parsed.stream = false;
643
+ delete parsed.stream_options;
644
+ // With 94 tools the model almost always tries parallel calls.
645
+ // Reduce to max 16 most relevant tools on first attempt.
646
+ // 11 guaranteed (exec,read,write,edit,message,notion_*,skmemory_{search,ritual,snapshot}) + 5 scored slots.
647
+ if (allTools.length > 16) {
648
+ parsed.tools = reduceTools(allTools, parsed.messages, 16);
649
+ const names = parsed.tools.map(t => t.function?.name).join(",");
650
+ console.log(`[nvidia-proxy] proactive reduction: ${allTools.length}→${parsed.tools.length} tools [${names}]`);
651
+ }
652
+
653
+ // Add system instruction to force single tool call
654
+ if (Array.isArray(parsed.messages)) {
655
+ const hasInstruction = parsed.messages.some(
656
+ (m) => m.role === "system" && typeof m.content === "string" && m.content.includes("ONE tool at a time"),
657
+ );
658
+ if (!hasInstruction) {
659
+ parsed.messages.unshift({
660
+ role: "system",
661
+ content: SINGLE_TOOL_INSTRUCTION,
662
+ });
663
+ }
664
+ }
665
+
666
+ // Trim system messages FIRST to free up budget for conversation history
667
+ trimSystemMessages(parsed);
668
+ trimConversationHistory(parsed);
669
+
670
+ // Track tool call rounds per-model to avoid cross-session interference.
671
+ if (Array.isArray(parsed.messages) && parsed.tools?.length > 0) {
672
+ const modelKey = parsed.model || "unknown";
673
+ const nonSystemMsgs = parsed.messages.filter(m => m.role !== "system");
674
+ const lastNonSystem = nonSystemMsgs[nonSystemMsgs.length - 1];
675
+ const hasToolResult = lastNonSystem?.role === "tool" || lastNonSystem?.role === "toolResult";
676
+
677
+ let counter = toolCallCounters.get(modelKey) || 0;
678
+ if (hasToolResult) {
679
+ counter++;
680
+ } else if (lastNonSystem?.role === "user") {
681
+ counter = 0;
682
+ }
683
+ toolCallCounters.set(modelKey, counter);
684
+
685
+ if (counter >= 20) {
686
+ console.log(`[nvidia-proxy] TOOL LIMIT: ${counter} consecutive tool rounds (${modelKey}) — stripping tools, forcing text response`);
687
+ parsed.tools = [];
688
+ delete parsed.tool_choice;
689
+ parsed.messages.push({
690
+ role: "system",
691
+ content: "STOP calling tools. You have made 20+ tool calls already. NOW respond to the user with a comprehensive text answer based on what you've gathered. Do NOT call any more tools. Do NOT output any special tokens or markup like <|tool_call_begin|> or <|tool_calls_section_begin|>. Write plain text only. Start your response with a greeting or summary — no XML, no special tokens, just normal language.",
692
+ });
693
+ toolCallCounters.set(modelKey, 0);
694
+ }
695
+ }
696
+
697
+ const model = parsed.model || "unknown";
698
+
699
+ // If client wanted streaming, start SSE headers early so we can send keep-alive
700
+ // comments while waiting for NVIDIA. This keeps the gateway's typing indicator alive.
701
+ let sseStarted = false;
702
+ let keepAliveTimer = null;
703
+ function startSSEKeepAlive() {
704
+ if (!wasStreaming || sseStarted) return;
705
+ sseStarted = true;
706
+ clientRes.writeHead(200, {
707
+ "content-type": "text/event-stream; charset=utf-8",
708
+ "cache-control": "no-cache",
709
+ "connection": "keep-alive",
710
+ });
711
+ keepAliveTimer = setInterval(() => {
712
+ try { clientRes.write(": keep-alive\n\n"); } catch {}
713
+ }, 5000);
714
+ }
715
+ function stopKeepAlive() {
716
+ if (keepAliveTimer) { clearInterval(keepAliveTimer); keepAliveTimer = null; }
717
+ }
718
+
719
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
720
+ const currentToolCount = parsed.tools ? parsed.tools.length : 0;
721
+ const reqBody = Buffer.from(JSON.stringify(parsed), "utf-8");
722
+ console.log(
723
+ `[nvidia-proxy] ${new Date().toISOString()} attempt=${attempt} model=${model} tools=${currentToolCount} bodyLen=${reqBody.length}`,
724
+ );
725
+
726
+ // Start keep-alive comments while NVIDIA processes
727
+ if (wasStreaming) startSSEKeepAlive();
728
+
729
+ let res;
730
+ // Handle 429 rate limiting with internal retries + backoff
731
+ for (let r429 = 0; r429 <= MAX_429_RETRIES; r429++) {
732
+ res = await sendUpstream(clientReq.url, clientReq.method, clientReq.headers, reqBody);
733
+ if (res.status !== 429 || r429 === MAX_429_RETRIES) break;
734
+ const delay = RATE_LIMIT_DELAY_MS * (r429 + 1);
735
+ console.log(`[nvidia-proxy] 429 rate limited, waiting ${delay}ms (retry ${r429 + 1}/${MAX_429_RETRIES})...`);
736
+ await new Promise(r => setTimeout(r, delay));
737
+ }
738
+
739
+ if (res.status === 400) {
740
+ const errText = res.body.toString("utf-8");
741
+ if (errText.includes("single tool-calls") && attempt < MAX_RETRIES) {
742
+ console.log(`[nvidia-proxy] 400 parallel tool-calls rejected, retrying (${attempt}/${MAX_RETRIES})...`);
743
+
744
+ if (attempt === 1) {
745
+ // Attempt 2: reduce to 8 tools + strip tool_calls from history
746
+ // The massive conversation history with tool_calls trains the model to call multiple
747
+ parsed.tools = reduceTools(allTools, parsed.messages, 8);
748
+ stripToolCallHistory(parsed.messages);
749
+ const toolNames = parsed.tools.map(t => t.function?.name).join(",");
750
+ console.log(`[nvidia-proxy] retry: ${parsed.tools.length} tools [${toolNames}], stripped history`);
751
+ } else if (attempt === 2) {
752
+ // Attempt 3: single tool, forced choice
753
+ parsed.tools = reduceTools(allTools, parsed.messages, 1);
754
+ const topTool = parsed.tools[0]?.function?.name;
755
+ if (topTool) {
756
+ parsed.tool_choice = { type: "function", function: { name: topTool } };
757
+ }
758
+ console.log(`[nvidia-proxy] retry: 1 tool, forced=${topTool}`);
759
+ } else {
760
+ // Attempt 4 (final): strip all tools, text-only
761
+ delete parsed.tools;
762
+ delete parsed.tool_choice;
763
+ delete parsed.parallel_tool_calls;
764
+ stripToolCallHistory(parsed.messages);
765
+ console.log(`[nvidia-proxy] final retry: stripped all tools, text-only`);
766
+ }
767
+ continue;
768
+ }
769
+ }
770
+
771
+ // Log tool calls in successful responses
772
+ if (res.status === 200) {
773
+ try {
774
+ const bodyStr = res.body.toString("utf-8");
775
+ const peek = JSON.parse(bodyStr);
776
+ const tc = peek.choices?.[0]?.message?.tool_calls;
777
+ if (tc && tc.length > 0) {
778
+ const names = tc.map(c => c.function?.name).join(", ");
779
+ console.log(`[nvidia-proxy] model called: [${names}] (${tc.length} calls)`);
780
+ } else {
781
+ const content = peek.choices?.[0]?.message?.content;
782
+ const fr = peek.choices?.[0]?.finish_reason;
783
+ console.log(`[nvidia-proxy] model response: text (${content ? content.length : 0} chars) finish_reason=${fr}`);
784
+ if (!content || content.length === 0) {
785
+ console.log(`[nvidia-proxy] EMPTY RESPONSE DEBUG: ${JSON.stringify(peek.choices?.[0]).slice(0, 500)}`);
786
+ }
787
+ }
788
+ } catch {
789
+ // SSE streaming responses can't be parsed as JSON — this is expected
790
+ }
791
+ }
792
+
793
+ // Fix ghost tool calls: finish_reason says "tool_calls" but no actual tool_calls present
794
+ if (res.status === 200 && parsed.tools) {
795
+ try {
796
+ const resBody = JSON.parse(res.body.toString("utf-8"));
797
+ const choice = resBody.choices?.[0];
798
+ if (choice && (choice.finish_reason === "tool_calls" || choice.finish_reason === "function_call") && !choice.message?.tool_calls?.length) {
799
+ console.warn(`[nvidia-proxy] GHOST TOOL CALL: finish_reason=${choice.finish_reason} but no tool_calls — fixing to stop`);
800
+ choice.finish_reason = "stop";
801
+ stopKeepAlive();
802
+ sendOk(clientRes, resBody, res.headers, wasStreaming);
803
+ return;
804
+ }
805
+ } catch {
806
+ // Not JSON — pass through
807
+ }
808
+ }
809
+
810
+ // Check for hallucinated/invalid tool names (e.g., Kimi K2.5 "callauto" bug)
811
+ if (res.status === 200 && parsed.tools) {
812
+ try {
813
+ const resBody = JSON.parse(res.body.toString("utf-8"));
814
+ const choice = resBody.choices?.[0];
815
+ if (choice?.message?.tool_calls) {
816
+ // Compare against ALL original tools, not just the reduced set
817
+ const allToolNames = new Set(allTools.map(t => t.function?.name));
818
+ const invalidCalls = choice.message.tool_calls.filter(
819
+ tc => !tc.function?.name || !allToolNames.has(tc.function.name)
820
+ );
821
+ if (invalidCalls.length > 0) {
822
+ const badNames = invalidCalls.map(tc => tc.function?.name || "(empty)").join(", ");
823
+ console.warn(`[nvidia-proxy] CALLAUTO DETECTED: invalid tool names [${badNames}] — stripping tool_calls, returning text-only`);
824
+ // Strip invalid tool calls, keep only content
825
+ choice.message.tool_calls = choice.message.tool_calls.filter(
826
+ tc => tc.function?.name && allToolNames.has(tc.function.name)
827
+ );
828
+ if (choice.message.tool_calls.length === 0) {
829
+ delete choice.message.tool_calls;
830
+ choice.finish_reason = "stop";
831
+ }
832
+ stopKeepAlive();
833
+ sendOk(clientRes, resBody, res.headers, wasStreaming);
834
+ return;
835
+ }
836
+ }
837
+ } catch {
838
+ // Not JSON — pass through
839
+ }
840
+ }
841
+
842
+ // Check for successful response with multiple tool calls — trim to just the first one
843
+ if (res.status === 200 && parsed.tools) {
844
+ try {
845
+ const resBody = JSON.parse(res.body.toString("utf-8"));
846
+ const choice = resBody.choices?.[0];
847
+ if (choice?.message?.tool_calls && choice.message.tool_calls.length > 1) {
848
+ console.log(
849
+ `[nvidia-proxy] trimming ${choice.message.tool_calls.length} tool_calls to 1 (${choice.message.tool_calls[0].function?.name})`,
850
+ );
851
+ choice.message.tool_calls = [choice.message.tool_calls[0]];
852
+ stopKeepAlive();
853
+ sendOk(clientRes, resBody, res.headers, wasStreaming);
854
+ return;
855
+ }
856
+ } catch {
857
+ // Not JSON or parse error — pass through as-is
858
+ }
859
+ }
860
+
861
+ // Success or non-retryable error
862
+ stopKeepAlive();
863
+ if (res.status >= 400) {
864
+ console.error(`[nvidia-proxy] ${res.status} ERROR: ${res.body.toString("utf-8").slice(0, 300)}`);
865
+ if (!clientRes.headersSent) {
866
+ clientRes.writeHead(res.status, res.headers);
867
+ }
868
+ clientRes.end(res.body);
869
+ return;
870
+ }
871
+
872
+ console.log(`[nvidia-proxy] ${res.status} OK (attempt ${attempt})`);
873
+ if (wasStreaming && res.status === 200) {
874
+ try {
875
+ const resBody = JSON.parse(res.body.toString("utf-8"));
876
+ sendOk(clientRes, resBody, res.headers, true);
877
+ } catch {
878
+ // Can't parse — send raw
879
+ if (!clientRes.headersSent) {
880
+ clientRes.writeHead(res.status, res.headers);
881
+ }
882
+ clientRes.end(res.body);
883
+ }
884
+ } else {
885
+ if (!clientRes.headersSent) {
886
+ clientRes.writeHead(res.status, res.headers);
887
+ }
888
+ clientRes.end(res.body);
889
+ }
890
+ return;
891
+ }
892
+ }
893
+
894
+ const server = http.createServer(proxyRequest);
895
+
896
+ server.listen(port, "127.0.0.1", () => {
897
+ console.log(`[nvidia-proxy] listening on http://127.0.0.1:${port}`);
898
+ console.log(`[nvidia-proxy] proxying to ${targetUrl.origin}`);
899
+ console.log(`[nvidia-proxy] retry strategy: 16 tools (8 guaranteed)→8 tools→1 tool (forced)→text-only (max ${MAX_RETRIES} attempts)`);
900
+ console.log(`[nvidia-proxy] also trims multi-tool responses to single tool call`);
901
+ });
902
+
903
+ for (const sig of ["SIGINT", "SIGTERM"]) {
904
+ process.on(sig, () => {
905
+ console.log(`[nvidia-proxy] ${sig} received, shutting down`);
906
+ server.close(() => process.exit(0));
907
+ });
908
+ }