@miller-tech/uap 1.4.3 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/benchmarks/agents/naive-agent.d.ts.map +1 -1
  2. package/dist/benchmarks/agents/naive-agent.js +1 -1
  3. package/dist/benchmarks/agents/naive-agent.js.map +1 -1
  4. package/dist/benchmarks/agents/uap-agent.d.ts.map +1 -1
  5. package/dist/benchmarks/agents/uap-agent.js +1 -1
  6. package/dist/benchmarks/agents/uap-agent.js.map +1 -1
  7. package/dist/benchmarks/improved-benchmark.d.ts.map +1 -1
  8. package/dist/benchmarks/improved-benchmark.js +3 -3
  9. package/dist/benchmarks/improved-benchmark.js.map +1 -1
  10. package/dist/benchmarks/model-integration.js +1 -1
  11. package/dist/benchmarks/model-integration.js.map +1 -1
  12. package/dist/benchmarks/multi-turn-agent.d.ts.map +1 -1
  13. package/dist/benchmarks/multi-turn-agent.js +3 -3
  14. package/dist/benchmarks/multi-turn-agent.js.map +1 -1
  15. package/dist/bin/cli.js +4 -0
  16. package/dist/bin/cli.js.map +1 -1
  17. package/dist/bin/llama-server-optimize.js +1 -1
  18. package/dist/bin/llama-server-optimize.js.map +1 -1
  19. package/dist/cli/dashboard.js +1 -1
  20. package/dist/cli/dashboard.js.map +1 -1
  21. package/dist/cli/model.d.ts.map +1 -1
  22. package/dist/cli/model.js +222 -1
  23. package/dist/cli/model.js.map +1 -1
  24. package/dist/cli/policy.d.ts +16 -0
  25. package/dist/cli/policy.d.ts.map +1 -0
  26. package/dist/cli/policy.js +159 -0
  27. package/dist/cli/policy.js.map +1 -0
  28. package/dist/cli/rtk.js +2 -2
  29. package/dist/cli/rtk.js.map +1 -1
  30. package/dist/cli/uap.js +2 -2
  31. package/dist/cli/uap.js.map +1 -1
  32. package/dist/cli/update.d.ts.map +1 -1
  33. package/dist/cli/update.js +7 -10
  34. package/dist/cli/update.js.map +1 -1
  35. package/dist/cli/worktree.d.ts +1 -1
  36. package/dist/cli/worktree.d.ts.map +1 -1
  37. package/dist/cli/worktree.js +77 -1
  38. package/dist/cli/worktree.js.map +1 -1
  39. package/dist/coordination/deploy-batcher.d.ts.map +1 -1
  40. package/dist/coordination/deploy-batcher.js +3 -2
  41. package/dist/coordination/deploy-batcher.js.map +1 -1
  42. package/dist/mcp-router/executor/client.d.ts +2 -0
  43. package/dist/mcp-router/executor/client.d.ts.map +1 -1
  44. package/dist/mcp-router/executor/client.js +21 -1
  45. package/dist/mcp-router/executor/client.js.map +1 -1
  46. package/dist/mcp-router/output-compressor.js +1 -1
  47. package/dist/mcp-router/output-compressor.js.map +1 -1
  48. package/dist/mcp-router/server.d.ts.map +1 -1
  49. package/dist/mcp-router/server.js +116 -31
  50. package/dist/mcp-router/server.js.map +1 -1
  51. package/dist/mcp-router/tools/execute.d.ts.map +1 -1
  52. package/dist/mcp-router/tools/execute.js +3 -1
  53. package/dist/mcp-router/tools/execute.js.map +1 -1
  54. package/dist/memory/adaptive-context.d.ts.map +1 -1
  55. package/dist/memory/adaptive-context.js +2 -0
  56. package/dist/memory/adaptive-context.js.map +1 -1
  57. package/dist/memory/backends/factory.d.ts.map +1 -1
  58. package/dist/memory/backends/factory.js +2 -2
  59. package/dist/memory/backends/factory.js.map +1 -1
  60. package/dist/memory/backends/github.d.ts.map +1 -1
  61. package/dist/memory/backends/github.js +1 -1
  62. package/dist/memory/backends/github.js.map +1 -1
  63. package/dist/memory/dynamic-retrieval.d.ts.map +1 -1
  64. package/dist/memory/dynamic-retrieval.js +4 -2
  65. package/dist/memory/dynamic-retrieval.js.map +1 -1
  66. package/dist/memory/embeddings.d.ts.map +1 -1
  67. package/dist/memory/embeddings.js +12 -2
  68. package/dist/memory/embeddings.js.map +1 -1
  69. package/dist/memory/knowledge-graph.js +12 -12
  70. package/dist/memory/knowledge-graph.js.map +1 -1
  71. package/dist/models/analytics.js +8 -8
  72. package/dist/models/analytics.js.map +1 -1
  73. package/dist/models/router.d.ts +1 -0
  74. package/dist/models/router.d.ts.map +1 -1
  75. package/dist/models/router.js +13 -0
  76. package/dist/models/router.js.map +1 -1
  77. package/dist/models/types.d.ts +1 -1
  78. package/dist/models/types.d.ts.map +1 -1
  79. package/dist/policies/database-manager.d.ts +5 -0
  80. package/dist/policies/database-manager.d.ts.map +1 -1
  81. package/dist/policies/database-manager.js +20 -0
  82. package/dist/policies/database-manager.js.map +1 -1
  83. package/dist/policies/policy-gate.d.ts +16 -0
  84. package/dist/policies/policy-gate.d.ts.map +1 -1
  85. package/dist/policies/policy-gate.js +88 -2
  86. package/dist/policies/policy-gate.js.map +1 -1
  87. package/dist/policies/policy-memory.d.ts +1 -0
  88. package/dist/policies/policy-memory.d.ts.map +1 -1
  89. package/dist/policies/policy-memory.js +33 -4
  90. package/dist/policies/policy-memory.js.map +1 -1
  91. package/dist/policies/policy-tools.d.ts.map +1 -1
  92. package/dist/policies/policy-tools.js +2 -1
  93. package/dist/policies/policy-tools.js.map +1 -1
  94. package/dist/policies/schemas/policy.d.ts +3 -3
  95. package/dist/policies/schemas/policy.js +1 -1
  96. package/dist/policies/schemas/policy.js.map +1 -1
  97. package/dist/tasks/service.js +1 -1
  98. package/dist/tasks/service.js.map +1 -1
  99. package/docs/MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md +281 -0
  100. package/docs/MODEL_ROUTING_OPTIMIZATION_PLAN.md +320 -0
  101. package/docs/POLICY_GATE_IMPLEMENTATION.md +245 -0
  102. package/package.json +4 -1
  103. package/templates/hooks/session-start.sh +36 -6
  104. package/tools/agents/scripts/tool-choice-proxy.cjs +29 -33
  105. package/tools/agents/scripts/tool_call_wrapper.py +14 -5
@@ -52,8 +52,6 @@ const MAX_OUTPUT_HISTORY = 10;
52
52
  let consecutiveIdenticalOutputs = 0;
53
53
 
54
54
  // --- Option 6: Semantic dedup ---
55
- const recentCommandPrefixes = [];
56
- const MAX_CMD_HISTORY = 10;
57
55
 
58
56
  function simpleHash(s) {
59
57
  let h = 0;
@@ -127,7 +125,10 @@ const server = http.createServer((req, res) => {
127
125
  console.log(`[proxy] #${n} SOFT BUDGET: tool_choice=auto`);
128
126
  } else if (parsed.tools && parsed.tools.length > 0) {
129
127
  const original = parsed.tool_choice;
130
- parsed.tool_choice = FORCE_TOOL_CHOICE;
128
+ // Only override string values, preserve object structures (per-tool choice)
129
+ if (typeof parsed.tool_choice === 'string') {
130
+ parsed.tool_choice = FORCE_TOOL_CHOICE;
131
+ }
131
132
  toolForceCount++;
132
133
  }
133
134
 
@@ -233,43 +234,38 @@ const server = http.createServer((req, res) => {
233
234
  res.writeHead(proxyRes.statusCode || 200, proxyRes.headers);
234
235
 
235
236
  // === Track response for output-diff detection ===
236
- const responseChunks = [];
237
+ // Use incremental hashing to avoid buffering the entire response body.
238
+ // Previously the proxy accumulated all chunks in memory and re-parsed
239
+ // the full response just to compute a hash — doubling memory usage.
240
+ let runningHash = 0;
241
+ const isChatEndpoint = req.url && req.url.includes('/chat/completions');
242
+
237
243
  proxyRes.on('data', (chunk) => {
238
- responseChunks.push(chunk);
239
244
  res.write(chunk);
245
+ // Compute hash incrementally from each chunk
246
+ if (isChatEndpoint) {
247
+ const s = chunk.toString();
248
+ for (let i = 0; i < Math.min(s.length, 2000); i++) {
249
+ runningHash = ((runningHash << 5) - runningHash + s.charCodeAt(i)) | 0;
250
+ }
251
+ }
240
252
  });
241
253
  proxyRes.on('end', () => {
242
254
  res.end();
243
255
 
244
256
  // Track output hash for diff detection
245
- if (req.url && req.url.includes('/chat/completions')) {
246
- const responseBody = Buffer.concat(responseChunks).toString();
247
- try {
248
- const respParsed = JSON.parse(responseBody);
249
- const content = respParsed?.choices?.[0]?.message?.content || '';
250
- const toolCalls = respParsed?.choices?.[0]?.message?.tool_calls || [];
251
-
252
- // Hash the response content + tool call args
253
- const hashInput =
254
- content + toolCalls.map((tc) => tc?.function?.arguments || '').join('');
255
-
256
- if (hashInput.length > 0) {
257
- const hash = simpleHash(hashInput);
258
- if (
259
- recentOutputHashes.length > 0 &&
260
- recentOutputHashes[recentOutputHashes.length - 1] === hash
261
- ) {
262
- consecutiveIdenticalOutputs++;
263
- } else {
264
- consecutiveIdenticalOutputs = 0;
265
- }
266
- recentOutputHashes.push(hash);
267
- if (recentOutputHashes.length > MAX_OUTPUT_HISTORY) {
268
- recentOutputHashes.shift();
269
- }
270
- }
271
- } catch (e) {
272
- // Ignore parse errors on response
257
+ if (isChatEndpoint && runningHash !== 0) {
258
+ if (
259
+ recentOutputHashes.length > 0 &&
260
+ recentOutputHashes[recentOutputHashes.length - 1] === runningHash
261
+ ) {
262
+ consecutiveIdenticalOutputs++;
263
+ } else {
264
+ consecutiveIdenticalOutputs = 0;
265
+ }
266
+ recentOutputHashes.push(runningHash);
267
+ if (recentOutputHashes.length > MAX_OUTPUT_HISTORY) {
268
+ recentOutputHashes.shift();
273
269
  }
274
270
  }
275
271
  });
@@ -83,7 +83,7 @@ MODEL_PROFILES: Dict[str, Dict[str, Any]] = {
83
83
  "dynamic_temp_floor": 0.2,
84
84
  },
85
85
  "qwen35": {
86
- "temperature": 0.6,
86
+ "temperature": 0.3,
87
87
  "top_p": 0.9,
88
88
  "presence_penalty": 0.0,
89
89
  "max_tokens": 4096,
@@ -105,7 +105,7 @@ MODEL_PROFILES: Dict[str, Dict[str, Any]] = {
105
105
  "CRITICAL: You MUST emit ALL tool calls in a SINGLE response. "
106
106
  "Each tool call must be a separate <tool_call>...</tool_call> block. "
107
107
  "Do NOT call one tool and wait - emit ALL tool calls together NOW. "
108
- "If asked to do 3 things, you must produce 3 tool calls in one response."
108
+ "You must produce all required tool calls in one response."
109
109
  ),
110
110
  },
111
111
  "llama": {
@@ -206,7 +206,7 @@ class ToolCallClient:
206
206
  "max_tokens": 4096,
207
207
  "enable_thinking": False,
208
208
  "max_retries": 3,
209
- "backoff_factor": 2.0,
209
+ "backoff_factor": 1.0,
210
210
  "base_url": "http://127.0.0.1:8080/v1",
211
211
  "api_key": "not-needed",
212
212
  "model": "default",
@@ -337,8 +337,12 @@ class ToolCallClient:
337
337
  # Make a copy of messages to avoid modifying original
338
338
  current_messages = [msg.copy() for msg in messages]
339
339
 
340
- # Strategy 2: Inject multi-tool system prompt
341
- if self.config.get("batch_tool_calls") and len(tools) > 1:
340
+ # Strategy 2: Inject multi-tool system prompt (only when multiple tool calls expected)
341
+ if (
342
+ self.config.get("batch_tool_calls")
343
+ and expected_tool_calls
344
+ and expected_tool_calls > 1
345
+ ):
342
346
  batch_prompt = self.config.get(
343
347
  "batch_system_prompt", DEFAULT_BATCH_SYSTEM_PROMPT
344
348
  )
@@ -402,6 +406,11 @@ class ToolCallClient:
402
406
  "enable_thinking": self.config.get("enable_thinking", False)
403
407
  }
404
408
  }
409
+ # Version check: llama.cpp >= 3761 supports chat_template_kwargs
410
+ # Older versions will ignore unknown extra_body keys
411
+ logger.debug(
412
+ f"Sending chat_template_kwargs with enable_thinking={self.config.get('enable_thinking', False)}"
413
+ )
405
414
 
406
415
  logger.debug(
407
416
  f"Attempt {attempt + 1}/{max_retries}: "