@miller-tech/uap 1.4.3 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/agents/naive-agent.d.ts.map +1 -1
- package/dist/benchmarks/agents/naive-agent.js +1 -1
- package/dist/benchmarks/agents/naive-agent.js.map +1 -1
- package/dist/benchmarks/agents/uap-agent.d.ts.map +1 -1
- package/dist/benchmarks/agents/uap-agent.js +1 -1
- package/dist/benchmarks/agents/uap-agent.js.map +1 -1
- package/dist/benchmarks/improved-benchmark.d.ts.map +1 -1
- package/dist/benchmarks/improved-benchmark.js +3 -3
- package/dist/benchmarks/improved-benchmark.js.map +1 -1
- package/dist/benchmarks/model-integration.js +1 -1
- package/dist/benchmarks/model-integration.js.map +1 -1
- package/dist/benchmarks/multi-turn-agent.d.ts.map +1 -1
- package/dist/benchmarks/multi-turn-agent.js +3 -3
- package/dist/benchmarks/multi-turn-agent.js.map +1 -1
- package/dist/bin/cli.js +4 -0
- package/dist/bin/cli.js.map +1 -1
- package/dist/bin/llama-server-optimize.js +1 -1
- package/dist/bin/llama-server-optimize.js.map +1 -1
- package/dist/cli/dashboard.js +1 -1
- package/dist/cli/dashboard.js.map +1 -1
- package/dist/cli/model.d.ts.map +1 -1
- package/dist/cli/model.js +222 -1
- package/dist/cli/model.js.map +1 -1
- package/dist/cli/policy.d.ts +16 -0
- package/dist/cli/policy.d.ts.map +1 -0
- package/dist/cli/policy.js +159 -0
- package/dist/cli/policy.js.map +1 -0
- package/dist/cli/rtk.js +2 -2
- package/dist/cli/rtk.js.map +1 -1
- package/dist/cli/uap.js +2 -2
- package/dist/cli/uap.js.map +1 -1
- package/dist/cli/update.d.ts.map +1 -1
- package/dist/cli/update.js +7 -10
- package/dist/cli/update.js.map +1 -1
- package/dist/cli/worktree.d.ts +1 -1
- package/dist/cli/worktree.d.ts.map +1 -1
- package/dist/cli/worktree.js +77 -1
- package/dist/cli/worktree.js.map +1 -1
- package/dist/coordination/deploy-batcher.d.ts.map +1 -1
- package/dist/coordination/deploy-batcher.js +3 -2
- package/dist/coordination/deploy-batcher.js.map +1 -1
- package/dist/mcp-router/executor/client.d.ts +2 -0
- package/dist/mcp-router/executor/client.d.ts.map +1 -1
- package/dist/mcp-router/executor/client.js +21 -1
- package/dist/mcp-router/executor/client.js.map +1 -1
- package/dist/mcp-router/output-compressor.js +1 -1
- package/dist/mcp-router/output-compressor.js.map +1 -1
- package/dist/mcp-router/server.d.ts.map +1 -1
- package/dist/mcp-router/server.js +116 -31
- package/dist/mcp-router/server.js.map +1 -1
- package/dist/mcp-router/tools/execute.d.ts.map +1 -1
- package/dist/mcp-router/tools/execute.js +3 -1
- package/dist/mcp-router/tools/execute.js.map +1 -1
- package/dist/memory/adaptive-context.d.ts.map +1 -1
- package/dist/memory/adaptive-context.js +2 -0
- package/dist/memory/adaptive-context.js.map +1 -1
- package/dist/memory/backends/factory.d.ts.map +1 -1
- package/dist/memory/backends/factory.js +2 -2
- package/dist/memory/backends/factory.js.map +1 -1
- package/dist/memory/backends/github.d.ts.map +1 -1
- package/dist/memory/backends/github.js +1 -1
- package/dist/memory/backends/github.js.map +1 -1
- package/dist/memory/dynamic-retrieval.d.ts.map +1 -1
- package/dist/memory/dynamic-retrieval.js +4 -2
- package/dist/memory/dynamic-retrieval.js.map +1 -1
- package/dist/memory/embeddings.d.ts.map +1 -1
- package/dist/memory/embeddings.js +12 -2
- package/dist/memory/embeddings.js.map +1 -1
- package/dist/memory/knowledge-graph.js +12 -12
- package/dist/memory/knowledge-graph.js.map +1 -1
- package/dist/models/analytics.js +8 -8
- package/dist/models/analytics.js.map +1 -1
- package/dist/models/router.d.ts +1 -0
- package/dist/models/router.d.ts.map +1 -1
- package/dist/models/router.js +13 -0
- package/dist/models/router.js.map +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.d.ts.map +1 -1
- package/dist/policies/database-manager.d.ts +5 -0
- package/dist/policies/database-manager.d.ts.map +1 -1
- package/dist/policies/database-manager.js +20 -0
- package/dist/policies/database-manager.js.map +1 -1
- package/dist/policies/policy-gate.d.ts +16 -0
- package/dist/policies/policy-gate.d.ts.map +1 -1
- package/dist/policies/policy-gate.js +88 -2
- package/dist/policies/policy-gate.js.map +1 -1
- package/dist/policies/policy-memory.d.ts +1 -0
- package/dist/policies/policy-memory.d.ts.map +1 -1
- package/dist/policies/policy-memory.js +33 -4
- package/dist/policies/policy-memory.js.map +1 -1
- package/dist/policies/policy-tools.d.ts.map +1 -1
- package/dist/policies/policy-tools.js +2 -1
- package/dist/policies/policy-tools.js.map +1 -1
- package/dist/policies/schemas/policy.d.ts +3 -3
- package/dist/policies/schemas/policy.js +1 -1
- package/dist/policies/schemas/policy.js.map +1 -1
- package/dist/tasks/service.js +1 -1
- package/dist/tasks/service.js.map +1 -1
- package/docs/MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md +281 -0
- package/docs/MODEL_ROUTING_OPTIMIZATION_PLAN.md +320 -0
- package/docs/POLICY_GATE_IMPLEMENTATION.md +245 -0
- package/package.json +4 -1
- package/templates/hooks/session-start.sh +36 -6
- package/tools/agents/scripts/tool-choice-proxy.cjs +29 -33
- package/tools/agents/scripts/tool_call_wrapper.py +14 -5
|
@@ -52,8 +52,6 @@ const MAX_OUTPUT_HISTORY = 10;
|
|
|
52
52
|
let consecutiveIdenticalOutputs = 0;
|
|
53
53
|
|
|
54
54
|
// --- Option 6: Semantic dedup ---
|
|
55
|
-
const recentCommandPrefixes = [];
|
|
56
|
-
const MAX_CMD_HISTORY = 10;
|
|
57
55
|
|
|
58
56
|
function simpleHash(s) {
|
|
59
57
|
let h = 0;
|
|
@@ -127,7 +125,10 @@ const server = http.createServer((req, res) => {
|
|
|
127
125
|
console.log(`[proxy] #${n} SOFT BUDGET: tool_choice=auto`);
|
|
128
126
|
} else if (parsed.tools && parsed.tools.length > 0) {
|
|
129
127
|
const original = parsed.tool_choice;
|
|
130
|
-
|
|
128
|
+
// Only override string values, preserve object structures (per-tool choice)
|
|
129
|
+
if (typeof parsed.tool_choice === 'string') {
|
|
130
|
+
parsed.tool_choice = FORCE_TOOL_CHOICE;
|
|
131
|
+
}
|
|
131
132
|
toolForceCount++;
|
|
132
133
|
}
|
|
133
134
|
|
|
@@ -233,43 +234,38 @@ const server = http.createServer((req, res) => {
|
|
|
233
234
|
res.writeHead(proxyRes.statusCode || 200, proxyRes.headers);
|
|
234
235
|
|
|
235
236
|
// === Track response for output-diff detection ===
|
|
236
|
-
|
|
237
|
+
// Use incremental hashing to avoid buffering the entire response body.
|
|
238
|
+
// Previously the proxy accumulated all chunks in memory and re-parsed
|
|
239
|
+
// the full response just to compute a hash — doubling memory usage.
|
|
240
|
+
let runningHash = 0;
|
|
241
|
+
const isChatEndpoint = req.url && req.url.includes('/chat/completions');
|
|
242
|
+
|
|
237
243
|
proxyRes.on('data', (chunk) => {
|
|
238
|
-
responseChunks.push(chunk);
|
|
239
244
|
res.write(chunk);
|
|
245
|
+
// Compute hash incrementally from each chunk
|
|
246
|
+
if (isChatEndpoint) {
|
|
247
|
+
const s = chunk.toString();
|
|
248
|
+
for (let i = 0; i < Math.min(s.length, 2000); i++) {
|
|
249
|
+
runningHash = ((runningHash << 5) - runningHash + s.charCodeAt(i)) | 0;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
240
252
|
});
|
|
241
253
|
proxyRes.on('end', () => {
|
|
242
254
|
res.end();
|
|
243
255
|
|
|
244
256
|
// Track output hash for diff detection
|
|
245
|
-
if (
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
const hash = simpleHash(hashInput);
|
|
258
|
-
if (
|
|
259
|
-
recentOutputHashes.length > 0 &&
|
|
260
|
-
recentOutputHashes[recentOutputHashes.length - 1] === hash
|
|
261
|
-
) {
|
|
262
|
-
consecutiveIdenticalOutputs++;
|
|
263
|
-
} else {
|
|
264
|
-
consecutiveIdenticalOutputs = 0;
|
|
265
|
-
}
|
|
266
|
-
recentOutputHashes.push(hash);
|
|
267
|
-
if (recentOutputHashes.length > MAX_OUTPUT_HISTORY) {
|
|
268
|
-
recentOutputHashes.shift();
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
} catch (e) {
|
|
272
|
-
// Ignore parse errors on response
|
|
257
|
+
if (isChatEndpoint && runningHash !== 0) {
|
|
258
|
+
if (
|
|
259
|
+
recentOutputHashes.length > 0 &&
|
|
260
|
+
recentOutputHashes[recentOutputHashes.length - 1] === runningHash
|
|
261
|
+
) {
|
|
262
|
+
consecutiveIdenticalOutputs++;
|
|
263
|
+
} else {
|
|
264
|
+
consecutiveIdenticalOutputs = 0;
|
|
265
|
+
}
|
|
266
|
+
recentOutputHashes.push(runningHash);
|
|
267
|
+
if (recentOutputHashes.length > MAX_OUTPUT_HISTORY) {
|
|
268
|
+
recentOutputHashes.shift();
|
|
273
269
|
}
|
|
274
270
|
}
|
|
275
271
|
});
|
|
@@ -83,7 +83,7 @@ MODEL_PROFILES: Dict[str, Dict[str, Any]] = {
|
|
|
83
83
|
"dynamic_temp_floor": 0.2,
|
|
84
84
|
},
|
|
85
85
|
"qwen35": {
|
|
86
|
-
"temperature": 0.
|
|
86
|
+
"temperature": 0.3,
|
|
87
87
|
"top_p": 0.9,
|
|
88
88
|
"presence_penalty": 0.0,
|
|
89
89
|
"max_tokens": 4096,
|
|
@@ -105,7 +105,7 @@ MODEL_PROFILES: Dict[str, Dict[str, Any]] = {
|
|
|
105
105
|
"CRITICAL: You MUST emit ALL tool calls in a SINGLE response. "
|
|
106
106
|
"Each tool call must be a separate <tool_call>...</tool_call> block. "
|
|
107
107
|
"Do NOT call one tool and wait - emit ALL tool calls together NOW. "
|
|
108
|
-
"
|
|
108
|
+
"You must produce all required tool calls in one response."
|
|
109
109
|
),
|
|
110
110
|
},
|
|
111
111
|
"llama": {
|
|
@@ -206,7 +206,7 @@ class ToolCallClient:
|
|
|
206
206
|
"max_tokens": 4096,
|
|
207
207
|
"enable_thinking": False,
|
|
208
208
|
"max_retries": 3,
|
|
209
|
-
"backoff_factor":
|
|
209
|
+
"backoff_factor": 1.0,
|
|
210
210
|
"base_url": "http://127.0.0.1:8080/v1",
|
|
211
211
|
"api_key": "not-needed",
|
|
212
212
|
"model": "default",
|
|
@@ -337,8 +337,12 @@ class ToolCallClient:
|
|
|
337
337
|
# Make a copy of messages to avoid modifying original
|
|
338
338
|
current_messages = [msg.copy() for msg in messages]
|
|
339
339
|
|
|
340
|
-
# Strategy 2: Inject multi-tool system prompt
|
|
341
|
-
if
|
|
340
|
+
# Strategy 2: Inject multi-tool system prompt (only when multiple tool calls expected)
|
|
341
|
+
if (
|
|
342
|
+
self.config.get("batch_tool_calls")
|
|
343
|
+
and expected_tool_calls
|
|
344
|
+
and expected_tool_calls > 1
|
|
345
|
+
):
|
|
342
346
|
batch_prompt = self.config.get(
|
|
343
347
|
"batch_system_prompt", DEFAULT_BATCH_SYSTEM_PROMPT
|
|
344
348
|
)
|
|
@@ -402,6 +406,11 @@ class ToolCallClient:
|
|
|
402
406
|
"enable_thinking": self.config.get("enable_thinking", False)
|
|
403
407
|
}
|
|
404
408
|
}
|
|
409
|
+
# Version check: llama.cpp >= 3761 supports chat_template_kwargs
|
|
410
|
+
# Older versions will ignore unknown extra_body keys
|
|
411
|
+
logger.debug(
|
|
412
|
+
f"Sending chat_template_kwargs with enable_thinking={self.config.get('enable_thinking', False)}"
|
|
413
|
+
)
|
|
405
414
|
|
|
406
415
|
logger.debug(
|
|
407
416
|
f"Attempt {attempt + 1}/{max_retries}: "
|