@webmcp-auto-ui/agent 2.5.24 → 2.5.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@webmcp-auto-ui/agent",
3
- "version": "2.5.24",
3
+ "version": "2.5.25",
4
4
  "description": "LLM agent loop + remote/WASM/local providers + MCP wrapper",
5
5
  "license": "AGPL-3.0-or-later",
6
6
  "type": "module",
package/src/loop.ts CHANGED
@@ -92,6 +92,8 @@ export interface AgentLoopOptions {
92
92
  maxIterations?: number;
93
93
  maxTokens?: number;
94
94
  maxTools?: number;
95
+ /** WASM-only: cap on conversation messages sent to the model. Default: derived from contextSize. */
96
+ maxMessages?: number;
95
97
  temperature?: number;
96
98
  topK?: number;
97
99
  cacheEnabled?: boolean;
@@ -125,6 +127,7 @@ export async function runAgentLoop(
125
127
  maxIterations = 5,
126
128
  maxTokens,
127
129
  maxTools,
130
+ maxMessages,
128
131
  temperature,
129
132
  topK,
130
133
  cacheEnabled = true,
@@ -264,7 +267,7 @@ export async function runAgentLoop(
264
267
  const t0 = performance.now();
265
268
  let streamingText = '';
266
269
  const response = await provider.chat(messages, iterationTools, {
267
- signal, cacheEnabled, system: iterationSystemPrompt, maxTokens, maxTools, temperature, topK,
270
+ signal, cacheEnabled, system: iterationSystemPrompt, maxTokens, maxTools, maxMessages, temperature, topK,
268
271
  onToken: callbacks.onToken ? (token) => {
269
272
  callbacks.onToken!(token);
270
273
  streamingText += token;
@@ -176,7 +176,7 @@ export class WasmProvider implements LLMProvider {
176
176
  async chat(
177
177
  messages: ChatMessage[],
178
178
  tools: ProviderTool[],
179
- options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
179
+ options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number; maxMessages?: number }
180
180
  ): Promise<LLMResponse> {
181
181
  if (this.status !== 'ready') await this.initialize();
182
182
  if (!this.inference) throw new Error('Model not initialized');
@@ -202,7 +202,7 @@ export class WasmProvider implements LLMProvider {
202
202
  private async _chat(
203
203
  messages: ChatMessage[],
204
204
  tools: ProviderTool[],
205
- options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
205
+ options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number; maxMessages?: number }
206
206
  ): Promise<LLMResponse> {
207
207
  // Apply per-request options
208
208
  if (options?.maxTokens || options?.temperature || options?.topK) {
@@ -222,7 +222,7 @@ export class WasmProvider implements LLMProvider {
222
222
 
223
223
  // Aggressive clipping: Gemma struggles with long conversations — dynamic cap based on context size
224
224
  const contextTokens = this.opts.contextSize ?? 4096;
225
- const MAX_MESSAGES = contextTokens <= 4096 ? 8 : contextTokens <= 8192 ? 16 : 32;
225
+ const MAX_MESSAGES = options?.maxMessages ?? Math.max(4, Math.floor(contextTokens / 512));
226
226
  while (messages.length > MAX_MESSAGES) {
227
227
  messages = messages.slice(1);
228
228
  }
@@ -660,9 +660,23 @@ export class WasmProvider implements LLMProvider {
660
660
  private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string, maxTools?: number): string {
661
661
  const systemParts: string[] = [];
662
662
 
663
- // Inject system prompt from settings if provided
663
+ // Inject system prompt from settings if provided.
664
+ // Rewrite paren syntax "tool_name()" / "tool_name(args)" to Gemma 4 native call syntax.
665
+ // Without this, Gemma mimics the paren syntax as plain text (regression from commit 2724b9e).
664
666
  if (systemPrompt) {
665
- systemParts.push(systemPrompt);
667
+ const gemmaPrompt = systemPrompt.replace(
668
+ /\b([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]*)\)/g,
669
+ (_full, name, args) => {
670
+ const trimmed = args.trim();
671
+ if (!trimmed) return `<|tool_call>call:${name}{}<tool_call|>`;
672
+ const argBody = trimmed
673
+ .split(',')
674
+ .map((a: string) => `${a.trim()}:<|"|>...<|"|>`)
675
+ .join(',');
676
+ return `<|tool_call>call:${name}{${argBody}}<tool_call|>`;
677
+ }
678
+ );
679
+ systemParts.push(gemmaPrompt);
666
680
  }
667
681
 
668
682
  if (tools.length > 0) {
package/src/types.ts CHANGED
@@ -51,7 +51,7 @@ export interface LLMProvider {
51
51
  chat(
52
52
  messages: ChatMessage[],
53
53
  tools: ProviderTool[],
54
- options?: { signal?: AbortSignal; cacheEnabled?: boolean; system?: string; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; maxTools?: number }
54
+ options?: { signal?: AbortSignal; cacheEnabled?: boolean; system?: string; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; maxTools?: number; maxMessages?: number }
55
55
  ): Promise<LLMResponse>;
56
56
  }
57
57