@probelabs/probe 0.6.0-rc224 → 0.6.0-rc225
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc225-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc225-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc225-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/{probe-v0.6.0-rc224-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc225-x86_64-pc-windows-msvc.zip} +0 -0
- package/bin/binaries/probe-v0.6.0-rc225-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +279 -3
- package/build/agent/index.js +408 -8
- package/build/agent/mcp/xmlBridge.js +10 -7
- package/build/agent/simpleTelemetry.js +198 -0
- package/build/agent/tools.js +8 -5
- package/cjs/agent/ProbeAgent.cjs +231 -8
- package/cjs/agent/simpleTelemetry.cjs +177 -0
- package/cjs/index.cjs +408 -8
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +279 -3
- package/src/agent/mcp/xmlBridge.js +10 -7
- package/src/agent/simpleTelemetry.js +198 -0
- package/src/agent/tools.js +8 -5
- package/bin/binaries/probe-v0.6.0-rc224-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc224-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc224-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc224-x86_64-unknown-linux-musl.tar.gz +0 -0
package/src/agent/ProbeAgent.js
CHANGED
|
@@ -406,6 +406,209 @@ export class ProbeAgent {
|
|
|
406
406
|
return mcpToolNames.filter(toolName => this._isMcpToolAllowed(toolName));
|
|
407
407
|
}
|
|
408
408
|
|
|
409
|
+
/**
|
|
410
|
+
* Check if tracer is AppTracer (expects sessionId as first param) vs SimpleAppTracer
|
|
411
|
+
* @returns {boolean} - True if tracer is AppTracer style (requires sessionId)
|
|
412
|
+
* @private
|
|
413
|
+
*/
|
|
414
|
+
_isAppTracerStyle() {
|
|
415
|
+
// AppTracer has recordThinkingContent(sessionId, iteration, content) signature
|
|
416
|
+
// SimpleAppTracer has recordThinkingContent(content, metadata) signature
|
|
417
|
+
// We detect by checking if there's a sessionSpans map (AppTracer-specific)
|
|
418
|
+
return this.tracer && typeof this.tracer.sessionSpans !== 'undefined';
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Record an error classification event for telemetry
|
|
423
|
+
* Provides unified error recording across all error types
|
|
424
|
+
* @param {string} errorType - Error type (wrapped_tool, unrecognized_tool, no_tool_call, circuit_breaker)
|
|
425
|
+
* @param {string} message - Error message
|
|
426
|
+
* @param {Object} context - Additional context data
|
|
427
|
+
* @param {number} iteration - Current iteration number
|
|
428
|
+
* @private
|
|
429
|
+
*/
|
|
430
|
+
_recordErrorTelemetry(errorType, message, context, iteration) {
|
|
431
|
+
if (!this.tracer) return;
|
|
432
|
+
|
|
433
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordErrorClassification === 'function') {
|
|
434
|
+
// AppTracer style: (sessionId, iteration, errorType, details)
|
|
435
|
+
this.tracer.recordErrorClassification(this.sessionId, iteration, errorType, {
|
|
436
|
+
message,
|
|
437
|
+
context
|
|
438
|
+
});
|
|
439
|
+
} else if (typeof this.tracer.recordErrorEvent === 'function') {
|
|
440
|
+
// SimpleAppTracer style: (errorType, details)
|
|
441
|
+
this.tracer.recordErrorEvent(errorType, {
|
|
442
|
+
message,
|
|
443
|
+
context: { ...context, iteration }
|
|
444
|
+
});
|
|
445
|
+
} else {
|
|
446
|
+
this.tracer.addEvent(`error.${errorType}`, {
|
|
447
|
+
'error.type': errorType,
|
|
448
|
+
'error.message': message,
|
|
449
|
+
'error.recoverable': errorType !== 'circuit_breaker',
|
|
450
|
+
'error.context': JSON.stringify(context).substring(0, 1000),
|
|
451
|
+
'iteration': iteration
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Record AI thinking content for telemetry
|
|
458
|
+
* @param {string} thinkingContent - The thinking content
|
|
459
|
+
* @param {number} iteration - Current iteration number
|
|
460
|
+
* @private
|
|
461
|
+
*/
|
|
462
|
+
_recordThinkingTelemetry(thinkingContent, iteration) {
|
|
463
|
+
if (!this.tracer || !thinkingContent) return;
|
|
464
|
+
|
|
465
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordThinkingContent === 'function') {
|
|
466
|
+
// AppTracer style: (sessionId, iteration, content)
|
|
467
|
+
this.tracer.recordThinkingContent(this.sessionId, iteration, thinkingContent);
|
|
468
|
+
} else if (typeof this.tracer.recordThinkingContent === 'function') {
|
|
469
|
+
// SimpleAppTracer style: (content, metadata)
|
|
470
|
+
this.tracer.recordThinkingContent(thinkingContent, { iteration });
|
|
471
|
+
} else {
|
|
472
|
+
this.tracer.addEvent('ai.thinking', {
|
|
473
|
+
'ai.thinking.content': thinkingContent.substring(0, 50000),
|
|
474
|
+
'ai.thinking.length': thinkingContent.length,
|
|
475
|
+
'iteration': iteration
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Record AI tool decision for telemetry
|
|
482
|
+
* @param {string} toolName - The tool name
|
|
483
|
+
* @param {Object} params - Tool parameters
|
|
484
|
+
* @param {number} responseLength - Length of AI response
|
|
485
|
+
* @param {number} iteration - Current iteration number
|
|
486
|
+
* @private
|
|
487
|
+
*/
|
|
488
|
+
_recordToolDecisionTelemetry(toolName, params, responseLength, iteration) {
|
|
489
|
+
if (!this.tracer) return;
|
|
490
|
+
|
|
491
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordAIToolDecision === 'function') {
|
|
492
|
+
// AppTracer style: (sessionId, iteration, toolName, params)
|
|
493
|
+
this.tracer.recordAIToolDecision(this.sessionId, iteration, toolName, params);
|
|
494
|
+
} else if (typeof this.tracer.recordToolDecision === 'function') {
|
|
495
|
+
// SimpleAppTracer style: (toolName, params, metadata)
|
|
496
|
+
this.tracer.recordToolDecision(toolName, params, {
|
|
497
|
+
iteration,
|
|
498
|
+
'ai.tool_decision.raw_response_length': responseLength
|
|
499
|
+
});
|
|
500
|
+
} else {
|
|
501
|
+
this.tracer.addEvent('ai.tool_decision', {
|
|
502
|
+
'ai.tool_decision.name': toolName,
|
|
503
|
+
'ai.tool_decision.params': JSON.stringify(params || {}).substring(0, 2000),
|
|
504
|
+
'ai.tool_decision.raw_response_length': responseLength,
|
|
505
|
+
'iteration': iteration
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Record tool result for telemetry
|
|
512
|
+
* @param {string} toolName - The tool name
|
|
513
|
+
* @param {string|Object} result - Tool result
|
|
514
|
+
* @param {boolean} success - Whether tool succeeded
|
|
515
|
+
* @param {number} durationMs - Execution duration in milliseconds
|
|
516
|
+
* @param {number} iteration - Current iteration number
|
|
517
|
+
* @private
|
|
518
|
+
*/
|
|
519
|
+
_recordToolResultTelemetry(toolName, result, success, durationMs, iteration) {
|
|
520
|
+
if (!this.tracer) return;
|
|
521
|
+
|
|
522
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordToolResult === 'function') {
|
|
523
|
+
// AppTracer style: (sessionId, iteration, toolName, result, success, durationMs)
|
|
524
|
+
this.tracer.recordToolResult(this.sessionId, iteration, toolName, result, success, durationMs);
|
|
525
|
+
} else if (typeof this.tracer.recordToolResult === 'function') {
|
|
526
|
+
// SimpleAppTracer style: (toolName, result, success, durationMs, metadata)
|
|
527
|
+
this.tracer.recordToolResult(toolName, result, success, durationMs, { iteration });
|
|
528
|
+
} else {
|
|
529
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result || '');
|
|
530
|
+
this.tracer.addEvent('tool.result', {
|
|
531
|
+
'tool.name': toolName,
|
|
532
|
+
'tool.result': resultStr.substring(0, 10000),
|
|
533
|
+
'tool.result.length': resultStr.length,
|
|
534
|
+
'tool.duration_ms': durationMs,
|
|
535
|
+
'tool.success': success,
|
|
536
|
+
'iteration': iteration
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Record MCP tool lifecycle event for telemetry
|
|
543
|
+
* @param {string} phase - 'start' or 'end'
|
|
544
|
+
* @param {string} toolName - MCP tool name
|
|
545
|
+
* @param {Object} params - Tool parameters (for start) or null (for end)
|
|
546
|
+
* @param {number} iteration - Current iteration number
|
|
547
|
+
* @param {Object} [endData] - Additional data for end phase (result, success, durationMs, error)
|
|
548
|
+
* @private
|
|
549
|
+
*/
|
|
550
|
+
_recordMcpToolTelemetry(phase, toolName, params, iteration, endData = null) {
|
|
551
|
+
if (!this.tracer) return;
|
|
552
|
+
|
|
553
|
+
if (phase === 'start') {
|
|
554
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordMcpToolStart === 'function') {
|
|
555
|
+
// AppTracer style: (sessionId, iteration, toolName, serverName, params)
|
|
556
|
+
this.tracer.recordMcpToolStart(this.sessionId, iteration, toolName, 'mcp', params);
|
|
557
|
+
} else if (typeof this.tracer.recordMcpToolStart === 'function') {
|
|
558
|
+
// SimpleAppTracer style: (toolName, serverName, params, metadata)
|
|
559
|
+
this.tracer.recordMcpToolStart(toolName, 'mcp', params, { iteration });
|
|
560
|
+
} else {
|
|
561
|
+
this.tracer.addEvent('mcp.tool.start', {
|
|
562
|
+
'mcp.tool.name': toolName,
|
|
563
|
+
'mcp.tool.server': 'mcp',
|
|
564
|
+
'mcp.tool.params': JSON.stringify(params || {}).substring(0, 2000),
|
|
565
|
+
'iteration': iteration
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
} else if (phase === 'end' && endData) {
|
|
569
|
+
const { result, success, durationMs, error } = endData;
|
|
570
|
+
if (this._isAppTracerStyle() && typeof this.tracer.recordMcpToolEnd === 'function') {
|
|
571
|
+
// AppTracer style: (sessionId, iteration, toolName, serverName, result, success, durationMs, error)
|
|
572
|
+
this.tracer.recordMcpToolEnd(this.sessionId, iteration, toolName, 'mcp', result, success, durationMs, error);
|
|
573
|
+
} else if (typeof this.tracer.recordMcpToolEnd === 'function') {
|
|
574
|
+
// SimpleAppTracer style: (toolName, serverName, result, success, durationMs, error, metadata)
|
|
575
|
+
this.tracer.recordMcpToolEnd(toolName, 'mcp', result, success, durationMs, error, { iteration });
|
|
576
|
+
} else {
|
|
577
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result || '');
|
|
578
|
+
this.tracer.addEvent('mcp.tool.end', {
|
|
579
|
+
'mcp.tool.name': toolName,
|
|
580
|
+
'mcp.tool.server': 'mcp',
|
|
581
|
+
'mcp.tool.result': resultStr.substring(0, 10000),
|
|
582
|
+
'mcp.tool.result.length': resultStr.length,
|
|
583
|
+
'mcp.tool.duration_ms': durationMs,
|
|
584
|
+
'mcp.tool.success': success,
|
|
585
|
+
'mcp.tool.error': error,
|
|
586
|
+
'iteration': iteration
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Record iteration lifecycle event for telemetry
|
|
594
|
+
* @param {string} phase - 'end' (start is already handled elsewhere)
|
|
595
|
+
* @param {number} iteration - Current iteration number
|
|
596
|
+
* @param {Object} data - Additional iteration data
|
|
597
|
+
* @private
|
|
598
|
+
*/
|
|
599
|
+
_recordIterationTelemetry(phase, iteration, data = {}) {
|
|
600
|
+
if (!this.tracer) return;
|
|
601
|
+
|
|
602
|
+
if (typeof this.tracer.recordIterationEvent === 'function') {
|
|
603
|
+
this.tracer.recordIterationEvent(phase, iteration, data);
|
|
604
|
+
} else {
|
|
605
|
+
this.tracer.addEvent(`iteration.${phase}`, {
|
|
606
|
+
'iteration': iteration,
|
|
607
|
+
...data
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
409
612
|
/**
|
|
410
613
|
* Initialize the agent asynchronously (must be called after constructor)
|
|
411
614
|
* This method initializes MCP and merges MCP tools into the tool list, and loads history from storage
|
|
@@ -2854,8 +3057,18 @@ Follow these instructions carefully:
|
|
|
2854
3057
|
const parsedTool = (this.mcpBridge && !options._disableTools)
|
|
2855
3058
|
? parseHybridXmlToolCall(assistantResponseContent, nativeTools, this.mcpBridge)
|
|
2856
3059
|
: parseXmlToolCallWithThinking(assistantResponseContent, validTools);
|
|
3060
|
+
|
|
3061
|
+
// Capture AI thinking content if present (for debugging and telemetry)
|
|
3062
|
+
if (parsedTool?.thinkingContent) {
|
|
3063
|
+
this._recordThinkingTelemetry(parsedTool.thinkingContent, currentIteration);
|
|
3064
|
+
}
|
|
3065
|
+
|
|
2857
3066
|
if (parsedTool) {
|
|
2858
3067
|
const { toolName, params } = parsedTool;
|
|
3068
|
+
|
|
3069
|
+
// Record AI tool decision for telemetry
|
|
3070
|
+
this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
|
|
3071
|
+
|
|
2859
3072
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
2860
3073
|
|
|
2861
3074
|
if (toolName === 'attempt_completion') {
|
|
@@ -2962,6 +3175,9 @@ Follow these instructions carefully:
|
|
|
2962
3175
|
|
|
2963
3176
|
if (type === 'mcp' && this.mcpBridge && this.mcpBridge.isMcpTool(toolName)) {
|
|
2964
3177
|
// Execute MCP tool
|
|
3178
|
+
const mcpStartTime = Date.now();
|
|
3179
|
+
this._recordMcpToolTelemetry('start', toolName, params, currentIteration);
|
|
3180
|
+
|
|
2965
3181
|
try {
|
|
2966
3182
|
// Log MCP tool execution in debug mode
|
|
2967
3183
|
if (this.debug) {
|
|
@@ -2999,6 +3215,15 @@ Follow these instructions carefully:
|
|
|
2999
3215
|
console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
|
|
3000
3216
|
}
|
|
3001
3217
|
|
|
3218
|
+
// Record MCP tool end event (success)
|
|
3219
|
+
const mcpDurationMs = Date.now() - mcpStartTime;
|
|
3220
|
+
this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
|
|
3221
|
+
result: toolResultContent,
|
|
3222
|
+
success: true,
|
|
3223
|
+
durationMs: mcpDurationMs,
|
|
3224
|
+
error: null
|
|
3225
|
+
});
|
|
3226
|
+
|
|
3002
3227
|
// Log MCP tool result in debug mode
|
|
3003
3228
|
if (this.debug) {
|
|
3004
3229
|
const preview = toolResultContent.length > 500 ? toolResultContent.substring(0, 500) + '...' : toolResultContent;
|
|
@@ -3011,6 +3236,15 @@ Follow these instructions carefully:
|
|
|
3011
3236
|
|
|
3012
3237
|
currentMessages.push({ role: 'user', content: `<tool_result>\n${toolResultContent}\n</tool_result>` });
|
|
3013
3238
|
} catch (error) {
|
|
3239
|
+
// Record MCP tool end event (failure)
|
|
3240
|
+
const mcpDurationMs = Date.now() - mcpStartTime;
|
|
3241
|
+
this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
|
|
3242
|
+
result: null,
|
|
3243
|
+
success: false,
|
|
3244
|
+
durationMs: mcpDurationMs,
|
|
3245
|
+
error: error.message
|
|
3246
|
+
});
|
|
3247
|
+
|
|
3014
3248
|
console.error(`Error executing MCP tool ${toolName}:`, error);
|
|
3015
3249
|
|
|
3016
3250
|
// Log MCP tool error in debug mode
|
|
@@ -3118,6 +3352,7 @@ Follow these instructions carefully:
|
|
|
3118
3352
|
};
|
|
3119
3353
|
|
|
3120
3354
|
let toolResult;
|
|
3355
|
+
const toolStartTime = Date.now();
|
|
3121
3356
|
try {
|
|
3122
3357
|
if (this.tracer) {
|
|
3123
3358
|
toolResult = await this.tracer.withSpan('tool.call', executeToolCall, {
|
|
@@ -3128,7 +3363,11 @@ Follow these instructions carefully:
|
|
|
3128
3363
|
} else {
|
|
3129
3364
|
toolResult = await executeToolCall();
|
|
3130
3365
|
}
|
|
3131
|
-
|
|
3366
|
+
|
|
3367
|
+
// Record tool result in telemetry
|
|
3368
|
+
const toolDurationMs = Date.now() - toolStartTime;
|
|
3369
|
+
this._recordToolResultTelemetry(toolName, toolResult, true, toolDurationMs, currentIteration);
|
|
3370
|
+
|
|
3132
3371
|
// Log tool result in debug mode
|
|
3133
3372
|
if (this.debug) {
|
|
3134
3373
|
const resultPreview = typeof toolResult === 'string'
|
|
@@ -3201,6 +3440,22 @@ Follow these instructions carefully:
|
|
|
3201
3440
|
content: toolResultMessage
|
|
3202
3441
|
});
|
|
3203
3442
|
|
|
3443
|
+
// Record conversation turns in telemetry
|
|
3444
|
+
if (this.tracer) {
|
|
3445
|
+
if (typeof this.tracer.recordConversationTurn === 'function') {
|
|
3446
|
+
this.tracer.recordConversationTurn('assistant', assistantResponseContent, {
|
|
3447
|
+
iteration: currentIteration,
|
|
3448
|
+
has_tool_call: true,
|
|
3449
|
+
tool_name: toolName
|
|
3450
|
+
});
|
|
3451
|
+
this.tracer.recordConversationTurn('tool_result', toolResultContent, {
|
|
3452
|
+
iteration: currentIteration,
|
|
3453
|
+
tool_name: toolName,
|
|
3454
|
+
tool_success: true
|
|
3455
|
+
});
|
|
3456
|
+
}
|
|
3457
|
+
}
|
|
3458
|
+
|
|
3204
3459
|
// NOTE: Automatic image processing removed (GitHub issue #305)
|
|
3205
3460
|
// Images are now only loaded when the AI explicitly calls the readImage tool
|
|
3206
3461
|
// This prevents: 1) implicit behavior that users don't expect
|
|
@@ -3294,6 +3549,10 @@ Follow these instructions carefully:
|
|
|
3294
3549
|
if (this.debug) {
|
|
3295
3550
|
console.log(`[DEBUG] Detected wrapped tool '${wrappedToolName}' in assistant response - wrong XML format.`);
|
|
3296
3551
|
}
|
|
3552
|
+
|
|
3553
|
+
// Record wrapped tool error in telemetry
|
|
3554
|
+
this._recordErrorTelemetry('wrapped_tool', 'Tool call wrapped in markdown', { toolName: wrappedToolName }, currentIteration);
|
|
3555
|
+
|
|
3297
3556
|
const toolError = new ParameterError(
|
|
3298
3557
|
`Tool '${wrappedToolName}' found but in WRONG FORMAT - do not wrap tools in other XML tags.`,
|
|
3299
3558
|
{
|
|
@@ -3318,12 +3577,19 @@ Remove ALL wrapper tags and use <${wrappedToolName}> directly as the outermost t
|
|
|
3318
3577
|
if (this.debug) {
|
|
3319
3578
|
console.log(`[DEBUG] Detected unrecognized tool '${unrecognizedTool}' in assistant response.`);
|
|
3320
3579
|
}
|
|
3580
|
+
|
|
3581
|
+
// Record unrecognized tool error in telemetry
|
|
3582
|
+
this._recordErrorTelemetry('unrecognized_tool', `Unknown tool: ${unrecognizedTool}`, { toolName: unrecognizedTool, validTools }, currentIteration);
|
|
3583
|
+
|
|
3321
3584
|
const toolError = new ParameterError(`Tool '${unrecognizedTool}' is not available in this context.`, {
|
|
3322
3585
|
suggestion: `Available tools: ${validTools.join(', ')}. Please use one of these tools instead.`
|
|
3323
3586
|
});
|
|
3324
3587
|
reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
|
|
3325
3588
|
} else {
|
|
3326
|
-
// No tool call detected at all -
|
|
3589
|
+
// No tool call detected at all - record in telemetry
|
|
3590
|
+
this._recordErrorTelemetry('no_tool_call', 'AI response did not contain tool call', { responsePreview: assistantResponseContent.substring(0, 500) }, currentIteration);
|
|
3591
|
+
|
|
3592
|
+
// Check if this is the last iteration
|
|
3327
3593
|
// On the last iteration, if the AI gave a substantive response without using
|
|
3328
3594
|
// attempt_completion, accept it as the final answer rather than losing the content
|
|
3329
3595
|
if (currentIteration >= maxIterations) {
|
|
@@ -3439,6 +3705,10 @@ Note: <attempt_complete></attempt_complete> reuses your PREVIOUS assistant messa
|
|
|
3439
3705
|
sameFormatErrorCount++;
|
|
3440
3706
|
if (sameFormatErrorCount >= MAX_REPEATED_FORMAT_ERRORS) {
|
|
3441
3707
|
const errorDesc = isWrapped ? 'wrapped tool format' : unrecognizedTool;
|
|
3708
|
+
|
|
3709
|
+
// Record circuit breaker error in telemetry
|
|
3710
|
+
this._recordErrorTelemetry('circuit_breaker', 'Format error limit exceeded', { formatErrorCount: sameFormatErrorCount, errorCategory }, currentIteration);
|
|
3711
|
+
|
|
3442
3712
|
console.error(`[ERROR] Format error category '${errorCategory}' repeated ${sameFormatErrorCount} times. Breaking loop early to prevent infinite iteration.`);
|
|
3443
3713
|
finalResult = `Error: Unable to complete request. The AI model repeatedly used incorrect tool call format (${errorDesc}). Please try rephrasing your question or using a different model.`;
|
|
3444
3714
|
break;
|
|
@@ -3454,13 +3724,19 @@ Note: <attempt_complete></attempt_complete> reuses your PREVIOUS assistant messa
|
|
|
3454
3724
|
}
|
|
3455
3725
|
}
|
|
3456
3726
|
|
|
3727
|
+
// Record iteration end event
|
|
3728
|
+
this._recordIterationTelemetry('end', currentIteration, {
|
|
3729
|
+
'iteration.completed': completionAttempted,
|
|
3730
|
+
'iteration.message_count': currentMessages.length
|
|
3731
|
+
});
|
|
3732
|
+
|
|
3457
3733
|
// Keep message history manageable
|
|
3458
3734
|
if (currentMessages.length > MAX_HISTORY_MESSAGES) {
|
|
3459
3735
|
const messagesBefore = currentMessages.length;
|
|
3460
3736
|
const systemMsg = currentMessages[0]; // Keep system message
|
|
3461
3737
|
const recentMessages = currentMessages.slice(-MAX_HISTORY_MESSAGES + 1);
|
|
3462
3738
|
currentMessages = [systemMsg, ...recentMessages];
|
|
3463
|
-
|
|
3739
|
+
|
|
3464
3740
|
if (this.debug) {
|
|
3465
3741
|
console.log(`[DEBUG] Trimmed message history from ${messagesBefore} to ${currentMessages.length} messages`);
|
|
3466
3742
|
}
|
|
@@ -321,14 +321,17 @@ export function parseHybridXmlToolCall(xmlString, nativeTools = [], mcpBridge =
|
|
|
321
321
|
// This includes thinking tag removal and attempt_complete recovery logic
|
|
322
322
|
const nativeResult = parseNativeXmlToolWithThinking(xmlString, nativeTools);
|
|
323
323
|
if (nativeResult) {
|
|
324
|
-
|
|
324
|
+
const { thinkingContent, ...rest } = nativeResult;
|
|
325
|
+
return { ...rest, type: 'native', thinkingContent };
|
|
325
326
|
}
|
|
326
327
|
|
|
327
328
|
// Then try MCP tools if bridge is available
|
|
328
329
|
if (mcpBridge) {
|
|
329
330
|
const mcpResult = parseXmlMcpToolCall(xmlString, mcpBridge.getToolNames());
|
|
330
331
|
if (mcpResult) {
|
|
331
|
-
|
|
332
|
+
// Extract thinking content for MCP tools as well
|
|
333
|
+
const { thinkingContent } = processXmlWithThinkingAndRecovery(xmlString, []);
|
|
334
|
+
return { ...mcpResult, type: 'mcp', thinkingContent };
|
|
332
335
|
}
|
|
333
336
|
}
|
|
334
337
|
|
|
@@ -344,18 +347,18 @@ export function parseHybridXmlToolCall(xmlString, nativeTools = [], mcpBridge =
|
|
|
344
347
|
*/
|
|
345
348
|
function parseNativeXmlToolWithThinking(xmlString, validTools) {
|
|
346
349
|
// Use the shared processing logic
|
|
347
|
-
const { cleanedXmlString, recoveryResult } = processXmlWithThinkingAndRecovery(xmlString, validTools);
|
|
348
|
-
|
|
349
|
-
// If recovery found an attempt_complete pattern, return it
|
|
350
|
+
const { cleanedXmlString, recoveryResult, thinkingContent } = processXmlWithThinkingAndRecovery(xmlString, validTools);
|
|
351
|
+
|
|
352
|
+
// If recovery found an attempt_complete pattern, return it with thinking content
|
|
350
353
|
if (recoveryResult) {
|
|
351
|
-
return recoveryResult;
|
|
354
|
+
return { ...recoveryResult, thinkingContent };
|
|
352
355
|
}
|
|
353
356
|
|
|
354
357
|
// Use the original parseNativeXmlTool function to parse the cleaned XML string
|
|
355
358
|
for (const toolName of validTools) {
|
|
356
359
|
const result = parseNativeXmlTool(cleanedXmlString, toolName);
|
|
357
360
|
if (result) {
|
|
358
|
-
return result;
|
|
361
|
+
return { ...result, thinkingContent };
|
|
359
362
|
}
|
|
360
363
|
}
|
|
361
364
|
|
|
@@ -264,6 +264,204 @@ export class SimpleAppTracer {
|
|
|
264
264
|
}
|
|
265
265
|
}
|
|
266
266
|
|
|
267
|
+
/**
|
|
268
|
+
* Hash content for deduplication/comparison purposes
|
|
269
|
+
* @param {string} content - The content to hash
|
|
270
|
+
* @returns {string} - Hex string hash
|
|
271
|
+
*/
|
|
272
|
+
hashContent(content) {
|
|
273
|
+
let hash = 0;
|
|
274
|
+
const len = Math.min(content.length, 1000);
|
|
275
|
+
for (let i = 0; i < len; i++) {
|
|
276
|
+
hash = ((hash << 5) - hash) + content.charCodeAt(i);
|
|
277
|
+
hash |= 0; // Convert to 32-bit integer
|
|
278
|
+
}
|
|
279
|
+
return hash.toString(16);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Record a conversation turn (assistant response or tool result)
|
|
284
|
+
* @param {string} role - The role (assistant, tool_result)
|
|
285
|
+
* @param {string} content - The turn content
|
|
286
|
+
* @param {Object} metadata - Additional metadata
|
|
287
|
+
*/
|
|
288
|
+
recordConversationTurn(role, content, metadata = {}) {
|
|
289
|
+
if (!this.isEnabled()) return;
|
|
290
|
+
|
|
291
|
+
this.addEvent(`conversation.turn.${role}`, {
|
|
292
|
+
'session.id': this.sessionId,
|
|
293
|
+
'conversation.role': role,
|
|
294
|
+
'conversation.content': content.substring(0, 10000),
|
|
295
|
+
'conversation.content.length': content.length,
|
|
296
|
+
'conversation.content.hash': this.hashContent(content),
|
|
297
|
+
...metadata
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Record error events with classification
|
|
303
|
+
* @param {string} errorType - The type of error (wrapped_tool, unrecognized_tool, no_tool_call, circuit_breaker, etc.)
|
|
304
|
+
* @param {Object} errorDetails - Error details including message, stack, context
|
|
305
|
+
*/
|
|
306
|
+
recordErrorEvent(errorType, errorDetails = {}) {
|
|
307
|
+
if (!this.isEnabled()) return;
|
|
308
|
+
|
|
309
|
+
this.addEvent(`error.${errorType}`, {
|
|
310
|
+
'session.id': this.sessionId,
|
|
311
|
+
'error.type': errorType,
|
|
312
|
+
'error.message': errorDetails.message?.substring(0, 1000) || null,
|
|
313
|
+
'error.stack': errorDetails.stack?.substring(0, 2000) || null,
|
|
314
|
+
'error.recoverable': errorDetails.recoverable ?? true,
|
|
315
|
+
'error.context': JSON.stringify(errorDetails.context || {}).substring(0, 1000),
|
|
316
|
+
...Object.fromEntries(
|
|
317
|
+
Object.entries(errorDetails)
|
|
318
|
+
.filter(([k]) => !['message', 'stack', 'context', 'recoverable'].includes(k))
|
|
319
|
+
.map(([k, v]) => [`error.${k}`, v])
|
|
320
|
+
)
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Record AI thinking/reasoning content
|
|
326
|
+
* @param {string} thinkingContent - The thinking content from AI response
|
|
327
|
+
* @param {Object} metadata - Additional metadata
|
|
328
|
+
*/
|
|
329
|
+
recordThinkingContent(thinkingContent, metadata = {}) {
|
|
330
|
+
if (!this.isEnabled() || !thinkingContent) return;
|
|
331
|
+
|
|
332
|
+
this.addEvent('ai.thinking', {
|
|
333
|
+
'session.id': this.sessionId,
|
|
334
|
+
'ai.thinking.content': thinkingContent.substring(0, 50000),
|
|
335
|
+
'ai.thinking.length': thinkingContent.length,
|
|
336
|
+
'ai.thinking.hash': this.hashContent(thinkingContent),
|
|
337
|
+
...metadata
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Record AI tool call decision
|
|
343
|
+
* @param {string} toolName - The tool name AI decided to call
|
|
344
|
+
* @param {Object} params - The parameters AI provided
|
|
345
|
+
* @param {Object} metadata - Additional metadata
|
|
346
|
+
*/
|
|
347
|
+
recordToolDecision(toolName, params, metadata = {}) {
|
|
348
|
+
if (!this.isEnabled()) return;
|
|
349
|
+
|
|
350
|
+
this.addEvent('ai.tool_decision', {
|
|
351
|
+
'session.id': this.sessionId,
|
|
352
|
+
'ai.tool_decision.name': toolName,
|
|
353
|
+
'ai.tool_decision.params': JSON.stringify(params || {}).substring(0, 2000),
|
|
354
|
+
...metadata
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Record tool result after execution
|
|
360
|
+
* @param {string} toolName - The tool that was executed
|
|
361
|
+
* @param {string|Object} result - The tool result
|
|
362
|
+
* @param {boolean} success - Whether the tool succeeded
|
|
363
|
+
* @param {number} durationMs - Execution duration in milliseconds
|
|
364
|
+
* @param {Object} metadata - Additional metadata
|
|
365
|
+
*/
|
|
366
|
+
recordToolResult(toolName, result, success, durationMs, metadata = {}) {
|
|
367
|
+
if (!this.isEnabled()) return;
|
|
368
|
+
|
|
369
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
370
|
+
this.addEvent('tool.result', {
|
|
371
|
+
'session.id': this.sessionId,
|
|
372
|
+
'tool.name': toolName,
|
|
373
|
+
'tool.result': resultStr.substring(0, 10000),
|
|
374
|
+
'tool.result.length': resultStr.length,
|
|
375
|
+
'tool.result.hash': this.hashContent(resultStr),
|
|
376
|
+
'tool.duration_ms': durationMs,
|
|
377
|
+
'tool.success': success,
|
|
378
|
+
...metadata
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Record MCP tool execution start
|
|
384
|
+
* @param {string} toolName - MCP tool name
|
|
385
|
+
* @param {string} serverName - MCP server name
|
|
386
|
+
* @param {Object} params - Tool parameters
|
|
387
|
+
* @param {Object} metadata - Additional metadata
|
|
388
|
+
*/
|
|
389
|
+
recordMcpToolStart(toolName, serverName, params, metadata = {}) {
|
|
390
|
+
if (!this.isEnabled()) return;
|
|
391
|
+
|
|
392
|
+
this.addEvent('mcp.tool.start', {
|
|
393
|
+
'session.id': this.sessionId,
|
|
394
|
+
'mcp.tool.name': toolName,
|
|
395
|
+
'mcp.tool.server': serverName || 'unknown',
|
|
396
|
+
'mcp.tool.params': JSON.stringify(params || {}).substring(0, 2000),
|
|
397
|
+
...metadata
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Record MCP tool execution end
|
|
403
|
+
* @param {string} toolName - MCP tool name
|
|
404
|
+
* @param {string} serverName - MCP server name
|
|
405
|
+
* @param {string|Object} result - Tool result
|
|
406
|
+
* @param {boolean} success - Whether succeeded
|
|
407
|
+
* @param {number} durationMs - Execution duration
|
|
408
|
+
* @param {string} errorMessage - Error message if failed
|
|
409
|
+
* @param {Object} metadata - Additional metadata
|
|
410
|
+
*/
|
|
411
|
+
recordMcpToolEnd(toolName, serverName, result, success, durationMs, errorMessage = null, metadata = {}) {
|
|
412
|
+
if (!this.isEnabled()) return;
|
|
413
|
+
|
|
414
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result || '');
|
|
415
|
+
this.addEvent('mcp.tool.end', {
|
|
416
|
+
'session.id': this.sessionId,
|
|
417
|
+
'mcp.tool.name': toolName,
|
|
418
|
+
'mcp.tool.server': serverName || 'unknown',
|
|
419
|
+
'mcp.tool.result': resultStr.substring(0, 10000),
|
|
420
|
+
'mcp.tool.result.length': resultStr.length,
|
|
421
|
+
'mcp.tool.duration_ms': durationMs,
|
|
422
|
+
'mcp.tool.success': success,
|
|
423
|
+
'mcp.tool.error': errorMessage,
|
|
424
|
+
...metadata
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Record iteration lifecycle event
|
|
430
|
+
* @param {string} eventType - start or end
|
|
431
|
+
* @param {number} iteration - Iteration number
|
|
432
|
+
* @param {Object} data - Additional data
|
|
433
|
+
*/
|
|
434
|
+
recordIterationEvent(eventType, iteration, data = {}) {
|
|
435
|
+
if (!this.isEnabled()) return;
|
|
436
|
+
|
|
437
|
+
this.addEvent(`iteration.${eventType}`, {
|
|
438
|
+
'session.id': this.sessionId,
|
|
439
|
+
'iteration': iteration,
|
|
440
|
+
...data
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Record per-turn token breakdown
|
|
446
|
+
* @param {number} iteration - Iteration number
|
|
447
|
+
* @param {Object} tokenData - Token metrics
|
|
448
|
+
*/
|
|
449
|
+
recordTokenTurn(iteration, tokenData = {}) {
|
|
450
|
+
if (!this.isEnabled()) return;
|
|
451
|
+
|
|
452
|
+
this.addEvent('tokens.turn', {
|
|
453
|
+
'session.id': this.sessionId,
|
|
454
|
+
'iteration': iteration,
|
|
455
|
+
'tokens.input': tokenData.inputTokens || 0,
|
|
456
|
+
'tokens.output': tokenData.outputTokens || 0,
|
|
457
|
+
'tokens.total': (tokenData.inputTokens || 0) + (tokenData.outputTokens || 0),
|
|
458
|
+
'tokens.cache_read': tokenData.cacheReadTokens || 0,
|
|
459
|
+
'tokens.cache_write': tokenData.cacheWriteTokens || 0,
|
|
460
|
+
'tokens.context_used': tokenData.contextTokens || 0,
|
|
461
|
+
'tokens.context_remaining': tokenData.maxContextTokens ? (tokenData.maxContextTokens - (tokenData.contextTokens || 0)) : null
|
|
462
|
+
});
|
|
463
|
+
}
|
|
464
|
+
|
|
267
465
|
async withSpan(spanName, fn, attributes = {}) {
|
|
268
466
|
if (!this.isEnabled()) {
|
|
269
467
|
return fn();
|
package/src/agent/tools.js
CHANGED
|
@@ -270,13 +270,16 @@ User: Analyze the diagram in docs/architecture.svg
|
|
|
270
270
|
*/
|
|
271
271
|
export function parseXmlToolCallWithThinking(xmlString, validTools) {
|
|
272
272
|
// Use the shared processing logic
|
|
273
|
-
const { cleanedXmlString, recoveryResult } = processXmlWithThinkingAndRecovery(xmlString, validTools);
|
|
274
|
-
|
|
275
|
-
// If recovery found an attempt_complete pattern, return it
|
|
273
|
+
const { cleanedXmlString, recoveryResult, thinkingContent } = processXmlWithThinkingAndRecovery(xmlString, validTools);
|
|
274
|
+
|
|
275
|
+
// If recovery found an attempt_complete pattern, return it with thinking content
|
|
276
276
|
if (recoveryResult) {
|
|
277
|
-
return recoveryResult;
|
|
277
|
+
return { ...recoveryResult, thinkingContent };
|
|
278
278
|
}
|
|
279
279
|
|
|
280
280
|
// Otherwise, use the original parseXmlToolCall function to parse the cleaned XML string
|
|
281
|
-
|
|
281
|
+
const toolCall = parseXmlToolCall(cleanedXmlString, validTools);
|
|
282
|
+
|
|
283
|
+
// Return tool call with thinking content attached
|
|
284
|
+
return toolCall ? { ...toolCall, thinkingContent } : null;
|
|
282
285
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|