praisonai 1.0.19 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/context.d.ts +68 -0
- package/dist/agent/context.js +119 -0
- package/dist/agent/enhanced.d.ts +92 -0
- package/dist/agent/enhanced.js +267 -0
- package/dist/agent/handoff.d.ts +82 -0
- package/dist/agent/handoff.js +124 -0
- package/dist/agent/image.d.ts +51 -0
- package/dist/agent/image.js +93 -0
- package/dist/agent/prompt-expander.d.ts +40 -0
- package/dist/agent/prompt-expander.js +84 -0
- package/dist/agent/query-rewriter.d.ts +38 -0
- package/dist/agent/query-rewriter.js +79 -0
- package/dist/agent/research.d.ts +52 -0
- package/dist/agent/research.js +118 -0
- package/dist/agent/router.d.ts +77 -0
- package/dist/agent/router.js +113 -0
- package/dist/agent/simple.js +1 -1
- package/dist/agent/types.js +2 -2
- package/dist/auto/index.d.ts +56 -0
- package/dist/auto/index.js +142 -0
- package/dist/cli/index.d.ts +20 -0
- package/dist/cli/index.js +150 -0
- package/dist/db/index.d.ts +23 -0
- package/dist/db/index.js +72 -0
- package/dist/db/memory-adapter.d.ts +42 -0
- package/dist/db/memory-adapter.js +146 -0
- package/dist/db/types.d.ts +113 -0
- package/dist/db/types.js +5 -0
- package/dist/eval/index.d.ts +61 -0
- package/dist/eval/index.js +157 -0
- package/dist/guardrails/index.d.ts +82 -0
- package/dist/guardrails/index.js +202 -0
- package/dist/guardrails/llm-guardrail.d.ts +40 -0
- package/dist/guardrails/llm-guardrail.js +91 -0
- package/dist/index.d.ts +26 -1
- package/dist/index.js +122 -1
- package/dist/knowledge/chunking.d.ts +55 -0
- package/dist/knowledge/chunking.js +157 -0
- package/dist/knowledge/rag.d.ts +80 -0
- package/dist/knowledge/rag.js +147 -0
- package/dist/llm/openai.js +1 -1
- package/dist/llm/providers/anthropic.d.ts +33 -0
- package/dist/llm/providers/anthropic.js +291 -0
- package/dist/llm/providers/base.d.ts +25 -0
- package/dist/llm/providers/base.js +43 -0
- package/dist/llm/providers/google.d.ts +27 -0
- package/dist/llm/providers/google.js +275 -0
- package/dist/llm/providers/index.d.ts +43 -0
- package/dist/llm/providers/index.js +116 -0
- package/dist/llm/providers/openai.d.ts +18 -0
- package/dist/llm/providers/openai.js +203 -0
- package/dist/llm/providers/types.d.ts +94 -0
- package/dist/llm/providers/types.js +5 -0
- package/dist/memory/memory.d.ts +92 -0
- package/dist/memory/memory.js +169 -0
- package/dist/observability/index.d.ts +86 -0
- package/dist/observability/index.js +166 -0
- package/dist/planning/index.d.ts +133 -0
- package/dist/planning/index.js +228 -0
- package/dist/session/index.d.ts +111 -0
- package/dist/session/index.js +250 -0
- package/dist/skills/index.d.ts +70 -0
- package/dist/skills/index.js +233 -0
- package/dist/telemetry/index.d.ts +102 -0
- package/dist/telemetry/index.js +187 -0
- package/dist/tools/decorator.d.ts +91 -0
- package/dist/tools/decorator.js +165 -0
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +3 -0
- package/dist/tools/mcpSse.d.ts +41 -0
- package/dist/tools/mcpSse.js +108 -0
- package/dist/workflows/index.d.ts +97 -0
- package/dist/workflows/index.js +216 -0
- package/package.json +5 -2
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-Memory Database Adapter - Simple implementation for testing and development
|
|
3
|
+
*/
|
|
4
|
+
import type { DbAdapter, DbSession, DbMessage, DbRun, DbToolCall, DbTrace, DbSpan } from './types';
|
|
5
|
+
export declare class MemoryDbAdapter implements DbAdapter {
|
|
6
|
+
private sessions;
|
|
7
|
+
private messages;
|
|
8
|
+
private runs;
|
|
9
|
+
private toolCalls;
|
|
10
|
+
private traces;
|
|
11
|
+
private spans;
|
|
12
|
+
private connected;
|
|
13
|
+
createSession(session: DbSession): Promise<void>;
|
|
14
|
+
getSession(id: string): Promise<DbSession | null>;
|
|
15
|
+
updateSession(id: string, updates: Partial<DbSession>): Promise<void>;
|
|
16
|
+
deleteSession(id: string): Promise<void>;
|
|
17
|
+
listSessions(limit?: number, offset?: number): Promise<DbSession[]>;
|
|
18
|
+
saveMessage(message: DbMessage): Promise<void>;
|
|
19
|
+
getMessages(sessionId: string, limit?: number): Promise<DbMessage[]>;
|
|
20
|
+
deleteMessages(sessionId: string): Promise<void>;
|
|
21
|
+
createRun(run: DbRun): Promise<void>;
|
|
22
|
+
getRun(id: string): Promise<DbRun | null>;
|
|
23
|
+
updateRun(id: string, updates: Partial<DbRun>): Promise<void>;
|
|
24
|
+
listRuns(sessionId: string, limit?: number): Promise<DbRun[]>;
|
|
25
|
+
saveToolCall(toolCall: DbToolCall): Promise<void>;
|
|
26
|
+
getToolCalls(runId: string): Promise<DbToolCall[]>;
|
|
27
|
+
createTrace(trace: DbTrace): Promise<void>;
|
|
28
|
+
getTrace(id: string): Promise<DbTrace | null>;
|
|
29
|
+
updateTrace(id: string, updates: Partial<DbTrace>): Promise<void>;
|
|
30
|
+
createSpan(span: DbSpan): Promise<void>;
|
|
31
|
+
getSpans(traceId: string): Promise<DbSpan[]>;
|
|
32
|
+
updateSpan(id: string, updates: Partial<DbSpan>): Promise<void>;
|
|
33
|
+
connect(): Promise<void>;
|
|
34
|
+
disconnect(): Promise<void>;
|
|
35
|
+
isConnected(): boolean;
|
|
36
|
+
clear(): void;
|
|
37
|
+
getStats(): {
|
|
38
|
+
sessions: number;
|
|
39
|
+
messages: number;
|
|
40
|
+
runs: number;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* In-Memory Database Adapter - Simple implementation for testing and development
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.MemoryDbAdapter = void 0;
|
|
7
|
+
class MemoryDbAdapter {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.sessions = new Map();
|
|
10
|
+
this.messages = new Map();
|
|
11
|
+
this.runs = new Map();
|
|
12
|
+
this.toolCalls = new Map();
|
|
13
|
+
this.traces = new Map();
|
|
14
|
+
this.spans = new Map();
|
|
15
|
+
this.connected = false;
|
|
16
|
+
}
|
|
17
|
+
// Session operations
|
|
18
|
+
async createSession(session) {
|
|
19
|
+
this.sessions.set(session.id, session);
|
|
20
|
+
this.messages.set(session.id, []);
|
|
21
|
+
}
|
|
22
|
+
async getSession(id) {
|
|
23
|
+
return this.sessions.get(id) || null;
|
|
24
|
+
}
|
|
25
|
+
async updateSession(id, updates) {
|
|
26
|
+
const session = this.sessions.get(id);
|
|
27
|
+
if (session) {
|
|
28
|
+
this.sessions.set(id, { ...session, ...updates, updatedAt: Date.now() });
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
async deleteSession(id) {
|
|
32
|
+
this.sessions.delete(id);
|
|
33
|
+
this.messages.delete(id);
|
|
34
|
+
}
|
|
35
|
+
async listSessions(limit = 100, offset = 0) {
|
|
36
|
+
const all = Array.from(this.sessions.values());
|
|
37
|
+
return all.slice(offset, offset + limit);
|
|
38
|
+
}
|
|
39
|
+
// Message operations
|
|
40
|
+
async saveMessage(message) {
|
|
41
|
+
const messages = this.messages.get(message.sessionId) || [];
|
|
42
|
+
messages.push(message);
|
|
43
|
+
this.messages.set(message.sessionId, messages);
|
|
44
|
+
}
|
|
45
|
+
async getMessages(sessionId, limit) {
|
|
46
|
+
const messages = this.messages.get(sessionId) || [];
|
|
47
|
+
if (limit) {
|
|
48
|
+
return messages.slice(-limit);
|
|
49
|
+
}
|
|
50
|
+
return messages;
|
|
51
|
+
}
|
|
52
|
+
async deleteMessages(sessionId) {
|
|
53
|
+
this.messages.set(sessionId, []);
|
|
54
|
+
}
|
|
55
|
+
// Run operations
|
|
56
|
+
async createRun(run) {
|
|
57
|
+
this.runs.set(run.id, run);
|
|
58
|
+
this.toolCalls.set(run.id, []);
|
|
59
|
+
}
|
|
60
|
+
async getRun(id) {
|
|
61
|
+
return this.runs.get(id) || null;
|
|
62
|
+
}
|
|
63
|
+
async updateRun(id, updates) {
|
|
64
|
+
const run = this.runs.get(id);
|
|
65
|
+
if (run) {
|
|
66
|
+
this.runs.set(id, { ...run, ...updates });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
async listRuns(sessionId, limit = 100) {
|
|
70
|
+
const all = Array.from(this.runs.values()).filter(r => r.sessionId === sessionId);
|
|
71
|
+
return all.slice(-limit);
|
|
72
|
+
}
|
|
73
|
+
// Tool call operations
|
|
74
|
+
async saveToolCall(toolCall) {
|
|
75
|
+
const calls = this.toolCalls.get(toolCall.runId) || [];
|
|
76
|
+
calls.push(toolCall);
|
|
77
|
+
this.toolCalls.set(toolCall.runId, calls);
|
|
78
|
+
}
|
|
79
|
+
async getToolCalls(runId) {
|
|
80
|
+
return this.toolCalls.get(runId) || [];
|
|
81
|
+
}
|
|
82
|
+
// Trace operations
|
|
83
|
+
async createTrace(trace) {
|
|
84
|
+
this.traces.set(trace.id, trace);
|
|
85
|
+
this.spans.set(trace.id, []);
|
|
86
|
+
}
|
|
87
|
+
async getTrace(id) {
|
|
88
|
+
return this.traces.get(id) || null;
|
|
89
|
+
}
|
|
90
|
+
async updateTrace(id, updates) {
|
|
91
|
+
const trace = this.traces.get(id);
|
|
92
|
+
if (trace) {
|
|
93
|
+
this.traces.set(id, { ...trace, ...updates });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Span operations
|
|
97
|
+
async createSpan(span) {
|
|
98
|
+
const spans = this.spans.get(span.traceId) || [];
|
|
99
|
+
spans.push(span);
|
|
100
|
+
this.spans.set(span.traceId, spans);
|
|
101
|
+
}
|
|
102
|
+
async getSpans(traceId) {
|
|
103
|
+
return this.spans.get(traceId) || [];
|
|
104
|
+
}
|
|
105
|
+
async updateSpan(id, updates) {
|
|
106
|
+
for (const [traceId, spans] of this.spans) {
|
|
107
|
+
const index = spans.findIndex(s => s.id === id);
|
|
108
|
+
if (index !== -1) {
|
|
109
|
+
spans[index] = { ...spans[index], ...updates };
|
|
110
|
+
this.spans.set(traceId, spans);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Lifecycle
|
|
116
|
+
async connect() {
|
|
117
|
+
this.connected = true;
|
|
118
|
+
}
|
|
119
|
+
async disconnect() {
|
|
120
|
+
this.connected = false;
|
|
121
|
+
}
|
|
122
|
+
isConnected() {
|
|
123
|
+
return this.connected;
|
|
124
|
+
}
|
|
125
|
+
// Utility methods
|
|
126
|
+
clear() {
|
|
127
|
+
this.sessions.clear();
|
|
128
|
+
this.messages.clear();
|
|
129
|
+
this.runs.clear();
|
|
130
|
+
this.toolCalls.clear();
|
|
131
|
+
this.traces.clear();
|
|
132
|
+
this.spans.clear();
|
|
133
|
+
}
|
|
134
|
+
getStats() {
|
|
135
|
+
let messageCount = 0;
|
|
136
|
+
for (const msgs of this.messages.values()) {
|
|
137
|
+
messageCount += msgs.length;
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
sessions: this.sessions.size,
|
|
141
|
+
messages: messageCount,
|
|
142
|
+
runs: this.runs.size,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
exports.MemoryDbAdapter = MemoryDbAdapter;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database Adapter Types - Protocol definitions for persistence layer
|
|
3
|
+
*/
|
|
4
|
+
export interface DbMessage {
|
|
5
|
+
id: string;
|
|
6
|
+
sessionId: string;
|
|
7
|
+
runId?: string;
|
|
8
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
9
|
+
content: string | null;
|
|
10
|
+
name?: string;
|
|
11
|
+
toolCallId?: string;
|
|
12
|
+
toolCalls?: any[];
|
|
13
|
+
createdAt: number;
|
|
14
|
+
metadata?: Record<string, any>;
|
|
15
|
+
}
|
|
16
|
+
export interface DbToolCall {
|
|
17
|
+
id: string;
|
|
18
|
+
runId: string;
|
|
19
|
+
name: string;
|
|
20
|
+
arguments: string;
|
|
21
|
+
result?: string;
|
|
22
|
+
status: 'pending' | 'completed' | 'failed';
|
|
23
|
+
startedAt: number;
|
|
24
|
+
completedAt?: number;
|
|
25
|
+
error?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface DbRun {
|
|
28
|
+
id: string;
|
|
29
|
+
sessionId: string;
|
|
30
|
+
agentName?: string;
|
|
31
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
32
|
+
startedAt: number;
|
|
33
|
+
completedAt?: number;
|
|
34
|
+
error?: string;
|
|
35
|
+
metadata?: Record<string, any>;
|
|
36
|
+
tokenUsage?: {
|
|
37
|
+
promptTokens: number;
|
|
38
|
+
completionTokens: number;
|
|
39
|
+
totalTokens: number;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
export interface DbSession {
|
|
43
|
+
id: string;
|
|
44
|
+
createdAt: number;
|
|
45
|
+
updatedAt: number;
|
|
46
|
+
metadata?: Record<string, any>;
|
|
47
|
+
}
|
|
48
|
+
export interface DbSpan {
|
|
49
|
+
id: string;
|
|
50
|
+
traceId: string;
|
|
51
|
+
parentId?: string;
|
|
52
|
+
name: string;
|
|
53
|
+
startedAt: number;
|
|
54
|
+
completedAt?: number;
|
|
55
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
56
|
+
attributes?: Record<string, any>;
|
|
57
|
+
}
|
|
58
|
+
export interface DbTrace {
|
|
59
|
+
id: string;
|
|
60
|
+
sessionId: string;
|
|
61
|
+
runId?: string;
|
|
62
|
+
agentName?: string;
|
|
63
|
+
startedAt: number;
|
|
64
|
+
completedAt?: number;
|
|
65
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
66
|
+
metadata?: Record<string, any>;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Database Adapter Protocol - Interface for persistence implementations
|
|
70
|
+
*/
|
|
71
|
+
export interface DbAdapter {
|
|
72
|
+
createSession(session: DbSession): Promise<void>;
|
|
73
|
+
getSession(id: string): Promise<DbSession | null>;
|
|
74
|
+
updateSession(id: string, updates: Partial<DbSession>): Promise<void>;
|
|
75
|
+
deleteSession(id: string): Promise<void>;
|
|
76
|
+
listSessions(limit?: number, offset?: number): Promise<DbSession[]>;
|
|
77
|
+
saveMessage(message: DbMessage): Promise<void>;
|
|
78
|
+
getMessages(sessionId: string, limit?: number): Promise<DbMessage[]>;
|
|
79
|
+
deleteMessages(sessionId: string): Promise<void>;
|
|
80
|
+
createRun(run: DbRun): Promise<void>;
|
|
81
|
+
getRun(id: string): Promise<DbRun | null>;
|
|
82
|
+
updateRun(id: string, updates: Partial<DbRun>): Promise<void>;
|
|
83
|
+
listRuns(sessionId: string, limit?: number): Promise<DbRun[]>;
|
|
84
|
+
saveToolCall(toolCall: DbToolCall): Promise<void>;
|
|
85
|
+
getToolCalls(runId: string): Promise<DbToolCall[]>;
|
|
86
|
+
createTrace(trace: DbTrace): Promise<void>;
|
|
87
|
+
getTrace(id: string): Promise<DbTrace | null>;
|
|
88
|
+
updateTrace(id: string, updates: Partial<DbTrace>): Promise<void>;
|
|
89
|
+
createSpan(span: DbSpan): Promise<void>;
|
|
90
|
+
getSpans(traceId: string): Promise<DbSpan[]>;
|
|
91
|
+
updateSpan(id: string, updates: Partial<DbSpan>): Promise<void>;
|
|
92
|
+
connect(): Promise<void>;
|
|
93
|
+
disconnect(): Promise<void>;
|
|
94
|
+
isConnected(): boolean;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Async Database Adapter - For async-first implementations
|
|
98
|
+
*/
|
|
99
|
+
export interface AsyncDbAdapter extends DbAdapter {
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Database configuration
|
|
103
|
+
*/
|
|
104
|
+
export interface DbConfig {
|
|
105
|
+
type: 'sqlite' | 'postgres' | 'redis' | 'memory';
|
|
106
|
+
connectionString?: string;
|
|
107
|
+
path?: string;
|
|
108
|
+
host?: string;
|
|
109
|
+
port?: number;
|
|
110
|
+
database?: string;
|
|
111
|
+
username?: string;
|
|
112
|
+
password?: string;
|
|
113
|
+
}
|
package/dist/db/types.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation Framework - Accuracy, Performance, and Reliability evaluation
|
|
3
|
+
*/
|
|
4
|
+
export interface EvalResult {
|
|
5
|
+
passed: boolean;
|
|
6
|
+
score: number;
|
|
7
|
+
message?: string;
|
|
8
|
+
details?: Record<string, any>;
|
|
9
|
+
duration: number;
|
|
10
|
+
}
|
|
11
|
+
export interface AccuracyEvalConfig {
|
|
12
|
+
input: string;
|
|
13
|
+
expectedOutput: string;
|
|
14
|
+
actualOutput: string;
|
|
15
|
+
threshold?: number;
|
|
16
|
+
}
|
|
17
|
+
export interface PerformanceEvalConfig {
|
|
18
|
+
func: () => Promise<any>;
|
|
19
|
+
iterations?: number;
|
|
20
|
+
warmupRuns?: number;
|
|
21
|
+
}
|
|
22
|
+
export interface PerformanceResult extends EvalResult {
|
|
23
|
+
avgTime: number;
|
|
24
|
+
minTime: number;
|
|
25
|
+
maxTime: number;
|
|
26
|
+
p95Time: number;
|
|
27
|
+
times: number[];
|
|
28
|
+
}
|
|
29
|
+
export interface ReliabilityEvalConfig {
|
|
30
|
+
expectedToolCalls: string[];
|
|
31
|
+
actualToolCalls: string[];
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Accuracy Evaluation - Compare actual output to expected
|
|
35
|
+
*/
|
|
36
|
+
export declare function accuracyEval(config: AccuracyEvalConfig): Promise<EvalResult>;
|
|
37
|
+
/**
|
|
38
|
+
* Performance Evaluation - Measure execution time
|
|
39
|
+
*/
|
|
40
|
+
export declare function performanceEval(config: PerformanceEvalConfig): Promise<PerformanceResult>;
|
|
41
|
+
/**
|
|
42
|
+
* Reliability Evaluation - Check tool call accuracy
|
|
43
|
+
*/
|
|
44
|
+
export declare function reliabilityEval(config: ReliabilityEvalConfig): Promise<EvalResult>;
|
|
45
|
+
/**
|
|
46
|
+
* Eval Suite - Run multiple evaluations
|
|
47
|
+
*/
|
|
48
|
+
export declare class EvalSuite {
|
|
49
|
+
private results;
|
|
50
|
+
runAccuracy(name: string, config: AccuracyEvalConfig): Promise<EvalResult>;
|
|
51
|
+
runPerformance(name: string, config: PerformanceEvalConfig): Promise<PerformanceResult>;
|
|
52
|
+
runReliability(name: string, config: ReliabilityEvalConfig): Promise<EvalResult>;
|
|
53
|
+
getResults(): Map<string, EvalResult>;
|
|
54
|
+
getSummary(): {
|
|
55
|
+
total: number;
|
|
56
|
+
passed: number;
|
|
57
|
+
failed: number;
|
|
58
|
+
avgScore: number;
|
|
59
|
+
};
|
|
60
|
+
printSummary(): void;
|
|
61
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Evaluation Framework - Accuracy, Performance, and Reliability evaluation
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.EvalSuite = void 0;
|
|
7
|
+
exports.accuracyEval = accuracyEval;
|
|
8
|
+
exports.performanceEval = performanceEval;
|
|
9
|
+
exports.reliabilityEval = reliabilityEval;
|
|
10
|
+
/**
|
|
11
|
+
* Accuracy Evaluation - Compare actual output to expected
|
|
12
|
+
*/
|
|
13
|
+
async function accuracyEval(config) {
|
|
14
|
+
const start = Date.now();
|
|
15
|
+
const threshold = config.threshold ?? 0.8;
|
|
16
|
+
const similarity = calculateSimilarity(config.expectedOutput, config.actualOutput);
|
|
17
|
+
const passed = similarity >= threshold;
|
|
18
|
+
return {
|
|
19
|
+
passed,
|
|
20
|
+
score: similarity,
|
|
21
|
+
message: passed ? 'Output matches expected' : 'Output does not match expected',
|
|
22
|
+
details: {
|
|
23
|
+
expected: config.expectedOutput,
|
|
24
|
+
actual: config.actualOutput,
|
|
25
|
+
threshold
|
|
26
|
+
},
|
|
27
|
+
duration: Date.now() - start
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Performance Evaluation - Measure execution time
|
|
32
|
+
*/
|
|
33
|
+
async function performanceEval(config) {
|
|
34
|
+
const iterations = config.iterations ?? 10;
|
|
35
|
+
const warmupRuns = config.warmupRuns ?? 2;
|
|
36
|
+
const times = [];
|
|
37
|
+
// Warmup runs
|
|
38
|
+
for (let i = 0; i < warmupRuns; i++) {
|
|
39
|
+
await config.func();
|
|
40
|
+
}
|
|
41
|
+
// Actual runs
|
|
42
|
+
const start = Date.now();
|
|
43
|
+
for (let i = 0; i < iterations; i++) {
|
|
44
|
+
const runStart = Date.now();
|
|
45
|
+
await config.func();
|
|
46
|
+
times.push(Date.now() - runStart);
|
|
47
|
+
}
|
|
48
|
+
const sortedTimes = [...times].sort((a, b) => a - b);
|
|
49
|
+
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
|
50
|
+
const minTime = sortedTimes[0];
|
|
51
|
+
const maxTime = sortedTimes[sortedTimes.length - 1];
|
|
52
|
+
const p95Index = Math.floor(sortedTimes.length * 0.95);
|
|
53
|
+
const p95Time = sortedTimes[p95Index] || maxTime;
|
|
54
|
+
return {
|
|
55
|
+
passed: true,
|
|
56
|
+
score: 1,
|
|
57
|
+
avgTime,
|
|
58
|
+
minTime,
|
|
59
|
+
maxTime,
|
|
60
|
+
p95Time,
|
|
61
|
+
times,
|
|
62
|
+
duration: Date.now() - start,
|
|
63
|
+
details: {
|
|
64
|
+
iterations,
|
|
65
|
+
warmupRuns
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Reliability Evaluation - Check tool call accuracy
|
|
71
|
+
*/
|
|
72
|
+
async function reliabilityEval(config) {
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
const expected = new Set(config.expectedToolCalls);
|
|
75
|
+
const actual = new Set(config.actualToolCalls);
|
|
76
|
+
const matched = config.expectedToolCalls.filter(t => actual.has(t));
|
|
77
|
+
const missing = config.expectedToolCalls.filter(t => !actual.has(t));
|
|
78
|
+
const extra = config.actualToolCalls.filter(t => !expected.has(t));
|
|
79
|
+
const score = expected.size > 0 ? matched.length / expected.size : 1;
|
|
80
|
+
const passed = missing.length === 0;
|
|
81
|
+
return {
|
|
82
|
+
passed,
|
|
83
|
+
score,
|
|
84
|
+
message: passed ? 'All expected tool calls made' : `Missing tool calls: ${missing.join(', ')}`,
|
|
85
|
+
details: {
|
|
86
|
+
matched,
|
|
87
|
+
missing,
|
|
88
|
+
extra,
|
|
89
|
+
expected: config.expectedToolCalls,
|
|
90
|
+
actual: config.actualToolCalls
|
|
91
|
+
},
|
|
92
|
+
duration: Date.now() - start
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Calculate text similarity (simple Jaccard similarity)
|
|
97
|
+
*/
|
|
98
|
+
function calculateSimilarity(a, b) {
|
|
99
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
100
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
101
|
+
const intersection = new Set([...wordsA].filter(x => wordsB.has(x)));
|
|
102
|
+
const union = new Set([...wordsA, ...wordsB]);
|
|
103
|
+
return union.size > 0 ? intersection.size / union.size : 0;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Eval Suite - Run multiple evaluations
|
|
107
|
+
*/
|
|
108
|
+
class EvalSuite {
|
|
109
|
+
constructor() {
|
|
110
|
+
this.results = new Map();
|
|
111
|
+
}
|
|
112
|
+
async runAccuracy(name, config) {
|
|
113
|
+
const result = await accuracyEval(config);
|
|
114
|
+
this.results.set(name, result);
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
117
|
+
async runPerformance(name, config) {
|
|
118
|
+
const result = await performanceEval(config);
|
|
119
|
+
this.results.set(name, result);
|
|
120
|
+
return result;
|
|
121
|
+
}
|
|
122
|
+
async runReliability(name, config) {
|
|
123
|
+
const result = await reliabilityEval(config);
|
|
124
|
+
this.results.set(name, result);
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
getResults() {
|
|
128
|
+
return new Map(this.results);
|
|
129
|
+
}
|
|
130
|
+
getSummary() {
|
|
131
|
+
const results = Array.from(this.results.values());
|
|
132
|
+
const passed = results.filter(r => r.passed).length;
|
|
133
|
+
const avgScore = results.length > 0
|
|
134
|
+
? results.reduce((a, b) => a + b.score, 0) / results.length
|
|
135
|
+
: 0;
|
|
136
|
+
return {
|
|
137
|
+
total: results.length,
|
|
138
|
+
passed,
|
|
139
|
+
failed: results.length - passed,
|
|
140
|
+
avgScore
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
printSummary() {
|
|
144
|
+
const summary = this.getSummary();
|
|
145
|
+
console.log('\n=== Evaluation Summary ===');
|
|
146
|
+
console.log(`Total: ${summary.total}`);
|
|
147
|
+
console.log(`Passed: ${summary.passed}`);
|
|
148
|
+
console.log(`Failed: ${summary.failed}`);
|
|
149
|
+
console.log(`Avg Score: ${(summary.avgScore * 100).toFixed(1)}%`);
|
|
150
|
+
console.log('\nResults:');
|
|
151
|
+
for (const [name, result] of this.results) {
|
|
152
|
+
const status = result.passed ? '✅' : '❌';
|
|
153
|
+
console.log(` ${status} ${name}: ${(result.score * 100).toFixed(1)}%`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
exports.EvalSuite = EvalSuite;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guardrails - Input/output validation and safety checks
|
|
3
|
+
*/
|
|
4
|
+
export type GuardrailStatus = 'passed' | 'failed' | 'warning';
|
|
5
|
+
export interface GuardrailResult {
|
|
6
|
+
status: GuardrailStatus;
|
|
7
|
+
message?: string;
|
|
8
|
+
details?: Record<string, any>;
|
|
9
|
+
modifiedContent?: string;
|
|
10
|
+
}
|
|
11
|
+
export type GuardrailFunction = (content: string, context?: GuardrailContext) => Promise<GuardrailResult> | GuardrailResult;
|
|
12
|
+
export interface GuardrailContext {
|
|
13
|
+
role: 'input' | 'output';
|
|
14
|
+
agentName?: string;
|
|
15
|
+
sessionId?: string;
|
|
16
|
+
metadata?: Record<string, any>;
|
|
17
|
+
}
|
|
18
|
+
export interface GuardrailConfig {
|
|
19
|
+
name: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
check: GuardrailFunction;
|
|
22
|
+
onFail?: 'block' | 'warn' | 'modify';
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Guardrail class
|
|
26
|
+
*/
|
|
27
|
+
export declare class Guardrail {
|
|
28
|
+
readonly name: string;
|
|
29
|
+
readonly description: string;
|
|
30
|
+
readonly check: GuardrailFunction;
|
|
31
|
+
readonly onFail: 'block' | 'warn' | 'modify';
|
|
32
|
+
constructor(config: GuardrailConfig);
|
|
33
|
+
run(content: string, context?: GuardrailContext): Promise<GuardrailResult>;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Create a guardrail
|
|
37
|
+
*/
|
|
38
|
+
export declare function guardrail(config: GuardrailConfig): Guardrail;
|
|
39
|
+
/**
|
|
40
|
+
* Guardrail Manager - Run multiple guardrails
|
|
41
|
+
*/
|
|
42
|
+
export declare class GuardrailManager {
|
|
43
|
+
private guardrails;
|
|
44
|
+
add(g: Guardrail): this;
|
|
45
|
+
runAll(content: string, context?: GuardrailContext): Promise<{
|
|
46
|
+
passed: boolean;
|
|
47
|
+
results: Array<{
|
|
48
|
+
name: string;
|
|
49
|
+
result: GuardrailResult;
|
|
50
|
+
}>;
|
|
51
|
+
}>;
|
|
52
|
+
get count(): number;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Built-in guardrails
|
|
56
|
+
*/
|
|
57
|
+
export declare const builtinGuardrails: {
|
|
58
|
+
/**
|
|
59
|
+
* Check for maximum length
|
|
60
|
+
*/
|
|
61
|
+
maxLength: (maxChars: number) => Guardrail;
|
|
62
|
+
/**
|
|
63
|
+
* Check for minimum length
|
|
64
|
+
*/
|
|
65
|
+
minLength: (minChars: number) => Guardrail;
|
|
66
|
+
/**
|
|
67
|
+
* Check for blocked words
|
|
68
|
+
*/
|
|
69
|
+
blockedWords: (words: string[]) => Guardrail;
|
|
70
|
+
/**
|
|
71
|
+
* Check for required words
|
|
72
|
+
*/
|
|
73
|
+
requiredWords: (words: string[]) => Guardrail;
|
|
74
|
+
/**
|
|
75
|
+
* Regex pattern check
|
|
76
|
+
*/
|
|
77
|
+
pattern: (regex: RegExp, mustMatch?: boolean) => Guardrail;
|
|
78
|
+
/**
|
|
79
|
+
* JSON validity check
|
|
80
|
+
*/
|
|
81
|
+
validJson: () => Guardrail;
|
|
82
|
+
};
|