@chanl/eval-cli 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/agent-loader.d.ts +39 -0
- package/dist/agent-loader.d.ts.map +1 -0
- package/dist/agent-loader.js +166 -0
- package/dist/agent-loader.js.map +1 -0
- package/dist/analytics.d.ts +15 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/analytics.js +94 -0
- package/dist/analytics.js.map +1 -0
- package/dist/assertions.d.ts +73 -0
- package/dist/assertions.d.ts.map +1 -0
- package/dist/assertions.js +282 -0
- package/dist/assertions.js.map +1 -0
- package/dist/baseline.d.ts +100 -0
- package/dist/baseline.d.ts.map +1 -0
- package/dist/baseline.js +327 -0
- package/dist/baseline.js.map +1 -0
- package/dist/bin/chanl.d.ts +3 -0
- package/dist/bin/chanl.d.ts.map +1 -0
- package/dist/bin/chanl.js +11 -0
- package/dist/bin/chanl.js.map +1 -0
- package/dist/client.d.ts +40 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +99 -0
- package/dist/client.js.map +1 -0
- package/dist/commands/analytics.d.ts +3 -0
- package/dist/commands/analytics.d.ts.map +1 -0
- package/dist/commands/analytics.js +44 -0
- package/dist/commands/analytics.js.map +1 -0
- package/dist/commands/compare.d.ts +51 -0
- package/dist/commands/compare.d.ts.map +1 -0
- package/dist/commands/compare.js +429 -0
- package/dist/commands/compare.js.map +1 -0
- package/dist/commands/config.d.ts +3 -0
- package/dist/commands/config.d.ts.map +1 -0
- package/dist/commands/config.js +94 -0
- package/dist/commands/config.js.map +1 -0
- package/dist/commands/dataset.d.ts +6 -0
- package/dist/commands/dataset.d.ts.map +1 -0
- package/dist/commands/dataset.js +225 -0
- package/dist/commands/dataset.js.map +1 -0
- package/dist/commands/executions.d.ts +3 -0
- package/dist/commands/executions.d.ts.map +1 -0
- package/dist/commands/executions.js +249 -0
- package/dist/commands/executions.js.map +1 -0
- package/dist/commands/generate.d.ts +3 -0
- package/dist/commands/generate.d.ts.map +1 -0
- package/dist/commands/generate.js +159 -0
- package/dist/commands/generate.js.map +1 -0
- package/dist/commands/init.d.ts +29 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +545 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/login.d.ts +3 -0
- package/dist/commands/login.d.ts.map +1 -0
- package/dist/commands/login.js +65 -0
- package/dist/commands/login.js.map +1 -0
- package/dist/commands/personas.d.ts +3 -0
- package/dist/commands/personas.d.ts.map +1 -0
- package/dist/commands/personas.js +269 -0
- package/dist/commands/personas.js.map +1 -0
- package/dist/commands/scenarios.d.ts +16 -0
- package/dist/commands/scenarios.d.ts.map +1 -0
- package/dist/commands/scenarios.js +755 -0
- package/dist/commands/scenarios.js.map +1 -0
- package/dist/commands/scorecards.d.ts +3 -0
- package/dist/commands/scorecards.d.ts.map +1 -0
- package/dist/commands/scorecards.js +220 -0
- package/dist/commands/scorecards.js.map +1 -0
- package/dist/commands/server.d.ts +8 -0
- package/dist/commands/server.d.ts.map +1 -0
- package/dist/commands/server.js +357 -0
- package/dist/commands/server.js.map +1 -0
- package/dist/commands/test.d.ts +3 -0
- package/dist/commands/test.d.ts.map +1 -0
- package/dist/commands/test.js +410 -0
- package/dist/commands/test.js.map +1 -0
- package/dist/commands/tool-fixtures.d.ts +3 -0
- package/dist/commands/tool-fixtures.d.ts.map +1 -0
- package/dist/commands/tool-fixtures.js +324 -0
- package/dist/commands/tool-fixtures.js.map +1 -0
- package/dist/config.d.ts +32 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +132 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +146 -0
- package/dist/index.js.map +1 -0
- package/dist/output.d.ts +30 -0
- package/dist/output.d.ts.map +1 -0
- package/dist/output.js +77 -0
- package/dist/output.js.map +1 -0
- package/dist/update-check.d.ts +6 -0
- package/dist/update-check.d.ts.map +1 -0
- package/dist/update-check.js +50 -0
- package/dist/update-check.js.map +1 -0
- package/package.json +42 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 chanl.ai
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parsed agent definition from a YAML file.
|
|
3
|
+
*/
|
|
4
|
+
export interface AgentDefinition {
|
|
5
|
+
name: string;
|
|
6
|
+
model: string;
|
|
7
|
+
provider: string;
|
|
8
|
+
systemPrompt: string;
|
|
9
|
+
temperature?: number;
|
|
10
|
+
maxTokens?: number;
|
|
11
|
+
/** For http provider: the endpoint URL */
|
|
12
|
+
httpEndpoint?: string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Parse a model string into provider + model name.
|
|
16
|
+
*
|
|
17
|
+
* Formats:
|
|
18
|
+
* "gpt-4o" -> { provider: "openai", model: "gpt-4o" }
|
|
19
|
+
* "openai:gpt-4o" -> { provider: "openai", model: "gpt-4o" }
|
|
20
|
+
* "anthropic:claude-sonnet" -> { provider: "anthropic", model: "claude-sonnet" }
|
|
21
|
+
* "http:localhost:8080" -> { provider: "http", endpoint: "localhost:8080" }
|
|
22
|
+
* "http:http://my-agent:8080" -> { provider: "http", endpoint: "http://my-agent:8080" }
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseModelString(modelStr: string, fallbackProvider?: string): {
|
|
25
|
+
provider: string;
|
|
26
|
+
model: string;
|
|
27
|
+
httpEndpoint?: string;
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Load and parse an agent YAML file.
|
|
31
|
+
*
|
|
32
|
+
* @param filePath - Path to the agent YAML file
|
|
33
|
+
* @param fallbackProvider - Provider to use when model string has no prefix and
|
|
34
|
+
* auto-detection fails (typically from CLI config)
|
|
35
|
+
* @returns Parsed agent definition
|
|
36
|
+
* @throws Error if file is missing, YAML is invalid, or required fields are absent
|
|
37
|
+
*/
|
|
38
|
+
export declare function loadAgentYaml(filePath: string, fallbackProvider?: string): AgentDefinition;
|
|
39
|
+
//# sourceMappingURL=agent-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-loader.d.ts","sourceRoot":"","sources":["../src/agent-loader.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AA0BD;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAC9B,QAAQ,EAAE,MAAM,EAChB,gBAAgB,CAAC,EAAE,MAAM,GACxB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAA;CAAE,CA2B5D;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,MAAM,EAChB,gBAAgB,CAAC,EAAE,MAAM,GACxB,eAAe,CA0GjB"}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.parseModelString = parseModelString;
|
|
37
|
+
exports.loadAgentYaml = loadAgentYaml;
|
|
38
|
+
const fs = __importStar(require("fs"));
|
|
39
|
+
const path = __importStar(require("path"));
|
|
40
|
+
const yaml = __importStar(require("js-yaml"));
|
|
41
|
+
/**
|
|
42
|
+
* Well-known model prefixes and their providers.
|
|
43
|
+
* Used to auto-detect provider when no explicit prefix is given.
|
|
44
|
+
*/
|
|
45
|
+
const MODEL_PROVIDER_MAP = {
|
|
46
|
+
'gpt-': 'openai',
|
|
47
|
+
'o1': 'openai',
|
|
48
|
+
'o3': 'openai',
|
|
49
|
+
'o4': 'openai',
|
|
50
|
+
'claude-': 'anthropic',
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Parse a model string into provider + model name.
|
|
54
|
+
*
|
|
55
|
+
* Formats:
|
|
56
|
+
* "gpt-4o" -> { provider: "openai", model: "gpt-4o" }
|
|
57
|
+
* "openai:gpt-4o" -> { provider: "openai", model: "gpt-4o" }
|
|
58
|
+
* "anthropic:claude-sonnet" -> { provider: "anthropic", model: "claude-sonnet" }
|
|
59
|
+
* "http:localhost:8080" -> { provider: "http", endpoint: "localhost:8080" }
|
|
60
|
+
* "http:http://my-agent:8080" -> { provider: "http", endpoint: "http://my-agent:8080" }
|
|
61
|
+
*/
|
|
62
|
+
function parseModelString(modelStr, fallbackProvider) {
|
|
63
|
+
// Check for explicit provider prefix
|
|
64
|
+
const colonIdx = modelStr.indexOf(':');
|
|
65
|
+
if (colonIdx > 0) {
|
|
66
|
+
const prefix = modelStr.slice(0, colonIdx).toLowerCase();
|
|
67
|
+
if (prefix === 'openai') {
|
|
68
|
+
return { provider: 'openai', model: modelStr.slice(colonIdx + 1) };
|
|
69
|
+
}
|
|
70
|
+
if (prefix === 'anthropic') {
|
|
71
|
+
return { provider: 'anthropic', model: modelStr.slice(colonIdx + 1) };
|
|
72
|
+
}
|
|
73
|
+
if (prefix === 'http') {
|
|
74
|
+
const endpoint = modelStr.slice(colonIdx + 1);
|
|
75
|
+
return { provider: 'http', model: '', httpEndpoint: endpoint };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Auto-detect from model name
|
|
79
|
+
for (const [prefix, provider] of Object.entries(MODEL_PROVIDER_MAP)) {
|
|
80
|
+
if (modelStr.toLowerCase().startsWith(prefix)) {
|
|
81
|
+
return { provider, model: modelStr };
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// Fall back to config's provider or default to openai
|
|
85
|
+
return { provider: fallbackProvider || 'openai', model: modelStr };
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Load and parse an agent YAML file.
|
|
89
|
+
*
|
|
90
|
+
* @param filePath - Path to the agent YAML file
|
|
91
|
+
* @param fallbackProvider - Provider to use when model string has no prefix and
|
|
92
|
+
* auto-detection fails (typically from CLI config)
|
|
93
|
+
* @returns Parsed agent definition
|
|
94
|
+
* @throws Error if file is missing, YAML is invalid, or required fields are absent
|
|
95
|
+
*/
|
|
96
|
+
function loadAgentYaml(filePath, fallbackProvider) {
|
|
97
|
+
const resolvedPath = path.resolve(filePath);
|
|
98
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
99
|
+
throw new Error(`Agent YAML file not found: ${resolvedPath}`);
|
|
100
|
+
}
|
|
101
|
+
const raw = fs.readFileSync(resolvedPath, 'utf-8');
|
|
102
|
+
let parsed;
|
|
103
|
+
try {
|
|
104
|
+
parsed = yaml.load(raw);
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
108
|
+
throw new Error(`Invalid YAML in agent file ${resolvedPath}: ${message}`);
|
|
109
|
+
}
|
|
110
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
111
|
+
throw new Error(`Agent YAML file is empty or not an object: ${resolvedPath}`);
|
|
112
|
+
}
|
|
113
|
+
// Validate required fields
|
|
114
|
+
if (!parsed.name || typeof parsed.name !== 'string') {
|
|
115
|
+
throw new Error(`Agent YAML missing required field "name" in ${resolvedPath}`);
|
|
116
|
+
}
|
|
117
|
+
if (!parsed.model || typeof parsed.model !== 'string') {
|
|
118
|
+
throw new Error(`Agent YAML missing required field "model" in ${resolvedPath}`);
|
|
119
|
+
}
|
|
120
|
+
// Resolve system prompt: inline or from file
|
|
121
|
+
let systemPrompt;
|
|
122
|
+
if (parsed.system_prompt && parsed.system_prompt_file) {
|
|
123
|
+
throw new Error(`Agent YAML has both "system_prompt" and "system_prompt_file" — use only one in ${resolvedPath}`);
|
|
124
|
+
}
|
|
125
|
+
if (parsed.system_prompt_file) {
|
|
126
|
+
const promptPath = path.resolve(path.dirname(resolvedPath), parsed.system_prompt_file);
|
|
127
|
+
if (!fs.existsSync(promptPath)) {
|
|
128
|
+
throw new Error(`system_prompt_file not found: ${promptPath} (referenced from ${resolvedPath})`);
|
|
129
|
+
}
|
|
130
|
+
systemPrompt = fs.readFileSync(promptPath, 'utf-8').trim();
|
|
131
|
+
}
|
|
132
|
+
else if (parsed.system_prompt) {
|
|
133
|
+
systemPrompt = parsed.system_prompt.trim();
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
throw new Error(`Agent YAML missing "system_prompt" or "system_prompt_file" in ${resolvedPath}`);
|
|
137
|
+
}
|
|
138
|
+
if (!systemPrompt) {
|
|
139
|
+
throw new Error(`Agent YAML has empty system prompt in ${resolvedPath}`);
|
|
140
|
+
}
|
|
141
|
+
// Parse model string into provider + model
|
|
142
|
+
const { provider, model, httpEndpoint } = parseModelString(parsed.model, fallbackProvider);
|
|
143
|
+
const result = {
|
|
144
|
+
name: parsed.name,
|
|
145
|
+
model,
|
|
146
|
+
provider,
|
|
147
|
+
systemPrompt,
|
|
148
|
+
};
|
|
149
|
+
if (parsed.temperature !== undefined) {
|
|
150
|
+
if (typeof parsed.temperature !== 'number' || parsed.temperature < 0 || parsed.temperature > 2) {
|
|
151
|
+
throw new Error(`Agent YAML "temperature" must be a number between 0 and 2 in ${resolvedPath}`);
|
|
152
|
+
}
|
|
153
|
+
result.temperature = parsed.temperature;
|
|
154
|
+
}
|
|
155
|
+
if (parsed.max_tokens !== undefined) {
|
|
156
|
+
if (typeof parsed.max_tokens !== 'number' || parsed.max_tokens < 1) {
|
|
157
|
+
throw new Error(`Agent YAML "max_tokens" must be a positive number in ${resolvedPath}`);
|
|
158
|
+
}
|
|
159
|
+
result.maxTokens = parsed.max_tokens;
|
|
160
|
+
}
|
|
161
|
+
if (httpEndpoint) {
|
|
162
|
+
result.httpEndpoint = httpEndpoint;
|
|
163
|
+
}
|
|
164
|
+
return result;
|
|
165
|
+
}
|
|
166
|
+
//# sourceMappingURL=agent-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-loader.js","sourceRoot":"","sources":["../src/agent-loader.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoDA,4CA8BC;AAWD,sCA6GC;AA1MD,uCAAyB;AACzB,2CAA6B;AAC7B,8CAAgC;AA4BhC;;;GAGG;AACH,MAAM,kBAAkB,GAA2B;IACjD,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,QAAQ;IACd,SAAS,EAAE,WAAW;CACvB,CAAC;AAEF;;;;;;;;;GASG;AACH,SAAgB,gBAAgB,CAC9B,QAAgB,EAChB,gBAAyB;IAEzB,qCAAqC;IACrC,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;QACjB,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QAEzD,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;YACxB,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC;QACrE,CAAC;QACD,IAAI,MAAM,KAAK,WAAW,EAAE,CAAC;YAC3B,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC;QACxE,CAAC;QACD,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACtB,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;YAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC;QACjE,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,KAAK,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAAE,CAAC;QACpE,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9C,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;QACvC,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,OAAO,EAAE,QAAQ,EAAE,gBAAgB,IAAI,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrE,CAAC;AAED;;;;;;;;GAQG;AACH,SAAgB,aAAa,CAC3B,QAAgB,EAChB,gBAAyB;IAEzB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,8BAA8B,YAAY,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IACnD,IAAI,MAAoB,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAiB,CAAC;IAC1C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,IAAI,KAAK,CAAC,8BAA8B,YAAY,KAAK,OAAO,EAAE,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CACb,8CAA8C,YAAY,EAAE,CAC7D,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QACpD,MAAM,IAAI,KAAK,CACb,+CAA+C,YAAY,EAAE,CAC9D,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CACb,gDAAgD,YAAY,EAAE,CAC/D,CAAC;IACJ,CAAC;IAED,6CAA6C;IAC7C,IAAI,YAAoB,CAAC;IAEzB,IAAI,MAAM,CAAC,aAAa,IAAI,MAAM,CAAC,kBAAkB,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CACb,kFAAkF,YAAY,EAAE,CACjG,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,kBAAkB,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,EAC1B,MAAM,CAAC,kBAAkB,CAC1B,CAAC;QACF,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CACb,iCAAiC,UAAU,qBAAqB,YAAY,GAAG,CAChF,CAAC;QACJ,CAAC;QACD,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7D,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QAChC,YAAY,GAAG,MAAM,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;IAC7C,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CACb,iEAAiE,YAAY,EAAE,CAChF,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CACb,yCAAyC,YAAY,EAAE,CACxD,CAAC;IACJ,CAAC;IAED,2CAA2C;IAC3C,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,gBAAgB,CACxD,MAAM,CAAC,KAAK,EACZ,gBAAgB,CACjB,CAAC;IAEF,MAAM,MAAM,GAAoB;QAC9B,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,KAAK;QACL,QAAQ;QACR,YAAY;KACb,CAAC;IAEF,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;QACrC,IAAI,OAAO,MAAM,CAAC,WAAW,KAAK,QAAQ,IAAI,MAAM,CAAC,WAAW,GAAG,CAAC,IAAI,MAAM,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;YAC/F,MAAM,IAAI,KAAK,CACb,gEAAgE,YAAY,EAAE,CAC/E,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAC1C,CAAC;IAED,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QACpC,IAAI,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACnE,MAAM,IAAI,KAAK,CACb,wDAAwD,YAAY,EAAE,CACvE,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC;IACvC,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Initialize analytics from config.
|
|
3
|
+
* No-op if analytics are disabled.
|
|
4
|
+
*/
|
|
5
|
+
export declare function initAnalytics(): void;
|
|
6
|
+
/**
|
|
7
|
+
* Track an analytics event. No-op if disabled.
|
|
8
|
+
* Never throws — errors are silently swallowed.
|
|
9
|
+
*/
|
|
10
|
+
export declare function track(event: string, properties?: Record<string, any>): void;
|
|
11
|
+
/**
|
|
12
|
+
* Get the current analytics distinct ID.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getAnalyticsId(): string;
|
|
15
|
+
//# sourceMappingURL=analytics.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analytics.d.ts","sourceRoot":"","sources":["../src/analytics.ts"],"names":[],"mappings":"AAiBA;;;GAGG;AACH,wBAAgB,aAAa,IAAI,IAAI,CAgBpC;AAED;;;GAGG;AACH,wBAAgB,KAAK,CACnB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC/B,IAAI,CAqDN;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,MAAM,CAKvC"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.initAnalytics = initAnalytics;
|
|
4
|
+
exports.track = track;
|
|
5
|
+
exports.getAnalyticsId = getAnalyticsId;
|
|
6
|
+
const config_1 = require("./config");
|
|
7
|
+
const crypto_1 = require("crypto");
|
|
8
|
+
const POSTHOG_KEY = 'phc_chanl_eval_placeholder';
|
|
9
|
+
const POSTHOG_HOST = 'https://app.posthog.com';
|
|
10
|
+
let initialized = false;
|
|
11
|
+
let analyticsEnabled = false;
|
|
12
|
+
let distinctId = '';
|
|
13
|
+
/**
|
|
14
|
+
* Initialize analytics from config.
|
|
15
|
+
* No-op if analytics are disabled.
|
|
16
|
+
*/
|
|
17
|
+
function initAnalytics() {
|
|
18
|
+
try {
|
|
19
|
+
const config = (0, config_1.loadConfig)();
|
|
20
|
+
analyticsEnabled = config.analytics !== false;
|
|
21
|
+
distinctId = config.analyticsId || '';
|
|
22
|
+
if (!distinctId && analyticsEnabled) {
|
|
23
|
+
// Generate a new anonymous ID — it'll be persisted on next config save
|
|
24
|
+
distinctId = (0, crypto_1.randomUUID)();
|
|
25
|
+
}
|
|
26
|
+
initialized = true;
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
initialized = true;
|
|
30
|
+
analyticsEnabled = false;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Track an analytics event. No-op if disabled.
|
|
35
|
+
* Never throws — errors are silently swallowed.
|
|
36
|
+
*/
|
|
37
|
+
function track(event, properties) {
|
|
38
|
+
if (!initialized) {
|
|
39
|
+
initAnalytics();
|
|
40
|
+
}
|
|
41
|
+
if (!analyticsEnabled || !distinctId)
|
|
42
|
+
return;
|
|
43
|
+
try {
|
|
44
|
+
const payload = {
|
|
45
|
+
event,
|
|
46
|
+
distinctId,
|
|
47
|
+
properties: {
|
|
48
|
+
...properties,
|
|
49
|
+
$lib: 'chanl-cli',
|
|
50
|
+
$lib_version: '0.1.0',
|
|
51
|
+
},
|
|
52
|
+
timestamp: new Date().toISOString(),
|
|
53
|
+
};
|
|
54
|
+
// Fire-and-forget POST to PostHog
|
|
55
|
+
// Using dynamic import to avoid adding posthog-node as hard dependency
|
|
56
|
+
const https = require('https');
|
|
57
|
+
const data = JSON.stringify({
|
|
58
|
+
api_key: POSTHOG_KEY,
|
|
59
|
+
batch: [payload],
|
|
60
|
+
});
|
|
61
|
+
const url = new URL(`${POSTHOG_HOST}/batch/`);
|
|
62
|
+
const req = https.request({
|
|
63
|
+
hostname: url.hostname,
|
|
64
|
+
port: 443,
|
|
65
|
+
path: url.pathname,
|
|
66
|
+
method: 'POST',
|
|
67
|
+
headers: {
|
|
68
|
+
'Content-Type': 'application/json',
|
|
69
|
+
'Content-Length': Buffer.byteLength(data),
|
|
70
|
+
},
|
|
71
|
+
timeout: 3000,
|
|
72
|
+
}, () => {
|
|
73
|
+
// Response ignored — fire and forget
|
|
74
|
+
});
|
|
75
|
+
req.on('error', () => {
|
|
76
|
+
// Silently ignore network errors
|
|
77
|
+
});
|
|
78
|
+
req.write(data);
|
|
79
|
+
req.end();
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
// Never throw from analytics
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Get the current analytics distinct ID.
|
|
87
|
+
*/
|
|
88
|
+
function getAnalyticsId() {
|
|
89
|
+
if (!initialized) {
|
|
90
|
+
initAnalytics();
|
|
91
|
+
}
|
|
92
|
+
return distinctId;
|
|
93
|
+
}
|
|
94
|
+
//# sourceMappingURL=analytics.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analytics.js","sourceRoot":"","sources":["../src/analytics.ts"],"names":[],"mappings":";;AAqBA,sCAgBC;AAMD,sBAwDC;AAKD,wCAKC;AA7GD,qCAAsC;AACtC,mCAAoC;AASpC,MAAM,WAAW,GAAG,4BAA4B,CAAC;AACjD,MAAM,YAAY,GAAG,yBAAyB,CAAC;AAE/C,IAAI,WAAW,GAAG,KAAK,CAAC;AACxB,IAAI,gBAAgB,GAAG,KAAK,CAAC;AAC7B,IAAI,UAAU,GAAG,EAAE,CAAC;AAEpB;;;GAGG;AACH,SAAgB,aAAa;IAC3B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAA,mBAAU,GAAE,CAAC;QAC5B,gBAAgB,GAAG,MAAM,CAAC,SAAS,KAAK,KAAK,CAAC;QAC9C,UAAU,GAAG,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;QAEtC,IAAI,CAAC,UAAU,IAAI,gBAAgB,EAAE,CAAC;YACpC,uEAAuE;YACvE,UAAU,GAAG,IAAA,mBAAU,GAAE,CAAC;QAC5B,CAAC;QAED,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;IAAC,MAAM,CAAC;QACP,WAAW,GAAG,IAAI,CAAC;QACnB,gBAAgB,GAAG,KAAK,CAAC;IAC3B,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAgB,KAAK,CACnB,KAAa,EACb,UAAgC;IAEhC,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,aAAa,EAAE,CAAC;IAClB,CAAC;IACD,IAAI,CAAC,gBAAgB,IAAI,CAAC,UAAU;QAAE,OAAO;IAE7C,IAAI,CAAC;QACH,MAAM,OAAO,GAAmB;YAC9B,KAAK;YACL,UAAU;YACV,UAAU,EAAE;gBACV,GAAG,UAAU;gBACb,IAAI,EAAE,WAAW;gBACjB,YAAY,EAAE,OAAO;aACtB;YACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QAEF,kCAAkC;QAClC,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC;YAC1B,OAAO,EAAE,WAAW;YACpB,KAAK,EAAE,CAAC,OAAO,CAAC;SACjB,CAAC,CAAC;QAEH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,YAAY,SAAS,CAAC,CAAC;QAC9C,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CACvB;YACE,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,IAAI,EAAE,GAAG;YACT,IAAI,EAAE,GAAG,CAAC,QAAQ;YAClB,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,gBAAgB,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC;aAC1C;YACD,OAAO,EAAE,IAAI;SACd,EACD,GAAG,EAAE;YACH,qCAAqC;QACvC,CAAC,CACF,CAAC;QAEF,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACnB,iCAAiC;QACnC,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAChB,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC;IAAC,MAAM,CAAC;QACP,6BAA6B;IAC/B,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc;IAC5B,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,aAAa,EAAE,CAAC;IAClB,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assertion engine for `chanl test` — evaluates pass/fail assertions
|
|
3
|
+
* against scenario execution results.
|
|
4
|
+
*/
|
|
5
|
+
export interface KeywordAssertion {
|
|
6
|
+
type: 'keyword';
|
|
7
|
+
must_include?: string[];
|
|
8
|
+
must_not_include?: string[];
|
|
9
|
+
description?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface ResponseTimeAssertion {
|
|
12
|
+
type: 'response_time';
|
|
13
|
+
max_seconds: number;
|
|
14
|
+
description?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface PromptAssertion {
|
|
17
|
+
type: 'prompt';
|
|
18
|
+
rubric: string;
|
|
19
|
+
min_score: number;
|
|
20
|
+
description?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ToolCallAssertion {
|
|
23
|
+
type: 'tool_call';
|
|
24
|
+
expected: string[];
|
|
25
|
+
description?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ScoreAssertion {
|
|
28
|
+
type: 'score';
|
|
29
|
+
min_score: number;
|
|
30
|
+
description?: string;
|
|
31
|
+
}
|
|
32
|
+
export type Assertion = KeywordAssertion | ResponseTimeAssertion | PromptAssertion | ToolCallAssertion | ScoreAssertion;
|
|
33
|
+
export interface AssertionResult {
|
|
34
|
+
type: string;
|
|
35
|
+
description: string;
|
|
36
|
+
passed: boolean;
|
|
37
|
+
actual: string;
|
|
38
|
+
expected: string;
|
|
39
|
+
reason: string;
|
|
40
|
+
}
|
|
41
|
+
/** Shape of a parsed test YAML file. */
|
|
42
|
+
export interface TestDefinition {
|
|
43
|
+
scenario: string;
|
|
44
|
+
/** Prompt entity ID — defines the agent under test */
|
|
45
|
+
promptId?: string;
|
|
46
|
+
/** @deprecated Use promptId instead. Kept for backwards compat parsing. */
|
|
47
|
+
agent?: string;
|
|
48
|
+
assertions: Assertion[];
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Extract the full transcript text from an execution's stepResults.
|
|
52
|
+
* Concatenates all actualResponse fields.
|
|
53
|
+
*/
|
|
54
|
+
export declare function extractTranscriptText(execution: any): string;
|
|
55
|
+
/**
|
|
56
|
+
* Extract average agent response time in seconds from execution data.
|
|
57
|
+
* Checks execution.metrics first, then computes from stepResults durations.
|
|
58
|
+
*/
|
|
59
|
+
export declare function extractAvgResponseTimeSecs(execution: any): number | null;
|
|
60
|
+
/**
|
|
61
|
+
* Extract tool names mentioned in an execution.
|
|
62
|
+
* Checks stepResults metadata and falls back to transcript keyword search.
|
|
63
|
+
*/
|
|
64
|
+
export declare function extractToolCalls(execution: any): string[];
|
|
65
|
+
/**
|
|
66
|
+
* Extract the overall score from an execution, if present.
|
|
67
|
+
*/
|
|
68
|
+
export declare function extractOverallScore(execution: any): number | null;
|
|
69
|
+
/**
|
|
70
|
+
* Evaluate all assertions against an execution result.
|
|
71
|
+
*/
|
|
72
|
+
export declare function evaluateAssertions(assertions: Assertion[], execution: any): AssertionResult[];
|
|
73
|
+
//# sourceMappingURL=assertions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,eAAe,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,OAAO,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,MAAM,SAAS,GACjB,gBAAgB,GAChB,qBAAqB,GACrB,eAAe,GACf,iBAAiB,GACjB,cAAc,CAAC;AAEnB,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wCAAwC;AACxC,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAMD;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,GAAG,GAAG,MAAM,CAQ5D;AAED;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,SAAS,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CA6BxE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,GAAG,GAAG,MAAM,EAAE,CA4BzD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQjE;AAwMD;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,UAAU,EAAE,SAAS,EAAE,EACvB,SAAS,EAAE,GAAG,GACb,eAAe,EAAE,CA8BnB"}
|