@huydao/karrot 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/GUIDE.md +484 -0
  2. package/README.md +253 -0
  3. package/dist/assertions/assertion.d.ts +18 -0
  4. package/dist/assertions/assertion.js +198 -0
  5. package/dist/assertions/turn-eval.d.ts +22 -0
  6. package/dist/assertions/turn-eval.js +178 -0
  7. package/dist/executors/adapters/ag-ui-post.d.ts +55 -0
  8. package/dist/executors/adapters/ag-ui-post.js +703 -0
  9. package/dist/executors/adapters/ag-ui.d.ts +15 -0
  10. package/dist/executors/adapters/ag-ui.js +275 -0
  11. package/dist/executors/execute.d.ts +16 -0
  12. package/dist/executors/execute.js +145 -0
  13. package/dist/executors/executor.d.ts +37 -0
  14. package/dist/executors/executor.js +203 -0
  15. package/dist/executors/run-result.d.ts +33 -0
  16. package/dist/executors/run-result.js +22 -0
  17. package/dist/index.d.ts +12 -0
  18. package/dist/index.js +28 -0
  19. package/dist/prompts/turn-eval-system-prompt.md +68 -0
  20. package/dist/prompts/turn-message-gen-system-prompt.md +16 -0
  21. package/dist/reports/report.d.ts +68 -0
  22. package/dist/reports/report.js +366 -0
  23. package/dist/scenarios/generated-message.d.ts +15 -0
  24. package/dist/scenarios/generated-message.js +116 -0
  25. package/dist/scenarios/scenario-loader.d.ts +12 -0
  26. package/dist/scenarios/scenario-loader.js +103 -0
  27. package/dist/scenarios/scenario.d.ts +62 -0
  28. package/dist/scenarios/scenario.js +35 -0
  29. package/dist/utils/artifact-files.d.ts +3 -0
  30. package/dist/utils/artifact-files.js +22 -0
  31. package/dist/utils/config.d.ts +101 -0
  32. package/dist/utils/config.js +57 -0
  33. package/dist/utils/openai-eval.d.ts +5 -0
  34. package/dist/utils/openai-eval.js +54 -0
  35. package/package.json +146 -0
@@ -0,0 +1,62 @@
1
+ export type BaseAiScenarioContext = {
2
+ projectId: string;
3
+ };
4
+ type AiTurnCompletionArgs<TContext extends BaseAiScenarioContext> = {
5
+ context: TContext;
6
+ output: string;
7
+ };
8
+ export type AiTurnAssertion = {
9
+ assert: {
10
+ hasText: string;
11
+ } | {
12
+ toolcall: string[];
13
+ };
14
+ description?: string;
15
+ } | {
16
+ aiAssert: {
17
+ hasContent: string;
18
+ } | {
19
+ notHasContent: string;
20
+ };
21
+ description?: string;
22
+ };
23
+ export type AiTurnEvalDimension = 'correctness' | 'coverage' | 'helpfulness' | (string & {});
24
+ export type AiTurnEvalDefinition = AiTurnEvalDimension | {
25
+ dimension: AiTurnEvalDimension;
26
+ guidance: string;
27
+ };
28
+ export type AiGeneratedMessageDefinition = {
29
+ type: 'ai_gen_previous_context';
30
+ } | {
31
+ type: 'ai_gen_guidance';
32
+ guidance: string;
33
+ } | {
34
+ type: 'ai_gen_content';
35
+ content: string;
36
+ };
37
+ type AiTurnMessage<TContext extends BaseAiScenarioContext> = ((context: TContext) => string) | AiGeneratedMessageDefinition;
38
+ export type AiTurn<TContext extends BaseAiScenarioContext> = {
39
+ label: string;
40
+ message: AiTurnMessage<TContext>;
41
+ idleTimeoutMs?: number;
42
+ processTimeoutMs?: number;
43
+ assertions?: AiTurnAssertion[];
44
+ eval?: AiTurnEvalDefinition[];
45
+ onComplete?: (args: AiTurnCompletionArgs<TContext>) => void | Promise<void>;
46
+ };
47
+ export type AiScenario<TContext extends BaseAiScenarioContext> = {
48
+ id: string;
49
+ name: string;
50
+ turns: AiTurn<TContext>[];
51
+ };
52
+ export declare class AiScenarioSet<TContext extends BaseAiScenarioContext> {
53
+ readonly items: AiScenario<TContext>[];
54
+ constructor(items: AiScenario<TContext>[]);
55
+ select(ids?: string[]): AiScenario<TContext>[];
56
+ }
57
+ export declare const aiGen: {
58
+ fromPreviousContext(): AiGeneratedMessageDefinition;
59
+ fromGuidance(guidance: string): AiGeneratedMessageDefinition;
60
+ fromContent(content: string): AiGeneratedMessageDefinition;
61
+ };
62
+ export {};
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.aiGen = exports.AiScenarioSet = void 0;
4
+ class AiScenarioSet {
5
+ items;
6
+ constructor(items) {
7
+ this.items = items;
8
+ }
9
+ select(ids) {
10
+ if (!ids?.length) {
11
+ return this.items;
12
+ }
13
+ return this.items.filter((item) => ids.includes(item.id));
14
+ }
15
+ }
16
+ exports.AiScenarioSet = AiScenarioSet;
17
+ exports.aiGen = {
18
+ fromPreviousContext() {
19
+ return {
20
+ type: 'ai_gen_previous_context',
21
+ };
22
+ },
23
+ fromGuidance(guidance) {
24
+ return {
25
+ type: 'ai_gen_guidance',
26
+ guidance,
27
+ };
28
+ },
29
+ fromContent(content) {
30
+ return {
31
+ type: 'ai_gen_content',
32
+ content,
33
+ };
34
+ },
35
+ };
@@ -0,0 +1,3 @@
1
+ export declare function getArtifactDirectory(baseDirectory?: string): string;
2
+ export declare function ensureArtifactDirectory(directoryPath: string): Promise<void>;
3
+ export declare function createRunArtifactDirectory(baseDirectory?: string): Promise<string>;
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getArtifactDirectory = getArtifactDirectory;
7
+ exports.ensureArtifactDirectory = ensureArtifactDirectory;
8
+ exports.createRunArtifactDirectory = createRunArtifactDirectory;
9
+ const promises_1 = __importDefault(require("node:fs/promises"));
10
+ const node_path_1 = __importDefault(require("node:path"));
11
+ function getArtifactDirectory(baseDirectory = process.cwd()) {
12
+ return node_path_1.default.join(baseDirectory, 'artifacts');
13
+ }
14
+ async function ensureArtifactDirectory(directoryPath) {
15
+ await promises_1.default.mkdir(directoryPath, { recursive: true });
16
+ }
17
+ async function createRunArtifactDirectory(baseDirectory = getArtifactDirectory()) {
18
+ await ensureArtifactDirectory(baseDirectory);
19
+ const runDirectory = node_path_1.default.join(baseDirectory, String(Date.now()));
20
+ await ensureArtifactDirectory(runDirectory);
21
+ return runDirectory;
22
+ }
@@ -0,0 +1,101 @@
1
+ export type KarrotRuntimeSnapshot = {
2
+ agentUrl: string;
3
+ agentId: string;
4
+ wsUrl: string;
5
+ wsTopic: string;
6
+ accountId: string;
7
+ projectId: string;
8
+ appBaseUrl: string;
9
+ };
10
+ export type KarrotScenarioSelection = {
11
+ file: string;
12
+ ids?: string[];
13
+ };
14
+ export type KarrotConfig = {
15
+ version: 1;
16
+ transport: {
17
+ type: 'ag-ui-wss';
18
+ env: Record<string, string>;
19
+ processTimeoutMs?: number;
20
+ allowIdleTimeoutWithAssistantText?: boolean;
21
+ } | {
22
+ type: 'ag-ui-post';
23
+ injectMessage?: boolean;
24
+ injectRunMetadata?: boolean;
25
+ run?: {
26
+ url: string;
27
+ headers?: Record<string, string>;
28
+ payload: Record<string, unknown>;
29
+ };
30
+ connect?: {
31
+ url: string;
32
+ headers?: Record<string, string>;
33
+ payload: Record<string, unknown>;
34
+ processTimeoutMs?: number;
35
+ };
36
+ request?: {
37
+ url: string;
38
+ headers?: Record<string, string>;
39
+ payload: Record<string, unknown>;
40
+ };
41
+ observe?: {
42
+ type: 'http-poll';
43
+ status: {
44
+ url: string;
45
+ headers?: Record<string, string>;
46
+ statusPath: string;
47
+ successStatuses: string[];
48
+ failureStatuses?: string[];
49
+ };
50
+ progressEndpoints?: Array<{
51
+ url: string;
52
+ headers?: Record<string, string>;
53
+ itemsPath: string;
54
+ idPath?: string;
55
+ eventType?: string;
56
+ }>;
57
+ outputEndpoint?: {
58
+ url: string;
59
+ headers?: Record<string, string>;
60
+ textPath: string;
61
+ };
62
+ intervalMs?: number;
63
+ timeoutMs?: number;
64
+ };
65
+ completionCheck?: {
66
+ type: 'http-poll';
67
+ url: string;
68
+ headers?: Record<string, string>;
69
+ statusPath: string;
70
+ successStatuses: string[];
71
+ failureStatuses?: string[];
72
+ intervalMs?: number;
73
+ timeoutMs?: number;
74
+ };
75
+ processTimeoutMs?: number;
76
+ };
77
+ artifacts?: {
78
+ directory?: string;
79
+ };
80
+ execution?: {
81
+ stopOnFailure?: boolean;
82
+ };
83
+ evaluation?: {
84
+ systemPromptPath?: string;
85
+ promptDirectory?: string;
86
+ };
87
+ context: Record<string, unknown> & {
88
+ projectId: string;
89
+ };
90
+ report?: {
91
+ enabled?: boolean;
92
+ environment: string;
93
+ projectName: string;
94
+ runtime: KarrotRuntimeSnapshot;
95
+ scenarioContext?: Record<string, unknown>;
96
+ };
97
+ };
98
+ type VariableMap = Record<string, unknown>;
99
+ export declare function loadConfig(configPath: string): Promise<KarrotConfig>;
100
+ export declare function resolveVariables<T>(config: T, variables: VariableMap): T;
101
+ export {};
@@ -0,0 +1,57 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.loadConfig = loadConfig;
7
+ exports.resolveVariables = resolveVariables;
8
+ const promises_1 = __importDefault(require("node:fs/promises"));
9
+ const node_path_1 = __importDefault(require("node:path"));
10
+ const yaml_1 = __importDefault(require("yaml"));
11
+ function isRecord(value) {
12
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
13
+ }
14
+ function resolveConfigPath(configPath) {
15
+ return node_path_1.default.isAbsolute(configPath) ? configPath : node_path_1.default.resolve(process.cwd(), configPath);
16
+ }
17
+ async function loadConfig(configPath) {
18
+ const absolutePath = resolveConfigPath(configPath);
19
+ const content = await promises_1.default.readFile(absolutePath, 'utf8');
20
+ const extension = node_path_1.default.extname(absolutePath).toLowerCase();
21
+ const parsed = extension === '.json'
22
+ ? JSON.parse(content)
23
+ : yaml_1.default.parse(content);
24
+ return parsed;
25
+ }
26
+ function readVariable(name, variables) {
27
+ if (name in variables) {
28
+ return variables[name];
29
+ }
30
+ return process.env[name];
31
+ }
32
+ function resolveStringTemplate(template, variables) {
33
+ const exactMatch = template.match(/^\$\{([A-Z0-9_]+)\}$/i);
34
+ if (exactMatch) {
35
+ const exactValue = readVariable(exactMatch[1], variables);
36
+ return exactValue ?? template;
37
+ }
38
+ return template.replace(/\$\{([A-Z0-9_]+)\}/gi, (_match, variableName) => {
39
+ const resolved = readVariable(variableName, variables);
40
+ return resolved == null ? '' : String(resolved);
41
+ });
42
+ }
43
+ function resolveValue(value, variables) {
44
+ if (typeof value === 'string') {
45
+ return resolveStringTemplate(value, variables);
46
+ }
47
+ if (Array.isArray(value)) {
48
+ return value.map((item) => resolveValue(item, variables));
49
+ }
50
+ if (isRecord(value)) {
51
+ return Object.fromEntries(Object.entries(value).map(([key, nestedValue]) => [key, resolveValue(nestedValue, variables)]));
52
+ }
53
+ return value;
54
+ }
55
+ function resolveVariables(config, variables) {
56
+ return resolveValue(config, variables);
57
+ }
@@ -0,0 +1,5 @@
1
+ export declare function getRequiredOpenAiApiKey(env?: NodeJS.ProcessEnv): string;
2
+ export declare function getOpenAiEvalModel(env?: NodeJS.ProcessEnv): string;
3
+ export declare function getOpenAiMessageGenModel(env?: NodeJS.ProcessEnv): string;
4
+ export declare function getOpenAiBaseUrl(env?: NodeJS.ProcessEnv): string;
5
+ export declare function extractOutputText(response: Record<string, unknown>): string;
@@ -0,0 +1,54 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getRequiredOpenAiApiKey = getRequiredOpenAiApiKey;
4
+ exports.getOpenAiEvalModel = getOpenAiEvalModel;
5
+ exports.getOpenAiMessageGenModel = getOpenAiMessageGenModel;
6
+ exports.getOpenAiBaseUrl = getOpenAiBaseUrl;
7
+ exports.extractOutputText = extractOutputText;
8
+ function getEnv(name, env) {
9
+ const value = env?.[name] ?? process.env[name];
10
+ return value && value.trim() ? value.trim() : undefined;
11
+ }
12
+ function getRequiredOpenAiApiKey(env) {
13
+ const apiKey = getEnv('OPENAI_API_KEY', env);
14
+ if (!apiKey) {
15
+ throw new Error('Missing OPENAI_API_KEY for evaluation.');
16
+ }
17
+ return apiKey;
18
+ }
19
+ function getOpenAiEvalModel(env) {
20
+ return getEnv('OPENAI_EVAL_MODEL', env) ?? 'gpt-5.4';
21
+ }
22
+ function getOpenAiMessageGenModel(env) {
23
+ return getEnv('OPENAI_MESSAGE_GEN_MODEL', env) ?? getEnv('OPENAI_EVAL_MODEL', env) ?? 'gpt-5.4-mini';
24
+ }
25
+ function getOpenAiBaseUrl(env) {
26
+ return (getEnv('OPENAI_BASE_URL', env) ?? 'https://api.openai.com/v1').replace(/\/+$/, '');
27
+ }
28
+ function extractOutputText(response) {
29
+ if (typeof response.output_text === 'string' && response.output_text.trim()) {
30
+ return response.output_text;
31
+ }
32
+ const outputs = Array.isArray(response.output) ? response.output : [];
33
+ const parts = [];
34
+ for (const item of outputs) {
35
+ if (!item || typeof item !== 'object') {
36
+ continue;
37
+ }
38
+ const content = Array.isArray(item.content)
39
+ ? item.content
40
+ : [];
41
+ for (const entry of content) {
42
+ if (!entry || typeof entry !== 'object') {
43
+ continue;
44
+ }
45
+ const typedEntry = entry;
46
+ if ((typedEntry.type === 'output_text' || typedEntry.type === 'text') &&
47
+ typeof typedEntry.text === 'string' &&
48
+ typedEntry.text.trim()) {
49
+ parts.push(typedEntry.text);
50
+ }
51
+ }
52
+ }
53
+ return parts.join('\n').trim();
54
+ }
package/package.json ADDED
@@ -0,0 +1,146 @@
1
+ {
2
+ "name": "@huydao/karrot",
3
+ "version": "0.1.1",
4
+ "description": "Reusable AI scenario execution, assertion, evaluation, and reporting toolkit",
5
+ "license": "ISC",
6
+ "type": "commonjs",
7
+ "main": "dist/index.js",
8
+ "types": "dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "require": "./dist/index.js"
13
+ },
14
+ "./assertion": {
15
+ "types": "./dist/assertions/assertion.d.ts",
16
+ "require": "./dist/assertions/assertion.js"
17
+ },
18
+ "./artifact-files": {
19
+ "types": "./dist/utils/artifact-files.d.ts",
20
+ "require": "./dist/utils/artifact-files.js"
21
+ },
22
+ "./config": {
23
+ "types": "./dist/utils/config.d.ts",
24
+ "require": "./dist/utils/config.js"
25
+ },
26
+ "./execute": {
27
+ "types": "./dist/executors/execute.d.ts",
28
+ "require": "./dist/executors/execute.js"
29
+ },
30
+ "./executor": {
31
+ "types": "./dist/executors/executor.d.ts",
32
+ "require": "./dist/executors/executor.js"
33
+ },
34
+ "./generated-message": {
35
+ "types": "./dist/scenarios/generated-message.d.ts",
36
+ "require": "./dist/scenarios/generated-message.js"
37
+ },
38
+ "./openai-eval": {
39
+ "types": "./dist/utils/openai-eval.d.ts",
40
+ "require": "./dist/utils/openai-eval.js"
41
+ },
42
+ "./report": {
43
+ "types": "./dist/reports/report.d.ts",
44
+ "require": "./dist/reports/report.js"
45
+ },
46
+ "./run-result": {
47
+ "types": "./dist/executors/run-result.d.ts",
48
+ "require": "./dist/executors/run-result.js"
49
+ },
50
+ "./scenario": {
51
+ "types": "./dist/scenarios/scenario.d.ts",
52
+ "require": "./dist/scenarios/scenario.js"
53
+ },
54
+ "./scenario-loader": {
55
+ "types": "./dist/scenarios/scenario-loader.d.ts",
56
+ "require": "./dist/scenarios/scenario-loader.js"
57
+ },
58
+ "./turn-eval": {
59
+ "types": "./dist/assertions/turn-eval.d.ts",
60
+ "require": "./dist/assertions/turn-eval.js"
61
+ },
62
+ "./adapters/ag-ui": {
63
+ "types": "./dist/executors/adapters/ag-ui.d.ts",
64
+ "require": "./dist/executors/adapters/ag-ui.js"
65
+ },
66
+ "./adapters/ag-ui-post": {
67
+ "types": "./dist/executors/adapters/ag-ui-post.d.ts",
68
+ "require": "./dist/executors/adapters/ag-ui-post.js"
69
+ },
70
+ "./assertions/assertion": {
71
+ "types": "./dist/assertions/assertion.d.ts",
72
+ "require": "./dist/assertions/assertion.js"
73
+ },
74
+ "./assertions/turn-eval": {
75
+ "types": "./dist/assertions/turn-eval.d.ts",
76
+ "require": "./dist/assertions/turn-eval.js"
77
+ },
78
+ "./reports/report": {
79
+ "types": "./dist/reports/report.d.ts",
80
+ "require": "./dist/reports/report.js"
81
+ },
82
+ "./executors/execute": {
83
+ "types": "./dist/executors/execute.d.ts",
84
+ "require": "./dist/executors/execute.js"
85
+ },
86
+ "./executors/executor": {
87
+ "types": "./dist/executors/executor.d.ts",
88
+ "require": "./dist/executors/executor.js"
89
+ },
90
+ "./executors/run-result": {
91
+ "types": "./dist/executors/run-result.d.ts",
92
+ "require": "./dist/executors/run-result.js"
93
+ },
94
+ "./executors/adapters/ag-ui": {
95
+ "types": "./dist/executors/adapters/ag-ui.d.ts",
96
+ "require": "./dist/executors/adapters/ag-ui.js"
97
+ },
98
+ "./executors/adapters/ag-ui-post": {
99
+ "types": "./dist/executors/adapters/ag-ui-post.d.ts",
100
+ "require": "./dist/executors/adapters/ag-ui-post.js"
101
+ },
102
+ "./scenarios/scenario": {
103
+ "types": "./dist/scenarios/scenario.d.ts",
104
+ "require": "./dist/scenarios/scenario.js"
105
+ },
106
+ "./scenarios/scenario-loader": {
107
+ "types": "./dist/scenarios/scenario-loader.d.ts",
108
+ "require": "./dist/scenarios/scenario-loader.js"
109
+ },
110
+ "./scenarios/generated-message": {
111
+ "types": "./dist/scenarios/generated-message.d.ts",
112
+ "require": "./dist/scenarios/generated-message.js"
113
+ },
114
+ "./utils/artifact-files": {
115
+ "types": "./dist/utils/artifact-files.d.ts",
116
+ "require": "./dist/utils/artifact-files.js"
117
+ },
118
+ "./utils/config": {
119
+ "types": "./dist/utils/config.d.ts",
120
+ "require": "./dist/utils/config.js"
121
+ },
122
+ "./utils/openai-eval": {
123
+ "types": "./dist/utils/openai-eval.d.ts",
124
+ "require": "./dist/utils/openai-eval.js"
125
+ }
126
+ },
127
+ "files": [
128
+ "dist",
129
+ "README.md",
130
+ "GUIDE.md"
131
+ ],
132
+ "scripts": {
133
+ "build": "rm -rf dist && tsc -p tsconfig.json && mkdir -p dist/prompts && cp prompts/*.md dist/prompts/",
134
+ "prepack": "npm run build"
135
+ },
136
+ "dependencies": {
137
+ "ag-ui-wss": "file:vendor/ag-ui-wss",
138
+ "yaml": "^2.8.1"
139
+ },
140
+ "bundleDependencies": [
141
+ "ag-ui-wss"
142
+ ],
143
+ "publishConfig": {
144
+ "access": "public"
145
+ }
146
+ }