@pauly4010/evalai-sdk 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +289 -0
- package/LICENSE +21 -0
- package/README.md +565 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +596 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +178 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +135 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +181 -0
- package/dist/client.d.ts +358 -0
- package/dist/client.js +802 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +80 -0
- package/dist/errors.js +285 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +334 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +111 -0
- package/dist/integrations/anthropic.d.ts +72 -0
- package/dist/integrations/anthropic.js +159 -0
- package/dist/integrations/openai.d.ts +69 -0
- package/dist/integrations/openai.js +156 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +146 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +135 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +204 -0
- package/dist/testing.js +252 -0
- package/dist/types.d.ts +715 -0
- package/dist/types.js +54 -0
- package/dist/workflows.d.ts +378 -0
- package/dist/workflows.js +628 -0
- package/package.json +102 -0
package/dist/batch.js
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Request batching for improved performance
|
|
4
|
+
* Combines multiple API requests into fewer network calls
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.RequestBatcher = void 0;
|
|
8
|
+
exports.canBatch = canBatch;
|
|
9
|
+
exports.batchProcess = batchProcess;
|
|
10
|
+
/**
|
|
11
|
+
* Batch processor for API requests
|
|
12
|
+
*/
|
|
13
|
+
class RequestBatcher {
|
|
14
|
+
constructor(executeBatch, options = {}) {
|
|
15
|
+
this.executeBatch = executeBatch;
|
|
16
|
+
this.queue = [];
|
|
17
|
+
this.batchTimer = null;
|
|
18
|
+
this.requestCounter = 0;
|
|
19
|
+
this.maxBatchSize = options.maxBatchSize || 10;
|
|
20
|
+
this.batchDelay = options.batchDelay || 50; // 50ms
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Add request to batch queue
|
|
24
|
+
*/
|
|
25
|
+
async enqueue(method, endpoint, body, headers) {
|
|
26
|
+
return new Promise((resolve, reject) => {
|
|
27
|
+
const id = `req_${this.requestCounter++}_${Date.now()}`;
|
|
28
|
+
this.queue.push({
|
|
29
|
+
id,
|
|
30
|
+
resolve,
|
|
31
|
+
reject,
|
|
32
|
+
request: { id, method, endpoint, body, headers },
|
|
33
|
+
});
|
|
34
|
+
// Process immediately if batch is full
|
|
35
|
+
if (this.queue.length >= this.maxBatchSize) {
|
|
36
|
+
this.processBatch();
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
// Otherwise schedule batch processing
|
|
40
|
+
this.scheduleBatch();
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Schedule batch processing after delay
|
|
46
|
+
*/
|
|
47
|
+
scheduleBatch() {
|
|
48
|
+
if (this.batchTimer) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
this.batchTimer = setTimeout(() => {
|
|
52
|
+
this.processBatch();
|
|
53
|
+
}, this.batchDelay);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Process current batch
|
|
57
|
+
*/
|
|
58
|
+
async processBatch() {
|
|
59
|
+
if (this.batchTimer) {
|
|
60
|
+
if (typeof this.batchTimer === 'number') {
|
|
61
|
+
clearTimeout(this.batchTimer);
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
clearTimeout(this.batchTimer);
|
|
65
|
+
}
|
|
66
|
+
this.batchTimer = null;
|
|
67
|
+
}
|
|
68
|
+
if (this.queue.length === 0) {
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
// Take items from queue
|
|
72
|
+
const batch = this.queue.splice(0, this.maxBatchSize);
|
|
73
|
+
const requests = batch.map(item => item.request);
|
|
74
|
+
try {
|
|
75
|
+
const responses = await this.executeBatch(requests);
|
|
76
|
+
// Match responses to requests and resolve/reject
|
|
77
|
+
for (const response of responses) {
|
|
78
|
+
const pendingRequest = batch.find(item => item.id === response.id);
|
|
79
|
+
if (pendingRequest) {
|
|
80
|
+
if (response.status >= 200 && response.status < 300) {
|
|
81
|
+
pendingRequest.resolve(response.data);
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
pendingRequest.reject(new Error(response.error || `Request failed with status ${response.status}`));
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Handle any requests that didn't get a response
|
|
89
|
+
for (const item of batch) {
|
|
90
|
+
if (!responses.find(r => r.id === item.id)) {
|
|
91
|
+
item.reject(new Error('No response received for request'));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
// Reject all requests in batch on error
|
|
97
|
+
for (const item of batch) {
|
|
98
|
+
item.reject(error);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// If there are more items in queue, schedule next batch
|
|
102
|
+
if (this.queue.length > 0) {
|
|
103
|
+
this.scheduleBatch();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Flush all pending requests immediately
|
|
108
|
+
*/
|
|
109
|
+
async flush() {
|
|
110
|
+
while (this.queue.length > 0) {
|
|
111
|
+
await this.processBatch();
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Clear queue without processing
|
|
116
|
+
*/
|
|
117
|
+
clear() {
|
|
118
|
+
if (this.batchTimer) {
|
|
119
|
+
if (typeof this.batchTimer === 'number') {
|
|
120
|
+
clearTimeout(this.batchTimer);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
clearTimeout(this.batchTimer);
|
|
124
|
+
}
|
|
125
|
+
this.batchTimer = null;
|
|
126
|
+
}
|
|
127
|
+
// Reject all pending requests
|
|
128
|
+
for (const item of this.queue) {
|
|
129
|
+
item.reject(new Error('Batch queue cleared'));
|
|
130
|
+
}
|
|
131
|
+
this.queue = [];
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get queue statistics
|
|
135
|
+
*/
|
|
136
|
+
getStats() {
|
|
137
|
+
return {
|
|
138
|
+
queueSize: this.queue.length,
|
|
139
|
+
maxBatchSize: this.maxBatchSize,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
exports.RequestBatcher = RequestBatcher;
|
|
144
|
+
/**
|
|
145
|
+
* @internal - Internal SDK logic, not part of public API
|
|
146
|
+
* Check if requests can be batched together
|
|
147
|
+
*/
|
|
148
|
+
function canBatch(method, endpoint) {
|
|
149
|
+
if (method !== 'GET') {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
const batchableEndpoints = [
|
|
153
|
+
'/traces',
|
|
154
|
+
'/evaluations',
|
|
155
|
+
'/annotations',
|
|
156
|
+
'/results',
|
|
157
|
+
];
|
|
158
|
+
return batchableEndpoints.some(pattern => endpoint.includes(pattern));
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Batch multiple async operations with concurrency limit
|
|
162
|
+
*/
|
|
163
|
+
async function batchProcess(items, processor, concurrency = 5) {
|
|
164
|
+
const results = [];
|
|
165
|
+
const executing = [];
|
|
166
|
+
for (const item of items) {
|
|
167
|
+
const promise = processor(item).then(result => {
|
|
168
|
+
results.push(result);
|
|
169
|
+
});
|
|
170
|
+
executing.push(promise);
|
|
171
|
+
if (executing.length >= concurrency) {
|
|
172
|
+
await Promise.race(executing);
|
|
173
|
+
executing.splice(executing.findIndex(p => p === promise), 1);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
await Promise.all(executing);
|
|
177
|
+
return results;
|
|
178
|
+
}
|
package/dist/cache.d.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple in-memory cache with TTL for SDK requests
|
|
3
|
+
* Reduces redundant API calls and improves performance
|
|
4
|
+
*/
|
|
5
|
+
export declare class RequestCache {
|
|
6
|
+
private cache;
|
|
7
|
+
private maxSize;
|
|
8
|
+
constructor(maxSize?: number);
|
|
9
|
+
/**
|
|
10
|
+
* Generate cache key from request parameters
|
|
11
|
+
*/
|
|
12
|
+
private generateKey;
|
|
13
|
+
/**
|
|
14
|
+
* Check if cache entry is still valid
|
|
15
|
+
*/
|
|
16
|
+
private isValid;
|
|
17
|
+
/**
|
|
18
|
+
* Get cached response if valid
|
|
19
|
+
*/
|
|
20
|
+
get<T>(method: string, url: string, params?: any): T | null;
|
|
21
|
+
/**
|
|
22
|
+
* Store response in cache
|
|
23
|
+
*/
|
|
24
|
+
set<T>(method: string, url: string, data: T, ttl: number, params?: any): void;
|
|
25
|
+
/**
|
|
26
|
+
* Invalidate specific cache entry
|
|
27
|
+
*/
|
|
28
|
+
invalidate(method: string, url: string, params?: any): void;
|
|
29
|
+
/**
|
|
30
|
+
* Invalidate all cache entries matching a pattern
|
|
31
|
+
*/
|
|
32
|
+
invalidatePattern(pattern: string): void;
|
|
33
|
+
/**
|
|
34
|
+
* Clear all cache entries
|
|
35
|
+
*/
|
|
36
|
+
clear(): void;
|
|
37
|
+
/**
|
|
38
|
+
* Get cache statistics
|
|
39
|
+
*/
|
|
40
|
+
getStats(): {
|
|
41
|
+
size: number;
|
|
42
|
+
maxSize: number;
|
|
43
|
+
hitRate?: number;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Default cache TTL values (in milliseconds)
|
|
48
|
+
* @internal - Used by SDK internally, exposed for advanced customization only
|
|
49
|
+
*/
|
|
50
|
+
export declare const CacheTTL: {
|
|
51
|
+
readonly SHORT: number;
|
|
52
|
+
readonly MEDIUM: number;
|
|
53
|
+
readonly LONG: number;
|
|
54
|
+
readonly HOUR: number;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* @internal - Internal SDK logic, not part of public API
|
|
58
|
+
* Determine if a request should be cached based on method and endpoint
|
|
59
|
+
*/
|
|
60
|
+
export declare function shouldCache(method: string, endpoint: string): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* @internal - Internal SDK logic, not part of public API
|
|
63
|
+
* Get appropriate TTL for an endpoint
|
|
64
|
+
*/
|
|
65
|
+
export declare function getTTL(endpoint: string): number;
|
package/dist/cache.js
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Simple in-memory cache with TTL for SDK requests
|
|
4
|
+
* Reduces redundant API calls and improves performance
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.CacheTTL = exports.RequestCache = void 0;
|
|
8
|
+
exports.shouldCache = shouldCache;
|
|
9
|
+
exports.getTTL = getTTL;
|
|
10
|
+
class RequestCache {
|
|
11
|
+
constructor(maxSize = 1000) {
|
|
12
|
+
this.cache = new Map();
|
|
13
|
+
this.maxSize = maxSize;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Generate cache key from request parameters
|
|
17
|
+
*/
|
|
18
|
+
generateKey(method, url, params) {
|
|
19
|
+
const paramString = params ? JSON.stringify(params) : '';
|
|
20
|
+
return `${method}:${url}:${paramString}`;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Check if cache entry is still valid
|
|
24
|
+
*/
|
|
25
|
+
isValid(entry) {
|
|
26
|
+
return Date.now() - entry.timestamp < entry.ttl;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Get cached response if valid
|
|
30
|
+
*/
|
|
31
|
+
get(method, url, params) {
|
|
32
|
+
const key = this.generateKey(method, url, params);
|
|
33
|
+
const entry = this.cache.get(key);
|
|
34
|
+
if (!entry) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
if (!this.isValid(entry)) {
|
|
38
|
+
this.cache.delete(key);
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
return entry.data;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Store response in cache
|
|
45
|
+
*/
|
|
46
|
+
set(method, url, data, ttl, params) {
|
|
47
|
+
// Enforce cache size limit (LRU-style)
|
|
48
|
+
if (this.cache.size >= this.maxSize) {
|
|
49
|
+
const firstKey = this.cache.keys().next().value;
|
|
50
|
+
if (firstKey) {
|
|
51
|
+
this.cache.delete(firstKey);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
const key = this.generateKey(method, url, params);
|
|
55
|
+
this.cache.set(key, {
|
|
56
|
+
data,
|
|
57
|
+
timestamp: Date.now(),
|
|
58
|
+
ttl,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Invalidate specific cache entry
|
|
63
|
+
*/
|
|
64
|
+
invalidate(method, url, params) {
|
|
65
|
+
const key = this.generateKey(method, url, params);
|
|
66
|
+
this.cache.delete(key);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Invalidate all cache entries matching a pattern
|
|
70
|
+
*/
|
|
71
|
+
invalidatePattern(pattern) {
|
|
72
|
+
for (const key of this.cache.keys()) {
|
|
73
|
+
if (key.includes(pattern)) {
|
|
74
|
+
this.cache.delete(key);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Clear all cache entries
|
|
80
|
+
*/
|
|
81
|
+
clear() {
|
|
82
|
+
this.cache.clear();
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Get cache statistics
|
|
86
|
+
*/
|
|
87
|
+
getStats() {
|
|
88
|
+
return {
|
|
89
|
+
size: this.cache.size,
|
|
90
|
+
maxSize: this.maxSize,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
exports.RequestCache = RequestCache;
|
|
95
|
+
/**
|
|
96
|
+
* Default cache TTL values (in milliseconds)
|
|
97
|
+
* @internal - Used by SDK internally, exposed for advanced customization only
|
|
98
|
+
*/
|
|
99
|
+
exports.CacheTTL = {
|
|
100
|
+
SHORT: 30 * 1000,
|
|
101
|
+
MEDIUM: 5 * 60 * 1000,
|
|
102
|
+
LONG: 30 * 60 * 1000,
|
|
103
|
+
HOUR: 60 * 60 * 1000,
|
|
104
|
+
};
|
|
105
|
+
/**
|
|
106
|
+
* @internal - Internal SDK logic, not part of public API
|
|
107
|
+
* Determine if a request should be cached based on method and endpoint
|
|
108
|
+
*/
|
|
109
|
+
function shouldCache(method, endpoint) {
|
|
110
|
+
if (method !== 'GET') {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
const noCacheEndpoints = [
|
|
114
|
+
'/health',
|
|
115
|
+
'/usage',
|
|
116
|
+
'/deliveries',
|
|
117
|
+
];
|
|
118
|
+
return !noCacheEndpoints.some(pattern => endpoint.includes(pattern));
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* @internal - Internal SDK logic, not part of public API
|
|
122
|
+
* Get appropriate TTL for an endpoint
|
|
123
|
+
*/
|
|
124
|
+
function getTTL(endpoint) {
|
|
125
|
+
if (endpoint.includes('/api-keys') || endpoint.includes('/webhooks')) {
|
|
126
|
+
return exports.CacheTTL.LONG;
|
|
127
|
+
}
|
|
128
|
+
if (endpoint.includes('/evaluations') || endpoint.includes('/configs')) {
|
|
129
|
+
return exports.CacheTTL.MEDIUM;
|
|
130
|
+
}
|
|
131
|
+
if (endpoint.includes('/traces') || endpoint.includes('/results')) {
|
|
132
|
+
return exports.CacheTTL.SHORT;
|
|
133
|
+
}
|
|
134
|
+
return exports.CacheTTL.MEDIUM;
|
|
135
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
/**
|
|
4
|
+
* CLI for AI Evaluation Platform SDK
|
|
5
|
+
* Tier 2.6: CLI for Everything
|
|
6
|
+
*/
|
|
7
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
8
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
9
|
+
};
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
const commander_1 = require("commander");
|
|
12
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
13
|
+
const path_1 = __importDefault(require("path"));
|
|
14
|
+
const client_1 = require("../client");
|
|
15
|
+
const export_1 = require("../export");
|
|
16
|
+
const program = new commander_1.Command();
|
|
17
|
+
program
|
|
18
|
+
.name('evalai')
|
|
19
|
+
.description('AI Evaluation Platform CLI')
|
|
20
|
+
.version('1.0.0');
|
|
21
|
+
// Initialize project
|
|
22
|
+
program
|
|
23
|
+
.command('init')
|
|
24
|
+
.description('Initialize a new evaluation project')
|
|
25
|
+
.option('-d, --dir <directory>', 'Project directory', '.')
|
|
26
|
+
.action(async (options) => {
|
|
27
|
+
const dir = path_1.default.resolve(options.dir);
|
|
28
|
+
console.log('๐ Initializing EvalAI project...');
|
|
29
|
+
// Create directory structure
|
|
30
|
+
await promises_1.default.mkdir(path_1.default.join(dir, '.evalai'), { recursive: true });
|
|
31
|
+
await promises_1.default.mkdir(path_1.default.join(dir, '.evalai', 'snapshots'), { recursive: true });
|
|
32
|
+
await promises_1.default.mkdir(path_1.default.join(dir, 'evaluations'), { recursive: true });
|
|
33
|
+
// Create config file
|
|
34
|
+
const config = {
|
|
35
|
+
apiKey: process.env.EVALAI_API_KEY || '',
|
|
36
|
+
projectId: process.env.EVALAI_PROJECT_ID || '',
|
|
37
|
+
baseUrl: 'http://localhost:3000/api',
|
|
38
|
+
debug: false,
|
|
39
|
+
retry: {
|
|
40
|
+
maxAttempts: 3,
|
|
41
|
+
backoff: 'exponential'
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
await promises_1.default.writeFile(path_1.default.join(dir, 'evalai.config.json'), JSON.stringify(config, null, 2));
|
|
45
|
+
// Create example evaluation file
|
|
46
|
+
const exampleEval = `import { AIEvalClient, createTestSuite, expect } from '@pauly4010/evalai-sdk'
|
|
47
|
+
|
|
48
|
+
const client = AIEvalClient.init()
|
|
49
|
+
|
|
50
|
+
const suite = createTestSuite('example-evaluation', {
|
|
51
|
+
cases: [
|
|
52
|
+
{
|
|
53
|
+
input: 'What is 2+2?',
|
|
54
|
+
expected: '4',
|
|
55
|
+
name: 'simple-math'
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
input: 'Explain AI in simple terms',
|
|
59
|
+
expected: (output) => {
|
|
60
|
+
expect(output).toContainKeywords(['artificial', 'intelligence'])
|
|
61
|
+
expect(output).toHaveLength({ min: 50, max: 500 })
|
|
62
|
+
return true
|
|
63
|
+
},
|
|
64
|
+
name: 'ai-explanation'
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
// Run the test suite
|
|
70
|
+
suite.run().then(results => {
|
|
71
|
+
console.log('Test Results:', results)
|
|
72
|
+
console.log(\`Passed: \${results.passed}/\${results.total}\`)
|
|
73
|
+
})
|
|
74
|
+
`;
|
|
75
|
+
await promises_1.default.writeFile(path_1.default.join(dir, 'evaluations', 'example.ts'), exampleEval);
|
|
76
|
+
console.log('โ
Project initialized successfully!');
|
|
77
|
+
console.log('\nNext steps:');
|
|
78
|
+
console.log('1. Set your API key: export EVALAI_API_KEY=your-key');
|
|
79
|
+
console.log('2. Set your project ID: export EVALAI_PROJECT_ID=your-project');
|
|
80
|
+
console.log('3. Run evaluations: npx evalai eval:run');
|
|
81
|
+
});
|
|
82
|
+
// Run evaluations
|
|
83
|
+
program
|
|
84
|
+
.command('eval:run')
|
|
85
|
+
.description('Run evaluation tests')
|
|
86
|
+
.option('-c, --config <path>', 'Config file path', './evalai.config.json')
|
|
87
|
+
.option('-f, --file <path>', 'Evaluation file to run')
|
|
88
|
+
.action(async (options) => {
|
|
89
|
+
console.log('๐งช Running evaluations...');
|
|
90
|
+
// Load config
|
|
91
|
+
const configPath = path_1.default.resolve(options.config);
|
|
92
|
+
let config;
|
|
93
|
+
try {
|
|
94
|
+
const configContent = await promises_1.default.readFile(configPath, 'utf-8');
|
|
95
|
+
config = JSON.parse(configContent);
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
console.error('โ Config file not found. Run "evalai init" first.');
|
|
99
|
+
process.exit(1);
|
|
100
|
+
}
|
|
101
|
+
const client = client_1.AIEvalClient.init(config);
|
|
102
|
+
// If file specified, run that file
|
|
103
|
+
if (options.file) {
|
|
104
|
+
console.log(`Running ${options.file}...`);
|
|
105
|
+
// Dynamic import of evaluation file would go here
|
|
106
|
+
// This requires compilation step for TS files
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
// Run all evaluations in the evaluations directory
|
|
110
|
+
console.log('Running all evaluations...');
|
|
111
|
+
}
|
|
112
|
+
console.log('โ
Evaluations completed!');
|
|
113
|
+
});
|
|
114
|
+
// List traces
|
|
115
|
+
program
|
|
116
|
+
.command('traces')
|
|
117
|
+
.description('List and filter traces')
|
|
118
|
+
.option('-l, --limit <number>', 'Number of traces to show', '10')
|
|
119
|
+
.option('--failed', 'Show only failed traces')
|
|
120
|
+
.option('--slow', 'Show slow traces (>5s)')
|
|
121
|
+
.action(async (options) => {
|
|
122
|
+
const configPath = path_1.default.resolve('./evalai.config.json');
|
|
123
|
+
let config;
|
|
124
|
+
try {
|
|
125
|
+
const configContent = await promises_1.default.readFile(configPath, 'utf-8');
|
|
126
|
+
config = JSON.parse(configContent);
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
console.error('โ Config file not found. Run "evalai init" first.');
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
const client = client_1.AIEvalClient.init(config);
|
|
133
|
+
console.log('๐ Fetching traces...');
|
|
134
|
+
// API call to get traces would go here
|
|
135
|
+
console.log(`Showing ${options.limit} traces`);
|
|
136
|
+
});
|
|
137
|
+
// Export data
|
|
138
|
+
program
|
|
139
|
+
.command('export')
|
|
140
|
+
.description('Export data from EvalAI')
|
|
141
|
+
.option('-f, --format <format>', 'Export format (json, csv, xlsx)', 'json')
|
|
142
|
+
.option('-o, --output <path>', 'Output file path', './export')
|
|
143
|
+
.option('-t, --type <type>', 'Data type (traces, evaluations, all)', 'all')
|
|
144
|
+
.action(async (options) => {
|
|
145
|
+
const configPath = path_1.default.resolve('./evalai.config.json');
|
|
146
|
+
let config;
|
|
147
|
+
try {
|
|
148
|
+
const configContent = await promises_1.default.readFile(configPath, 'utf-8');
|
|
149
|
+
config = JSON.parse(configContent);
|
|
150
|
+
}
|
|
151
|
+
catch (error) {
|
|
152
|
+
console.error('โ Config file not found. Run "evalai init" first.');
|
|
153
|
+
process.exit(1);
|
|
154
|
+
}
|
|
155
|
+
const client = client_1.AIEvalClient.init(config);
|
|
156
|
+
console.log(`๐ฅ Exporting data as ${options.format}...`);
|
|
157
|
+
const data = await (0, export_1.exportData)(client, {
|
|
158
|
+
format: options.format,
|
|
159
|
+
includeTraces: true,
|
|
160
|
+
includeEvaluations: true
|
|
161
|
+
});
|
|
162
|
+
// Save to file
|
|
163
|
+
const outputPath = path_1.default.resolve(process.cwd(), options.output);
|
|
164
|
+
await promises_1.default.writeFile(outputPath, JSON.stringify(data, null, 2));
|
|
165
|
+
console.log(`โ
Data exported to ${outputPath}`);
|
|
166
|
+
});
|
|
167
|
+
// Dev server
|
|
168
|
+
program
|
|
169
|
+
.command('dev')
|
|
170
|
+
.description('Start local development server')
|
|
171
|
+
.option('-p, --port <port>', 'Port number', '3001')
|
|
172
|
+
.action(async (options) => {
|
|
173
|
+
console.log(`๐ Starting development server on port ${options.port}...`);
|
|
174
|
+
console.log('๐ Dashboard: http://localhost:' + options.port);
|
|
175
|
+
console.log('๐ API: http://localhost:' + options.port + '/api');
|
|
176
|
+
console.log('\nPress Ctrl+C to stop');
|
|
177
|
+
// This would start an Express server with a simple dashboard
|
|
178
|
+
// For now, just keep the process running
|
|
179
|
+
process.stdin.resume();
|
|
180
|
+
});
|
|
181
|
+
program.parse();
|