@artemiskit/core 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +127 -0
- package/adapters/openai/dist/index.js +5626 -0
- package/dist/adapters/registry.d.ts.map +1 -1
- package/dist/adapters/types.d.ts +32 -2
- package/dist/adapters/types.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +12 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.js +455 -4
- package/dist/scenario/schema.d.ts +116 -84
- package/dist/scenario/schema.d.ts.map +1 -1
- package/dist/storage/supabase.d.ts +25 -4
- package/dist/storage/supabase.d.ts.map +1 -1
- package/dist/storage/types.d.ts +162 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/adapters/registry.ts +38 -0
- package/src/adapters/types.ts +38 -0
- package/src/artifacts/types.ts +16 -0
- package/src/scenario/schema.ts +10 -0
- package/src/storage/supabase.test.ts +988 -0
- package/src/storage/supabase.ts +599 -5
- package/src/storage/types.ts +196 -0
package/src/storage/types.ts
CHANGED
|
@@ -173,3 +173,199 @@ export interface BaselineStorageAdapter extends StorageAdapter {
|
|
|
173
173
|
regressionThreshold: number;
|
|
174
174
|
} | null>;
|
|
175
175
|
}
|
|
176
|
+
|
|
177
|
+
// ============================================================================
|
|
178
|
+
// Case Results Types (for granular analytics)
|
|
179
|
+
// ============================================================================
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Status of an individual case result
|
|
183
|
+
*/
|
|
184
|
+
export type CaseResultStatus = 'passed' | 'failed' | 'error';
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Individual case result record for storage
|
|
188
|
+
*/
|
|
189
|
+
export interface CaseResultRecord {
|
|
190
|
+
/** Unique ID (auto-generated if not provided) */
|
|
191
|
+
id?: string;
|
|
192
|
+
/** Run ID this case belongs to */
|
|
193
|
+
runId: string;
|
|
194
|
+
/** Case ID from the test */
|
|
195
|
+
caseId: string;
|
|
196
|
+
/** Optional case name */
|
|
197
|
+
caseName?: string;
|
|
198
|
+
/** Result status */
|
|
199
|
+
status: CaseResultStatus;
|
|
200
|
+
/** Score from 0.0 to 1.0 */
|
|
201
|
+
score: number;
|
|
202
|
+
/** Type of matcher used */
|
|
203
|
+
matcherType: string;
|
|
204
|
+
/** Reason for the status */
|
|
205
|
+
reason?: string;
|
|
206
|
+
/** Model response */
|
|
207
|
+
response: string;
|
|
208
|
+
/** Latency in milliseconds */
|
|
209
|
+
latencyMs: number;
|
|
210
|
+
/** Prompt tokens used */
|
|
211
|
+
promptTokens: number;
|
|
212
|
+
/** Completion tokens used */
|
|
213
|
+
completionTokens: number;
|
|
214
|
+
/** Total tokens used */
|
|
215
|
+
totalTokens: number;
|
|
216
|
+
/** Error message if status is 'error' */
|
|
217
|
+
error?: string;
|
|
218
|
+
/** Tags for categorization */
|
|
219
|
+
tags?: string[];
|
|
220
|
+
/** ISO timestamp when created */
|
|
221
|
+
createdAt?: string;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Options for querying case results
|
|
226
|
+
*/
|
|
227
|
+
export interface CaseResultQueryOptions {
|
|
228
|
+
/** Filter by run ID */
|
|
229
|
+
runId?: string;
|
|
230
|
+
/** Filter by case ID */
|
|
231
|
+
caseId?: string;
|
|
232
|
+
/** Filter by status */
|
|
233
|
+
status?: CaseResultStatus;
|
|
234
|
+
/** Filter by tags (any match) */
|
|
235
|
+
tags?: string[];
|
|
236
|
+
/** Maximum results to return */
|
|
237
|
+
limit?: number;
|
|
238
|
+
/** Offset for pagination */
|
|
239
|
+
offset?: number;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ============================================================================
|
|
243
|
+
// Metrics History Types (for trending)
|
|
244
|
+
// ============================================================================
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Daily metrics snapshot for a project/scenario
|
|
248
|
+
*/
|
|
249
|
+
export interface MetricsSnapshot {
|
|
250
|
+
/** Unique ID (auto-generated if not provided) */
|
|
251
|
+
id?: string;
|
|
252
|
+
/** Date of the snapshot (YYYY-MM-DD) */
|
|
253
|
+
date: string;
|
|
254
|
+
/** Project name */
|
|
255
|
+
project: string;
|
|
256
|
+
/** Optional scenario name (null for project-wide) */
|
|
257
|
+
scenario?: string;
|
|
258
|
+
/** Total runs on this date */
|
|
259
|
+
totalRuns: number;
|
|
260
|
+
/** Total cases across all runs */
|
|
261
|
+
totalCases: number;
|
|
262
|
+
/** Total passed cases */
|
|
263
|
+
passedCases: number;
|
|
264
|
+
/** Total failed cases */
|
|
265
|
+
failedCases: number;
|
|
266
|
+
/** Average success rate */
|
|
267
|
+
avgSuccessRate: number;
|
|
268
|
+
/** Average latency in ms */
|
|
269
|
+
avgLatencyMs: number;
|
|
270
|
+
/** Average tokens per run */
|
|
271
|
+
avgTokensPerRun: number;
|
|
272
|
+
/** Minimum success rate */
|
|
273
|
+
minSuccessRate?: number;
|
|
274
|
+
/** Maximum success rate */
|
|
275
|
+
maxSuccessRate?: number;
|
|
276
|
+
/** Minimum latency in ms */
|
|
277
|
+
minLatencyMs?: number;
|
|
278
|
+
/** Maximum latency in ms */
|
|
279
|
+
maxLatencyMs?: number;
|
|
280
|
+
/** Total tokens consumed */
|
|
281
|
+
totalTokens: number;
|
|
282
|
+
/** ISO timestamp when created */
|
|
283
|
+
createdAt?: string;
|
|
284
|
+
/** ISO timestamp when last updated */
|
|
285
|
+
updatedAt?: string;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Options for querying metrics history
|
|
290
|
+
*/
|
|
291
|
+
export interface MetricsTrendOptions {
|
|
292
|
+
/** Project to query */
|
|
293
|
+
project: string;
|
|
294
|
+
/** Optional scenario filter */
|
|
295
|
+
scenario?: string;
|
|
296
|
+
/** Start date (YYYY-MM-DD) */
|
|
297
|
+
startDate?: string;
|
|
298
|
+
/** End date (YYYY-MM-DD) */
|
|
299
|
+
endDate?: string;
|
|
300
|
+
/** Maximum results to return */
|
|
301
|
+
limit?: number;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Trend data point for visualization
|
|
306
|
+
*/
|
|
307
|
+
export interface TrendDataPoint {
|
|
308
|
+
date: string;
|
|
309
|
+
successRate: number;
|
|
310
|
+
latencyMs: number;
|
|
311
|
+
totalRuns: number;
|
|
312
|
+
totalTokens: number;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ============================================================================
|
|
316
|
+
// Enhanced Storage Adapter with Analytics
|
|
317
|
+
// ============================================================================
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Extended storage adapter with analytics capabilities
|
|
321
|
+
*/
|
|
322
|
+
export interface AnalyticsStorageAdapter extends BaselineStorageAdapter {
|
|
323
|
+
/**
|
|
324
|
+
* Save an individual case result
|
|
325
|
+
*/
|
|
326
|
+
saveCaseResult(result: CaseResultRecord): Promise<string>;
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Save multiple case results in batch
|
|
330
|
+
*/
|
|
331
|
+
saveCaseResults(results: CaseResultRecord[]): Promise<string[]>;
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Get case results for a run
|
|
335
|
+
*/
|
|
336
|
+
getCaseResults(runId: string): Promise<CaseResultRecord[]>;
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Query case results with filters
|
|
340
|
+
*/
|
|
341
|
+
queryCaseResults(options: CaseResultQueryOptions): Promise<CaseResultRecord[]>;
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Save a metrics snapshot
|
|
345
|
+
*/
|
|
346
|
+
saveMetricsSnapshot(snapshot: MetricsSnapshot): Promise<string>;
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Get metrics trend data
|
|
350
|
+
*/
|
|
351
|
+
getMetricsTrend(options: MetricsTrendOptions): Promise<TrendDataPoint[]>;
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Get a specific metrics snapshot
|
|
355
|
+
*/
|
|
356
|
+
getMetricsSnapshot(
|
|
357
|
+
date: string,
|
|
358
|
+
project: string,
|
|
359
|
+
scenario?: string
|
|
360
|
+
): Promise<MetricsSnapshot | null>;
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Aggregate and save daily metrics from runs
|
|
364
|
+
* This can be called to build/update metrics_history from existing runs
|
|
365
|
+
*/
|
|
366
|
+
aggregateDailyMetrics?(
|
|
367
|
+
date: string,
|
|
368
|
+
project: string,
|
|
369
|
+
scenario?: string
|
|
370
|
+
): Promise<MetricsSnapshot>;
|
|
371
|
+
}
|