@lov3kaizen/agentsea-evaluate 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @lov3kaizen/agentsea-evaluate might be problematic. Click here for more details.

Files changed (42) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/dist/annotation/index.d.mts +3 -0
  4. package/dist/annotation/index.d.ts +3 -0
  5. package/dist/annotation/index.js +630 -0
  6. package/dist/annotation/index.mjs +22 -0
  7. package/dist/chunk-5JRYKRSE.mjs +2791 -0
  8. package/dist/chunk-EUXXIZK3.mjs +676 -0
  9. package/dist/chunk-NBMUSATK.mjs +596 -0
  10. package/dist/chunk-PAQ2TTJJ.mjs +1105 -0
  11. package/dist/chunk-TUMNJN2S.mjs +416 -0
  12. package/dist/continuous/index.d.mts +2 -0
  13. package/dist/continuous/index.d.ts +2 -0
  14. package/dist/continuous/index.js +707 -0
  15. package/dist/continuous/index.mjs +16 -0
  16. package/dist/datasets/index.d.mts +1 -0
  17. package/dist/datasets/index.d.ts +1 -0
  18. package/dist/datasets/index.js +456 -0
  19. package/dist/datasets/index.mjs +14 -0
  20. package/dist/evaluation/index.d.mts +1 -0
  21. package/dist/evaluation/index.d.ts +1 -0
  22. package/dist/evaluation/index.js +2853 -0
  23. package/dist/evaluation/index.mjs +78 -0
  24. package/dist/feedback/index.d.mts +2 -0
  25. package/dist/feedback/index.d.ts +2 -0
  26. package/dist/feedback/index.js +1158 -0
  27. package/dist/feedback/index.mjs +40 -0
  28. package/dist/index-6Pbiq7ny.d.mts +234 -0
  29. package/dist/index-6Pbiq7ny.d.ts +234 -0
  30. package/dist/index-BNTycFEA.d.mts +479 -0
  31. package/dist/index-BNTycFEA.d.ts +479 -0
  32. package/dist/index-CTYCfWfH.d.mts +543 -0
  33. package/dist/index-CTYCfWfH.d.ts +543 -0
  34. package/dist/index-Cq5LwG_3.d.mts +322 -0
  35. package/dist/index-Cq5LwG_3.d.ts +322 -0
  36. package/dist/index-bPghFsfP.d.mts +315 -0
  37. package/dist/index-bPghFsfP.d.ts +315 -0
  38. package/dist/index.d.mts +81 -0
  39. package/dist/index.d.ts +81 -0
  40. package/dist/index.js +5962 -0
  41. package/dist/index.mjs +429 -0
  42. package/package.json +102 -0
@@ -0,0 +1,416 @@
1
+ // src/datasets/PreferenceDatasetBuilder.ts
2
+ import { nanoid } from "nanoid";
3
+ var PreferenceDataset = class _PreferenceDataset {
4
+ type = "preference";
5
+ pairs;
6
+ _stats;
7
+ constructor(pairs) {
8
+ this.pairs = pairs;
9
+ }
10
+ get size() {
11
+ return this.pairs.length;
12
+ }
13
+ get stats() {
14
+ if (!this._stats) {
15
+ this._stats = this.calculateStats();
16
+ }
17
+ return this._stats;
18
+ }
19
+ getPairs() {
20
+ return [...this.pairs];
21
+ }
22
+ filter(predicate) {
23
+ return new _PreferenceDataset(this.pairs.filter(predicate));
24
+ }
25
+ sample(count) {
26
+ if (count >= this.pairs.length) {
27
+ return new _PreferenceDataset([...this.pairs]);
28
+ }
29
+ const shuffled = [...this.pairs];
30
+ for (let i = shuffled.length - 1; i > 0; i--) {
31
+ const j = Math.floor(Math.random() * (i + 1));
32
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
33
+ }
34
+ return new _PreferenceDataset(shuffled.slice(0, count));
35
+ }
36
+ split(ratio) {
37
+ const shuffled = [...this.pairs];
38
+ for (let i = shuffled.length - 1; i > 0; i--) {
39
+ const j = Math.floor(Math.random() * (i + 1));
40
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
41
+ }
42
+ const splitIndex = Math.floor(shuffled.length * ratio);
43
+ return [
44
+ new _PreferenceDataset(shuffled.slice(0, splitIndex)),
45
+ new _PreferenceDataset(shuffled.slice(splitIndex))
46
+ ];
47
+ }
48
+ shuffle(seed) {
49
+ const shuffled = [...this.pairs];
50
+ const rng = seed !== void 0 ? this.seededRandom(seed) : Math.random;
51
+ for (let i = shuffled.length - 1; i > 0; i--) {
52
+ const j = Math.floor(rng() * (i + 1));
53
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
54
+ }
55
+ return new _PreferenceDataset(shuffled);
56
+ }
57
+ calculateStats() {
58
+ const models = /* @__PURE__ */ new Map();
59
+ let totalPromptLen = 0;
60
+ let totalResponseLen = 0;
61
+ const uniquePrompts = /* @__PURE__ */ new Set();
62
+ for (const pair of this.pairs) {
63
+ uniquePrompts.add(pair.prompt);
64
+ totalPromptLen += pair.prompt.length;
65
+ totalResponseLen += (pair.chosen.length + pair.rejected.length) / 2;
66
+ if (pair.chosenModel) {
67
+ models.set(pair.chosenModel, (models.get(pair.chosenModel) ?? 0) + 1);
68
+ }
69
+ if (pair.rejectedModel) {
70
+ models.set(
71
+ pair.rejectedModel,
72
+ (models.get(pair.rejectedModel) ?? 0) + 1
73
+ );
74
+ }
75
+ }
76
+ return {
77
+ size: this.pairs.length,
78
+ type: "preference",
79
+ avgPromptLength: this.pairs.length > 0 ? totalPromptLen / this.pairs.length : 0,
80
+ avgResponseLength: this.pairs.length > 0 ? totalResponseLen / this.pairs.length : 0,
81
+ uniquePrompts: uniquePrompts.size,
82
+ modelDistribution: Object.fromEntries(models)
83
+ };
84
+ }
85
+ seededRandom(seed) {
86
+ return () => {
87
+ seed = (seed * 9301 + 49297) % 233280;
88
+ return seed / 233280;
89
+ };
90
+ }
91
+ };
92
+ var PreferenceDatasetBuilder = class {
93
+ feedbackStore;
94
+ sampling;
95
+ constructor(config) {
96
+ this.feedbackStore = config.feedbackStore;
97
+ this.sampling = config.sampling;
98
+ }
99
+ /**
100
+ * Build preference dataset from feedback
101
+ */
102
+ async build(options = {}) {
103
+ const minPairs = options.minPairs ?? 0;
104
+ const maxPairs = options.maxPairs ?? Infinity;
105
+ const result = await this.feedbackStore.query({
106
+ type: "preference",
107
+ minConfidence: this.sampling?.minConfidence,
108
+ limit: maxPairs * 2
109
+ // Fetch extra to account for filtering
110
+ });
111
+ let pairs = [];
112
+ for (const item of result.items) {
113
+ const feedback = item;
114
+ if (feedback.preference === "tie") {
115
+ continue;
116
+ }
117
+ const chosen = feedback.preference === "A" ? feedback.responseA : feedback.responseB;
118
+ const rejected = feedback.preference === "A" ? feedback.responseB : feedback.responseA;
119
+ const pair = {
120
+ id: nanoid(),
121
+ prompt: feedback.input,
122
+ chosen: chosen.content,
123
+ rejected: rejected.content,
124
+ chosenModel: chosen.model,
125
+ rejectedModel: rejected.model,
126
+ reason: feedback.reason,
127
+ confidence: feedback.confidence,
128
+ metadata: feedback.metadata
129
+ };
130
+ pairs.push(pair);
131
+ }
132
+ if (options.filterFn) {
133
+ pairs = pairs.filter(options.filterFn);
134
+ }
135
+ if (options.deduplication && options.deduplication !== "none") {
136
+ pairs = this.deduplicate(pairs, options.deduplication);
137
+ }
138
+ if (this.sampling) {
139
+ pairs = this.applySampling(pairs);
140
+ }
141
+ if (pairs.length > maxPairs) {
142
+ pairs = pairs.slice(0, maxPairs);
143
+ }
144
+ if (pairs.length < minPairs) {
145
+ console.warn(
146
+ `Only ${pairs.length} pairs available, requested minimum ${minPairs}`
147
+ );
148
+ }
149
+ return new PreferenceDataset(pairs);
150
+ }
151
+ /**
152
+ * Deduplicate pairs
153
+ */
154
+ deduplicate(pairs, mode) {
155
+ const seen = /* @__PURE__ */ new Set();
156
+ const result = [];
157
+ for (const pair of pairs) {
158
+ const key = mode === "prompt" ? pair.prompt : `${pair.prompt}|${pair.chosen}|${pair.rejected}`;
159
+ if (!seen.has(key)) {
160
+ seen.add(key);
161
+ result.push(pair);
162
+ }
163
+ }
164
+ return result;
165
+ }
166
+ /**
167
+ * Apply sampling strategy
168
+ */
169
+ applySampling(pairs) {
170
+ if (!this.sampling) return pairs;
171
+ switch (this.sampling.type) {
172
+ case "random":
173
+ return this.randomSample(pairs);
174
+ case "balanced":
175
+ return this.balancedSample(pairs);
176
+ case "stratified":
177
+ return this.stratifiedSample(pairs);
178
+ default:
179
+ return pairs;
180
+ }
181
+ }
182
+ randomSample(pairs) {
183
+ const shuffled = [...pairs];
184
+ const seed = this.sampling?.seed;
185
+ const rng = seed !== void 0 ? this.seededRandom(seed) : Math.random;
186
+ for (let i = shuffled.length - 1; i > 0; i--) {
187
+ const j = Math.floor(rng() * (i + 1));
188
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
189
+ }
190
+ return shuffled;
191
+ }
192
+ balancedSample(pairs) {
193
+ const minConf = this.sampling?.minConfidence ?? 0;
194
+ return pairs.filter((p) => (p.confidence ?? 1) >= minConf);
195
+ }
196
+ stratifiedSample(pairs) {
197
+ const field = this.sampling?.stratifyBy ?? "chosenModel";
198
+ const groups = /* @__PURE__ */ new Map();
199
+ for (const pair of pairs) {
200
+ const key = String(
201
+ pair[field] ?? "unknown"
202
+ );
203
+ if (!groups.has(key)) {
204
+ groups.set(key, []);
205
+ }
206
+ groups.get(key).push(pair);
207
+ }
208
+ const result = [];
209
+ const ratios = this.sampling?.stratifyRatios ?? {};
210
+ for (const [key, group] of groups) {
211
+ const ratio = ratios[key] ?? 1 / groups.size;
212
+ const count = Math.ceil(pairs.length * ratio);
213
+ result.push(...group.slice(0, count));
214
+ }
215
+ return result;
216
+ }
217
+ seededRandom(seed) {
218
+ return () => {
219
+ seed = (seed * 9301 + 49297) % 233280;
220
+ return seed / 233280;
221
+ };
222
+ }
223
+ };
224
+ function createPreferenceDatasetBuilder(config) {
225
+ return new PreferenceDatasetBuilder(config);
226
+ }
227
+
228
+ // src/datasets/DatasetExporter.ts
229
+ import * as fs from "fs/promises";
230
+ var DatasetExporter = class {
231
+ /**
232
+ * Export preference dataset to file
233
+ */
234
+ async exportPreferences(dataset, options) {
235
+ const pairs = dataset.getPairs();
236
+ let content;
237
+ const warnings = [];
238
+ switch (options.format) {
239
+ case "jsonl":
240
+ content = this.toJSONL(pairs, options);
241
+ break;
242
+ case "json":
243
+ content = JSON.stringify(pairs, null, 2);
244
+ break;
245
+ case "csv":
246
+ content = this.toCSV(pairs);
247
+ break;
248
+ case "huggingface":
249
+ return this.exportToHuggingFace(pairs, options);
250
+ case "anthropic":
251
+ content = this.toAnthropicFormat(pairs);
252
+ break;
253
+ case "openai":
254
+ content = this.toOpenAIFormat(pairs);
255
+ break;
256
+ default:
257
+ throw new Error(`Unsupported export format: ${options.format}`);
258
+ }
259
+ if (options.path) {
260
+ await fs.writeFile(options.path, content, "utf-8");
261
+ }
262
+ return {
263
+ format: options.format,
264
+ path: options.path,
265
+ itemCount: pairs.length,
266
+ bytesWritten: Buffer.byteLength(content, "utf-8"),
267
+ warnings: warnings.length > 0 ? warnings : void 0
268
+ };
269
+ }
270
+ /**
271
+ * Convert to JSONL format
272
+ */
273
+ toJSONL(pairs, options) {
274
+ const format = options?.formatOptions?.format ?? "dpo";
275
+ return pairs.map((pair) => {
276
+ switch (format) {
277
+ case "dpo":
278
+ return JSON.stringify({
279
+ prompt: pair.prompt,
280
+ chosen: pair.chosen,
281
+ rejected: pair.rejected
282
+ });
283
+ case "sft":
284
+ return JSON.stringify({
285
+ instruction: pair.prompt,
286
+ output: pair.chosen
287
+ });
288
+ default:
289
+ return JSON.stringify(pair);
290
+ }
291
+ }).join("\n");
292
+ }
293
+ /**
294
+ * Convert to CSV format
295
+ */
296
+ toCSV(pairs) {
297
+ const headers = [
298
+ "prompt",
299
+ "chosen",
300
+ "rejected",
301
+ "chosen_model",
302
+ "rejected_model",
303
+ "confidence"
304
+ ];
305
+ const rows = pairs.map(
306
+ (pair) => [
307
+ this.escapeCSV(pair.prompt),
308
+ this.escapeCSV(pair.chosen),
309
+ this.escapeCSV(pair.rejected),
310
+ pair.chosenModel ?? "",
311
+ pair.rejectedModel ?? "",
312
+ pair.confidence?.toString() ?? ""
313
+ ].join(",")
314
+ );
315
+ return [headers.join(","), ...rows].join("\n");
316
+ }
317
+ /**
318
+ * Convert to Anthropic format
319
+ */
320
+ toAnthropicFormat(pairs) {
321
+ return pairs.map(
322
+ (pair) => JSON.stringify({
323
+ prompt: `
324
+
325
+ Human: ${pair.prompt}
326
+
327
+ Assistant:`,
328
+ completion: ` ${pair.chosen}`
329
+ })
330
+ ).join("\n");
331
+ }
332
+ /**
333
+ * Convert to OpenAI format
334
+ */
335
+ toOpenAIFormat(pairs) {
336
+ return pairs.map(
337
+ (pair) => JSON.stringify({
338
+ messages: [
339
+ { role: "user", content: pair.prompt },
340
+ { role: "assistant", content: pair.chosen }
341
+ ]
342
+ })
343
+ ).join("\n");
344
+ }
345
+ /**
346
+ * Export to HuggingFace Hub (stub)
347
+ */
348
+ async exportToHuggingFace(pairs, options) {
349
+ const hfOptions = options.formatOptions;
350
+ if (!hfOptions?.token) {
351
+ throw new Error("HuggingFace token is required for Hub export");
352
+ }
353
+ console.warn(
354
+ "HuggingFace Hub export not fully implemented. Saving locally instead."
355
+ );
356
+ const localPath = options.path ?? `./${hfOptions.name ?? "dataset"}.jsonl`;
357
+ const content = this.toJSONL(pairs, { formatOptions: { format: "dpo" } });
358
+ await fs.writeFile(localPath, content, "utf-8");
359
+ return {
360
+ format: "huggingface",
361
+ path: localPath,
362
+ itemCount: pairs.length,
363
+ warnings: ["Exported locally. Use @huggingface/hub to push to Hub."]
364
+ };
365
+ }
366
+ /**
367
+ * Export to multiple formats
368
+ */
369
+ async exportMultiple(dataset, formats, basePath) {
370
+ const results = /* @__PURE__ */ new Map();
371
+ for (const format of formats) {
372
+ const ext = this.getExtension(format);
373
+ const path = `${basePath}.${ext}`;
374
+ const result = await this.exportPreferences(dataset, { format, path });
375
+ results.set(format, result);
376
+ }
377
+ return results;
378
+ }
379
+ /**
380
+ * Get file extension for format
381
+ */
382
+ getExtension(format) {
383
+ switch (format) {
384
+ case "jsonl":
385
+ return "jsonl";
386
+ case "json":
387
+ return "json";
388
+ case "csv":
389
+ return "csv";
390
+ case "parquet":
391
+ return "parquet";
392
+ default:
393
+ return "jsonl";
394
+ }
395
+ }
396
+ /**
397
+ * Escape CSV value
398
+ */
399
+ escapeCSV(value) {
400
+ if (value.includes(",") || value.includes('"') || value.includes("\n")) {
401
+ return `"${value.replace(/"/g, '""')}"`;
402
+ }
403
+ return value;
404
+ }
405
+ };
406
+ function createDatasetExporter() {
407
+ return new DatasetExporter();
408
+ }
409
+
410
+ export {
411
+ PreferenceDataset,
412
+ PreferenceDatasetBuilder,
413
+ createPreferenceDatasetBuilder,
414
+ DatasetExporter,
415
+ createDatasetExporter
416
+ };
@@ -0,0 +1,2 @@
1
+ export { G as ABTestRunner, z as AlertManager, x as ContinuousEval, I as createABTestRunner, F as createAlertManager, y as createContinuousEval } from '../index-Cq5LwG_3.mjs';
2
+ import 'eventemitter3';
@@ -0,0 +1,2 @@
1
+ export { G as ABTestRunner, z as AlertManager, x as ContinuousEval, I as createABTestRunner, F as createAlertManager, y as createContinuousEval } from '../index-Cq5LwG_3.js';
2
+ import 'eventemitter3';