@lov3kaizen/agentsea-evaluate 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @lov3kaizen/agentsea-evaluate might be problematic. Click here for more details.

Files changed (42) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +339 -0
  3. package/dist/annotation/index.d.mts +3 -0
  4. package/dist/annotation/index.d.ts +3 -0
  5. package/dist/annotation/index.js +630 -0
  6. package/dist/annotation/index.mjs +22 -0
  7. package/dist/chunk-5JRYKRSE.mjs +2791 -0
  8. package/dist/chunk-EUXXIZK3.mjs +676 -0
  9. package/dist/chunk-NBMUSATK.mjs +596 -0
  10. package/dist/chunk-PAQ2TTJJ.mjs +1105 -0
  11. package/dist/chunk-TUMNJN2S.mjs +416 -0
  12. package/dist/continuous/index.d.mts +2 -0
  13. package/dist/continuous/index.d.ts +2 -0
  14. package/dist/continuous/index.js +707 -0
  15. package/dist/continuous/index.mjs +16 -0
  16. package/dist/datasets/index.d.mts +1 -0
  17. package/dist/datasets/index.d.ts +1 -0
  18. package/dist/datasets/index.js +456 -0
  19. package/dist/datasets/index.mjs +14 -0
  20. package/dist/evaluation/index.d.mts +1 -0
  21. package/dist/evaluation/index.d.ts +1 -0
  22. package/dist/evaluation/index.js +2853 -0
  23. package/dist/evaluation/index.mjs +78 -0
  24. package/dist/feedback/index.d.mts +2 -0
  25. package/dist/feedback/index.d.ts +2 -0
  26. package/dist/feedback/index.js +1158 -0
  27. package/dist/feedback/index.mjs +40 -0
  28. package/dist/index-6Pbiq7ny.d.mts +234 -0
  29. package/dist/index-6Pbiq7ny.d.ts +234 -0
  30. package/dist/index-BNTycFEA.d.mts +479 -0
  31. package/dist/index-BNTycFEA.d.ts +479 -0
  32. package/dist/index-CTYCfWfH.d.mts +543 -0
  33. package/dist/index-CTYCfWfH.d.ts +543 -0
  34. package/dist/index-Cq5LwG_3.d.mts +322 -0
  35. package/dist/index-Cq5LwG_3.d.ts +322 -0
  36. package/dist/index-bPghFsfP.d.mts +315 -0
  37. package/dist/index-bPghFsfP.d.ts +315 -0
  38. package/dist/index.d.mts +81 -0
  39. package/dist/index.d.ts +81 -0
  40. package/dist/index.js +5962 -0
  41. package/dist/index.mjs +429 -0
  42. package/package.json +102 -0
@@ -0,0 +1,596 @@
1
+ // src/annotation/AnnotationTask.ts
2
+ import { nanoid } from "nanoid";
3
+ import { z } from "zod";
4
+ var AnnotationTask = class {
5
+ id;
6
+ name;
7
+ description;
8
+ instructions;
9
+ schema;
10
+ status;
11
+ itemsPerAnnotator;
12
+ annotatorsPerItem;
13
+ deadline;
14
+ createdAt;
15
+ updatedAt;
16
+ completedAt;
17
+ metadata;
18
+ constructor(config) {
19
+ this.id = nanoid();
20
+ this.name = config.name;
21
+ this.description = config.description;
22
+ this.instructions = config.instructions;
23
+ this.schema = config.schema;
24
+ this.status = "draft";
25
+ this.itemsPerAnnotator = config.itemsPerAnnotator ?? 100;
26
+ this.annotatorsPerItem = config.annotatorsPerItem ?? 1;
27
+ this.deadline = config.deadline;
28
+ this.createdAt = Date.now();
29
+ this.updatedAt = Date.now();
30
+ this.metadata = config.metadata;
31
+ }
32
+ /**
33
+ * Start the task
34
+ */
35
+ start() {
36
+ if (this.status !== "draft") {
37
+ throw new Error(`Cannot start task in ${this.status} status`);
38
+ }
39
+ this.status = "active";
40
+ this.updatedAt = Date.now();
41
+ }
42
+ /**
43
+ * Pause the task
44
+ */
45
+ pause() {
46
+ if (this.status !== "active") {
47
+ throw new Error(`Cannot pause task in ${this.status} status`);
48
+ }
49
+ this.status = "paused";
50
+ this.updatedAt = Date.now();
51
+ }
52
+ /**
53
+ * Resume the task
54
+ */
55
+ resume() {
56
+ if (this.status !== "paused") {
57
+ throw new Error(`Cannot resume task in ${this.status} status`);
58
+ }
59
+ this.status = "active";
60
+ this.updatedAt = Date.now();
61
+ }
62
+ /**
63
+ * Complete the task
64
+ */
65
+ complete() {
66
+ if (this.status !== "active") {
67
+ throw new Error(`Cannot complete task in ${this.status} status`);
68
+ }
69
+ this.status = "completed";
70
+ this.completedAt = Date.now();
71
+ this.updatedAt = Date.now();
72
+ }
73
+ /**
74
+ * Cancel the task
75
+ */
76
+ cancel() {
77
+ if (this.status === "completed") {
78
+ throw new Error("Cannot cancel completed task");
79
+ }
80
+ this.status = "cancelled";
81
+ this.updatedAt = Date.now();
82
+ }
83
+ /**
84
+ * Validate an annotation against the schema
85
+ */
86
+ validateAnnotation(value) {
87
+ try {
88
+ this.schema.parse(value);
89
+ return { valid: true };
90
+ } catch (error) {
91
+ if (error instanceof z.ZodError) {
92
+ return {
93
+ valid: false,
94
+ error: error.errors.map((e) => e.message).join(", ")
95
+ };
96
+ }
97
+ return { valid: false, error: "Unknown validation error" };
98
+ }
99
+ }
100
+ /**
101
+ * Check if task is past deadline
102
+ */
103
+ isPastDeadline() {
104
+ if (!this.deadline) return false;
105
+ return Date.now() > this.deadline.getTime();
106
+ }
107
+ /**
108
+ * Get task configuration for display
109
+ */
110
+ toConfig() {
111
+ return {
112
+ name: this.name,
113
+ description: this.description,
114
+ instructions: this.instructions,
115
+ schema: this.schema,
116
+ itemsPerAnnotator: this.itemsPerAnnotator,
117
+ annotatorsPerItem: this.annotatorsPerItem,
118
+ deadline: this.deadline,
119
+ metadata: this.metadata
120
+ };
121
+ }
122
+ };
123
+ function createAnnotationTask(config) {
124
+ return new AnnotationTask(config);
125
+ }
126
+ var BinaryClassificationSchema = z.object({
127
+ label: z.enum(["positive", "negative"]),
128
+ confidence: z.number().min(0).max(1).optional(),
129
+ notes: z.string().optional()
130
+ });
131
+ var QualityRatingSchema = z.object({
132
+ accuracy: z.number().min(1).max(5),
133
+ helpfulness: z.number().min(1).max(5),
134
+ safety: z.enum(["pass", "fail"]),
135
+ corrections: z.string().optional(),
136
+ notes: z.string().optional()
137
+ });
138
+ var TextSpanSchema = z.object({
139
+ spans: z.array(
140
+ z.object({
141
+ start: z.number(),
142
+ end: z.number(),
143
+ label: z.string(),
144
+ text: z.string().optional()
145
+ })
146
+ ),
147
+ notes: z.string().optional()
148
+ });
149
+
150
+ // src/annotation/AnnotationQueue.ts
151
+ import { nanoid as nanoid2 } from "nanoid";
152
+ import { EventEmitter } from "eventemitter3";
153
+ var AnnotationQueue = class extends EventEmitter {
154
+ task;
155
+ items;
156
+ annotatorAssignments;
157
+ annotatorCounts;
158
+ constructor(config) {
159
+ super();
160
+ this.task = config.task;
161
+ this.items = /* @__PURE__ */ new Map();
162
+ this.annotatorAssignments = /* @__PURE__ */ new Map();
163
+ this.annotatorCounts = /* @__PURE__ */ new Map();
164
+ for (const item of config.items) {
165
+ this.items.set(item.id, item);
166
+ }
167
+ }
168
+ /**
169
+ * Get next item for annotator
170
+ */
171
+ getNextItem(annotatorId) {
172
+ const assigned = this.annotatorAssignments.get(annotatorId) ?? /* @__PURE__ */ new Set();
173
+ for (const item of this.items.values()) {
174
+ if (assigned.has(item.id)) continue;
175
+ if (item.status === "completed") continue;
176
+ const assignedCount = item.assignedTo?.length ?? 0;
177
+ if (assignedCount >= this.task.annotatorsPerItem) continue;
178
+ if (assignedCount === 0) {
179
+ this.assignItem(item.id, annotatorId);
180
+ return item;
181
+ }
182
+ }
183
+ for (const item of this.items.values()) {
184
+ if (assigned.has(item.id)) continue;
185
+ if (item.status === "completed") continue;
186
+ const assignedCount = item.assignedTo?.length ?? 0;
187
+ if (assignedCount >= this.task.annotatorsPerItem) continue;
188
+ this.assignItem(item.id, annotatorId);
189
+ return item;
190
+ }
191
+ return null;
192
+ }
193
+ /**
194
+ * Assign item to annotator
195
+ */
196
+ assignItem(itemId, annotatorId) {
197
+ const item = this.items.get(itemId);
198
+ if (!item) {
199
+ throw new Error(`Item ${itemId} not found`);
200
+ }
201
+ if (!this.annotatorAssignments.has(annotatorId)) {
202
+ this.annotatorAssignments.set(annotatorId, /* @__PURE__ */ new Set());
203
+ }
204
+ this.annotatorAssignments.get(annotatorId).add(itemId);
205
+ if (!item.assignedTo) {
206
+ item.assignedTo = [];
207
+ }
208
+ if (!item.assignedTo.includes(annotatorId)) {
209
+ item.assignedTo.push(annotatorId);
210
+ }
211
+ item.status = "assigned";
212
+ item.updatedAt = Date.now();
213
+ this.emit("item:assigned", itemId, annotatorId);
214
+ }
215
+ /**
216
+ * Submit annotation for item
217
+ */
218
+ submitAnnotation(itemId, annotatorId, value, duration) {
219
+ const item = this.items.get(itemId);
220
+ if (!item) {
221
+ throw new Error(`Item ${itemId} not found`);
222
+ }
223
+ const validation = this.task.validateAnnotation(value);
224
+ if (!validation.valid) {
225
+ throw new Error(`Invalid annotation: ${validation.error}`);
226
+ }
227
+ const annotation = {
228
+ id: nanoid2(),
229
+ itemId,
230
+ annotatorId,
231
+ value,
232
+ duration,
233
+ createdAt: Date.now()
234
+ };
235
+ item.annotations.push(annotation);
236
+ item.updatedAt = Date.now();
237
+ this.annotatorCounts.set(
238
+ annotatorId,
239
+ (this.annotatorCounts.get(annotatorId) ?? 0) + 1
240
+ );
241
+ if (item.annotations.length >= this.task.annotatorsPerItem) {
242
+ item.status = "completed";
243
+ } else {
244
+ item.status = "in_progress";
245
+ }
246
+ this.emit("item:annotated", itemId, annotation);
247
+ return annotation;
248
+ }
249
+ /**
250
+ * Flag item for review
251
+ */
252
+ flagItem(itemId, reason) {
253
+ const item = this.items.get(itemId);
254
+ if (!item) {
255
+ throw new Error(`Item ${itemId} not found`);
256
+ }
257
+ item.status = "flagged";
258
+ item.updatedAt = Date.now();
259
+ this.emit("item:flagged", itemId, reason);
260
+ }
261
+ /**
262
+ * Skip item
263
+ */
264
+ skipItem(itemId, annotatorId) {
265
+ const item = this.items.get(itemId);
266
+ if (!item) {
267
+ throw new Error(`Item ${itemId} not found`);
268
+ }
269
+ const assigned = this.annotatorAssignments.get(annotatorId);
270
+ if (assigned) {
271
+ assigned.delete(itemId);
272
+ }
273
+ if (item.assignedTo) {
274
+ const idx = item.assignedTo.indexOf(annotatorId);
275
+ if (idx >= 0) {
276
+ item.assignedTo.splice(idx, 1);
277
+ }
278
+ }
279
+ item.updatedAt = Date.now();
280
+ }
281
+ /**
282
+ * Get batch assignment for annotator
283
+ */
284
+ getBatchAssignment(annotatorId, count) {
285
+ const itemIds = [];
286
+ for (let i = 0; i < count; i++) {
287
+ const item = this.getNextItem(annotatorId);
288
+ if (!item) break;
289
+ itemIds.push(item.id);
290
+ }
291
+ return {
292
+ annotatorId,
293
+ itemIds
294
+ };
295
+ }
296
+ /**
297
+ * Get queue statistics
298
+ */
299
+ getStats() {
300
+ let pending = 0;
301
+ let assigned = 0;
302
+ let completed = 0;
303
+ let flagged = 0;
304
+ let totalAnnotations = 0;
305
+ for (const item of this.items.values()) {
306
+ switch (item.status) {
307
+ case "pending":
308
+ pending++;
309
+ break;
310
+ case "assigned":
311
+ case "in_progress":
312
+ assigned++;
313
+ break;
314
+ case "completed":
315
+ completed++;
316
+ break;
317
+ case "flagged":
318
+ flagged++;
319
+ break;
320
+ }
321
+ totalAnnotations += item.annotations.length;
322
+ }
323
+ const avgAnnotationsPerItem = this.items.size > 0 ? totalAnnotations / this.items.size : 0;
324
+ return {
325
+ taskId: this.task.id,
326
+ totalItems: this.items.size,
327
+ pendingItems: pending,
328
+ assignedItems: assigned,
329
+ completedItems: completed,
330
+ flaggedItems: flagged,
331
+ averageAnnotationsPerItem: avgAnnotationsPerItem,
332
+ averageAgreement: 0
333
+ // Would need consensus calculation
334
+ };
335
+ }
336
+ /**
337
+ * Get item by ID
338
+ */
339
+ getItem(itemId) {
340
+ return this.items.get(itemId);
341
+ }
342
+ /**
343
+ * Get all items
344
+ */
345
+ getItems() {
346
+ return Array.from(this.items.values());
347
+ }
348
+ /**
349
+ * Get items by status
350
+ */
351
+ getItemsByStatus(status) {
352
+ return Array.from(this.items.values()).filter(
353
+ (item) => item.status === status
354
+ );
355
+ }
356
+ /**
357
+ * Get annotator's completed count
358
+ */
359
+ getAnnotatorCount(annotatorId) {
360
+ return this.annotatorCounts.get(annotatorId) ?? 0;
361
+ }
362
+ };
363
+ function createAnnotationQueue(config) {
364
+ return new AnnotationQueue(config);
365
+ }
366
+
367
+ // src/annotation/ConsensusManager.ts
368
+ var ConsensusManager = class {
369
+ method;
370
+ weights;
371
+ expertAnnotatorId;
372
+ constructor(config) {
373
+ this.method = config.method;
374
+ this.weights = config.weights;
375
+ this.expertAnnotatorId = config.expertAnnotatorId;
376
+ }
377
+ /**
378
+ * Calculate consensus from annotations
379
+ */
380
+ calculateConsensus(annotations) {
381
+ if (annotations.length === 0) {
382
+ return {
383
+ value: {},
384
+ method: this.method,
385
+ agreement: 0,
386
+ confidence: 0,
387
+ contributingAnnotations: []
388
+ };
389
+ }
390
+ if (annotations.length === 1) {
391
+ return {
392
+ value: annotations[0].value,
393
+ method: this.method,
394
+ agreement: 1,
395
+ confidence: annotations[0].confidence ?? 0.5,
396
+ contributingAnnotations: [annotations[0].id]
397
+ };
398
+ }
399
+ switch (this.method) {
400
+ case "majority":
401
+ return this.majorityConsensus(annotations);
402
+ case "unanimous":
403
+ return this.unanimousConsensus(annotations);
404
+ case "weighted":
405
+ return this.weightedConsensus(annotations);
406
+ case "expert":
407
+ return this.expertConsensus(annotations);
408
+ default:
409
+ return this.majorityConsensus(annotations);
410
+ }
411
+ }
412
+ /**
413
+ * Majority vote consensus
414
+ */
415
+ majorityConsensus(annotations) {
416
+ const allFields = /* @__PURE__ */ new Set();
417
+ for (const ann of annotations) {
418
+ for (const key of Object.keys(ann.value)) {
419
+ allFields.add(key);
420
+ }
421
+ }
422
+ const consensusValue = {};
423
+ const disagreements = [];
424
+ let totalAgreement = 0;
425
+ for (const field of allFields) {
426
+ const values = annotations.map((ann) => ann.value[field]);
427
+ const { value, agreement, disagrees } = this.findMajority(
428
+ values,
429
+ annotations
430
+ );
431
+ consensusValue[field] = value;
432
+ totalAgreement += agreement;
433
+ if (disagrees) {
434
+ disagreements.push({
435
+ field,
436
+ values: disagrees,
437
+ resolved: true,
438
+ resolution: value
439
+ });
440
+ }
441
+ }
442
+ const avgAgreement = allFields.size > 0 ? totalAgreement / allFields.size : 1;
443
+ return {
444
+ value: consensusValue,
445
+ method: "majority",
446
+ agreement: avgAgreement,
447
+ confidence: avgAgreement,
448
+ contributingAnnotations: annotations.map((a) => a.id),
449
+ disagreements: disagreements.length > 0 ? disagreements : void 0
450
+ };
451
+ }
452
+ /**
453
+ * Find majority value
454
+ */
455
+ findMajority(values, annotations) {
456
+ const counts = /* @__PURE__ */ new Map();
457
+ for (let i = 0; i < values.length; i++) {
458
+ const key = JSON.stringify(values[i]);
459
+ if (!counts.has(key)) {
460
+ counts.set(key, { value: values[i], count: 0, annotatorIds: [] });
461
+ }
462
+ counts.get(key).count++;
463
+ counts.get(key).annotatorIds.push(annotations[i].annotatorId);
464
+ }
465
+ let maxCount = 0;
466
+ let majorityValue = null;
467
+ for (const { value, count } of counts.values()) {
468
+ if (count > maxCount) {
469
+ maxCount = count;
470
+ majorityValue = value;
471
+ }
472
+ }
473
+ const agreement = maxCount / values.length;
474
+ if (counts.size > 1) {
475
+ const disagrees = Array.from(counts.entries()).map(([, v]) => ({
476
+ value: v.value,
477
+ annotatorIds: v.annotatorIds,
478
+ count: v.count
479
+ }));
480
+ return { value: majorityValue, agreement, disagrees };
481
+ }
482
+ return { value: majorityValue, agreement };
483
+ }
484
+ /**
485
+ * Unanimous consensus
486
+ */
487
+ unanimousConsensus(annotations) {
488
+ const firstValue = annotations[0].value;
489
+ const allMatch = annotations.every(
490
+ (ann) => JSON.stringify(ann.value) === JSON.stringify(firstValue)
491
+ );
492
+ return {
493
+ value: allMatch ? firstValue : {},
494
+ method: "unanimous",
495
+ agreement: allMatch ? 1 : 0,
496
+ confidence: allMatch ? 1 : 0,
497
+ contributingAnnotations: allMatch ? annotations.map((a) => a.id) : []
498
+ };
499
+ }
500
+ /**
501
+ * Weighted consensus
502
+ */
503
+ weightedConsensus(annotations) {
504
+ if (!this.weights) {
505
+ return this.majorityConsensus(annotations);
506
+ }
507
+ const allFields = /* @__PURE__ */ new Set();
508
+ for (const ann of annotations) {
509
+ for (const key of Object.keys(ann.value)) {
510
+ allFields.add(key);
511
+ }
512
+ }
513
+ const consensusValue = {};
514
+ let totalWeightedAgreement = 0;
515
+ let totalWeight = 0;
516
+ for (const field of allFields) {
517
+ const values = /* @__PURE__ */ new Map();
518
+ for (const ann of annotations) {
519
+ const key = JSON.stringify(ann.value[field]);
520
+ const weight = this.weights[ann.annotatorId] ?? 1;
521
+ if (!values.has(key)) {
522
+ values.set(key, { value: ann.value[field], weight: 0 });
523
+ }
524
+ values.get(key).weight += weight;
525
+ totalWeight += weight;
526
+ }
527
+ let maxWeight = 0;
528
+ let bestValue = null;
529
+ for (const { value, weight } of values.values()) {
530
+ if (weight > maxWeight) {
531
+ maxWeight = weight;
532
+ bestValue = value;
533
+ }
534
+ }
535
+ consensusValue[field] = bestValue;
536
+ totalWeightedAgreement += maxWeight;
537
+ }
538
+ const avgAgreement = totalWeight > 0 ? totalWeightedAgreement / totalWeight : 1;
539
+ return {
540
+ value: consensusValue,
541
+ method: "weighted",
542
+ agreement: avgAgreement,
543
+ confidence: avgAgreement,
544
+ contributingAnnotations: annotations.map((a) => a.id)
545
+ };
546
+ }
547
+ /**
548
+ * Expert consensus
549
+ */
550
+ expertConsensus(annotations) {
551
+ if (!this.expertAnnotatorId) {
552
+ return this.majorityConsensus(annotations);
553
+ }
554
+ const expertAnnotation = annotations.find(
555
+ (a) => a.annotatorId === this.expertAnnotatorId
556
+ );
557
+ if (!expertAnnotation) {
558
+ return this.majorityConsensus(annotations);
559
+ }
560
+ return {
561
+ value: expertAnnotation.value,
562
+ method: "expert",
563
+ agreement: 1,
564
+ confidence: expertAnnotation.confidence ?? 1,
565
+ contributingAnnotations: [expertAnnotation.id]
566
+ };
567
+ }
568
+ /**
569
+ * Calculate inter-annotator agreement (Fleiss' kappa approximation)
570
+ */
571
+ calculateAgreement(annotations) {
572
+ if (annotations.length < 2) return 1;
573
+ const values = annotations.map((a) => JSON.stringify(a.value));
574
+ const counts = /* @__PURE__ */ new Map();
575
+ for (const v of values) {
576
+ counts.set(v, (counts.get(v) ?? 0) + 1);
577
+ }
578
+ const maxCount = Math.max(...counts.values());
579
+ return maxCount / values.length;
580
+ }
581
+ };
582
+ function createConsensusManager(config) {
583
+ return new ConsensusManager(config);
584
+ }
585
+
586
+ export {
587
+ AnnotationTask,
588
+ createAnnotationTask,
589
+ BinaryClassificationSchema,
590
+ QualityRatingSchema,
591
+ TextSpanSchema,
592
+ AnnotationQueue,
593
+ createAnnotationQueue,
594
+ ConsensusManager,
595
+ createConsensusManager
596
+ };