@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -12,14 +12,14 @@
|
|
|
12
12
|
* - Tracks agent actions for performance evaluation
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
|
-
import type { JsonValue } from
|
|
16
|
-
import { logger } from
|
|
15
|
+
import type { JsonValue } from "../adapter";
|
|
16
|
+
import { logger } from "../utils/logger";
|
|
17
17
|
import type {
|
|
18
18
|
BenchmarkGameSnapshot,
|
|
19
19
|
GameState,
|
|
20
20
|
Tick,
|
|
21
|
-
} from
|
|
22
|
-
import { MetricsValidator } from
|
|
21
|
+
} from "./BenchmarkDataGenerator";
|
|
22
|
+
import { MetricsValidator } from "./MetricsValidator";
|
|
23
23
|
|
|
24
24
|
export interface SimulationConfig {
|
|
25
25
|
/** The benchmark snapshot to replay */
|
|
@@ -52,7 +52,7 @@ export interface AgentAction {
|
|
|
52
52
|
perpCorrect?: boolean;
|
|
53
53
|
sentimentAtTrade?: number;
|
|
54
54
|
priceChange?: number;
|
|
55
|
-
expectedDirection?:
|
|
55
|
+
expectedDirection?: "up" | "down";
|
|
56
56
|
|
|
57
57
|
/** Sentiment analysis accuracy tracking */
|
|
58
58
|
sentimentAccuracy?: number;
|
|
@@ -62,14 +62,14 @@ export interface AgentAction {
|
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
export type AgentActionType =
|
|
65
|
-
|
|
|
66
|
-
|
|
|
67
|
-
|
|
|
68
|
-
|
|
|
69
|
-
|
|
|
70
|
-
|
|
|
71
|
-
|
|
|
72
|
-
|
|
|
65
|
+
| "query_state"
|
|
66
|
+
| "buy_prediction"
|
|
67
|
+
| "sell_prediction"
|
|
68
|
+
| "open_perp"
|
|
69
|
+
| "close_perp"
|
|
70
|
+
| "create_post"
|
|
71
|
+
| "join_group"
|
|
72
|
+
| "send_message";
|
|
73
73
|
|
|
74
74
|
export type AgentActionResult =
|
|
75
75
|
| { positionId: string; shares: number } // buy_prediction
|
|
@@ -197,23 +197,23 @@ export class SimulationEngine {
|
|
|
197
197
|
const validation = MetricsValidator.validate(
|
|
198
198
|
metrics,
|
|
199
199
|
this.actions,
|
|
200
|
-
this.config.snapshot.groundTruth
|
|
200
|
+
this.config.snapshot.groundTruth,
|
|
201
201
|
);
|
|
202
202
|
|
|
203
203
|
if (!validation.valid) {
|
|
204
|
-
logger.error(
|
|
204
|
+
logger.error("Metrics validation failed", {
|
|
205
205
|
errors: validation.errors,
|
|
206
206
|
warnings: validation.warnings,
|
|
207
207
|
});
|
|
208
208
|
}
|
|
209
209
|
if (validation.warnings.length > 0) {
|
|
210
|
-
logger.warn(
|
|
210
|
+
logger.warn("Metrics validation warnings", {
|
|
211
211
|
warnings: validation.warnings,
|
|
212
212
|
});
|
|
213
213
|
}
|
|
214
214
|
const trajectory = this.buildTrajectory();
|
|
215
215
|
|
|
216
|
-
logger.info(
|
|
216
|
+
logger.info("Simulation completed", {
|
|
217
217
|
duration: endTime - this.startTime,
|
|
218
218
|
ticksProcessed: this.currentTick,
|
|
219
219
|
totalPnl: metrics.totalPnl,
|
|
@@ -243,7 +243,7 @@ export class SimulationEngine {
|
|
|
243
243
|
this.currentTick = 0;
|
|
244
244
|
this.pnlHistory = [];
|
|
245
245
|
|
|
246
|
-
logger.info(
|
|
246
|
+
logger.info("Simulation initialized", {
|
|
247
247
|
benchmarkId: this.config.snapshot.id,
|
|
248
248
|
agentId: this.config.agentId,
|
|
249
249
|
totalTicks: this.config.snapshot.ticks.length,
|
|
@@ -308,27 +308,27 @@ export class SimulationEngine {
|
|
|
308
308
|
*/
|
|
309
309
|
async performAction(
|
|
310
310
|
type: AgentActionType,
|
|
311
|
-
data: Record<string, JsonValue
|
|
311
|
+
data: Record<string, JsonValue>,
|
|
312
312
|
): Promise<{ success: boolean; result?: AgentActionResult; error?: string }> {
|
|
313
313
|
const actionStart = Date.now();
|
|
314
314
|
|
|
315
315
|
let result: AgentActionResult;
|
|
316
|
-
let correctness: AgentAction[
|
|
316
|
+
let correctness: AgentAction["correctness"];
|
|
317
317
|
|
|
318
318
|
try {
|
|
319
319
|
switch (type) {
|
|
320
|
-
case
|
|
320
|
+
case "buy_prediction": {
|
|
321
321
|
result = this.handleBuyPrediction(data);
|
|
322
322
|
const { marketId, outcome } = data as {
|
|
323
323
|
marketId: string;
|
|
324
|
-
outcome:
|
|
324
|
+
outcome: "YES" | "NO";
|
|
325
325
|
};
|
|
326
326
|
|
|
327
327
|
// Track correctness for prediction markets
|
|
328
328
|
const marketOutcome =
|
|
329
329
|
this.config.snapshot.groundTruth.marketOutcomes[marketId];
|
|
330
330
|
if (marketOutcome !== undefined) {
|
|
331
|
-
const predictedOutcome = outcome ===
|
|
331
|
+
const predictedOutcome = outcome === "YES";
|
|
332
332
|
const isCorrect = predictedOutcome === marketOutcome;
|
|
333
333
|
|
|
334
334
|
correctness = {
|
|
@@ -340,17 +340,17 @@ export class SimulationEngine {
|
|
|
340
340
|
break;
|
|
341
341
|
}
|
|
342
342
|
|
|
343
|
-
case
|
|
343
|
+
case "open_perp": {
|
|
344
344
|
result = this.handleOpenPerp(data);
|
|
345
345
|
const { ticker, side } = data as {
|
|
346
346
|
ticker: string;
|
|
347
|
-
side:
|
|
347
|
+
side: "LONG" | "SHORT";
|
|
348
348
|
};
|
|
349
349
|
|
|
350
350
|
// Track correctness for perp trades based on sentiment and price movement
|
|
351
351
|
const state = this.getGameState();
|
|
352
352
|
const market = state.perpetualMarkets.find(
|
|
353
|
-
(m: { ticker: string }) => m.ticker === ticker
|
|
353
|
+
(m: { ticker: string }) => m.ticker === ticker,
|
|
354
354
|
);
|
|
355
355
|
|
|
356
356
|
if (market) {
|
|
@@ -370,8 +370,8 @@ export class SimulationEngine {
|
|
|
370
370
|
// Determine if trade was correct
|
|
371
371
|
// If sentiment is negative and we went short, that's correct
|
|
372
372
|
// If sentiment is positive and we went long, that's correct
|
|
373
|
-
const expectedDirection = sentimentAtTrade < 0 ?
|
|
374
|
-
const tradeDirection = side ===
|
|
373
|
+
const expectedDirection = sentimentAtTrade < 0 ? "down" : "up";
|
|
374
|
+
const tradeDirection = side === "SHORT" ? "down" : "up";
|
|
375
375
|
const isCorrect = expectedDirection === tradeDirection;
|
|
376
376
|
|
|
377
377
|
correctness = {
|
|
@@ -385,15 +385,15 @@ export class SimulationEngine {
|
|
|
385
385
|
break;
|
|
386
386
|
}
|
|
387
387
|
|
|
388
|
-
case
|
|
388
|
+
case "close_perp":
|
|
389
389
|
result = this.handleClosePerp(data);
|
|
390
390
|
break;
|
|
391
391
|
|
|
392
|
-
case
|
|
392
|
+
case "join_group":
|
|
393
393
|
result = this.handleJoinGroup(data);
|
|
394
394
|
break;
|
|
395
395
|
|
|
396
|
-
case
|
|
396
|
+
case "create_post":
|
|
397
397
|
result = this.handleCreatePost(data);
|
|
398
398
|
break;
|
|
399
399
|
|
|
@@ -471,13 +471,13 @@ export class SimulationEngine {
|
|
|
471
471
|
} {
|
|
472
472
|
const { marketId, outcome, amount } = data as {
|
|
473
473
|
marketId: string;
|
|
474
|
-
outcome:
|
|
474
|
+
outcome: "YES" | "NO";
|
|
475
475
|
amount: number;
|
|
476
476
|
};
|
|
477
477
|
|
|
478
478
|
const state = this.getGameState();
|
|
479
479
|
const market = state.predictionMarkets.find(
|
|
480
|
-
(m: { id: string }) => m.id === marketId
|
|
480
|
+
(m: { id: string }) => m.id === marketId,
|
|
481
481
|
);
|
|
482
482
|
|
|
483
483
|
if (!market) {
|
|
@@ -485,7 +485,7 @@ export class SimulationEngine {
|
|
|
485
485
|
}
|
|
486
486
|
|
|
487
487
|
// Calculate shares based on current price
|
|
488
|
-
const price = outcome ===
|
|
488
|
+
const price = outcome === "YES" ? market.yesPrice : market.noPrice;
|
|
489
489
|
const shares = amount / price;
|
|
490
490
|
|
|
491
491
|
// Record position
|
|
@@ -510,14 +510,14 @@ export class SimulationEngine {
|
|
|
510
510
|
} {
|
|
511
511
|
const { ticker, side, size, leverage } = data as {
|
|
512
512
|
ticker: string;
|
|
513
|
-
side:
|
|
513
|
+
side: "LONG" | "SHORT";
|
|
514
514
|
size: number;
|
|
515
515
|
leverage: number;
|
|
516
516
|
};
|
|
517
517
|
|
|
518
518
|
const state = this.getGameState();
|
|
519
519
|
const market = state.perpetualMarkets.find(
|
|
520
|
-
(m: { ticker: string }) => m.ticker === ticker
|
|
520
|
+
(m: { ticker: string }) => m.ticker === ticker,
|
|
521
521
|
);
|
|
522
522
|
|
|
523
523
|
if (!market) {
|
|
@@ -551,7 +551,7 @@ export class SimulationEngine {
|
|
|
551
551
|
|
|
552
552
|
const state = this.getGameState();
|
|
553
553
|
const market = state.perpetualMarkets.find(
|
|
554
|
-
(m: { ticker: string }) => m.ticker === position.ticker
|
|
554
|
+
(m: { ticker: string }) => m.ticker === position.ticker,
|
|
555
555
|
);
|
|
556
556
|
|
|
557
557
|
if (!market) {
|
|
@@ -561,7 +561,7 @@ export class SimulationEngine {
|
|
|
561
561
|
// Calculate realized P&L
|
|
562
562
|
const priceChange = market.price - position.entryPrice;
|
|
563
563
|
const pnl =
|
|
564
|
-
position.side ===
|
|
564
|
+
position.side === "LONG"
|
|
565
565
|
? priceChange * position.size * position.leverage
|
|
566
566
|
: -priceChange * position.size * position.leverage;
|
|
567
567
|
|
|
@@ -601,13 +601,13 @@ export class SimulationEngine {
|
|
|
601
601
|
if (position.closedAt) continue; // Skip closed positions
|
|
602
602
|
|
|
603
603
|
const market = tick.state.perpetualMarkets.find(
|
|
604
|
-
(m: { ticker: string; price: number }) => m.ticker === position.ticker
|
|
604
|
+
(m: { ticker: string; price: number }) => m.ticker === position.ticker,
|
|
605
605
|
);
|
|
606
606
|
if (!market) continue;
|
|
607
607
|
|
|
608
608
|
const priceChange = market.price - position.entryPrice;
|
|
609
609
|
position.unrealizedPnl =
|
|
610
|
-
position.side ===
|
|
610
|
+
position.side === "LONG"
|
|
611
611
|
? priceChange * position.size * position.leverage
|
|
612
612
|
: -priceChange * position.size * position.leverage;
|
|
613
613
|
}
|
|
@@ -629,8 +629,8 @@ export class SimulationEngine {
|
|
|
629
629
|
const marketOutcome =
|
|
630
630
|
this.config.snapshot.groundTruth.marketOutcomes[position.marketId];
|
|
631
631
|
const isCorrect =
|
|
632
|
-
(position.outcome ===
|
|
633
|
-
(position.outcome ===
|
|
632
|
+
(position.outcome === "YES" && marketOutcome) ||
|
|
633
|
+
(position.outcome === "NO" && !marketOutcome);
|
|
634
634
|
|
|
635
635
|
if (isCorrect) {
|
|
636
636
|
correctPredictions++;
|
|
@@ -736,12 +736,12 @@ export class SimulationEngine {
|
|
|
736
736
|
|
|
737
737
|
// Match action type and target
|
|
738
738
|
if (
|
|
739
|
-
optimalAction.type ===
|
|
740
|
-
a.type ===
|
|
739
|
+
optimalAction.type === "buy_prediction" &&
|
|
740
|
+
a.type === "buy_prediction"
|
|
741
741
|
) {
|
|
742
742
|
return a.data.marketId === optimalAction.target;
|
|
743
743
|
}
|
|
744
|
-
if (optimalAction.type ===
|
|
744
|
+
if (optimalAction.type === "open_perp" && a.type === "open_perp") {
|
|
745
745
|
return a.data.ticker === optimalAction.target;
|
|
746
746
|
}
|
|
747
747
|
|
|
@@ -772,9 +772,9 @@ export class SimulationEngine {
|
|
|
772
772
|
// Calculate reward based on action outcome
|
|
773
773
|
let reward = 0;
|
|
774
774
|
|
|
775
|
-
if (action.type ===
|
|
775
|
+
if (action.type === "buy_prediction") {
|
|
776
776
|
const positionId = Object.keys(
|
|
777
|
-
Object.fromEntries(this.predictionPositions)
|
|
777
|
+
Object.fromEntries(this.predictionPositions),
|
|
778
778
|
).find((id) => {
|
|
779
779
|
const pos = this.predictionPositions.get(id)!;
|
|
780
780
|
return pos.openedAt === action.tick;
|
|
@@ -787,8 +787,8 @@ export class SimulationEngine {
|
|
|
787
787
|
position.marketId
|
|
788
788
|
];
|
|
789
789
|
const isCorrect =
|
|
790
|
-
(position.outcome ===
|
|
791
|
-
(position.outcome ===
|
|
790
|
+
(position.outcome === "YES" && marketOutcome) ||
|
|
791
|
+
(position.outcome === "NO" && !marketOutcome);
|
|
792
792
|
reward = isCorrect ? 1.0 : -1.0;
|
|
793
793
|
}
|
|
794
794
|
}
|
|
@@ -812,7 +812,7 @@ export class SimulationEngine {
|
|
|
812
812
|
|
|
813
813
|
interface PredictionPosition {
|
|
814
814
|
marketId: string;
|
|
815
|
-
outcome:
|
|
815
|
+
outcome: "YES" | "NO";
|
|
816
816
|
shares: number;
|
|
817
817
|
entryPrice: number;
|
|
818
818
|
amount: number;
|
|
@@ -821,7 +821,7 @@ interface PredictionPosition {
|
|
|
821
821
|
|
|
822
822
|
interface PerpPosition {
|
|
823
823
|
ticker: string;
|
|
824
|
-
side:
|
|
824
|
+
side: "LONG" | "SHORT";
|
|
825
825
|
size: number;
|
|
826
826
|
leverage: number;
|
|
827
827
|
entryPrice: number;
|
|
@@ -1,94 +1,102 @@
|
|
|
1
|
-
|
|
2
1
|
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
} from
|
|
8
|
-
import { logger } from '../utils/logger';
|
|
2
|
+
getAgentRuntimeManager,
|
|
3
|
+
getAgentService,
|
|
4
|
+
getTaskInteractor,
|
|
5
|
+
} from "../dependencies";
|
|
6
|
+
import { logger } from "../utils/logger";
|
|
9
7
|
|
|
10
8
|
export interface TaskRunnerConfig {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
agentName: string;
|
|
10
|
+
taskPrompt: string;
|
|
11
|
+
iterations: number;
|
|
12
|
+
model: string;
|
|
15
13
|
}
|
|
16
14
|
|
|
17
15
|
export interface TaskRunResult {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
iteration: number;
|
|
17
|
+
success: boolean;
|
|
18
|
+
response: string;
|
|
19
|
+
trajectoryId?: string;
|
|
20
|
+
error?: string;
|
|
21
|
+
duration: number;
|
|
24
22
|
}
|
|
25
23
|
|
|
26
24
|
export class TaskRunner {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
private config: TaskRunnerConfig;
|
|
26
|
+
|
|
27
|
+
constructor(config: TaskRunnerConfig) {
|
|
28
|
+
this.config = config;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async run(): Promise<TaskRunResult[]> {
|
|
32
|
+
logger.info(
|
|
33
|
+
"Starting Task Benchmark",
|
|
34
|
+
{ config: this.config },
|
|
35
|
+
"TaskRunner",
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
const agentService = getAgentService();
|
|
39
|
+
const runtimeManager = getAgentRuntimeManager();
|
|
40
|
+
const taskInteractor = getTaskInteractor();
|
|
41
|
+
|
|
42
|
+
// 1. Create or get agent
|
|
43
|
+
// For simplicity, we assume we create a temp agent or use existing.
|
|
44
|
+
// Let's create a temporary agent for this run to ensure clean state.
|
|
45
|
+
const agentUser = await agentService.createAgent({
|
|
46
|
+
userId: "task-benchmark-manager", // Dummy manager ID
|
|
47
|
+
name: this.config.agentName,
|
|
48
|
+
system: "You are a helpful assistant.", // Base system prompt
|
|
49
|
+
bio: ["Helpful", "Smart"],
|
|
50
|
+
modelTier: "standard", // or whatever maps to config.model internally
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const runtime = await runtimeManager.getRuntime(agentUser.id);
|
|
54
|
+
if (!runtime) {
|
|
55
|
+
throw new Error(`Failed to get runtime for agent ${agentUser.id}`);
|
|
31
56
|
}
|
|
32
57
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
const results: TaskRunResult[] = [];
|
|
59
|
+
|
|
60
|
+
// 2. Run iterations
|
|
61
|
+
for (let i = 0; i < this.config.iterations; i++) {
|
|
62
|
+
logger.info(
|
|
63
|
+
`Running iteration ${i + 1}/${this.config.iterations}...`,
|
|
64
|
+
{},
|
|
65
|
+
"TaskRunner",
|
|
66
|
+
);
|
|
67
|
+
const start = Date.now();
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
const result = await taskInteractor.executeTask(
|
|
71
|
+
runtime,
|
|
72
|
+
this.config.taskPrompt,
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
results.push({
|
|
76
|
+
iteration: i + 1,
|
|
77
|
+
success: result.success,
|
|
78
|
+
response: result.response,
|
|
79
|
+
trajectoryId: result.trajectoryId,
|
|
80
|
+
error: result.error,
|
|
81
|
+
duration: Date.now() - start,
|
|
49
82
|
});
|
|
83
|
+
} catch (err) {
|
|
84
|
+
logger.error("Iteration failed", { error: err }, "TaskRunner");
|
|
85
|
+
results.push({
|
|
86
|
+
iteration: i + 1,
|
|
87
|
+
success: false,
|
|
88
|
+
response: "",
|
|
89
|
+
error: err instanceof Error ? err.message : String(err),
|
|
90
|
+
duration: Date.now() - start,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
50
94
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const results: TaskRunResult[] = [];
|
|
57
|
-
|
|
58
|
-
// 2. Run iterations
|
|
59
|
-
for (let i = 0; i < this.config.iterations; i++) {
|
|
60
|
-
logger.info(`Running iteration ${i + 1}/${this.config.iterations}...`, {}, 'TaskRunner');
|
|
61
|
-
const start = Date.now();
|
|
62
|
-
|
|
63
|
-
try {
|
|
64
|
-
const result = await taskInteractor.executeTask(runtime, this.config.taskPrompt);
|
|
65
|
-
|
|
66
|
-
results.push({
|
|
67
|
-
iteration: i + 1,
|
|
68
|
-
success: result.success,
|
|
69
|
-
response: result.response,
|
|
70
|
-
trajectoryId: result.trajectoryId,
|
|
71
|
-
error: result.error,
|
|
72
|
-
duration: Date.now() - start,
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
} catch (err) {
|
|
76
|
-
logger.error('Iteration failed', { error: err }, 'TaskRunner');
|
|
77
|
-
results.push({
|
|
78
|
-
iteration: i + 1,
|
|
79
|
-
success: false,
|
|
80
|
-
response: '',
|
|
81
|
-
error: err instanceof Error ? err.message : String(err),
|
|
82
|
-
duration: Date.now() - start,
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// 3. Cleanup ?
|
|
88
|
-
// AgentService might not have delete method exposed in interface?
|
|
89
|
-
// Dependencies has `resetRuntime` but not deleteAgent.
|
|
90
|
-
// Access adapter if needed, but for now we leave it.
|
|
95
|
+
// 3. Cleanup ?
|
|
96
|
+
// AgentService might not have delete method exposed in interface?
|
|
97
|
+
// Dependencies has `resetRuntime` but not deleteAgent.
|
|
98
|
+
// Access adapter if needed, but for now we leave it.
|
|
91
99
|
|
|
92
|
-
|
|
93
|
-
|
|
100
|
+
return results;
|
|
101
|
+
}
|
|
94
102
|
}
|