@restormel/testing-runner 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/ac-agent-loop.d.ts +10 -0
  2. package/dist/ac-agent-loop.d.ts.map +1 -1
  3. package/dist/ac-agent-loop.js +69 -20
  4. package/dist/ac-agent-loop.js.map +1 -1
  5. package/dist/ac-judge.d.ts +4 -1
  6. package/dist/ac-judge.d.ts.map +1 -1
  7. package/dist/ac-judge.js +69 -74
  8. package/dist/ac-judge.js.map +1 -1
  9. package/dist/ac-llm.d.ts +13 -7
  10. package/dist/ac-llm.d.ts.map +1 -1
  11. package/dist/ac-llm.js +27 -12
  12. package/dist/ac-llm.js.map +1 -1
  13. package/dist/browser-goal.d.ts +5 -0
  14. package/dist/browser-goal.d.ts.map +1 -1
  15. package/dist/browser-goal.js +14 -3
  16. package/dist/browser-goal.js.map +1 -1
  17. package/dist/egress-browser-context.d.ts +23 -0
  18. package/dist/egress-browser-context.d.ts.map +1 -0
  19. package/dist/egress-browser-context.js +46 -0
  20. package/dist/egress-browser-context.js.map +1 -0
  21. package/dist/egress-navigation.d.ts +12 -0
  22. package/dist/egress-navigation.d.ts.map +1 -0
  23. package/dist/egress-navigation.js +70 -0
  24. package/dist/egress-navigation.js.map +1 -0
  25. package/dist/evaluate-criteria.d.ts +5 -0
  26. package/dist/evaluate-criteria.d.ts.map +1 -1
  27. package/dist/evaluate-criteria.js +64 -68
  28. package/dist/evaluate-criteria.js.map +1 -1
  29. package/dist/llm-usage.d.ts +10 -0
  30. package/dist/llm-usage.d.ts.map +1 -0
  31. package/dist/llm-usage.js +22 -0
  32. package/dist/llm-usage.js.map +1 -0
  33. package/dist/run-ac-sequence-goal.d.ts.map +1 -1
  34. package/dist/run-ac-sequence-goal.js +37 -8
  35. package/dist/run-ac-sequence-goal.js.map +1 -1
  36. package/dist/run-suite.d.ts.map +1 -1
  37. package/dist/run-suite.js +140 -65
  38. package/dist/run-suite.js.map +1 -1
  39. package/dist/suite-llm-budget.d.ts +50 -0
  40. package/dist/suite-llm-budget.d.ts.map +1 -0
  41. package/dist/suite-llm-budget.js +154 -0
  42. package/dist/suite-llm-budget.js.map +1 -0
  43. package/package.json +5 -5
@@ -0,0 +1,154 @@
1
+ function hasDefinedNumericUsage(u) {
2
+ return ((u.promptTokens !== undefined && Number.isFinite(u.promptTokens)) ||
3
+ (u.completionTokens !== undefined && Number.isFinite(u.completionTokens)) ||
4
+ (u.totalTokens !== undefined && Number.isFinite(u.totalTokens)));
5
+ }
6
+ /**
7
+ * Enforces suite- and goal-level `llm_budget` and aggregates provider token usage for {@link RunRecord.costEstimate}.
8
+ */
9
+ export class SuiteLlmBudgetTracker {
10
+ suiteBudget;
11
+ suiteStart = Date.now();
12
+ suiteCompletions = 0;
13
+ suiteAcRounds = 0;
14
+ goalCompletions = 0;
15
+ goalAcRounds = 0;
16
+ currentGoalId = "";
17
+ goalBudget;
18
+ promptSum = 0;
19
+ completionSum = 0;
20
+ totalReportedSum = 0;
21
+ /** True when at least one completion returned numeric `usage` fields (including zeros). */
22
+ providerReportedUsage = false;
23
+ estimateOnlyCalls = 0;
24
+ constructor(suiteBudget) {
25
+ this.suiteBudget = suiteBudget;
26
+ }
27
+ beginGoal(goalId, goalBudget) {
28
+ this.currentGoalId = goalId;
29
+ this.goalBudget = goalBudget;
30
+ this.goalCompletions = 0;
31
+ this.goalAcRounds = 0;
32
+ }
33
+ wallClockMs() {
34
+ return Date.now() - this.suiteStart;
35
+ }
36
+ /** Call before each chat completion (judge or AC agent). */
37
+ tryConsumeLlm(kind) {
38
+ const wall = this.wallClockMs();
39
+ if (this.suiteBudget?.maxWallClockMs !== undefined && wall >= this.suiteBudget.maxWallClockMs) {
40
+ return {
41
+ code: "SUITE_BUDGET_WALL_CLOCK",
42
+ summary: `Suite llm_budget: wall clock ${wall}ms >= max_wall_clock_ms ${this.suiteBudget.maxWallClockMs}`,
43
+ };
44
+ }
45
+ if (this.suiteBudget?.maxCompletions !== undefined && this.suiteCompletions >= this.suiteBudget.maxCompletions) {
46
+ return {
47
+ code: "SUITE_BUDGET_MAX_COMPLETIONS",
48
+ summary: `Suite llm_budget: ${this.suiteCompletions} LLM calls >= max_completions ${this.suiteBudget.maxCompletions}`,
49
+ };
50
+ }
51
+ if (kind === "ac_round" &&
52
+ this.suiteBudget?.maxRounds !== undefined &&
53
+ this.suiteAcRounds >= this.suiteBudget.maxRounds) {
54
+ return {
55
+ code: "SUITE_BUDGET_MAX_ROUNDS",
56
+ summary: `Suite llm_budget: ${this.suiteAcRounds} AC agent rounds >= max_rounds ${this.suiteBudget.maxRounds}`,
57
+ };
58
+ }
59
+ if (this.goalBudget?.maxCompletions !== undefined && this.goalCompletions >= this.goalBudget.maxCompletions) {
60
+ return {
61
+ code: "GOAL_BUDGET_MAX_COMPLETIONS",
62
+ summary: `Goal "${this.currentGoalId}" llm_budget: ${this.goalCompletions} LLM calls >= max_completions ${this.goalBudget.maxCompletions}`,
63
+ };
64
+ }
65
+ if (kind === "ac_round" &&
66
+ this.goalBudget?.maxRounds !== undefined &&
67
+ this.goalAcRounds >= this.goalBudget.maxRounds) {
68
+ return {
69
+ code: "GOAL_BUDGET_MAX_ROUNDS",
70
+ summary: `Goal "${this.currentGoalId}" llm_budget: ${this.goalAcRounds} AC rounds >= max_rounds ${this.goalBudget.maxRounds}`,
71
+ };
72
+ }
73
+ return null;
74
+ }
75
+ /** Suite wall clock or total completion cap — skip starting further goals without consuming a slot. */
76
+ suiteNonAcWouldBlock() {
77
+ const wall = this.wallClockMs();
78
+ if (this.suiteBudget?.maxWallClockMs !== undefined && wall >= this.suiteBudget.maxWallClockMs) {
79
+ return {
80
+ code: "SUITE_BUDGET_WALL_CLOCK",
81
+ summary: `Suite llm_budget: wall clock ${wall}ms >= max_wall_clock_ms ${this.suiteBudget.maxWallClockMs}`,
82
+ };
83
+ }
84
+ if (this.suiteBudget?.maxCompletions !== undefined && this.suiteCompletions >= this.suiteBudget.maxCompletions) {
85
+ return {
86
+ code: "SUITE_BUDGET_MAX_COMPLETIONS",
87
+ summary: `Suite llm_budget: ${this.suiteCompletions} LLM calls >= max_completions ${this.suiteBudget.maxCompletions}`,
88
+ };
89
+ }
90
+ return null;
91
+ }
92
+ /** Before an ac_sequence goal: suite AC round budget already exhausted. */
93
+ suiteAcBudgetWouldBlock() {
94
+ if (this.suiteBudget?.maxRounds !== undefined && this.suiteAcRounds >= this.suiteBudget.maxRounds) {
95
+ return {
96
+ code: "SUITE_BUDGET_MAX_ROUNDS",
97
+ summary: `Suite llm_budget: ${this.suiteAcRounds} AC agent rounds >= max_rounds ${this.suiteBudget.maxRounds}`,
98
+ };
99
+ }
100
+ return null;
101
+ }
102
+ /** Call after an LLM request returns (count attempt even if model errored after HTTP 200). */
103
+ recordLlmCall(kind, usage) {
104
+ this.suiteCompletions++;
105
+ this.goalCompletions++;
106
+ if (kind === "ac_round") {
107
+ this.suiteAcRounds++;
108
+ this.goalAcRounds++;
109
+ }
110
+ if (usage !== undefined && hasDefinedNumericUsage(usage)) {
111
+ this.providerReportedUsage = true;
112
+ this.promptSum += usage.promptTokens ?? 0;
113
+ this.completionSum += usage.completionTokens ?? 0;
114
+ const t = usage.totalTokens;
115
+ if (t !== undefined && Number.isFinite(t))
116
+ this.totalReportedSum += t;
117
+ }
118
+ else {
119
+ this.estimateOnlyCalls++;
120
+ }
121
+ }
122
+ summarizeExecution() {
123
+ return {
124
+ wallClockMs: this.wallClockMs(),
125
+ llmCompletions: this.suiteCompletions,
126
+ acAgentRounds: this.suiteAcRounds,
127
+ };
128
+ }
129
+ usageForCostEstimate() {
130
+ const hasProvider = this.providerReportedUsage;
131
+ if (hasProvider && this.estimateOnlyCalls > 0) {
132
+ return {
133
+ tokenUsage: {
134
+ prompt: this.promptSum > 0 ? this.promptSum : undefined,
135
+ completion: this.completionSum > 0 ? this.completionSum : undefined,
136
+ total: this.totalReportedSum > 0 ? this.totalReportedSum : undefined,
137
+ },
138
+ usageSource: "mixed",
139
+ };
140
+ }
141
+ if (hasProvider) {
142
+ return {
143
+ tokenUsage: {
144
+ prompt: this.promptSum > 0 ? this.promptSum : undefined,
145
+ completion: this.completionSum > 0 ? this.completionSum : undefined,
146
+ total: this.totalReportedSum > 0 ? this.totalReportedSum : undefined,
147
+ },
148
+ usageSource: "provider",
149
+ };
150
+ }
151
+ return { usageSource: "estimate" };
152
+ }
153
+ }
154
+ //# sourceMappingURL=suite-llm-budget.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"suite-llm-budget.js","sourceRoot":"","sources":["../src/suite-llm-budget.ts"],"names":[],"mappings":"AAQA,SAAS,sBAAsB,CAAC,CAAgB;IAC9C,OAAO,CACL,CAAC,CAAC,CAAC,YAAY,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC,gBAAgB,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QACzE,CAAC,CAAC,CAAC,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAChE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAeH;IAdZ,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACjC,gBAAgB,GAAG,CAAC,CAAC;IACrB,aAAa,GAAG,CAAC,CAAC;IAClB,eAAe,GAAG,CAAC,CAAC;IACpB,YAAY,GAAG,CAAC,CAAC;IACjB,aAAa,GAAG,EAAE,CAAC;IACnB,UAAU,CAAwB;IAClC,SAAS,GAAG,CAAC,CAAC;IACd,aAAa,GAAG,CAAC,CAAC;IAClB,gBAAgB,GAAG,CAAC,CAAC;IAC7B,2FAA2F;IACnF,qBAAqB,GAAG,KAAK,CAAC;IAC9B,iBAAiB,GAAG,CAAC,CAAC;IAE9B,YAA6B,WAAkC;QAAlC,gBAAW,GAAX,WAAW,CAAuB;IAAG,CAAC;IAEnE,SAAS,CAAC,MAAc,EAAE,UAAiC;QACzD,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,UAAU,CAAC;IACtC,CAAC;IAED,4DAA4D;IAC5D,aAAa,CAAC,IAAyB;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,IAAI,CAAC,WAAW,EAAE,cAAc,KAAK,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC;YAC9F,OAAO;gBACL,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,gCAAgC,IAAI,2BAA2B,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE;aAC1G,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,WAAW,EAAE,cAAc,KAAK,SAAS,IAAI,IAAI,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC;YAC/G,OAAO;gBACL,IAAI,EAAE,8BAA8B;gBACpC,OAAO,EAAE,qBAAqB,IAAI,CAAC,gBAAgB,iCAAiC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE;aACtH,CAAC;QACJ,CAAC;QACD,IACE,IAAI,KAAK,UAAU;YACnB,IAAI,CAAC,WAAW,EAAE,SAAS,KAAK,SAAS;YACzC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,WAAW,CAAC,SAAS,EAChD,CAAC;YACD,OAAO;gBACL,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,qBAAqB,IAAI,CAAC,aAAa,kCAAkC,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE;aAC/G,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,EAAE,cAAc,KAAK,SAAS,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,CAAC;YAC5G,OAAO;gBACL,IAAI,EAAE,6BAA6B;gBACnC,OAAO,EAAE,SAAS,IAAI,CAAC,aAAa,iBAAiB,IAAI,CAAC,eAAe,iCAAiC,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE;aAC3I,CAAC;QACJ,CAAC;QACD,IACE,IAAI,KAAK,UAAU;YACnB,IAAI,CAAC,UAAU,EAAE,SAAS,KAAK,SAAS;YACxC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,EAC9C,CAAC;YACD,OAAO;gBACL,IAAI,EAAE,wBAAwB;gBAC9B,OAAO,EAAE,SAAS,IAAI,CAAC,aAAa,iBAAiB,IAAI,CAAC,YAAY,4BAA4B,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE;aAC9H,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uGAAuG;IACvG,oBAAoB;QAClB,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,IAAI,CAAC,WAAW,EAAE,cAAc,KAAK,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC;YAC9F,OAAO;gBACL,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,gCAAgC,IAAI,2BAA2B,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE;aAC1G,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,WAAW,EAAE,cAAc,KAAK,SAAS,IAAI,IAAI,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC;YAC/G,OAAO;gBACL,IAAI,EAAE,8BAA8B;gBACpC,OAAO,EAAE,qBAAqB,IAAI,CAAC,gBAAgB,iCAAiC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE;aACtH,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,2EAA2E;IAC3E,uBAAuB;QACrB,IAAI,IAAI,CAAC,WAAW,EAAE,SAAS,KAAK,SAAS,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,CAAC;YAClG,OAAO;gBACL,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,qBAAqB,IAAI,CAAC,aAAa,kCAAkC,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE;aAC/G,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,8FAA8F;IAC9F,aAAa,CAAC,IAAyB,EAAE,KAAqB;QAC5D,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACxB,IAAI,CAAC,eAAe,EAAE,CAAC;QACvB,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;YACxB,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,KAAK,KAAK,SAAS,IAAI,sBAAsB,CAAC,KAAK,CAAC,EAAE,CAAC;YACzD,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC;YAClC,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;YAC1C,IAAI,CAAC,aAAa,IAAI,KAAK,CAAC,gBAAgB,IAAI,CAAC,CAAC;YAClD,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,CAAC;YAC5B,IAAI,CAAC,KAAK,SAAS,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAAE,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC;QACxE,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,kBAAkB;QAChB,OAAO;YACL,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE;YAC/B,cAAc,EAAE,IAAI,CAAC,gBAAgB;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa;SAClC,CAAC;IACJ,CAAC;IAED,oBAAoB;QAIlB,MAAM,WAAW,GAAG,IAAI,CAAC,qBAAqB,CAAC;QAC/C,IAAI,WAAW,IAAI,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,UAAU,EAAE;oBACV,MAAM,EAAE,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;oBACvD,UAAU,EAAE,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;oBACnE,KAAK,EAAE,IAAI,CAAC,gBAAgB,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS;iBACrE;gBACD,WAAW,EAAE,OAAO;aACrB,CAAC;QACJ,CAAC;QACD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO;gBACL,UAAU,EAAE;oBACV,MAAM,EAAE,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;oBACvD,UAAU,EAAE,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;oBACnE,KAAK,EAAE,IAAI,CAAC,gBAAgB,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS;iBACrE;gBACD,WAAW,EAAE,UAAU;aACxB,CAAC;QACJ,CAAC;QACD,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC;IACrC,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@restormel/testing-runner",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "Suite execution: browser goals, Keys-backed judges, retries, artefacts.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -34,10 +34,10 @@
34
34
  "chrome-launcher": "^1.2.1",
35
35
  "lighthouse": "^13.1.0",
36
36
  "playwright": "^1.49.0",
37
- "@restormel/testing-browser-playwright": "0.1.5",
38
- "@restormel/testing-config": "0.1.5",
39
- "@restormel/testing-core": "0.1.5",
40
- "@restormel/testing-keys-adapter": "0.1.5"
37
+ "@restormel/testing-browser-playwright": "0.1.6",
38
+ "@restormel/testing-config": "0.1.6",
39
+ "@restormel/testing-keys-adapter": "0.1.6",
40
+ "@restormel/testing-core": "0.1.6"
41
41
  },
42
42
  "devDependencies": {
43
43
  "typescript": "^5.7.0"