@zhixuan92/multi-model-agent-core 3.9.0 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/model-profiles.json +201 -12
- package/dist/observability/events.d.ts +3 -3
- package/dist/routing/model-profiles.d.ts +73 -0
- package/dist/routing/model-profiles.d.ts.map +1 -1
- package/dist/routing/model-profiles.js +200 -26
- package/dist/routing/model-profiles.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +2 -11
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/telemetry/event-builder.d.ts +4 -24
- package/dist/telemetry/event-builder.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +183 -294
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/normalize.d.ts +18 -0
- package/dist/telemetry/normalize.d.ts.map +1 -0
- package/dist/telemetry/normalize.js +18 -0
- package/dist/telemetry/normalize.js.map +1 -0
- package/dist/telemetry/types.d.ts +969 -1816
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +204 -345
- package/dist/telemetry/types.js.map +1 -1
- package/dist/types.d.ts +40 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +61 -16
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/telemetry/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/telemetry/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,eAAO,MAAM,cAAc,IAAI,CAAC;AAEhC,eAAO,MAAM,eAAe,QAA2C,CAAC;AAWxE,eAAO,MAAM,EAAE;;;;;EAAgD,CAAC;AAEhE,eAAO,MAAM,kBAAkB;;;;;;;;;;;kBAMpB,CAAC;AAIZ,eAAO,MAAM,eAAe;;;;;;;;;;EAU1B,CAAC;AAEH,eAAO,MAAM,SAAS;;;;;;;;;;;;EAYpB,CAAC;AAEH,eAAO,MAAM,WAAW;;;;;EAA6C,CAAC;AAEtE,eAAO,MAAM,wBAAwB;;;;;kBAK1B,CAAC;AAkCZ,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBAMxB,CAAC;AAEZ,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBAGxB,CAAC;AAEZ,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBAIxB,CAAC;AAEZ,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;kBAIxB,CAAC;AAEZ,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;kBAE3B,CAAC;AAEZ,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;4BAM3B,CAAC;AAIH,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBA+C1B,CAAC;AAIZ,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBAOnB,CAAC;AASZ,eAAO,MAAM,iCAAiC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBA+G5C,CAAC;AAIH,MAAM,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,kBAAkB,CAAC,CAAC;AAC9D,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,CAAC;AAC9D,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAAC,CAAC;AAC9E,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAChE,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,eAAe,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,CAAC;AACtD,MAAM,MAAM,kBAAkB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAAC,CAAC"}
|
package/dist/telemetry/types.js
CHANGED
|
@@ -1,111 +1,32 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
* model namespace observed in the wild (Anthropic, OpenAI, Bedrock prefixes,
|
|
7
|
-
* OpenRouter `meta-llama/...`, Ollama `model:tag`). Length cap prevents PII
|
|
8
|
-
* smuggling. The schema validates SHAPE, not VOCABULARY.
|
|
9
|
-
*/
|
|
10
|
-
export const BoundedIdentifier = z
|
|
11
|
-
.string()
|
|
12
|
-
.min(1)
|
|
13
|
-
.max(120)
|
|
14
|
-
.regex(/^[A-Za-z0-9._:/\-]+$/);
|
|
15
|
-
const MAX_STR = 64;
|
|
16
|
-
const MAX_VERSION_STR = 64;
|
|
2
|
+
import { ModelFamilyEnum } from '../routing/model-profiles.js';
|
|
3
|
+
export const SCHEMA_VERSION = 3;
|
|
4
|
+
export const STRICT_ID_REGEX = /^[A-Za-z0-9][-A-Za-z0-9_.:+/@]{0,119}$/;
|
|
5
|
+
// ── Batch wrapper (§3.1) ─────────────────────────────────────────────────
|
|
17
6
|
const VersionString = z
|
|
18
7
|
.string()
|
|
19
8
|
.regex(/^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$/)
|
|
20
|
-
.max(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
'mma-delegate',
|
|
25
|
-
'mma-audit',
|
|
26
|
-
'mma-review',
|
|
27
|
-
'mma-verify',
|
|
28
|
-
'mma-debug',
|
|
29
|
-
'mma-execute-plan',
|
|
30
|
-
'mma-retry',
|
|
31
|
-
'mma-investigate', // present on disk under packages/server/src/skills/
|
|
32
|
-
'mma-context-blocks',
|
|
33
|
-
'mma-clarifications',
|
|
34
|
-
'other', // sentinel for unknown / community skill
|
|
35
|
-
]);
|
|
36
|
-
export const TriggeringSkillId = z.union([InstallableSkillId, z.literal('direct')]);
|
|
37
|
-
export const ClientId = BoundedIdentifier;
|
|
38
|
-
export const ModelFamily = z.enum([
|
|
39
|
-
'claude', // Anthropic
|
|
40
|
-
'openai', // OpenAI
|
|
41
|
-
'gemini', // Google
|
|
42
|
-
'deepseek', // DeepSeek
|
|
43
|
-
'grok', // xAI
|
|
44
|
-
'mistral', // Mistral
|
|
45
|
-
'meta', // Meta (Llama family — covers llama2:7b, meta-llama/..., etc.)
|
|
46
|
-
'qwen', // Alibaba
|
|
47
|
-
'zhipu', // Z.ai (GLM family)
|
|
48
|
-
'kimi', // Moonshot
|
|
49
|
-
'minimax', // MiniMax
|
|
50
|
-
'other', // catch-all — never rejected
|
|
51
|
-
]);
|
|
52
|
-
export const Language = z.enum([
|
|
53
|
-
'en',
|
|
54
|
-
'es',
|
|
55
|
-
'fr',
|
|
56
|
-
'de',
|
|
57
|
-
'zh',
|
|
58
|
-
'ja',
|
|
59
|
-
'ko',
|
|
60
|
-
'pt',
|
|
61
|
-
'ru',
|
|
62
|
-
'it',
|
|
63
|
-
'tr',
|
|
64
|
-
'ar',
|
|
65
|
-
'hi',
|
|
66
|
-
'vi',
|
|
67
|
-
'id',
|
|
68
|
-
'th',
|
|
69
|
-
'pl',
|
|
70
|
-
'nl',
|
|
71
|
-
'sv',
|
|
72
|
-
'other',
|
|
73
|
-
]);
|
|
74
|
-
// Time-zone offset buckets, fully covering UTC-12 through UTC+14, half-open [a, b).
|
|
75
|
-
export const TzOffsetBucket = z.enum([
|
|
76
|
-
'utc_minus_12_to_minus_6', // [-12, -6)
|
|
77
|
-
'utc_minus_6_to_0', // [-6, 0)
|
|
78
|
-
'utc_0_to_plus_6', // [0, +6)
|
|
79
|
-
'utc_plus_6_to_plus_12', // [+6, +12)
|
|
80
|
-
'utc_plus_12_to_plus_15', // [+12, +15) -- covers UTC+12, +12:45, +13, +14
|
|
81
|
-
]);
|
|
82
|
-
export const Os = z.enum(['darwin', 'linux', 'win32', 'other']); // matches process.platform; non-listed → 'other'
|
|
83
|
-
export const InstallMetadata = z.object({
|
|
9
|
+
.max(64);
|
|
10
|
+
export const Os = z.enum(['darwin', 'linux', 'win32', 'other']);
|
|
11
|
+
export const BatchWrapperSchema = z.object({
|
|
12
|
+
schemaVersion: z.literal(3),
|
|
84
13
|
installId: z.string().uuid(),
|
|
85
14
|
mmagentVersion: VersionString,
|
|
86
15
|
os: Os,
|
|
87
|
-
nodeMajor: z.
|
|
88
|
-
language: Language, // bucketed from runtime locale, never raw
|
|
89
|
-
tzOffsetBucket: TzOffsetBucket,
|
|
16
|
+
nodeMajor: z.number().int().min(22).max(99),
|
|
90
17
|
}).strict();
|
|
91
|
-
//
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
'
|
|
100
|
-
'
|
|
101
|
-
'editFile',
|
|
102
|
-
'runShell',
|
|
103
|
-
'listFiles',
|
|
104
|
-
'grep',
|
|
105
|
-
'glob',
|
|
18
|
+
// ── Enums shared across stages and top-level ─────────────────────────────
|
|
19
|
+
export const ConcernCategory = z.enum([
|
|
20
|
+
'missing_test',
|
|
21
|
+
'scope_creep',
|
|
22
|
+
'incomplete_impl',
|
|
23
|
+
'style_lint',
|
|
24
|
+
'security',
|
|
25
|
+
'performance',
|
|
26
|
+
'maintainability',
|
|
27
|
+
'doc_gap',
|
|
106
28
|
'other',
|
|
107
29
|
]);
|
|
108
|
-
// Allowlist of error codes from packages/core/src/types.ts:RunResult.structuredError
|
|
109
30
|
export const ErrorCode = z.enum([
|
|
110
31
|
'verify_command_error',
|
|
111
32
|
'commit_metadata_invalid',
|
|
@@ -119,280 +40,218 @@ export const ErrorCode = z.enum([
|
|
|
119
40
|
'rate_limit_exceeded',
|
|
120
41
|
'other',
|
|
121
42
|
]);
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
'style_lint',
|
|
129
|
-
'security',
|
|
130
|
-
'performance',
|
|
131
|
-
'maintainability',
|
|
132
|
-
'doc_gap',
|
|
133
|
-
'other',
|
|
134
|
-
]);
|
|
135
|
-
// Per-stage breakdown — populated for stages the task actually entered.
|
|
136
|
-
// Each sub-object is null when the stage was not entered.
|
|
137
|
-
export const StageStats = z.object({
|
|
138
|
-
entered: z.boolean(),
|
|
139
|
-
durationBucket: z.enum(['<10s', '10s-1m', '1m-5m', '5m-30m', '30m+']).nullable(),
|
|
140
|
-
costBucket: z.enum(['$0', '<$0.01', '$0.01-$0.10', '$0.10-$1', '$1+']).nullable(),
|
|
141
|
-
agentTier: z.enum(['standard', 'complex']).nullable(),
|
|
142
|
-
modelFamily: ModelFamily.nullable(),
|
|
143
|
-
model: BoundedIdentifier.nullable(),
|
|
43
|
+
export const SeverityBin = z.enum(['high', 'medium', 'low', 'style']);
|
|
44
|
+
export const FindingsBySeveritySchema = z.object({
|
|
45
|
+
high: z.number().int().min(0).max(50),
|
|
46
|
+
medium: z.number().int().min(0).max(50),
|
|
47
|
+
low: z.number().int().min(0).max(50),
|
|
48
|
+
style: z.number().int().min(0).max(50),
|
|
144
49
|
}).strict();
|
|
145
|
-
//
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
50
|
+
// ── Stage entry (§3.3) ───────────────────────────────────────────────────
|
|
51
|
+
const StageNameEnum = z.enum([
|
|
52
|
+
'implementing',
|
|
53
|
+
'spec_review',
|
|
54
|
+
'spec_rework',
|
|
55
|
+
'quality_review',
|
|
56
|
+
'quality_rework',
|
|
57
|
+
'diff_review',
|
|
58
|
+
'verifying',
|
|
59
|
+
'committing',
|
|
60
|
+
]);
|
|
61
|
+
// Base fields shared by all stage variants
|
|
62
|
+
const StageEntryBase = z.object({
|
|
63
|
+
name: StageNameEnum,
|
|
64
|
+
model: z.string().regex(STRICT_ID_REGEX),
|
|
65
|
+
agentTier: z.enum(['standard', 'reasoning']),
|
|
66
|
+
durationMs: z.number().int().min(0).max(3_600_000),
|
|
67
|
+
costUSD: z.number().min(0).max(100),
|
|
68
|
+
inputTokens: z.number().int().min(0).max(5_000_000),
|
|
69
|
+
outputTokens: z.number().int().min(0).max(500_000),
|
|
70
|
+
cachedTokens: z.number().int().min(0).max(5_000_000),
|
|
71
|
+
reasoningTokens: z.number().int().min(0).max(500_000),
|
|
72
|
+
toolCallCount: z.number().int().min(0).max(5000),
|
|
73
|
+
filesReadCount: z.number().int().min(0).max(5000),
|
|
74
|
+
filesWrittenCount: z.number().int().min(0).max(5000),
|
|
75
|
+
turnCount: z.number().int().min(0).max(250),
|
|
76
|
+
maxIdleMs: z.number().int().min(0).max(1_200_000).nullable(),
|
|
77
|
+
totalIdleMs: z.number().int().min(0).max(3_600_000).nullable(),
|
|
155
78
|
});
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
79
|
+
export const ReviewStageEntrySchema = StageEntryBase.extend({
|
|
80
|
+
name: z.enum(['spec_review', 'quality_review', 'diff_review']),
|
|
81
|
+
verdict: z.enum(['approved', 'concerns', 'changes_required', 'error', 'skipped', 'annotated', 'not_applicable']),
|
|
82
|
+
roundsUsed: z.number().int().min(1).max(10),
|
|
83
|
+
concernCategories: z.array(ConcernCategory).max(9),
|
|
84
|
+
findingsBySeverity: FindingsBySeveritySchema,
|
|
85
|
+
}).strict();
|
|
86
|
+
export const ReworkStageEntrySchema = StageEntryBase.extend({
|
|
87
|
+
name: z.enum(['spec_rework', 'quality_rework']),
|
|
88
|
+
triggeringConcernCategories: z.array(ConcernCategory).max(9),
|
|
89
|
+
}).strict();
|
|
90
|
+
export const VerifyStageEntrySchema = StageEntryBase.extend({
|
|
91
|
+
name: z.literal('verifying'),
|
|
92
|
+
outcome: z.enum(['passed', 'failed', 'skipped', 'not_applicable']),
|
|
159
93
|
skipReason: z.enum(['no_command', 'dirty_worktree', 'not_applicable', 'other']).nullable(),
|
|
160
|
-
});
|
|
161
|
-
export const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
94
|
+
}).strict();
|
|
95
|
+
export const CommitStageEntrySchema = StageEntryBase.extend({
|
|
96
|
+
name: z.literal('committing'),
|
|
97
|
+
filesCommittedCount: z.number().int().min(0).max(1000),
|
|
98
|
+
branchCreated: z.boolean(),
|
|
99
|
+
}).strict();
|
|
100
|
+
export const ImplementStageEntrySchema = StageEntryBase.extend({
|
|
101
|
+
name: z.literal('implementing'),
|
|
102
|
+
}).strict();
|
|
103
|
+
export const StageEntrySchema = z.discriminatedUnion('name', [
|
|
104
|
+
ImplementStageEntrySchema,
|
|
105
|
+
ReviewStageEntrySchema,
|
|
106
|
+
ReworkStageEntrySchema,
|
|
107
|
+
VerifyStageEntrySchema,
|
|
108
|
+
CommitStageEntrySchema,
|
|
109
|
+
]);
|
|
110
|
+
// ── Task completed event (§3.2) ──────────────────────────────────────────
|
|
111
|
+
export const TaskCompletedEventSchema = z.object({
|
|
112
|
+
// Identity
|
|
113
|
+
eventId: z.string().uuid(),
|
|
114
|
+
route: z.enum(['delegate', 'audit', 'review', 'verify', 'debug', 'execute-plan', 'retry', 'investigate']),
|
|
115
|
+
client: z.string().regex(STRICT_ID_REGEX),
|
|
116
|
+
// Configuration
|
|
165
117
|
agentType: z.enum(['standard', 'complex']),
|
|
166
|
-
capabilities: z
|
|
167
|
-
.array(z.enum(['web_search', 'web_fetch', 'other']))
|
|
168
|
-
.max(3)
|
|
169
|
-
.refine(xs => new Set(xs).size === xs.length, 'unique'),
|
|
170
118
|
toolMode: z.enum(['none', 'readonly', 'no-shell', 'full']),
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
costBucket: z.enum(['$0', '<$0.01', '$0.01-$0.10', '$0.10-$1', '$1+']),
|
|
177
|
-
savedCostBucket: z.enum(['$0', '<$0.10', '$0.10-$1', '$1+', 'unknown']),
|
|
178
|
-
// Implementer model summary (top-level convenience; per-stage detail lives in `stages`)
|
|
179
|
-
implementerModelFamily: ModelFamily,
|
|
180
|
-
implementerModel: BoundedIdentifier,
|
|
119
|
+
capabilities: z.array(z.enum(['web_search', 'web_fetch', 'other'])).max(3),
|
|
120
|
+
reviewPolicy: z.enum(['full', 'quality_only', 'diff_only', 'none']),
|
|
121
|
+
verifyCommandPresent: z.boolean(),
|
|
122
|
+
// Model
|
|
123
|
+
implementerModel: z.string().regex(STRICT_ID_REGEX),
|
|
181
124
|
// Outcome
|
|
182
|
-
terminalStatus: z.enum([
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
'review_loop_aborted',
|
|
198
|
-
]),
|
|
199
|
-
errorCode: ErrorCode.nullable(), // populated when terminalStatus is a failure mode
|
|
200
|
-
// 3.5.0 lifecycle effectiveness
|
|
201
|
-
escalated: z.boolean(),
|
|
202
|
-
fallbackTriggered: z.boolean(),
|
|
203
|
-
// Tool-call profile — top 5 distinct tool names called during this task by count
|
|
204
|
-
// (allowlisted; non-listed tools become 'other'; never includes args/paths)
|
|
205
|
-
topToolNames: z.array(BoundedIdentifier).max(20),
|
|
206
|
-
// Per-stage breakdown — drives the lifecycle funnel + per-stage panels
|
|
207
|
-
stages: z.object({
|
|
208
|
-
implementing: StageStats,
|
|
209
|
-
verifying: VerifyStageStats,
|
|
210
|
-
spec_review: ReviewStageStats,
|
|
211
|
-
spec_rework: StageStats, // implementer re-runs after spec changes_required
|
|
212
|
-
quality_review: ReviewStageStats,
|
|
213
|
-
quality_rework: StageStats,
|
|
214
|
-
diff_review: ReviewStageStats.optional(), // diff-only policy; not always present
|
|
215
|
-
committing: StageStats,
|
|
216
|
-
}).strict(),
|
|
217
|
-
// v2 fields
|
|
218
|
-
filesWrittenBucket: z.enum(['0', '1-5', '6-20', '21-50', '51+']),
|
|
219
|
-
c2Promoted: z.boolean(),
|
|
220
|
-
workerSelfAssessment: z.enum(['done', 'done_with_concerns', 'needs_context', 'blocked', 'failed', 'review_loop_aborted']).nullable(),
|
|
221
|
-
concernCount: z.number().int().min(0).max(50),
|
|
125
|
+
terminalStatus: z.enum(['ok', 'incomplete', 'timeout', 'error', 'cost_exceeded', 'brief_too_vague', 'unavailable']),
|
|
126
|
+
workerStatus: z.enum(['done', 'done_with_concerns', 'needs_context', 'blocked', 'failed', 'review_loop_aborted']),
|
|
127
|
+
errorCode: ErrorCode.nullable(),
|
|
128
|
+
parentModelFamily: ModelFamilyEnum,
|
|
129
|
+
// Token economics
|
|
130
|
+
inputTokens: z.number().int().min(0).max(5_000_000),
|
|
131
|
+
outputTokens: z.number().int().min(0).max(500_000),
|
|
132
|
+
cachedTokens: z.number().int().min(0).max(5_000_000),
|
|
133
|
+
reasoningTokens: z.number().int().min(0).max(500_000),
|
|
134
|
+
// Run totals
|
|
135
|
+
totalDurationMs: z.number().int().min(0).max(86_400_000),
|
|
136
|
+
totalCostUSD: z.number().min(0).max(800),
|
|
137
|
+
totalSavedCostUSD: z.number().min(-800).max(800).nullable(),
|
|
138
|
+
// Lifecycle counts
|
|
139
|
+
concernCount: z.number().int().min(0).max(150),
|
|
222
140
|
escalationCount: z.number().int().min(0).max(20),
|
|
223
141
|
fallbackCount: z.number().int().min(0).max(20),
|
|
224
|
-
|
|
225
|
-
|
|
142
|
+
// Operational signals
|
|
143
|
+
stallCount: z.number().int().min(0).max(20),
|
|
144
|
+
taskMaxIdleMs: z.number().int().min(0).max(1_200_000).nullable(),
|
|
226
145
|
clarificationRequested: z.boolean(),
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
type: z.literal('session.started'),
|
|
232
|
-
configFlavor: z.object({
|
|
233
|
-
defaultTier: z.enum(['standard', 'complex']),
|
|
234
|
-
diagnosticsEnabled: z.boolean(),
|
|
235
|
-
autoUpdateSkills: z.boolean(),
|
|
236
|
-
}).strict(),
|
|
237
|
-
providersConfigured: z
|
|
238
|
-
.array(z.enum(['claude', 'openai-compatible', 'codex']))
|
|
239
|
-
.max(3)
|
|
240
|
-
.refine(xs => new Set(xs).size === xs.length, 'unique'),
|
|
241
|
-
}).strict();
|
|
242
|
-
export const InstallChangedEvent = z.object({
|
|
243
|
-
type: z.literal('install.changed'),
|
|
244
|
-
fromVersion: VersionString.nullable(),
|
|
245
|
-
toVersion: VersionString,
|
|
246
|
-
trigger: z.enum(['fresh_install', 'upgrade', 'downgrade']),
|
|
146
|
+
briefQualityWarningCount: z.number().int().min(0).max(20),
|
|
147
|
+
sandboxViolationCount: z.number().int().min(0).max(100),
|
|
148
|
+
// Stages array
|
|
149
|
+
stages: z.array(StageEntrySchema).min(0).max(8),
|
|
247
150
|
}).strict();
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
151
|
+
// ── Upload batch ─────────────────────────────────────────────────────────
|
|
152
|
+
export const UploadBatchSchema = z.object({
|
|
153
|
+
schemaVersion: z.literal(3),
|
|
154
|
+
installId: z.string().uuid(),
|
|
155
|
+
mmagentVersion: VersionString,
|
|
156
|
+
os: Os,
|
|
157
|
+
nodeMajor: z.number().int().min(22).max(99),
|
|
158
|
+
events: z.array(TaskCompletedEventSchema).min(1).max(500),
|
|
252
159
|
}).strict();
|
|
253
|
-
//
|
|
254
|
-
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
SessionStartedEvent.merge(TelemetryEventBase),
|
|
260
|
-
InstallChangedEvent.merge(TelemetryEventBase),
|
|
261
|
-
SkillInstalledEvent.merge(TelemetryEventBase),
|
|
262
|
-
])
|
|
263
|
-
.superRefine((event, ctx) => {
|
|
264
|
-
if (event.type !== 'task.completed')
|
|
265
|
-
return;
|
|
160
|
+
// ── Super-refinement: R1–R15 (§3.4) ──────────────────────────────────────
|
|
161
|
+
const qualityOnlyRoutes = new Set(['audit', 'review', 'verify', 'debug', 'investigate']);
|
|
162
|
+
const reviewedRoutes = new Set(['delegate', 'audit', 'review', 'verify', 'debug', 'execute-plan', 'investigate']);
|
|
163
|
+
const reworkStages = new Set(['spec_rework', 'quality_rework']);
|
|
164
|
+
const reviewStages = new Set(['spec_review', 'quality_review', 'diff_review']);
|
|
165
|
+
export const ValidatedTaskCompletedEventSchema = TaskCompletedEventSchema.superRefine((event, ctx) => {
|
|
266
166
|
// R1: ok terminalStatus implies non-failed worker outcome and no errorCode
|
|
267
167
|
if (event.terminalStatus === 'ok') {
|
|
268
168
|
if (!['done', 'done_with_concerns'].includes(event.workerStatus)) {
|
|
269
|
-
ctx.addIssue({
|
|
270
|
-
code: 'custom',
|
|
271
|
-
message: 'terminalStatus=ok requires workerStatus done|done_with_concerns',
|
|
272
|
-
});
|
|
169
|
+
ctx.addIssue({ code: 'custom', message: 'R1: terminalStatus=ok requires workerStatus done|done_with_concerns' });
|
|
273
170
|
}
|
|
274
171
|
if (event.errorCode !== null) {
|
|
275
|
-
ctx.addIssue({
|
|
276
|
-
code: 'custom',
|
|
277
|
-
message: 'terminalStatus=ok requires errorCode=null',
|
|
278
|
-
});
|
|
172
|
+
ctx.addIssue({ code: 'custom', message: 'R1: terminalStatus=ok requires errorCode=null' });
|
|
279
173
|
}
|
|
280
174
|
}
|
|
281
|
-
// R2:
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
verifyOutcome !== null &&
|
|
286
|
-
verifyOutcome !== 'not_applicable') {
|
|
287
|
-
ctx.addIssue({
|
|
288
|
-
code: 'custom',
|
|
289
|
-
message: 'stages.verifying.outcome must be null or not_applicable for non-verify routes',
|
|
290
|
-
});
|
|
175
|
+
// R2: stage count must be > 0 for ok/incomplete (brief_too_vague may have 0)
|
|
176
|
+
// R2.1: empty stages only allowed for brief_too_vague and error
|
|
177
|
+
if (event.stages.length === 0 && !['brief_too_vague', 'error'].includes(event.terminalStatus)) {
|
|
178
|
+
ctx.addIssue({ code: 'custom', message: 'R2.1: empty stages only allowed for brief_too_vague|error' });
|
|
291
179
|
}
|
|
292
|
-
// R3:
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
message: 'stages.spec_review.entered must be false for non-reviewed routes',
|
|
299
|
-
});
|
|
300
|
-
}
|
|
301
|
-
if (event.stages.quality_review.entered) {
|
|
302
|
-
ctx.addIssue({
|
|
303
|
-
code: 'custom',
|
|
304
|
-
message: 'stages.quality_review.entered must be false for non-reviewed routes',
|
|
305
|
-
});
|
|
306
|
-
}
|
|
307
|
-
if (event.stages.diff_review?.entered) {
|
|
308
|
-
ctx.addIssue({
|
|
309
|
-
code: 'custom',
|
|
310
|
-
message: 'stages.diff_review.entered must be false (or stages.diff_review absent) for non-reviewed routes',
|
|
311
|
-
});
|
|
312
|
-
}
|
|
180
|
+
// R3: concernCount must not exceed the sum of findingsBySeverity bins + a tolerance
|
|
181
|
+
// Per plan Task 6, concernCount is capped separately; the invariant is sum(bins) ≤ concernCount
|
|
182
|
+
// R4: totalDurationMs >= sum of stage durationMs (not strictly equal due to overhead)
|
|
183
|
+
const stageDurationSum = event.stages.reduce((s, st) => s + st.durationMs, 0);
|
|
184
|
+
if (stageDurationSum > event.totalDurationMs) {
|
|
185
|
+
ctx.addIssue({ code: 'custom', message: 'R4: sum of stage durationMs must not exceed totalDurationMs' });
|
|
313
186
|
}
|
|
314
|
-
//
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
const reviewDirty = ('verdict' in st && st.verdict !== null) ||
|
|
332
|
-
('roundsUsed' in st && st.roundsUsed !== null) ||
|
|
333
|
-
('concernCategories' in st &&
|
|
334
|
-
st.concernCategories !== null);
|
|
335
|
-
const verifyDirty = ('outcome' in st && st.outcome !== null) ||
|
|
336
|
-
('skipReason' in st && st.skipReason !== null);
|
|
337
|
-
if (baseDirty || reviewDirty || verifyDirty) {
|
|
338
|
-
ctx.addIssue({
|
|
339
|
-
code: 'custom',
|
|
340
|
-
message: `stages.${name} sub-fields must be null when entered=false`,
|
|
341
|
-
});
|
|
342
|
-
}
|
|
187
|
+
// R5: top-level token counts = sum of stage token counts
|
|
188
|
+
const tokenSum = event.stages.reduce((acc, st) => ({
|
|
189
|
+
input: acc.input + st.inputTokens,
|
|
190
|
+
output: acc.output + st.outputTokens,
|
|
191
|
+
cached: acc.cached + st.cachedTokens,
|
|
192
|
+
reasoning: acc.reasoning + st.reasoningTokens,
|
|
193
|
+
}), { input: 0, output: 0, cached: 0, reasoning: 0 });
|
|
194
|
+
if (tokenSum.input !== event.inputTokens ||
|
|
195
|
+
tokenSum.output !== event.outputTokens ||
|
|
196
|
+
tokenSum.cached !== event.cachedTokens ||
|
|
197
|
+
tokenSum.reasoning !== event.reasoningTokens) {
|
|
198
|
+
ctx.addIssue({ code: 'custom', message: 'R5: token sums must equal top-level totals' });
|
|
199
|
+
}
|
|
200
|
+
// R5b: per stage, reasoningTokens ≤ outputTokens (subset semantics)
|
|
201
|
+
for (const st of event.stages) {
|
|
202
|
+
if (st.reasoningTokens > st.outputTokens) {
|
|
203
|
+
ctx.addIssue({ code: 'custom', message: 'R5b: reasoningTokens must not exceed outputTokens per stage' });
|
|
343
204
|
}
|
|
344
205
|
}
|
|
345
|
-
//
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
// verdict legitimately surfaces an empty list.
|
|
350
|
-
for (const [name, st] of Object.entries(event.stages)) {
|
|
351
|
-
if (!st || !st.entered)
|
|
352
|
-
continue;
|
|
353
|
-
const baseMissing = st.durationBucket === null ||
|
|
354
|
-
st.costBucket === null ||
|
|
355
|
-
st.agentTier === null ||
|
|
356
|
-
st.modelFamily === null ||
|
|
357
|
-
st.model === null;
|
|
358
|
-
if (baseMissing) {
|
|
359
|
-
ctx.addIssue({
|
|
360
|
-
code: 'custom',
|
|
361
|
-
message: `stages.${name} base sub-fields must be non-null when entered=true`,
|
|
362
|
-
});
|
|
206
|
+
// R6: per stage, cachedTokens ≤ inputTokens (cached is subset of input)
|
|
207
|
+
for (const st of event.stages) {
|
|
208
|
+
if (st.cachedTokens > st.inputTokens) {
|
|
209
|
+
ctx.addIssue({ code: 'custom', message: 'R6: cachedTokens must not exceed inputTokens per stage' });
|
|
363
210
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
211
|
+
}
|
|
212
|
+
// R7: totalCostUSD = sum of stage costUSD (float comparison with tolerance)
|
|
213
|
+
const costSum = event.stages.reduce((s, st) => s + st.costUSD, 0);
|
|
214
|
+
if (Math.abs(costSum - event.totalCostUSD) > 0.02) {
|
|
215
|
+
ctx.addIssue({ code: 'custom', message: 'R7: totalCostUSD must approximately equal sum of stage costUSD' });
|
|
216
|
+
}
|
|
217
|
+
// R8: verification outcome only on delegate, execute-plan, verify routes
|
|
218
|
+
const verifyRoutes = new Set(['delegate', 'execute-plan', 'verify']);
|
|
219
|
+
for (const st of event.stages) {
|
|
220
|
+
if (st.name === 'verifying' && !verifyRoutes.has(event.route)) {
|
|
221
|
+
ctx.addIssue({ code: 'custom', message: 'R8: verifying stage only allowed on delegate|execute-plan|verify routes' });
|
|
369
222
|
}
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
});
|
|
223
|
+
}
|
|
224
|
+
// R9: review stages only on reviewed routes
|
|
225
|
+
for (const st of event.stages) {
|
|
226
|
+
if (reviewStages.has(st.name) && !reviewedRoutes.has(event.route)) {
|
|
227
|
+
ctx.addIssue({ code: 'custom', message: `R9: ${st.name} stage only allowed on reviewed routes` });
|
|
375
228
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
229
|
+
}
|
|
230
|
+
// R10: quality_only routes must not have spec_review, diff_review, or rework stages
|
|
231
|
+
// R10b: no rework on quality_only
|
|
232
|
+
// R10c: annotated verdict only on quality_only routes
|
|
233
|
+
for (const st of event.stages) {
|
|
234
|
+
if (qualityOnlyRoutes.has(event.route)) {
|
|
235
|
+
if (reviewStages.has(st.name) && st.name !== 'quality_review') {
|
|
236
|
+
ctx.addIssue({ code: 'custom', message: 'R10: non-quality review stage on quality_only route' });
|
|
237
|
+
}
|
|
238
|
+
if (reworkStages.has(st.name)) {
|
|
239
|
+
ctx.addIssue({ code: 'custom', message: 'R10b: rework stages not allowed on quality_only routes' });
|
|
240
|
+
}
|
|
381
241
|
}
|
|
382
|
-
if ('
|
|
383
|
-
|
|
384
|
-
st.skipReason === null) {
|
|
385
|
-
ctx.addIssue({
|
|
386
|
-
code: 'custom',
|
|
387
|
-
message: `stages.${name}.skipReason must be non-null when outcome='skipped'`,
|
|
388
|
-
});
|
|
242
|
+
if ('verdict' in st && st.verdict === 'annotated' && !qualityOnlyRoutes.has(event.route)) {
|
|
243
|
+
ctx.addIssue({ code: 'custom', message: 'R10c: annotated verdict only allowed on quality_only routes' });
|
|
389
244
|
}
|
|
390
245
|
}
|
|
246
|
+
// R11: concernCount in [0, 150], escalationCount in [0, 20], fallbackCount in [0, 20]
|
|
247
|
+
// (enforced by Zod schema bounds)
|
|
248
|
+
// R12: stallCount in [0, 20], sandboxViolationCount in [0, 100]
|
|
249
|
+
// (enforced by Zod schema bounds)
|
|
250
|
+
// R13: totalDurationMs in [0, 86_400_000]
|
|
251
|
+
// (enforced by Zod schema bounds)
|
|
252
|
+
// R14: totalCostUSD in [0, 800], totalSavedCostUSD in [-800, 800] or null
|
|
253
|
+
// (enforced by Zod schema bounds)
|
|
254
|
+
// R15: costUSD per stage in [0, 100]
|
|
255
|
+
// (enforced by Zod schema bounds)
|
|
391
256
|
});
|
|
392
|
-
// The complete uploadable envelope.
|
|
393
|
-
export const UploadBatch = z.object({
|
|
394
|
-
schemaVersion: z.literal(SCHEMA_VERSION),
|
|
395
|
-
install: InstallMetadata,
|
|
396
|
-
events: z.array(TelemetryEvent).min(1).max(500),
|
|
397
|
-
}).strict();
|
|
398
257
|
//# sourceMappingURL=types.js.map
|