gyoshu 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +363 -0
- package/README.md +1 -0
- package/package.json +1 -1
- package/src/agent/baksa.md +81 -0
- package/src/agent/gyoshu.md +180 -0
- package/src/agent/jogyo.md +55 -0
- package/src/lib/goal-gates.ts +753 -0
- package/src/lib/notebook-frontmatter.ts +307 -40
- package/src/tool/gyoshu-completion.ts +53 -0
|
@@ -55,6 +55,72 @@ export interface RunEntry {
|
|
|
55
55
|
notes?: string;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
+
// =============================================================================
|
|
59
|
+
// GOAL CONTRACT TYPES (Two-Gate System)
|
|
60
|
+
// =============================================================================
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Kind of acceptance criterion for goal completion.
|
|
64
|
+
*/
|
|
65
|
+
export type AcceptanceCriteriaKind =
|
|
66
|
+
| "metric_threshold" // e.g., accuracy >= 0.90
|
|
67
|
+
| "marker_required" // e.g., [METRIC:baseline_accuracy] must exist
|
|
68
|
+
| "artifact_exists" // e.g., model.pkl must be created
|
|
69
|
+
| "finding_count"; // e.g., at least 3 verified findings
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Comparison operators for metric thresholds.
|
|
73
|
+
*/
|
|
74
|
+
export type ComparisonOperator = ">=" | ">" | "<=" | "<" | "==" | "!=";
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Single acceptance criterion for goal completion.
|
|
78
|
+
*
|
|
79
|
+
* Kept flat (no nested objects) for YAML parser compatibility.
|
|
80
|
+
*/
|
|
81
|
+
export interface AcceptanceCriterion {
|
|
82
|
+
/** Unique identifier, e.g., "AC1", "AC2" */
|
|
83
|
+
id: string;
|
|
84
|
+
/** Type of acceptance criterion */
|
|
85
|
+
kind: AcceptanceCriteriaKind;
|
|
86
|
+
/** Human-readable description */
|
|
87
|
+
description?: string;
|
|
88
|
+
// For metric_threshold
|
|
89
|
+
/** Metric name, e.g., "cv_accuracy_mean" */
|
|
90
|
+
metric?: string;
|
|
91
|
+
/** Comparison operator */
|
|
92
|
+
op?: ComparisonOperator;
|
|
93
|
+
/** Target threshold value */
|
|
94
|
+
target?: number;
|
|
95
|
+
// For marker_required
|
|
96
|
+
/** Marker pattern, e.g., "METRIC:baseline_accuracy" */
|
|
97
|
+
marker?: string;
|
|
98
|
+
// For artifact_exists
|
|
99
|
+
/** Glob pattern for artifact, e.g., "*.pkl" */
|
|
100
|
+
artifactPattern?: string;
|
|
101
|
+
// For finding_count
|
|
102
|
+
/** Minimum required findings */
|
|
103
|
+
minCount?: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Goal contract defining acceptance criteria for research completion.
|
|
108
|
+
*
|
|
109
|
+
* Stored in notebook frontmatter under gyoshu.goal_contract.
|
|
110
|
+
*/
|
|
111
|
+
export interface GoalContract {
|
|
112
|
+
/** Schema version for goal contracts (currently 1) */
|
|
113
|
+
version: number;
|
|
114
|
+
/** Original user goal text */
|
|
115
|
+
goal_text: string;
|
|
116
|
+
/** Goal classification, e.g., "ml_classification", "eda", "hypothesis_test" */
|
|
117
|
+
goal_type?: string;
|
|
118
|
+
/** List of acceptance criteria */
|
|
119
|
+
acceptance_criteria: AcceptanceCriterion[];
|
|
120
|
+
/** Maximum attempts before escalation (default: 3) */
|
|
121
|
+
max_goal_attempts?: number;
|
|
122
|
+
}
|
|
123
|
+
|
|
58
124
|
/**
|
|
59
125
|
* Gyoshu-specific frontmatter stored in the `gyoshu:` namespace.
|
|
60
126
|
*/
|
|
@@ -81,6 +147,8 @@ export interface GyoshuFrontmatter {
|
|
|
81
147
|
outputs_dir?: string;
|
|
82
148
|
/** Run history - bounded to last 10 runs (optional) */
|
|
83
149
|
runs?: RunEntry[];
|
|
150
|
+
/** Goal contract for Two-Gate acceptance criteria (optional) */
|
|
151
|
+
goal_contract?: GoalContract;
|
|
84
152
|
}
|
|
85
153
|
|
|
86
154
|
/**
|
|
@@ -119,30 +187,46 @@ export function parseSimpleYaml(yamlString: string): Record<string, unknown> {
|
|
|
119
187
|
let currentArray: unknown[] | null = null;
|
|
120
188
|
let currentArrayKey: string | null = null;
|
|
121
189
|
let arrayItemBuffer: Record<string, unknown> | null = null;
|
|
190
|
+
let level3Object: Record<string, unknown> | null = null;
|
|
191
|
+
let level3Key: string | null = null;
|
|
192
|
+
let level3Array: unknown[] | null = null;
|
|
193
|
+
let level3ArrayKey: string | null = null;
|
|
194
|
+
let level3ArrayItemBuffer: Record<string, unknown> | null = null;
|
|
122
195
|
|
|
123
196
|
for (let i = 0; i < lines.length; i++) {
|
|
124
197
|
const line = lines[i];
|
|
125
198
|
const trimmed = line.trimEnd();
|
|
126
199
|
|
|
127
|
-
// Skip empty lines and comments
|
|
128
200
|
if (trimmed === "" || trimmed.startsWith("#")) {
|
|
129
201
|
continue;
|
|
130
202
|
}
|
|
131
203
|
|
|
132
|
-
// Detect indentation level
|
|
133
204
|
const indent = line.length - line.trimStart().length;
|
|
134
205
|
|
|
135
|
-
|
|
136
|
-
const arrayItemMatch = trimmed.match(/^(\s*)- (.+)$/);
|
|
206
|
+
const arrayItemMatch = trimmed.match(/^\s*- (.+)$/);
|
|
137
207
|
if (arrayItemMatch) {
|
|
138
|
-
const itemContent = arrayItemMatch[
|
|
139
|
-
|
|
140
|
-
// Check if this is a key-value pair within an array item
|
|
208
|
+
const itemContent = arrayItemMatch[1].trim();
|
|
141
209
|
const kvMatch = itemContent.match(/^([^:]+):\s*(.*)$/);
|
|
210
|
+
|
|
211
|
+
if (level3ArrayKey && level3Array) {
|
|
212
|
+
if (kvMatch) {
|
|
213
|
+
if (level3ArrayItemBuffer) {
|
|
214
|
+
level3Array.push(level3ArrayItemBuffer);
|
|
215
|
+
}
|
|
216
|
+
level3ArrayItemBuffer = {};
|
|
217
|
+
level3ArrayItemBuffer[kvMatch[1].trim()] = parseYamlValue(kvMatch[2].trim());
|
|
218
|
+
} else {
|
|
219
|
+
if (level3ArrayItemBuffer) {
|
|
220
|
+
level3Array.push(level3ArrayItemBuffer);
|
|
221
|
+
level3ArrayItemBuffer = null;
|
|
222
|
+
}
|
|
223
|
+
level3Array.push(parseYamlValue(itemContent));
|
|
224
|
+
}
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
|
|
142
228
|
if (kvMatch && currentArrayKey) {
|
|
143
|
-
// Start of a new object in the array
|
|
144
229
|
if (arrayItemBuffer) {
|
|
145
|
-
// Save previous buffer
|
|
146
230
|
currentArray?.push(arrayItemBuffer);
|
|
147
231
|
}
|
|
148
232
|
arrayItemBuffer = {};
|
|
@@ -150,7 +234,6 @@ export function parseSimpleYaml(yamlString: string): Record<string, unknown> {
|
|
|
150
234
|
const value = parseYamlValue(kvMatch[2].trim());
|
|
151
235
|
arrayItemBuffer[key] = value;
|
|
152
236
|
} else if (currentArrayKey && currentArray) {
|
|
153
|
-
// Simple string array item
|
|
154
237
|
if (arrayItemBuffer) {
|
|
155
238
|
currentArray.push(arrayItemBuffer);
|
|
156
239
|
arrayItemBuffer = null;
|
|
@@ -160,8 +243,15 @@ export function parseSimpleYaml(yamlString: string): Record<string, unknown> {
|
|
|
160
243
|
continue;
|
|
161
244
|
}
|
|
162
245
|
|
|
163
|
-
|
|
164
|
-
|
|
246
|
+
if (level3ArrayItemBuffer && indent >= 8) {
|
|
247
|
+
const kvMatch = trimmed.match(/^([^:]+):\s*(.*)$/);
|
|
248
|
+
if (kvMatch) {
|
|
249
|
+
level3ArrayItemBuffer[kvMatch[1].trim()] = parseYamlValue(kvMatch[2].trim());
|
|
250
|
+
continue;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (arrayItemBuffer && indent >= 6 && !level3Object) {
|
|
165
255
|
const kvMatch = trimmed.match(/^([^:]+):\s*(.*)$/);
|
|
166
256
|
if (kvMatch) {
|
|
167
257
|
const key = kvMatch[1].trim();
|
|
@@ -171,62 +261,103 @@ export function parseSimpleYaml(yamlString: string): Record<string, unknown> {
|
|
|
171
261
|
}
|
|
172
262
|
}
|
|
173
263
|
|
|
174
|
-
// Check for key: value pair
|
|
175
264
|
const kvMatch = trimmed.match(/^([^:]+):\s*(.*)$/);
|
|
176
265
|
if (kvMatch) {
|
|
177
266
|
const key = kvMatch[1].trim();
|
|
178
267
|
const value = kvMatch[2].trim();
|
|
179
268
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
arrayItemBuffer = null;
|
|
269
|
+
if (level3Array && level3ArrayKey) {
|
|
270
|
+
if (level3ArrayItemBuffer) {
|
|
271
|
+
level3Array.push(level3ArrayItemBuffer);
|
|
272
|
+
level3ArrayItemBuffer = null;
|
|
185
273
|
}
|
|
186
|
-
if (
|
|
187
|
-
|
|
188
|
-
} else {
|
|
189
|
-
result[currentArrayKey] = currentArray;
|
|
274
|
+
if (level3Object) {
|
|
275
|
+
level3Object[level3ArrayKey] = level3Array;
|
|
190
276
|
}
|
|
191
|
-
|
|
192
|
-
|
|
277
|
+
level3Array = null;
|
|
278
|
+
level3ArrayKey = null;
|
|
193
279
|
}
|
|
194
280
|
|
|
195
|
-
// Handle based on indentation
|
|
196
281
|
if (indent === 0) {
|
|
197
|
-
|
|
282
|
+
if (level3Object && level3Key && currentObject) {
|
|
283
|
+
currentObject[level3Key] = level3Object;
|
|
284
|
+
level3Object = null;
|
|
285
|
+
level3Key = null;
|
|
286
|
+
}
|
|
287
|
+
if (currentArray && currentArrayKey) {
|
|
288
|
+
if (arrayItemBuffer) {
|
|
289
|
+
currentArray.push(arrayItemBuffer);
|
|
290
|
+
arrayItemBuffer = null;
|
|
291
|
+
}
|
|
292
|
+
if (currentObject) {
|
|
293
|
+
currentObject[currentArrayKey] = currentArray;
|
|
294
|
+
} else {
|
|
295
|
+
result[currentArrayKey] = currentArray;
|
|
296
|
+
}
|
|
297
|
+
currentArray = null;
|
|
298
|
+
currentArrayKey = null;
|
|
299
|
+
}
|
|
198
300
|
currentObject = null;
|
|
199
301
|
currentKey = key;
|
|
200
302
|
|
|
201
303
|
if (value === "") {
|
|
202
|
-
// Start of a nested object or array
|
|
203
|
-
// Look ahead to see if it's an array
|
|
204
304
|
if (i + 1 < lines.length && lines[i + 1].trim().startsWith("-")) {
|
|
205
305
|
currentArray = [];
|
|
206
306
|
currentArrayKey = key;
|
|
207
307
|
} else {
|
|
208
|
-
// Nested object
|
|
209
308
|
currentObject = {};
|
|
210
309
|
result[key] = currentObject;
|
|
211
310
|
}
|
|
212
311
|
} else {
|
|
213
312
|
result[key] = parseYamlValue(value);
|
|
214
313
|
}
|
|
215
|
-
} else if (indent
|
|
216
|
-
|
|
314
|
+
} else if (indent === 2 && currentObject) {
|
|
315
|
+
if (level3Object && level3Key) {
|
|
316
|
+
currentObject[level3Key] = level3Object;
|
|
317
|
+
level3Object = null;
|
|
318
|
+
level3Key = null;
|
|
319
|
+
}
|
|
320
|
+
if (currentArray && currentArrayKey) {
|
|
321
|
+
if (arrayItemBuffer) {
|
|
322
|
+
currentArray.push(arrayItemBuffer);
|
|
323
|
+
arrayItemBuffer = null;
|
|
324
|
+
}
|
|
325
|
+
currentObject[currentArrayKey] = currentArray;
|
|
326
|
+
currentArray = null;
|
|
327
|
+
currentArrayKey = null;
|
|
328
|
+
}
|
|
217
329
|
if (value === "") {
|
|
218
|
-
// Check if it's an array
|
|
219
330
|
if (i + 1 < lines.length && lines[i + 1].trim().startsWith("-")) {
|
|
220
331
|
currentArray = [];
|
|
221
332
|
currentArrayKey = key;
|
|
222
333
|
} else {
|
|
223
|
-
|
|
334
|
+
level3Object = {};
|
|
335
|
+
level3Key = key;
|
|
224
336
|
}
|
|
225
337
|
} else {
|
|
226
338
|
currentObject[key] = parseYamlValue(value);
|
|
227
339
|
}
|
|
340
|
+
} else if (indent === 4 && level3Object) {
|
|
341
|
+
if (level3Array && level3ArrayKey) {
|
|
342
|
+
if (level3ArrayItemBuffer) {
|
|
343
|
+
level3Array.push(level3ArrayItemBuffer);
|
|
344
|
+
level3ArrayItemBuffer = null;
|
|
345
|
+
}
|
|
346
|
+
level3Object[level3ArrayKey] = level3Array;
|
|
347
|
+
level3Array = null;
|
|
348
|
+
level3ArrayKey = null;
|
|
349
|
+
}
|
|
350
|
+
if (value === "") {
|
|
351
|
+
if (i + 1 < lines.length && lines[i + 1].trim().startsWith("-")) {
|
|
352
|
+
level3Array = [];
|
|
353
|
+
level3ArrayKey = key;
|
|
354
|
+
} else {
|
|
355
|
+
level3Object[key] = null;
|
|
356
|
+
}
|
|
357
|
+
} else {
|
|
358
|
+
level3Object[key] = parseYamlValue(value);
|
|
359
|
+
}
|
|
228
360
|
} else if (indent >= 2 && !currentObject && value === "") {
|
|
229
|
-
// This might be a top-level array
|
|
230
361
|
if (i + 1 < lines.length && lines[i + 1].trim().startsWith("-")) {
|
|
231
362
|
currentArray = [];
|
|
232
363
|
currentArrayKey = key;
|
|
@@ -235,7 +366,17 @@ export function parseSimpleYaml(yamlString: string): Record<string, unknown> {
|
|
|
235
366
|
}
|
|
236
367
|
}
|
|
237
368
|
|
|
238
|
-
|
|
369
|
+
if (level3Array && level3ArrayKey) {
|
|
370
|
+
if (level3ArrayItemBuffer) {
|
|
371
|
+
level3Array.push(level3ArrayItemBuffer);
|
|
372
|
+
}
|
|
373
|
+
if (level3Object) {
|
|
374
|
+
level3Object[level3ArrayKey] = level3Array;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
if (level3Object && level3Key && currentObject) {
|
|
378
|
+
currentObject[level3Key] = level3Object;
|
|
379
|
+
}
|
|
239
380
|
if (currentArray && currentArrayKey) {
|
|
240
381
|
if (arrayItemBuffer) {
|
|
241
382
|
currentArray.push(arrayItemBuffer);
|
|
@@ -296,7 +437,6 @@ export function serializeToYaml(obj: Record<string, unknown>): string {
|
|
|
296
437
|
}
|
|
297
438
|
|
|
298
439
|
if (typeof value === "object" && !Array.isArray(value)) {
|
|
299
|
-
// Nested object
|
|
300
440
|
lines.push(`${key}:`);
|
|
301
441
|
const nested = value as Record<string, unknown>;
|
|
302
442
|
for (const [nestedKey, nestedValue] of Object.entries(nested)) {
|
|
@@ -305,11 +445,9 @@ export function serializeToYaml(obj: Record<string, unknown>): string {
|
|
|
305
445
|
}
|
|
306
446
|
|
|
307
447
|
if (Array.isArray(nestedValue)) {
|
|
308
|
-
// Array within nested object
|
|
309
448
|
lines.push(` ${nestedKey}:`);
|
|
310
449
|
for (const item of nestedValue) {
|
|
311
450
|
if (typeof item === "object" && item !== null) {
|
|
312
|
-
// Object array item
|
|
313
451
|
const objItem = item as Record<string, unknown>;
|
|
314
452
|
const entries = Object.entries(objItem);
|
|
315
453
|
if (entries.length > 0) {
|
|
@@ -323,16 +461,45 @@ export function serializeToYaml(obj: Record<string, unknown>): string {
|
|
|
323
461
|
}
|
|
324
462
|
}
|
|
325
463
|
} else {
|
|
326
|
-
// Simple array item
|
|
327
464
|
lines.push(` - ${formatYamlValue(item)}`);
|
|
328
465
|
}
|
|
329
466
|
}
|
|
467
|
+
} else if (typeof nestedValue === "object") {
|
|
468
|
+
lines.push(` ${nestedKey}:`);
|
|
469
|
+
const level3Obj = nestedValue as Record<string, unknown>;
|
|
470
|
+
for (const [l3Key, l3Value] of Object.entries(level3Obj)) {
|
|
471
|
+
if (l3Value === null || l3Value === undefined) {
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
if (Array.isArray(l3Value)) {
|
|
475
|
+
lines.push(` ${l3Key}:`);
|
|
476
|
+
for (const item of l3Value) {
|
|
477
|
+
if (typeof item === "object" && item !== null) {
|
|
478
|
+
const objItem = item as Record<string, unknown>;
|
|
479
|
+
const entries = Object.entries(objItem);
|
|
480
|
+
if (entries.length > 0) {
|
|
481
|
+
const [firstKey, firstValue] = entries[0];
|
|
482
|
+
lines.push(` - ${firstKey}: ${formatYamlValue(firstValue)}`);
|
|
483
|
+
for (let i = 1; i < entries.length; i++) {
|
|
484
|
+
const [k, v] = entries[i];
|
|
485
|
+
if (v !== null && v !== undefined) {
|
|
486
|
+
lines.push(` ${k}: ${formatYamlValue(v)}`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
} else {
|
|
491
|
+
lines.push(` - ${formatYamlValue(item)}`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} else {
|
|
495
|
+
lines.push(` ${l3Key}: ${formatYamlValue(l3Value)}`);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
330
498
|
} else {
|
|
331
499
|
lines.push(` ${nestedKey}: ${formatYamlValue(nestedValue)}`);
|
|
332
500
|
}
|
|
333
501
|
}
|
|
334
502
|
} else if (Array.isArray(value)) {
|
|
335
|
-
// Top-level array
|
|
336
503
|
lines.push(`${key}:`);
|
|
337
504
|
for (const item of value) {
|
|
338
505
|
if (typeof item === "object" && item !== null) {
|
|
@@ -542,6 +709,21 @@ export function extractFrontmatter(notebook: Notebook): GyoshuFrontmatter | null
|
|
|
542
709
|
frontmatter.runs = gyoshu.runs as RunEntry[];
|
|
543
710
|
}
|
|
544
711
|
|
|
712
|
+
if (gyoshu.goal_contract && typeof gyoshu.goal_contract === "object") {
|
|
713
|
+
const gc = gyoshu.goal_contract as Record<string, unknown>;
|
|
714
|
+
if (typeof gc.version === "number" && typeof gc.goal_text === "string") {
|
|
715
|
+
frontmatter.goal_contract = {
|
|
716
|
+
version: gc.version,
|
|
717
|
+
goal_text: gc.goal_text,
|
|
718
|
+
goal_type: typeof gc.goal_type === "string" ? gc.goal_type : undefined,
|
|
719
|
+
acceptance_criteria: Array.isArray(gc.acceptance_criteria)
|
|
720
|
+
? (gc.acceptance_criteria as AcceptanceCriterion[])
|
|
721
|
+
: [],
|
|
722
|
+
max_goal_attempts: typeof gc.max_goal_attempts === "number" ? gc.max_goal_attempts : undefined,
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
|
|
545
727
|
return frontmatter;
|
|
546
728
|
} catch (error) {
|
|
547
729
|
console.debug(`[notebook-frontmatter] Failed to parse YAML in extractFrontmatter: ${error}`);
|
|
@@ -760,6 +942,91 @@ export function validateFrontmatter(
|
|
|
760
942
|
};
|
|
761
943
|
}
|
|
762
944
|
|
|
945
|
+
const VALID_CRITERIA_KINDS: AcceptanceCriteriaKind[] = [
|
|
946
|
+
"metric_threshold",
|
|
947
|
+
"marker_required",
|
|
948
|
+
"artifact_exists",
|
|
949
|
+
"finding_count",
|
|
950
|
+
];
|
|
951
|
+
|
|
952
|
+
const VALID_OPERATORS: ComparisonOperator[] = [">=", ">", "<=", "<", "==", "!="];
|
|
953
|
+
|
|
954
|
+
export function validateGoalContract(
|
|
955
|
+
contract: GoalContract
|
|
956
|
+
): { isValid: boolean; errors: string[] } {
|
|
957
|
+
const errors: string[] = [];
|
|
958
|
+
|
|
959
|
+
if (contract.version !== 1) {
|
|
960
|
+
errors.push(`Unsupported goal contract version: ${contract.version}. Expected: 1`);
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
if (!contract.goal_text || typeof contract.goal_text !== "string") {
|
|
964
|
+
errors.push("Missing or invalid goal_text");
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
if (contract.goal_type !== undefined && typeof contract.goal_type !== "string") {
|
|
968
|
+
errors.push("Invalid goal_type (must be string if provided)");
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
if (contract.max_goal_attempts !== undefined) {
|
|
972
|
+
if (typeof contract.max_goal_attempts !== "number" || contract.max_goal_attempts < 1) {
|
|
973
|
+
errors.push("Invalid max_goal_attempts (must be positive number if provided)");
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
if (!Array.isArray(contract.acceptance_criteria)) {
|
|
978
|
+
errors.push("acceptance_criteria must be an array");
|
|
979
|
+
} else {
|
|
980
|
+
for (let i = 0; i < contract.acceptance_criteria.length; i++) {
|
|
981
|
+
const criterion = contract.acceptance_criteria[i];
|
|
982
|
+
const prefix = `Criterion ${i}`;
|
|
983
|
+
|
|
984
|
+
if (!criterion.id || typeof criterion.id !== "string") {
|
|
985
|
+
errors.push(`${prefix}: missing or invalid id`);
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
if (!VALID_CRITERIA_KINDS.includes(criterion.kind)) {
|
|
989
|
+
errors.push(`${prefix}: invalid kind '${criterion.kind}'`);
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
if (criterion.kind === "metric_threshold") {
|
|
993
|
+
if (!criterion.metric || typeof criterion.metric !== "string") {
|
|
994
|
+
errors.push(`${prefix}: metric_threshold requires 'metric' field`);
|
|
995
|
+
}
|
|
996
|
+
if (!criterion.op || !VALID_OPERATORS.includes(criterion.op)) {
|
|
997
|
+
errors.push(`${prefix}: metric_threshold requires valid 'op' field`);
|
|
998
|
+
}
|
|
999
|
+
if (typeof criterion.target !== "number") {
|
|
1000
|
+
errors.push(`${prefix}: metric_threshold requires 'target' number`);
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
if (criterion.kind === "marker_required") {
|
|
1005
|
+
if (!criterion.marker || typeof criterion.marker !== "string") {
|
|
1006
|
+
errors.push(`${prefix}: marker_required requires 'marker' field`);
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
if (criterion.kind === "artifact_exists") {
|
|
1011
|
+
if (!criterion.artifactPattern || typeof criterion.artifactPattern !== "string") {
|
|
1012
|
+
errors.push(`${prefix}: artifact_exists requires 'artifactPattern' field`);
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
if (criterion.kind === "finding_count") {
|
|
1017
|
+
if (typeof criterion.minCount !== "number" || criterion.minCount < 0) {
|
|
1018
|
+
errors.push(`${prefix}: finding_count requires 'minCount' non-negative number`);
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
return {
|
|
1025
|
+
isValid: errors.length === 0,
|
|
1026
|
+
errors,
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
|
|
763
1030
|
/**
|
|
764
1031
|
* Check if a notebook has valid Gyoshu frontmatter.
|
|
765
1032
|
*
|
|
@@ -11,6 +11,8 @@ import { getLegacyManifestPath, getNotebookPath } from "../lib/paths";
|
|
|
11
11
|
import { gatherReportContext, ReportContext, generateReport } from "../lib/report-markdown";
|
|
12
12
|
import { exportToPdf, PdfExportResult } from "../lib/pdf-export";
|
|
13
13
|
import { runQualityGates, QualityGateResult } from "../lib/quality-gates";
|
|
14
|
+
import { evaluateGoalGate, recommendPivot, GoalGateResult } from "../lib/goal-gates";
|
|
15
|
+
import { extractFrontmatter, GyoshuFrontmatter } from "../lib/notebook-frontmatter";
|
|
14
16
|
import type { Notebook } from "../lib/cell-identity";
|
|
15
17
|
|
|
16
18
|
interface KeyResult {
|
|
@@ -326,6 +328,8 @@ export default tool({
|
|
|
326
328
|
const valid = !hasErrors(warnings);
|
|
327
329
|
|
|
328
330
|
let qualityGateResult: QualityGateResult | undefined;
|
|
331
|
+
let goalGateResult: GoalGateResult | undefined;
|
|
332
|
+
let frontmatter: GyoshuFrontmatter | null = null;
|
|
329
333
|
let adjustedStatus = status;
|
|
330
334
|
|
|
331
335
|
// Fix 1: SUCCESS status requires reportTitle for quality gate validation
|
|
@@ -358,11 +362,37 @@ export default tool({
|
|
|
358
362
|
}
|
|
359
363
|
}
|
|
360
364
|
|
|
365
|
+
// Run Quality Gates (Trust Gate)
|
|
361
366
|
qualityGateResult = runQualityGates(allOutput.join("\n"));
|
|
362
367
|
|
|
363
368
|
if (!qualityGateResult.passed) {
|
|
364
369
|
adjustedStatus = "PARTIAL";
|
|
365
370
|
}
|
|
371
|
+
|
|
372
|
+
// Extract frontmatter for Goal Gate evaluation
|
|
373
|
+
frontmatter = extractFrontmatter(notebook);
|
|
374
|
+
|
|
375
|
+
// Evaluate Goal Gate (in addition to Trust Gate / quality gates)
|
|
376
|
+
// Only evaluate if frontmatter has a goal_contract (backward compatibility)
|
|
377
|
+
if (adjustedStatus === "SUCCESS" && frontmatter?.goal_contract) {
|
|
378
|
+
goalGateResult = evaluateGoalGate(
|
|
379
|
+
frontmatter.goal_contract,
|
|
380
|
+
allOutput.join("\n"),
|
|
381
|
+
typedEvidence?.artifactPaths || []
|
|
382
|
+
);
|
|
383
|
+
|
|
384
|
+
// Apply Two-Gate Decision Matrix
|
|
385
|
+
// If Trust Gate passed (quality gates) but Goal Gate failed
|
|
386
|
+
if (!goalGateResult.passed) {
|
|
387
|
+
adjustedStatus = "PARTIAL";
|
|
388
|
+
warnings.push({
|
|
389
|
+
code: "GOAL_NOT_MET",
|
|
390
|
+
message: `Goal criteria not met: ${goalGateResult.metCount}/${goalGateResult.totalCount} criteria passed`,
|
|
391
|
+
severity: "warning",
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
// If no goal_contract, skip Goal Gate (backward compatibility)
|
|
366
396
|
} catch (e) {
|
|
367
397
|
// Fix 2: Don't swallow quality gate errors - downgrade to PARTIAL
|
|
368
398
|
adjustedStatus = "PARTIAL";
|
|
@@ -491,6 +521,29 @@ export default tool({
|
|
|
491
521
|
}
|
|
492
522
|
}
|
|
493
523
|
|
|
524
|
+
if (goalGateResult) {
|
|
525
|
+
response.goalGates = {
|
|
526
|
+
passed: goalGateResult.passed,
|
|
527
|
+
overallStatus: goalGateResult.overallStatus,
|
|
528
|
+
metCount: goalGateResult.metCount,
|
|
529
|
+
totalCount: goalGateResult.totalCount,
|
|
530
|
+
criteriaResults: goalGateResult.criteriaResults,
|
|
531
|
+
};
|
|
532
|
+
|
|
533
|
+
if (!goalGateResult.passed && goalGateResult.overallStatus === "NOT_MET") {
|
|
534
|
+
const pivot = recommendPivot(
|
|
535
|
+
goalGateResult,
|
|
536
|
+
1,
|
|
537
|
+
frontmatter?.goal_contract?.max_goal_attempts || 3
|
|
538
|
+
);
|
|
539
|
+
response.pivotRecommendation = pivot;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (!goalGateResult.passed) {
|
|
543
|
+
response.message = `Completion signal recorded: ${adjustedStatus} (Goal Gate failed: ${goalGateResult.metCount}/${goalGateResult.totalCount} criteria met)`;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
494
547
|
return JSON.stringify(response, null, 2);
|
|
495
548
|
},
|
|
496
549
|
});
|