@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -0,0 +1,664 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ import { readFileSync } from 'node:fs';
5
+ import { resolve, dirname } from 'node:path';
6
+ import { fileURLToPath } from 'node:url';
7
+
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+
11
+ /**
12
+ * Path Prover Brain
13
+ *
14
+ * Implements the intelligence layer for the Path Prover agent mode.
15
+ * This module identifies coverage gaps, finds nearest substitutions,
16
+ * classifies failures, gates tune/adapter stages, and builds
17
+ * Athena-compatible records with run_type='path_prove'.
18
+ *
19
+ * Feature: ci-benchmark-pipeline
20
+ * Requirements: 8.1–8.12
21
+ */
22
+
23
+ // ── Configuration Dimensions ─────────────────────────────────────────────────
24
+
25
+ /**
26
+ * The ordered vector of config dimensions used for Hamming distance calculation.
27
+ */
28
+ export const CONFIG_DIMENSIONS = [
29
+ 'deployment_config',
30
+ 'model_family',
31
+ 'instance_family',
32
+ 'quantization',
33
+ 'tp_degree',
34
+ 'deployment_target'
35
+ ];
36
+
37
+ // ── Failure Classification ───────────────────────────────────────────────────
38
+
39
+ /**
40
+ * Valid failure categories for Path Prover classification.
41
+ */
42
+ export const FAILURE_CATEGORIES = [
43
+ 'capacity',
44
+ 'timeout',
45
+ 'oom',
46
+ 'code_bug',
47
+ 'model_incompatibility',
48
+ 'service_limitation'
49
+ ];
50
+
51
+ /**
52
+ * Error pattern matchers for failure classification.
53
+ * Each entry maps a regex pattern to a category and retryable flag.
54
+ */
55
+ const ERROR_PATTERNS = [
56
+ { pattern: /InsufficientInstanceCapacity/i, category: 'capacity', retryable: true },
57
+ { pattern: /CapacityError/i, category: 'capacity', retryable: true },
58
+ { pattern: /no capacity/i, category: 'capacity', retryable: true },
59
+ { pattern: /timed?\s*out/i, category: 'timeout', retryable: true },
60
+ { pattern: /timeout/i, category: 'timeout', retryable: true },
61
+ { pattern: /deadline exceeded/i, category: 'timeout', retryable: true },
62
+ { pattern: /OutOfMemory/i, category: 'oom', retryable: false },
63
+ { pattern: /OOM/i, category: 'oom', retryable: false },
64
+ { pattern: /CUDA out of memory/i, category: 'oom', retryable: false },
65
+ { pattern: /Cannot allocate memory/i, category: 'oom', retryable: false },
66
+ { pattern: /killed.*memory/i, category: 'oom', retryable: false },
67
+ { pattern: /template.*error/i, category: 'code_bug', retryable: false },
68
+ { pattern: /SyntaxError/i, category: 'code_bug', retryable: false },
69
+ { pattern: /ReferenceError/i, category: 'code_bug', retryable: false },
70
+ { pattern: /TypeError/i, category: 'code_bug', retryable: false },
71
+ { pattern: /script crash/i, category: 'code_bug', retryable: false },
72
+ { pattern: /rendering failed/i, category: 'code_bug', retryable: false },
73
+ { pattern: /not supported.*model/i, category: 'model_incompatibility', retryable: false },
74
+ { pattern: /model.*incompatible/i, category: 'model_incompatibility', retryable: false },
75
+ { pattern: /unsupported.*architecture/i, category: 'model_incompatibility', retryable: false },
76
+ { pattern: /LoRA.*not supported/i, category: 'model_incompatibility', retryable: false },
77
+ { pattern: /adapter.*not compatible/i, category: 'model_incompatibility', retryable: false },
78
+ { pattern: /not available.*region/i, category: 'service_limitation', retryable: false },
79
+ { pattern: /service.*not supported/i, category: 'service_limitation', retryable: false },
80
+ { pattern: /API.*not available/i, category: 'service_limitation', retryable: false },
81
+ { pattern: /feature.*not.*region/i, category: 'service_limitation', retryable: false },
82
+ { pattern: /ValidationException/i, category: 'service_limitation', retryable: false }
83
+ ];
84
+
85
+ // ── Gap Identification (Task 5.1) ────────────────────────────────────────────
86
+
87
+ /**
88
+ * Identify coverage gaps given a set of proven configurations.
89
+ *
90
+ * A "gap" is a config dimension combination that has no records in Athena.
91
+ * This function compares the known dimension space (all unique values seen
92
+ * across proven configs) against what is actually proven, and returns
93
+ * combinations that are missing.
94
+ *
95
+ * @param {object[]} provenConfigs - Array of proven config objects from Athena
96
+ * Each object must have keys matching CONFIG_DIMENSIONS plus `status`
97
+ * @returns {object[]} Ordered list of gap configs to prove, sorted by
98
+ * coverage priority (more neighbors proven = higher priority)
99
+ */
100
+ export function identifyGaps(provenConfigs) {
101
+ if (!provenConfigs || provenConfigs.length === 0) {
102
+ return [];
103
+ }
104
+
105
+ // Extract unique values for each dimension from proven configs
106
+ const dimensionValues = {};
107
+ for (const dim of CONFIG_DIMENSIONS) {
108
+ const values = new Set();
109
+ for (const config of provenConfigs) {
110
+ if (config[dim] !== undefined && config[dim] !== null) {
111
+ values.add(String(config[dim]));
112
+ }
113
+ }
114
+ dimensionValues[dim] = [...values];
115
+ }
116
+
117
+ // Build a set of proven config signatures for fast lookup
118
+ const provenSignatures = new Set();
119
+ for (const config of provenConfigs) {
120
+ if (config.status === 'completed') {
121
+ const sig = CONFIG_DIMENSIONS.map(d => String(config[d] ?? '')).join('|');
122
+ provenSignatures.add(sig);
123
+ }
124
+ }
125
+
126
+ // Generate all combinations from observed values and find gaps
127
+ const gaps = [];
128
+ const combinations = cartesianProduct(dimensionValues);
129
+
130
+ for (const combo of combinations) {
131
+ const sig = CONFIG_DIMENSIONS.map(d => String(combo[d] ?? '')).join('|');
132
+ if (!provenSignatures.has(sig)) {
133
+ // Count how many neighbors (distance=1) are proven — higher = more valuable
134
+ let neighborCount = 0;
135
+ for (const provenSig of provenSignatures) {
136
+ const provenParts = provenSig.split('|');
137
+ const comboParts = sig.split('|');
138
+ let diff = 0;
139
+ for (let i = 0; i < provenParts.length; i++) {
140
+ if (provenParts[i] !== comboParts[i]) diff++;
141
+ }
142
+ if (diff === 1) neighborCount++;
143
+ }
144
+ gaps.push({ ...combo, _neighborCount: neighborCount });
145
+ }
146
+ }
147
+
148
+ // Sort by neighbor count descending (most surrounded gaps first)
149
+ gaps.sort((a, b) => b._neighborCount - a._neighborCount);
150
+
151
+ // Remove internal sorting field before returning
152
+ return gaps.map(({ _neighborCount, ...config }) => config);
153
+ }
154
+
155
+ /**
156
+ * Generate cartesian product of dimension value arrays.
157
+ * @param {object} dimensionValues - Map of dimension name to array of values
158
+ * @returns {object[]} Array of config objects representing all combinations
159
+ */
160
+ function cartesianProduct(dimensionValues) {
161
+ const dims = CONFIG_DIMENSIONS;
162
+ const results = [];
163
+
164
+ function generate(index, current) {
165
+ if (index === dims.length) {
166
+ results.push({ ...current });
167
+ return;
168
+ }
169
+ const dim = dims[index];
170
+ const values = dimensionValues[dim] || [];
171
+ if (values.length === 0) {
172
+ generate(index + 1, current);
173
+ return;
174
+ }
175
+ for (const val of values) {
176
+ current[dim] = val;
177
+ generate(index + 1, current);
178
+ }
179
+ }
180
+
181
+ generate(0, {});
182
+ return results;
183
+ }
184
+
185
+ // ── Substitution Algorithm (Task 5.2) ────────────────────────────────────────
186
+
187
+ /**
188
+ * Find the nearest proven substitution for a requested configuration.
189
+ *
190
+ * Uses Hamming distance on the config dimension vector. Only considers
191
+ * configs with status='completed'. Never crosses the model_family boundary.
192
+ *
193
+ * @param {object} requestedConfig - The requested config with dimension fields
194
+ * @param {object[]} provenConfigs - Array of proven configs from Athena
195
+ * @returns {object} Result object:
196
+ * - If matches found: { substitutions: [{config, distance, explanation}...] } (top 3)
197
+ * - If no matches: { noMatch: true, message: string }
198
+ */
199
+ export function findNearestSubstitution(requestedConfig, provenConfigs) {
200
+ if (!requestedConfig || !provenConfigs || provenConfigs.length === 0) {
201
+ return { noMatch: true, message: 'no coverage — no proven configs available' };
202
+ }
203
+
204
+ const requestedFamily = requestedConfig.model_family;
205
+
206
+ // Filter to only completed configs in the same model_family
207
+ const candidates = provenConfigs.filter(c =>
208
+ c.status === 'completed' && c.model_family === requestedFamily
209
+ );
210
+
211
+ if (candidates.length === 0) {
212
+ // Find nearest across families for the message
213
+ const allCompleted = provenConfigs.filter(c => c.status === 'completed');
214
+ if (allCompleted.length === 0) {
215
+ return { noMatch: true, message: 'no coverage — no proven configs available' };
216
+ }
217
+ const minDistance = Math.min(
218
+ ...allCompleted.map(c => hammingDistance(requestedConfig, c))
219
+ );
220
+ return {
221
+ noMatch: true,
222
+ message: `no coverage — nearest proven config is ${minDistance} dimensions away`
223
+ };
224
+ }
225
+
226
+ // Compute distances and sort
227
+ const scored = candidates.map(config => {
228
+ const distance = hammingDistance(requestedConfig, config);
229
+ const explanation = buildExplanation(requestedConfig, config);
230
+ return { config, distance, explanation };
231
+ });
232
+
233
+ // Sort by distance ascending, then by recency (if run_timestamp available)
234
+ scored.sort((a, b) => {
235
+ if (a.distance !== b.distance) return a.distance - b.distance;
236
+ // Secondary sort: prefer more recent configs
237
+ const aTime = a.config.run_timestamp || '';
238
+ const bTime = b.config.run_timestamp || '';
239
+ return bTime.localeCompare(aTime);
240
+ });
241
+
242
+ // Return top 3
243
+ const substitutions = scored.slice(0, 3).map(({ config, distance, explanation }) => ({
244
+ config,
245
+ distance,
246
+ explanation
247
+ }));
248
+
249
+ return { substitutions };
250
+ }
251
+
252
+ /**
253
+ * Compute Hamming distance between two config vectors.
254
+ * Counts the number of dimensions that differ.
255
+ *
256
+ * @param {object} configA - First config
257
+ * @param {object} configB - Second config
258
+ * @returns {number} Number of dimensions that differ
259
+ */
260
+ export function hammingDistance(configA, configB) {
261
+ let distance = 0;
262
+ for (const dim of CONFIG_DIMENSIONS) {
263
+ const valA = String(configA[dim] ?? '');
264
+ const valB = String(configB[dim] ?? '');
265
+ if (valA !== valB) {
266
+ distance++;
267
+ }
268
+ }
269
+ return distance;
270
+ }
271
+
272
+ /**
273
+ * Build a human-readable explanation of which dimensions differ.
274
+ *
275
+ * @param {object} requested - The requested config
276
+ * @param {object} suggested - The suggested substitution
277
+ * @returns {string[]} Array of dimension difference explanations
278
+ */
279
+ function buildExplanation(requested, suggested) {
280
+ const diffs = [];
281
+ for (const dim of CONFIG_DIMENSIONS) {
282
+ const reqVal = String(requested[dim] ?? '');
283
+ const sugVal = String(suggested[dim] ?? '');
284
+ if (reqVal !== sugVal) {
285
+ diffs.push(`${dim}: '${reqVal}' → '${sugVal}'`);
286
+ }
287
+ }
288
+ return diffs;
289
+ }
290
+
291
+ // ── Tune/Adapter Stage Gating (Task 5.3) ─────────────────────────────────────
292
+
293
+ /**
294
+ * Determine whether tune/adapter stages should execute for a prove request.
295
+ *
296
+ * Tune stages only execute when the prove request explicitly includes
297
+ * fine-tuning (e.g., the gap involves a tune technique or the user
298
+ * requested adapter serving).
299
+ *
300
+ * @param {object} proveRequest - The prove request object
301
+ * @param {boolean} [proveRequest.include_tuning] - Explicitly request tuning
302
+ * @param {boolean} [proveRequest.enable_lora] - Whether LoRA is enabled
303
+ * @param {string} [proveRequest.tune_technique] - Tune technique (sft, dpo, etc.)
304
+ * @returns {boolean} True if tune stages should execute
305
+ */
306
+ export function shouldExecuteTuneStages(proveRequest) {
307
+ if (!proveRequest) return false;
308
+
309
+ // Explicit tuning request
310
+ if (proveRequest.include_tuning === true) return true;
311
+
312
+ // LoRA adapter serving requested
313
+ if (proveRequest.enable_lora === true) return true;
314
+
315
+ // Tune technique specified
316
+ if (proveRequest.tune_technique && proveRequest.tune_technique !== 'none') return true;
317
+
318
+ return false;
319
+ }
320
+
321
+ // ── Failure Classification (Task 5.4) ────────────────────────────────────────
322
+
323
+ /**
324
+ * Classify a failure from error output.
325
+ *
326
+ * Parses error output for known patterns and returns a structured
327
+ * classification with stage, category, and retryable flag.
328
+ *
329
+ * @param {string|object} errorOutput - Error output (string or structured object)
330
+ * @param {string} [errorOutput.error] - Error message (if object)
331
+ * @param {string} [errorOutput.stage] - Stage that failed (if object)
332
+ * @returns {object} Classification: { stage, category, retryable }
333
+ */
334
+ export function classifyFailure(errorOutput) {
335
+ if (!errorOutput) {
336
+ return { stage: 'unknown', category: 'code_bug', retryable: false };
337
+ }
338
+
339
+ // Extract error message and stage
340
+ let errorMsg = '';
341
+ let stage = 'unknown';
342
+
343
+ if (typeof errorOutput === 'string') {
344
+ errorMsg = errorOutput;
345
+ stage = detectStage(errorOutput);
346
+ } else if (typeof errorOutput === 'object') {
347
+ errorMsg = errorOutput.error || errorOutput.message || JSON.stringify(errorOutput);
348
+ stage = errorOutput.stage || detectStage(errorMsg);
349
+ }
350
+
351
+ // Match against known patterns
352
+ for (const { pattern, category, retryable } of ERROR_PATTERNS) {
353
+ if (pattern.test(errorMsg)) {
354
+ return { stage, category, retryable };
355
+ }
356
+ }
357
+
358
+ // Default: unrecognized errors are classified as code_bug (non-retryable)
359
+ return { stage, category: 'code_bug', retryable: false };
360
+ }
361
+
362
+ /**
363
+ * Detect which lifecycle stage produced an error from the error message.
364
+ *
365
+ * @param {string} errorMsg - The error message
366
+ * @returns {string} The detected stage name
367
+ */
368
+ function detectStage(errorMsg) {
369
+ const stagePatterns = [
370
+ { pattern: /\b(generate|generation)\b/i, stage: 'generate' },
371
+ { pattern: /\bdo\/stage\b|model.staging|pre-stage|s3.*stag/i, stage: 'stage' },
372
+ { pattern: /\b(build|docker)\b/i, stage: 'build' },
373
+ { pattern: /\b(push|ecr|registry)\b/i, stage: 'push' },
374
+ { pattern: /\b(deploy|endpoint|CreateEndpoint|InferenceComponent)\b/i, stage: 'deploy' },
375
+ { pattern: /\b(test|invoke|invocation|inference)\b/i, stage: 'test' },
376
+ { pattern: /\b(tune|fine-?tun|customization)\b/i, stage: 'tune' },
377
+ { pattern: /\b(adapter|lora)\b/i, stage: 'adapter' },
378
+ { pattern: /\b(benchmark|bench)\b/i, stage: 'benchmark' },
379
+ { pattern: /\b(register|dynamo)\b/i, stage: 'register' },
380
+ { pattern: /\b(clean|delete)\b/i, stage: 'clean' }
381
+ ];
382
+
383
+ for (const { pattern, stage } of stagePatterns) {
384
+ if (pattern.test(errorMsg)) {
385
+ return stage;
386
+ }
387
+ }
388
+
389
+ return 'unknown';
390
+ }
391
+
392
+ // ── Result Writing (Task 5.5) ────────────────────────────────────────────────
393
+
394
+ /**
395
+ * Build a Path Prover Athena record from execution result and classification.
396
+ *
397
+ * All records have run_type='path_prove'. On success, status='completed'.
398
+ * On non-retryable failure, status='unfeasible' with failure_reason populated.
399
+ * On retryable failure, status='failed' with failure_reason populated.
400
+ *
401
+ * @param {object} result - The execution result
402
+ * @param {boolean} result.success - Whether the prove run succeeded
403
+ * @param {object} [result.metrics] - Benchmark metrics (on success)
404
+ * @param {object} [result.config] - The config that was proven
405
+ * @param {string} [result.error] - Error message (on failure)
406
+ * @param {object|null} [classification] - Failure classification (from classifyFailure)
407
+ * @param {string} [classification.stage] - Stage that failed
408
+ * @param {string} [classification.category] - Error category
409
+ * @param {boolean} [classification.retryable] - Whether failure is retryable
410
+ * @returns {object} Athena-compatible record with run_type='path_prove'
411
+ */
412
+ export function buildPathProverRecord(result, classification) {
413
+ const record = {
414
+ run_type: 'path_prove',
415
+ run_timestamp: new Date().toISOString()
416
+ };
417
+
418
+ // Merge config dimensions if provided
419
+ if (result.config) {
420
+ for (const dim of CONFIG_DIMENSIONS) {
421
+ if (result.config[dim] !== undefined) {
422
+ record[dim] = result.config[dim];
423
+ }
424
+ }
425
+ // Also copy non-dimension config fields
426
+ if (result.config.config_id) record.config_id = result.config.config_id;
427
+ if (result.config.model_name) record.model_name = result.config.model_name;
428
+ if (result.config.instance_type) record.instance_type = result.config.instance_type;
429
+ }
430
+
431
+ if (result.success) {
432
+ record.status = 'completed';
433
+ // Merge metrics if available
434
+ if (result.metrics) {
435
+ Object.assign(record, result.metrics);
436
+ }
437
+ } else {
438
+ // Failure case
439
+ if (classification && classification.retryable === false) {
440
+ record.status = 'unfeasible';
441
+ } else {
442
+ record.status = 'failed';
443
+ }
444
+
445
+ // Populate failure details
446
+ record.failure_reason = result.error || 'Unknown failure';
447
+
448
+ if (classification) {
449
+ record.failure_stage = classification.stage;
450
+ record.failure_category = classification.category;
451
+ record.failure_retryable = classification.retryable;
452
+ }
453
+ }
454
+
455
+ return record;
456
+ }
457
+
458
+ /**
459
+ * Check if a config is known to be unfeasible (prevents repeated attempts).
460
+ *
461
+ * @param {object} config - The config to check
462
+ * @param {object[]} existingRecords - Existing Athena records
463
+ * @returns {object|null} The unfeasible record if found, null otherwise
464
+ */
465
+ export function findUnfeasibleRecord(config, existingRecords) {
466
+ if (!config || !existingRecords || existingRecords.length === 0) {
467
+ return null;
468
+ }
469
+
470
+ for (const record of existingRecords) {
471
+ if (record.status !== 'unfeasible') continue;
472
+ if (record.run_type !== 'path_prove') continue;
473
+
474
+ // Check if all dimensions match
475
+ const allMatch = CONFIG_DIMENSIONS.every(dim =>
476
+ String(record[dim] ?? '') === String(config[dim] ?? '')
477
+ );
478
+
479
+ if (allMatch) return record;
480
+ }
481
+
482
+ return null;
483
+ }
484
+
485
+ // ── Priority Queue (v1 Validation Mode) ──────────────────────────────────────
486
+
487
+ /**
488
+ * Get the next unproven config from the priority queue.
489
+ *
490
+ * Checks the priority targets list and returns the first target whose
491
+ * status is 'pending' and which hasn't been proven in existing records.
492
+ * If all priority targets are proven/completed, returns null to fall
493
+ * through to gap-finding mode.
494
+ *
495
+ * @param {object} event - The Step Functions event object
496
+ * @param {string} [event.priorityConfigPath] - Path to priority targets JSON
497
+ * @param {object[]} [event.previousResults] - Previously proven configs in this run
498
+ * @param {object|null} priorityData - Pre-loaded priority data (for Lambda/testing).
499
+ * If null, attempts to load from event.priorityConfigPath.
500
+ * @returns {object|null} Next config to prove, or null if priority queue exhausted
501
+ */
502
+ export function getNextPriorityConfig(event, priorityData = null) {
503
+ // Resolve priority data: explicit param > event._priorityData > load from file
504
+ const data = priorityData || event._priorityData || (
505
+ event.priorityConfigPath ? loadPriorityTargets(event.priorityConfigPath) : null
506
+ );
507
+
508
+ if (!data || !data.targets || !Array.isArray(data.targets)) {
509
+ return null;
510
+ }
511
+
512
+ const defaults = data.defaults || {};
513
+ const provenNames = new Set((data.proven || []).map(p => p.model_name));
514
+
515
+ // Also consider previousResults from this run as proven
516
+ const previousResults = event.previousResults || [];
517
+ for (const result of previousResults) {
518
+ if (result.success && result.config && result.config.model_name) {
519
+ provenNames.add(result.config.model_name);
520
+ }
521
+ }
522
+
523
+ // Find first pending target not yet proven
524
+ for (const target of data.targets) {
525
+ if (target.status !== 'pending') continue;
526
+ if (provenNames.has(target.model_name)) continue;
527
+
528
+ // Build full config from defaults + target overrides
529
+ const config = { ...defaults, ...target };
530
+ delete config.status; // status is metadata, not a config field
531
+
532
+ return config;
533
+ }
534
+
535
+ // All priority targets are proven or non-pending
536
+ return null;
537
+ }
538
+
539
+ /**
540
+ * Update a priority target's status after a prove attempt.
541
+ *
542
+ * @param {object} priorityData - The loaded priority targets data (mutated in place)
543
+ * @param {string} modelName - The model_name to update
544
+ * @param {string} newStatus - New status: 'proven', 'failed', or 'unfeasible'
545
+ * @param {object} [details] - Additional details (error_category, error_message)
546
+ * @returns {object} Updated priority data (same reference, mutated)
547
+ */
548
+ export function updatePriorityStatus(priorityData, modelName, newStatus, details = {}) {
549
+ if (!priorityData || !priorityData.targets) return priorityData;
550
+
551
+ const targetIndex = priorityData.targets.findIndex(t => t.model_name === modelName);
552
+ if (targetIndex === -1) return priorityData;
553
+
554
+ if (newStatus === 'proven') {
555
+ // Move from targets to proven list
556
+ priorityData.targets.splice(targetIndex, 1);
557
+ priorityData.proven = priorityData.proven || [];
558
+ priorityData.proven.push({
559
+ model_name: modelName,
560
+ proven_date: new Date().toISOString().split('T')[0],
561
+ ...details
562
+ });
563
+ } else {
564
+ // Update status in place (failed, unfeasible)
565
+ const target = priorityData.targets[targetIndex];
566
+ target.status = newStatus;
567
+ if (details.error_category) target.error_category = details.error_category;
568
+ if (details.error_message) target.error_message = details.error_message;
569
+ target.last_attempt = new Date().toISOString();
570
+ }
571
+
572
+ return priorityData;
573
+ }
574
+
575
+ /**
576
+ * Get a summary of priority queue status.
577
+ *
578
+ * @param {object} priorityData - The loaded priority targets data
579
+ * @returns {object} Summary with counts: { total, pending, proven, failed, unfeasible }
580
+ */
581
+ export function getPriorityQueueStatus(priorityData) {
582
+ if (!priorityData) {
583
+ return { total: 0, pending: 0, proven: 0, failed: 0, unfeasible: 0 };
584
+ }
585
+
586
+ const targets = priorityData.targets || [];
587
+ const proven = priorityData.proven || [];
588
+
589
+ return {
590
+ total: targets.length + proven.length,
591
+ pending: targets.filter(t => t.status === 'pending').length,
592
+ proven: proven.length,
593
+ failed: targets.filter(t => t.status === 'failed').length,
594
+ unfeasible: targets.filter(t => t.status === 'unfeasible').length
595
+ };
596
+ }
597
+
598
+ /**
599
+ * Load priority targets from a JSON file path (synchronous).
600
+ *
601
+ * @param {string} configPath - Absolute or relative path to the JSON file
602
+ * @returns {object|null} Parsed priority data, or null if not found/invalid
603
+ */
604
+ export function loadPriorityTargets(configPath) {
605
+ try {
606
+ const resolvedPath = resolve(configPath);
607
+ const raw = readFileSync(resolvedPath, 'utf8');
608
+ return JSON.parse(raw);
609
+ } catch {
610
+ return null;
611
+ }
612
+ }
613
+
614
+ // ── TP Degree Auto-Resolution at Prove-Time (Task 6.5) ──────────────────────
615
+
616
+ /**
617
+ * Resolve tp_degree from instance catalog when not explicitly set in a prove config.
618
+ *
619
+ * At prove-time, if the target config omits tp_degree but specifies an instance_type,
620
+ * look up the instance catalog's GPU count and use it as the tp_degree.
621
+ * This mirrors the generation-time logic in template-variable-resolver.js.
622
+ *
623
+ * Requirements: FTP-1 (extension) — task 6.5
624
+ *
625
+ * @param {object} config - The prove config (from getNextPriorityConfig or similar)
626
+ * @param {string} [config.instance_type] - Instance type (e.g. 'ml.g5.48xlarge')
627
+ * @param {number|string|undefined} [config.tp_degree] - TP degree if already set
628
+ * @param {object|null} [catalogOverride] - Optional catalog data for testing (skips file load)
629
+ * @returns {object} The config object (mutated in place) with tp_degree resolved
630
+ */
631
+ export function resolveProveTpDegree(config, catalogOverride = null) {
632
+ if (!config) return config;
633
+
634
+ // If tp_degree is already set (not null/undefined), respect it
635
+ if (config.tp_degree !== undefined && config.tp_degree !== null) {
636
+ return config;
637
+ }
638
+
639
+ // Need an instance_type to look up GPU count
640
+ if (!config.instance_type) {
641
+ return config;
642
+ }
643
+
644
+ // Load instance catalog
645
+ let catalog = catalogOverride;
646
+ if (!catalog) {
647
+ try {
648
+ const catalogPath = resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
649
+ const raw = readFileSync(catalogPath, 'utf8');
650
+ catalog = JSON.parse(raw);
651
+ } catch {
652
+ return config;
653
+ }
654
+ }
655
+
656
+ const instanceInfo = catalog?.catalog?.[config.instance_type];
657
+ if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
658
+ config.tp_degree = instanceInfo.gpus;
659
+ config._tpAutoResolved = true;
660
+ config._tpAutoResolvedFrom = config.instance_type;
661
+ }
662
+
663
+ return config;
664
+ }
@@ -252,7 +252,7 @@ const infraInstancePrompts = [
252
252
  if (!input || input.trim() === '') {
253
253
  return 'At least one instance type is required';
254
254
  }
255
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
255
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
256
256
  const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
257
257
  if (instances.length === 0) {
258
258
  return 'At least one instance type is required';
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
384
384
  if (!input || input.trim() === '') {
385
385
  return 'Instance type is required';
386
386
  }
387
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
387
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
388
388
  if (!instancePattern.test(input.trim())) {
389
389
  return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
390
390
  }
@@ -459,6 +459,12 @@ const hfTokenPrompts = [
459
459
  return false;
460
460
  }
461
461
 
462
+ // Skip HF token when model name is an S3 URI (no HF download needed)
463
+ const modelName = answers.customModelName || answers.modelName;
464
+ if (modelName && modelName.startsWith('s3://')) {
465
+ return false;
466
+ }
467
+
462
468
  // Display security warning before prompting
463
469
  console.log('\n🔐 HuggingFace Authentication');
464
470
  console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');