@simulatte/doppler 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/README.md +26 -10
  2. package/package.json +30 -6
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.js +1 -1
  6. package/src/client/doppler-provider/types.js +1 -1
  7. package/src/config/execution-contract-check.d.ts +33 -0
  8. package/src/config/execution-contract-check.js +72 -0
  9. package/src/config/execution-v0-contract-check.d.ts +94 -0
  10. package/src/config/execution-v0-contract-check.js +251 -0
  11. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  12. package/src/config/execution-v0-graph-contract-check.js +64 -0
  13. package/src/config/kernel-path-contract-check.d.ts +76 -0
  14. package/src/config/kernel-path-contract-check.js +479 -0
  15. package/src/config/kernel-path-loader.d.ts +16 -0
  16. package/src/config/kernel-path-loader.js +54 -0
  17. package/src/config/kernels/kernel-ref-digests.js +39 -27
  18. package/src/config/kernels/registry.json +598 -2
  19. package/src/config/loader.js +81 -48
  20. package/src/config/merge-contract-check.d.ts +16 -0
  21. package/src/config/merge-contract-check.js +321 -0
  22. package/src/config/merge-helpers.d.ts +58 -0
  23. package/src/config/merge-helpers.js +54 -0
  24. package/src/config/merge.js +21 -6
  25. package/src/config/presets/models/janus-text.json +2 -0
  26. package/src/config/presets/models/qwen3.json +9 -2
  27. package/src/config/presets/models/transformer.json +5 -0
  28. package/src/config/quantization-contract-check.d.ts +12 -0
  29. package/src/config/quantization-contract-check.js +91 -0
  30. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  31. package/src/config/required-inference-fields-contract-check.js +237 -0
  32. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  33. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  34. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  35. package/src/config/schema/conversion-report.schema.js +108 -0
  36. package/src/config/schema/doppler.schema.js +12 -18
  37. package/src/config/schema/index.d.ts +22 -0
  38. package/src/config/schema/index.js +18 -0
  39. package/src/config/schema/inference-defaults.schema.js +3 -0
  40. package/src/config/schema/inference.schema.d.ts +9 -0
  41. package/src/config/schema/kernel-path.schema.d.ts +6 -0
  42. package/src/config/schema/manifest.schema.d.ts +6 -0
  43. package/src/config/schema/manifest.schema.js +3 -0
  44. package/src/converter/core.d.ts +10 -0
  45. package/src/converter/core.js +27 -2
  46. package/src/converter/parsers/diffusion.js +63 -3
  47. package/src/converter/rope-config.js +42 -0
  48. package/src/gpu/device.js +58 -0
  49. package/src/gpu/kernels/attention.js +98 -0
  50. package/src/gpu/kernels/bias_add.wgsl +8 -6
  51. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  52. package/src/gpu/kernels/conv2d.js +1 -1
  53. package/src/gpu/kernels/conv2d.wgsl +7 -8
  54. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  55. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  56. package/src/gpu/kernels/depthwise_conv2d.js +99 -0
  57. package/src/gpu/kernels/depthwise_conv2d.wgsl +55 -0
  58. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +59 -0
  59. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  60. package/src/gpu/kernels/grouped_pointwise_conv2d.js +93 -0
  61. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +44 -0
  62. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +48 -0
  63. package/src/gpu/kernels/index.d.ts +30 -0
  64. package/src/gpu/kernels/index.js +25 -0
  65. package/src/gpu/kernels/matmul.js +25 -0
  66. package/src/gpu/kernels/pixel_shuffle.js +1 -1
  67. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  68. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  69. package/src/gpu/kernels/relu.d.ts +18 -0
  70. package/src/gpu/kernels/relu.js +58 -0
  71. package/src/gpu/kernels/relu.wgsl +22 -0
  72. package/src/gpu/kernels/relu_f16.wgsl +24 -0
  73. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  74. package/src/gpu/kernels/repeat_channels.js +60 -0
  75. package/src/gpu/kernels/repeat_channels.wgsl +28 -0
  76. package/src/gpu/kernels/repeat_channels_f16.wgsl +30 -0
  77. package/src/gpu/kernels/residual.js +44 -8
  78. package/src/gpu/kernels/residual.wgsl +6 -3
  79. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  80. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  81. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  82. package/src/gpu/kernels/rmsnorm.js +58 -6
  83. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  84. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  85. package/src/gpu/kernels/rope.d.ts +2 -0
  86. package/src/gpu/kernels/rope.js +11 -1
  87. package/src/gpu/kernels/rope.wgsl +56 -40
  88. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  89. package/src/gpu/kernels/sana_linear_attention.js +121 -0
  90. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +43 -0
  91. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +46 -0
  92. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +51 -0
  93. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +53 -0
  94. package/src/gpu/kernels/silu.d.ts +1 -0
  95. package/src/gpu/kernels/silu.js +32 -14
  96. package/src/gpu/kernels/silu.wgsl +19 -9
  97. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  98. package/src/gpu/kernels/transpose.js +15 -2
  99. package/src/gpu/kernels/transpose.wgsl +5 -6
  100. package/src/gpu/kernels/upsample2d.js +2 -1
  101. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  102. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  103. package/src/gpu/kernels/utils.js +16 -1
  104. package/src/index-browser.d.ts +1 -1
  105. package/src/index-browser.js +2 -2
  106. package/src/index.js +1 -1
  107. package/src/inference/browser-harness.js +109 -23
  108. package/src/inference/pipelines/diffusion/init.js +14 -0
  109. package/src/inference/pipelines/diffusion/pipeline.js +215 -77
  110. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  111. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  112. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  113. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  114. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +11 -4
  115. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +282 -0
  116. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  117. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  118. package/src/inference/pipelines/diffusion/vae.js +782 -78
  119. package/src/inference/pipelines/text/attention/record.js +11 -2
  120. package/src/inference/pipelines/text/attention/run.js +11 -2
  121. package/src/inference/pipelines/text/chat-format.js +25 -1
  122. package/src/inference/pipelines/text/config.d.ts +9 -0
  123. package/src/inference/pipelines/text/config.js +69 -2
  124. package/src/inference/pipelines/text/execution-plan.js +23 -31
  125. package/src/inference/pipelines/text/execution-v0.js +43 -95
  126. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  127. package/src/inference/pipelines/text/init.d.ts +4 -0
  128. package/src/inference/pipelines/text/init.js +56 -9
  129. package/src/inference/pipelines/text/layer.js +11 -0
  130. package/src/inference/pipelines/text.js +4 -0
  131. package/src/inference/tokenizers/bundled.js +156 -33
  132. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  133. package/src/rules/execution-rules-contract-check.js +245 -0
  134. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  135. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  136. package/src/rules/kernels/relu.rules.json +6 -0
  137. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  138. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  139. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  140. package/src/rules/layer-pattern-contract-check.js +231 -0
  141. package/src/rules/rule-registry.d.ts +28 -0
  142. package/src/rules/rule-registry.js +38 -0
  143. package/src/rules/tooling/command-runtime.rules.json +18 -0
  144. package/src/tooling/command-api.d.ts +27 -1
  145. package/src/tooling/command-api.js +142 -3
  146. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  147. package/src/tooling/conversion-config-materializer.js +99 -0
  148. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  149. package/src/tooling/lean-execution-contract-runner.js +158 -0
  150. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  151. package/src/tooling/node-browser-command-runner.js +58 -3
  152. package/src/tooling/node-command-runner.js +15 -0
  153. package/src/tooling/node-convert.d.ts +10 -0
  154. package/src/tooling/node-converter.js +59 -0
  155. package/src/tooling/node-webgpu.js +11 -89
  156. package/src/training/checkpoint-watch.d.ts +7 -0
  157. package/src/training/checkpoint-watch.js +106 -0
  158. package/src/training/checkpoint.d.ts +6 -1
  159. package/src/training/checkpoint.js +12 -2
  160. package/src/training/distillation/artifacts.d.ts +71 -0
  161. package/src/training/distillation/artifacts.js +132 -0
  162. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  163. package/src/training/distillation/checkpoint-watch.js +57 -0
  164. package/src/training/distillation/dataset.d.ts +59 -0
  165. package/src/training/distillation/dataset.js +337 -0
  166. package/src/training/distillation/eval.d.ts +34 -0
  167. package/src/training/distillation/eval.js +310 -0
  168. package/src/training/distillation/index.d.ts +29 -0
  169. package/src/training/distillation/index.js +29 -0
  170. package/src/training/distillation/runtime.d.ts +20 -0
  171. package/src/training/distillation/runtime.js +121 -0
  172. package/src/training/distillation/scoreboard.d.ts +6 -0
  173. package/src/training/distillation/scoreboard.js +8 -0
  174. package/src/training/distillation/stage-a.d.ts +45 -0
  175. package/src/training/distillation/stage-a.js +338 -0
  176. package/src/training/distillation/stage-b.d.ts +24 -0
  177. package/src/training/distillation/stage-b.js +20 -0
  178. package/src/training/index.d.ts +10 -0
  179. package/src/training/index.js +10 -0
  180. package/src/training/lora-pipeline.d.ts +40 -0
  181. package/src/training/lora-pipeline.js +796 -0
  182. package/src/training/operator-artifacts.d.ts +62 -0
  183. package/src/training/operator-artifacts.js +140 -0
  184. package/src/training/operator-command.d.ts +5 -0
  185. package/src/training/operator-command.js +453 -0
  186. package/src/training/operator-eval.d.ts +48 -0
  187. package/src/training/operator-eval.js +230 -0
  188. package/src/training/operator-scoreboard.d.ts +5 -0
  189. package/src/training/operator-scoreboard.js +44 -0
  190. package/src/training/runner.d.ts +52 -0
  191. package/src/training/runner.js +29 -4
  192. package/src/training/suite.d.ts +112 -0
  193. package/src/training/suite.js +9 -9
  194. package/src/training/workloads.d.ts +164 -0
  195. package/src/training/workloads.js +539 -0
  196. package/src/version.d.ts +2 -0
  197. package/src/version.js +2 -0
  198. package/tools/convert-safetensors-node.js +47 -0
  199. package/tools/doppler-cli.js +252 -41
@@ -0,0 +1,164 @@
1
+ export type TrainingWorkloadKind = 'lora' | 'distill' | 'ul';
2
+ export type TrainingSurfaceSupport = 'node' | 'browser' | 'both';
3
+ export type TrainingSelectionGoal = 'max' | 'min';
4
+ export type TrainingEvalKind =
5
+ | 'translation'
6
+ | 'text_generation'
7
+ | 'classification'
8
+ | 'retrieval'
9
+ | 'custom';
10
+
11
+ export interface TrainingEvalDataset {
12
+ id: string;
13
+ datasetPath: string;
14
+ evalKind: TrainingEvalKind;
15
+ metrics: string[];
16
+ decodePolicy: {
17
+ maxTokens: number | null;
18
+ stopOnEos: boolean;
19
+ } | null;
20
+ scoreboardColumns: string[];
21
+ sourceLangs: string[] | null;
22
+ targetLangs: string[] | null;
23
+ pairAllowlist: string[] | null;
24
+ }
25
+
26
+ export interface TrainingOptimizerConfig {
27
+ type: string;
28
+ lr: number;
29
+ beta1: number;
30
+ beta2: number;
31
+ eps: number;
32
+ weightDecay: number;
33
+ scheduler: {
34
+ enabled: boolean;
35
+ type: string;
36
+ warmupSteps: number;
37
+ stepSize: number;
38
+ gamma: number;
39
+ totalSteps: number;
40
+ minLr: number;
41
+ };
42
+ }
43
+
44
+ export interface TrainingWorkloadTrainingConfig {
45
+ optimizer: TrainingOptimizerConfig;
46
+ batchSize: number;
47
+ accumSteps: number;
48
+ steps: number;
49
+ precision: {
50
+ activations: string;
51
+ gradients: string;
52
+ loraParams: string;
53
+ };
54
+ gradientClipping: {
55
+ maxNorm: number;
56
+ };
57
+ }
58
+
59
+ export interface LoRAWorkloadPipelineConfig {
60
+ datasetFormat: string;
61
+ taskType: string;
62
+ adapter: {
63
+ rank: number;
64
+ alpha: number;
65
+ dropout: number;
66
+ targetModules: string[];
67
+ };
68
+ freeze: {
69
+ encoder: boolean;
70
+ prior: boolean;
71
+ decoder: boolean;
72
+ base: boolean;
73
+ lora: boolean;
74
+ };
75
+ export: {
76
+ enabled: boolean;
77
+ atCheckpoints: boolean;
78
+ select: string;
79
+ id: string | null;
80
+ name: string | null;
81
+ format: string;
82
+ } | null;
83
+ activation: {
84
+ enabled: boolean;
85
+ autoActivate: boolean;
86
+ smokePrompt: string | null;
87
+ } | null;
88
+ }
89
+
90
+ export interface DistillStagePlanEntry {
91
+ id: string;
92
+ trainingStage: string;
93
+ objective: string;
94
+ steps: number;
95
+ checkpointEvery: number;
96
+ selectionMetric: string;
97
+ selectionGoal: TrainingSelectionGoal;
98
+ evalSchedule: string;
99
+ }
100
+
101
+ export interface DistillWorkloadPipelineConfig {
102
+ stagePlan: DistillStagePlanEntry[];
103
+ studentGraphMode: string;
104
+ temperature: number;
105
+ alphaKd: number;
106
+ alphaCe: number;
107
+ tripletMargin: number;
108
+ sourceLangs: string[] | null;
109
+ targetLangs: string[] | null;
110
+ pairAllowlist: string[] | null;
111
+ strictPairContract: boolean;
112
+ subsetSpec: Record<string, unknown> | null;
113
+ }
114
+
115
+ export interface TrainingWorkloadPack {
116
+ schemaVersion: number;
117
+ kind: TrainingWorkloadKind;
118
+ id: string;
119
+ description: string;
120
+ claimBoundary: string;
121
+ seed: number;
122
+ baseModelId: string;
123
+ studentModelId: string | null;
124
+ teacherModelId: string | null;
125
+ datasetId: string;
126
+ datasetPath: string | null;
127
+ evalDatasets: TrainingEvalDataset[];
128
+ trainingSchemaVersion: number;
129
+ checkpointEvery: number;
130
+ selectionMetric: string;
131
+ selectionGoal: TrainingSelectionGoal;
132
+ surfaceSupport: TrainingSurfaceSupport;
133
+ training: TrainingWorkloadTrainingConfig;
134
+ pipeline: LoRAWorkloadPipelineConfig | DistillWorkloadPipelineConfig | Record<string, unknown>;
135
+ configHash: string;
136
+ }
137
+
138
+ export interface LoadedTrainingWorkload {
139
+ absolutePath: string;
140
+ path: string;
141
+ raw: string;
142
+ workloadSha256: string;
143
+ workload: TrainingWorkloadPack;
144
+ }
145
+
146
+ export declare const TRAINING_WORKLOAD_SCHEMA_VERSION: number;
147
+ export declare const TRAINING_WORKLOAD_KINDS: readonly TrainingWorkloadKind[];
148
+ export declare const TRAINING_WORKLOAD_SURFACE_SUPPORT: readonly TrainingSurfaceSupport[];
149
+ export declare const TRAINING_SELECTION_GOALS: readonly TrainingSelectionGoal[];
150
+ export declare const TRAINING_EVAL_KINDS: readonly TrainingEvalKind[];
151
+
152
+ export declare function normalizeTrainingWorkloadPack(
153
+ payload: Record<string, unknown>,
154
+ context?: { label?: string }
155
+ ): TrainingWorkloadPack;
156
+
157
+ export declare function loadTrainingWorkloadPack(
158
+ input: string,
159
+ options?: { registryPath?: string | null }
160
+ ): Promise<LoadedTrainingWorkload>;
161
+
162
+ export declare function serializeTrainingWorkloadLock(
163
+ loadedWorkload: LoadedTrainingWorkload
164
+ ): string;
@@ -0,0 +1,539 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import { resolve } from 'node:path';
3
+
4
+ import { isPlainObject } from '../utils/plain-object.js';
5
+ import { sha256Hex } from '../utils/sha256.js';
6
+ import { VALID_LORA_TARGET_MODULES } from '../config/schema/adapter.schema.js';
7
+
8
+ export const TRAINING_WORKLOAD_SCHEMA_VERSION = 1;
9
+ export const TRAINING_WORKLOAD_KINDS = Object.freeze(['lora', 'distill', 'ul']);
10
+ export const TRAINING_WORKLOAD_SURFACE_SUPPORT = Object.freeze(['node', 'browser', 'both']);
11
+ export const TRAINING_SELECTION_GOALS = Object.freeze(['max', 'min']);
12
+ export const TRAINING_EVAL_KINDS = Object.freeze([
13
+ 'translation',
14
+ 'text_generation',
15
+ 'classification',
16
+ 'retrieval',
17
+ 'custom',
18
+ ]);
19
+
20
+ const LEGACY_DISTILL_TEST_IDS = Object.freeze(['distill-stage-a', 'distill-stage-b']);
21
+
22
+ function stableSortObject(value) {
23
+ if (Array.isArray(value)) {
24
+ return value.map((entry) => stableSortObject(entry));
25
+ }
26
+ if (!isPlainObject(value)) {
27
+ return value;
28
+ }
29
+ const sorted = {};
30
+ for (const key of Object.keys(value).sort()) {
31
+ sorted[key] = stableSortObject(value[key]);
32
+ }
33
+ return sorted;
34
+ }
35
+
36
+ function stableJson(value) {
37
+ return JSON.stringify(stableSortObject(value));
38
+ }
39
+
40
+ function asNonEmptyString(value, label, options = {}) {
41
+ if (value === undefined || value === null) {
42
+ if (options.optional === true) return null;
43
+ throw new Error(`${label} is required.`);
44
+ }
45
+ if (typeof value !== 'string') {
46
+ throw new Error(`${label} must be a string.`);
47
+ }
48
+ const trimmed = value.trim();
49
+ if (!trimmed) {
50
+ if (options.optional === true) return null;
51
+ throw new Error(`${label} must not be empty.`);
52
+ }
53
+ return trimmed;
54
+ }
55
+
56
+ function asPositiveInteger(value, label, options = {}) {
57
+ if (value === undefined || value === null || value === '') {
58
+ if (options.optional === true) return null;
59
+ throw new Error(`${label} is required.`);
60
+ }
61
+ const parsed = Number(value);
62
+ if (!Number.isInteger(parsed) || parsed < 1) {
63
+ throw new Error(`${label} must be a positive integer.`);
64
+ }
65
+ return parsed;
66
+ }
67
+
68
+ function asNonNegativeInteger(value, label, options = {}) {
69
+ if (value === undefined || value === null || value === '') {
70
+ if (options.optional === true) return null;
71
+ throw new Error(`${label} is required.`);
72
+ }
73
+ const parsed = Number(value);
74
+ if (!Number.isInteger(parsed) || parsed < 0) {
75
+ throw new Error(`${label} must be a non-negative integer.`);
76
+ }
77
+ return parsed;
78
+ }
79
+
80
+ function asFiniteNumber(value, label, options = {}) {
81
+ if (value === undefined || value === null || value === '') {
82
+ if (options.optional === true) return null;
83
+ throw new Error(`${label} is required.`);
84
+ }
85
+ const parsed = Number(value);
86
+ if (!Number.isFinite(parsed)) {
87
+ throw new Error(`${label} must be a finite number.`);
88
+ }
89
+ return parsed;
90
+ }
91
+
92
+ function asBoolean(value, label, options = {}) {
93
+ if (value === undefined || value === null) {
94
+ if (options.optional === true) return null;
95
+ throw new Error(`${label} is required.`);
96
+ }
97
+ if (typeof value !== 'boolean') {
98
+ throw new Error(`${label} must be boolean.`);
99
+ }
100
+ return value;
101
+ }
102
+
103
+ function asStringArray(value, label, options = {}) {
104
+ if (value === undefined || value === null) {
105
+ if (options.optional === true) return null;
106
+ throw new Error(`${label} is required.`);
107
+ }
108
+ if (!Array.isArray(value)) {
109
+ throw new Error(`${label} must be an array of strings.`);
110
+ }
111
+ const normalized = value.map((entry, index) => asNonEmptyString(entry, `${label}[${index}]`));
112
+ if (normalized.length === 0 && options.allowEmpty !== true) {
113
+ throw new Error(`${label} must not be empty.`);
114
+ }
115
+ return normalized;
116
+ }
117
+
118
+ function asEnum(value, label, allowed, options = {}) {
119
+ const normalized = asNonEmptyString(value, label, options);
120
+ if (normalized === null) return null;
121
+ if (!allowed.includes(normalized)) {
122
+ throw new Error(`${label} must be one of ${allowed.join(', ')}.`);
123
+ }
124
+ return normalized;
125
+ }
126
+
127
+ function asObject(value, label, options = {}) {
128
+ if (value === undefined || value === null) {
129
+ if (options.optional === true) return null;
130
+ throw new Error(`${label} is required.`);
131
+ }
132
+ if (!isPlainObject(value)) {
133
+ throw new Error(`${label} must be an object.`);
134
+ }
135
+ return value;
136
+ }
137
+
138
+ function inferLegacyKind(payload, contextLabel) {
139
+ const explicitKind = typeof payload?.kind === 'string' ? payload.kind.trim() : '';
140
+ if (explicitKind) return explicitKind;
141
+ const workloadKind = typeof payload?.workloadKind === 'string' ? payload.workloadKind.trim() : '';
142
+ if (workloadKind) return workloadKind;
143
+ const id = String(payload?.id || '').trim();
144
+ if (id.startsWith('distill-')) return 'distill';
145
+ if (id.startsWith('lora-')) return 'lora';
146
+ if (id.startsWith('ul-') || id.startsWith('ul_training') || id.startsWith('ul-training')) {
147
+ return 'ul';
148
+ }
149
+ if (Array.isArray(payload?.trainingTests) && payload.trainingTests.every((entry) => LEGACY_DISTILL_TEST_IDS.includes(String(entry)))) {
150
+ return 'distill';
151
+ }
152
+ throw new Error(`${contextLabel}.kind is required.`);
153
+ }
154
+
155
+ function normalizeScheduler(value, label) {
156
+ const scheduler = asObject(value, label, { optional: true }) || {};
157
+ return {
158
+ enabled: scheduler.enabled === true,
159
+ type: asNonEmptyString(scheduler.type ?? 'constant', `${label}.type`),
160
+ warmupSteps: asNonNegativeInteger(
161
+ scheduler.warmupSteps ?? 0,
162
+ `${label}.warmupSteps`,
163
+ { optional: true }
164
+ ) ?? 0,
165
+ stepSize: asPositiveInteger(scheduler.stepSize ?? 1, `${label}.stepSize`, { optional: true }) ?? 1,
166
+ gamma: asFiniteNumber(scheduler.gamma ?? 1, `${label}.gamma`, { optional: true }) ?? 1,
167
+ totalSteps: asPositiveInteger(scheduler.totalSteps ?? 1, `${label}.totalSteps`, { optional: true }) ?? 1,
168
+ minLr: asFiniteNumber(scheduler.minLr ?? 0, `${label}.minLr`, { optional: true }) ?? 0,
169
+ };
170
+ }
171
+
172
+ function normalizeTrainingConfig(value, label) {
173
+ const training = asObject(value, label);
174
+ const optimizer = asObject(training.optimizer, `${label}.optimizer`);
175
+ const precision = isPlainObject(training.precision)
176
+ ? training.precision
177
+ : { activations: training.precision ?? 'f16' };
178
+ const gradientClipping = isPlainObject(training.gradientClipping)
179
+ ? training.gradientClipping
180
+ : { maxNorm: training.gradientClipping ?? 1 };
181
+ return {
182
+ optimizer: {
183
+ type: asNonEmptyString(optimizer.type ?? 'adam', `${label}.optimizer.type`),
184
+ lr: asFiniteNumber(optimizer.lr, `${label}.optimizer.lr`),
185
+ beta1: asFiniteNumber(optimizer.beta1 ?? 0.9, `${label}.optimizer.beta1`, { optional: true }) ?? 0.9,
186
+ beta2: asFiniteNumber(optimizer.beta2 ?? 0.999, `${label}.optimizer.beta2`, { optional: true }) ?? 0.999,
187
+ eps: asFiniteNumber(optimizer.eps ?? 1e-8, `${label}.optimizer.eps`, { optional: true }) ?? 1e-8,
188
+ weightDecay: asFiniteNumber(optimizer.weightDecay ?? 0, `${label}.optimizer.weightDecay`, { optional: true }) ?? 0,
189
+ scheduler: normalizeScheduler(optimizer.scheduler, `${label}.optimizer.scheduler`),
190
+ },
191
+ batchSize: asPositiveInteger(training.batchSize, `${label}.batchSize`),
192
+ accumSteps: asPositiveInteger(training.accumSteps ?? 1, `${label}.accumSteps`, { optional: true }) ?? 1,
193
+ steps: asPositiveInteger(training.steps, `${label}.steps`),
194
+ precision: {
195
+ activations: asNonEmptyString(precision.activations ?? 'f16', `${label}.precision.activations`),
196
+ gradients: asNonEmptyString(precision.gradients ?? 'f32', `${label}.precision.gradients`),
197
+ loraParams: asNonEmptyString(precision.loraParams ?? 'f32', `${label}.precision.loraParams`),
198
+ },
199
+ gradientClipping: {
200
+ maxNorm: asFiniteNumber(gradientClipping.maxNorm ?? 1, `${label}.gradientClipping.maxNorm`, { optional: true }) ?? 1,
201
+ },
202
+ };
203
+ }
204
+
205
+ function normalizeEvalDatasets(value, label) {
206
+ const entries = Array.isArray(value) ? value : [];
207
+ return entries.map((entry, index) => {
208
+ const dataset = asObject(entry, `${label}[${index}]`);
209
+ const decodePolicy = asObject(
210
+ dataset.decodePolicy,
211
+ `${label}[${index}].decodePolicy`,
212
+ { optional: true }
213
+ );
214
+ return {
215
+ id: asNonEmptyString(dataset.id, `${label}[${index}].id`),
216
+ datasetPath: asNonEmptyString(dataset.datasetPath ?? dataset.path, `${label}[${index}].datasetPath`),
217
+ evalKind: asEnum(
218
+ dataset.evalKind ?? dataset.kind ?? 'text_generation',
219
+ `${label}[${index}].evalKind`,
220
+ TRAINING_EVAL_KINDS
221
+ ),
222
+ metrics: asStringArray(dataset.metrics ?? [], `${label}[${index}].metrics`, {
223
+ optional: true,
224
+ allowEmpty: true,
225
+ }) ?? [],
226
+ decodePolicy: decodePolicy
227
+ ? {
228
+ maxTokens: asPositiveInteger(
229
+ decodePolicy.maxTokens,
230
+ `${label}[${index}].decodePolicy.maxTokens`,
231
+ { optional: true }
232
+ ),
233
+ stopOnEos: asBoolean(
234
+ decodePolicy.stopOnEos ?? true,
235
+ `${label}[${index}].decodePolicy.stopOnEos`,
236
+ { optional: true }
237
+ ) ?? true,
238
+ }
239
+ : null,
240
+ scoreboardColumns: asStringArray(
241
+ dataset.scoreboardColumns ?? [],
242
+ `${label}[${index}].scoreboardColumns`,
243
+ { optional: true, allowEmpty: true }
244
+ ) ?? [],
245
+ sourceLangs: asStringArray(dataset.sourceLangs, `${label}[${index}].sourceLangs`, { optional: true, allowEmpty: true }),
246
+ targetLangs: asStringArray(dataset.targetLangs, `${label}[${index}].targetLangs`, { optional: true, allowEmpty: true }),
247
+ pairAllowlist: asStringArray(dataset.pairAllowlist, `${label}[${index}].pairAllowlist`, { optional: true, allowEmpty: true }),
248
+ };
249
+ });
250
+ }
251
+
252
+ function normalizeFreezeConfig(value, label) {
253
+ const freeze = asObject(value, label, { optional: true }) || {};
254
+ return {
255
+ encoder: freeze.encoder === true,
256
+ prior: freeze.prior === true,
257
+ decoder: freeze.decoder === true,
258
+ base: freeze.base === true,
259
+ lora: freeze.lora === true,
260
+ };
261
+ }
262
+
263
+ function normalizeStagePlan(value, label) {
264
+ if (!Array.isArray(value) || value.length === 0) {
265
+ throw new Error(`${label} must be a non-empty array.`);
266
+ }
267
+ return value.map((entry, index) => {
268
+ const stage = asObject(entry, `${label}[${index}]`);
269
+ const selectionMetric = asNonEmptyString(
270
+ stage.selectionMetric ?? stage.metric ?? 'bleu',
271
+ `${label}[${index}].selectionMetric`
272
+ );
273
+ const selectionGoal = asEnum(
274
+ stage.selectionGoal ?? stage.goal ?? 'max',
275
+ `${label}[${index}].selectionGoal`,
276
+ TRAINING_SELECTION_GOALS
277
+ );
278
+ return {
279
+ id: asNonEmptyString(stage.id ?? stage.name, `${label}[${index}].id`),
280
+ trainingStage: asNonEmptyString(stage.trainingStage, `${label}[${index}].trainingStage`),
281
+ objective: asNonEmptyString(stage.objective, `${label}[${index}].objective`),
282
+ steps: asPositiveInteger(stage.steps, `${label}[${index}].steps`),
283
+ checkpointEvery: asPositiveInteger(
284
+ stage.checkpointEvery ?? stage.steps,
285
+ `${label}[${index}].checkpointEvery`
286
+ ),
287
+ selectionMetric,
288
+ selectionGoal,
289
+ evalSchedule: asNonEmptyString(
290
+ stage.evalSchedule ?? 'on_checkpoint',
291
+ `${label}[${index}].evalSchedule`
292
+ ),
293
+ };
294
+ });
295
+ }
296
+
297
+ function normalizeLoraConfig(value, label) {
298
+ const lora = asObject(value, label);
299
+ const adapter = asObject(lora.adapter, `${label}.adapter`);
300
+ const exportConfig = asObject(lora.export, `${label}.export`, { optional: true });
301
+ const activation = asObject(lora.activation, `${label}.activation`, { optional: true });
302
+ const targetModules = asStringArray(adapter.targetModules, `${label}.adapter.targetModules`);
303
+ for (const moduleName of targetModules) {
304
+ if (!VALID_LORA_TARGET_MODULES.includes(moduleName)) {
305
+ throw new Error(`${label}.adapter.targetModules contains unsupported module "${moduleName}".`);
306
+ }
307
+ }
308
+ return {
309
+ datasetFormat: asNonEmptyString(lora.datasetFormat ?? 'prompt_completion_jsonl', `${label}.datasetFormat`),
310
+ taskType: asNonEmptyString(lora.taskType ?? 'text_generation', `${label}.taskType`),
311
+ adapter: {
312
+ rank: asPositiveInteger(adapter.rank, `${label}.adapter.rank`),
313
+ alpha: asFiniteNumber(adapter.alpha, `${label}.adapter.alpha`),
314
+ dropout: asFiniteNumber(adapter.dropout ?? 0, `${label}.adapter.dropout`, { optional: true }) ?? 0,
315
+ targetModules,
316
+ },
317
+ freeze: normalizeFreezeConfig(lora.freeze, `${label}.freeze`),
318
+ export: exportConfig
319
+ ? {
320
+ enabled: exportConfig.enabled !== false,
321
+ atCheckpoints: exportConfig.atCheckpoints === true,
322
+ select: asNonEmptyString(exportConfig.select ?? 'best', `${label}.export.select`),
323
+ id: asNonEmptyString(exportConfig.id, `${label}.export.id`, { optional: true }),
324
+ name: asNonEmptyString(exportConfig.name, `${label}.export.name`, { optional: true }),
325
+ format: asNonEmptyString(exportConfig.format ?? 'manifest_json', `${label}.export.format`),
326
+ }
327
+ : null,
328
+ activation: activation
329
+ ? {
330
+ enabled: activation.enabled === true,
331
+ autoActivate: activation.autoActivate === true,
332
+ smokePrompt: asNonEmptyString(activation.smokePrompt, `${label}.activation.smokePrompt`, { optional: true }),
333
+ }
334
+ : null,
335
+ };
336
+ }
337
+
338
+ function normalizeDistillConfig(value, label) {
339
+ const distill = asObject(value, label);
340
+ return {
341
+ stagePlan: normalizeStagePlan(distill.stagePlan, `${label}.stagePlan`),
342
+ studentGraphMode: asNonEmptyString(
343
+ distill.studentGraphMode ?? 'transformer_full',
344
+ `${label}.studentGraphMode`
345
+ ),
346
+ temperature: asFiniteNumber(distill.temperature ?? 1, `${label}.temperature`, { optional: true }) ?? 1,
347
+ alphaKd: asFiniteNumber(distill.alphaKd ?? 1, `${label}.alphaKd`, { optional: true }) ?? 1,
348
+ alphaCe: asFiniteNumber(distill.alphaCe ?? 0, `${label}.alphaCe`, { optional: true }) ?? 0,
349
+ tripletMargin: asFiniteNumber(distill.tripletMargin ?? 0.2, `${label}.tripletMargin`, { optional: true }) ?? 0.2,
350
+ sourceLangs: asStringArray(distill.sourceLangs, `${label}.sourceLangs`, { optional: true, allowEmpty: true }),
351
+ targetLangs: asStringArray(distill.targetLangs, `${label}.targetLangs`, { optional: true, allowEmpty: true }),
352
+ pairAllowlist: asStringArray(distill.pairAllowlist, `${label}.pairAllowlist`, { optional: true, allowEmpty: true }),
353
+ strictPairContract: asBoolean(
354
+ distill.strictPairContract ?? false,
355
+ `${label}.strictPairContract`,
356
+ { optional: true }
357
+ ) ?? false,
358
+ subsetSpec: asObject(distill.subsetSpec, `${label}.subsetSpec`, { optional: true }),
359
+ };
360
+ }
361
+
362
+ function normalizeLegacyUlPayload(payload, contextLabel) {
363
+ return {
364
+ schemaVersion: asPositiveInteger(payload.schemaVersion, `${contextLabel}.schemaVersion`),
365
+ kind: 'ul',
366
+ id: asNonEmptyString(payload.id, `${contextLabel}.id`),
367
+ description: asNonEmptyString(payload.description, `${contextLabel}.description`),
368
+ claimBoundary: asNonEmptyString(
369
+ payload.claimBoundary ?? 'Practical UL workflow quality traceability.',
370
+ `${contextLabel}.claimBoundary`
371
+ ),
372
+ seed: asPositiveInteger(payload.seed, `${contextLabel}.seed`),
373
+ baseModelId: asNonEmptyString(payload.baseModelId ?? 'training', `${contextLabel}.baseModelId`),
374
+ studentModelId: null,
375
+ teacherModelId: null,
376
+ datasetId: asNonEmptyString(payload.datasetId ?? payload.ulDatasetId ?? 'ul', `${contextLabel}.datasetId`),
377
+ datasetPath: asNonEmptyString(payload.datasetPath ?? null, `${contextLabel}.datasetPath`, { optional: true }),
378
+ evalDatasets: normalizeEvalDatasets(payload.evalDatasets ?? [], `${contextLabel}.evalDatasets`),
379
+ trainingSchemaVersion: asPositiveInteger(
380
+ payload.trainingSchemaVersion,
381
+ `${contextLabel}.trainingSchemaVersion`
382
+ ),
383
+ checkpointEvery: asPositiveInteger(payload.checkpointEvery ?? 1, `${contextLabel}.checkpointEvery`),
384
+ selectionMetric: asNonEmptyString(payload.selectionMetric ?? 'total_loss', `${contextLabel}.selectionMetric`),
385
+ selectionGoal: asEnum(
386
+ payload.selectionGoal ?? 'min',
387
+ `${contextLabel}.selectionGoal`,
388
+ TRAINING_SELECTION_GOALS
389
+ ),
390
+ surfaceSupport: asEnum(
391
+ payload.surfaceSupport ?? 'node',
392
+ `${contextLabel}.surfaceSupport`,
393
+ TRAINING_WORKLOAD_SURFACE_SUPPORT
394
+ ),
395
+ training: normalizeTrainingConfig({
396
+ optimizer: payload.training?.optimizer ?? {
397
+ type: 'adam',
398
+ lr: 2e-4,
399
+ },
400
+ batchSize: payload.training?.batchSize ?? 1,
401
+ accumSteps: payload.training?.accumSteps ?? 1,
402
+ steps: payload.training?.steps ?? payload.trainingBenchSteps ?? 1,
403
+ precision: payload.training?.precision ?? {
404
+ activations: 'f16',
405
+ gradients: 'f32',
406
+ loraParams: 'f32',
407
+ },
408
+ gradientClipping: payload.training?.gradientClipping ?? { maxNorm: 1 },
409
+ }, `${contextLabel}.training`),
410
+ pipeline: {
411
+ legacyWorkloadType: 'ul',
412
+ trainingTests: asStringArray(
413
+ payload.trainingTests,
414
+ `${contextLabel}.trainingTests`,
415
+ { allowEmpty: false }
416
+ ),
417
+ },
418
+ };
419
+ }
420
+
421
+ export function normalizeTrainingWorkloadPack(payload, context = {}) {
422
+ const contextLabel = context.label || 'training workload';
423
+ const kind = inferLegacyKind(payload, contextLabel);
424
+ if (kind === 'ul') {
425
+ const workload = normalizeLegacyUlPayload(payload, contextLabel);
426
+ return withHashes(workload);
427
+ }
428
+
429
+ if (!TRAINING_WORKLOAD_KINDS.includes(kind)) {
430
+ throw new Error(`${contextLabel}.kind must be one of ${TRAINING_WORKLOAD_KINDS.join(', ')}.`);
431
+ }
432
+ const schemaVersion = asPositiveInteger(payload.schemaVersion, `${contextLabel}.schemaVersion`);
433
+ if (schemaVersion !== TRAINING_WORKLOAD_SCHEMA_VERSION) {
434
+ throw new Error(`${contextLabel}.schemaVersion must be ${TRAINING_WORKLOAD_SCHEMA_VERSION}.`);
435
+ }
436
+
437
+ const workload = {
438
+ schemaVersion,
439
+ kind,
440
+ id: asNonEmptyString(payload.id, `${contextLabel}.id`),
441
+ description: asNonEmptyString(payload.description, `${contextLabel}.description`),
442
+ claimBoundary: asNonEmptyString(payload.claimBoundary, `${contextLabel}.claimBoundary`),
443
+ seed: asPositiveInteger(payload.seed, `${contextLabel}.seed`),
444
+ baseModelId: asNonEmptyString(payload.baseModelId, `${contextLabel}.baseModelId`),
445
+ studentModelId: asNonEmptyString(payload.studentModelId, `${contextLabel}.studentModelId`, { optional: true }),
446
+ teacherModelId: asNonEmptyString(payload.teacherModelId, `${contextLabel}.teacherModelId`, { optional: true }),
447
+ datasetId: asNonEmptyString(payload.datasetId, `${contextLabel}.datasetId`),
448
+ datasetPath: asNonEmptyString(payload.datasetPath, `${contextLabel}.datasetPath`),
449
+ evalDatasets: normalizeEvalDatasets(payload.evalDatasets ?? [], `${contextLabel}.evalDatasets`),
450
+ trainingSchemaVersion: asPositiveInteger(
451
+ payload.trainingSchemaVersion,
452
+ `${contextLabel}.trainingSchemaVersion`
453
+ ),
454
+ checkpointEvery: asPositiveInteger(payload.checkpointEvery, `${contextLabel}.checkpointEvery`),
455
+ selectionMetric: asNonEmptyString(payload.selectionMetric, `${contextLabel}.selectionMetric`),
456
+ selectionGoal: asEnum(
457
+ payload.selectionGoal,
458
+ `${contextLabel}.selectionGoal`,
459
+ TRAINING_SELECTION_GOALS
460
+ ),
461
+ surfaceSupport: asEnum(
462
+ payload.surfaceSupport,
463
+ `${contextLabel}.surfaceSupport`,
464
+ TRAINING_WORKLOAD_SURFACE_SUPPORT
465
+ ),
466
+ training: normalizeTrainingConfig(payload.training, `${contextLabel}.training`),
467
+ pipeline: null,
468
+ };
469
+
470
+ if (kind === 'lora') {
471
+ workload.pipeline = normalizeLoraConfig(payload.lora ?? payload.pipeline, `${contextLabel}.lora`);
472
+ } else if (kind === 'distill') {
473
+ workload.pipeline = normalizeDistillConfig(payload.distill ?? payload.pipeline, `${contextLabel}.distill`);
474
+ const stageRequiresTeacher = workload.pipeline.stagePlan.some((stage) => stage.objective !== 'sft');
475
+ if (stageRequiresTeacher && !workload.teacherModelId) {
476
+ throw new Error(`${contextLabel}.teacherModelId is required when stagePlan includes non-SFT stages.`);
477
+ }
478
+ if (!workload.studentModelId) {
479
+ throw new Error(`${contextLabel}.studentModelId is required for distill workloads.`);
480
+ }
481
+ }
482
+
483
+ return withHashes(workload);
484
+ }
485
+
486
+ function withHashes(workload) {
487
+ const configHash = sha256Hex(stableJson(workload));
488
+ return {
489
+ ...workload,
490
+ configHash,
491
+ };
492
+ }
493
+
494
+ async function readRegistryEntryById(registryPath, workloadId) {
495
+ const absoluteRegistryPath = resolve(String(registryPath));
496
+ const raw = await readFile(absoluteRegistryPath, 'utf8');
497
+ const parsed = JSON.parse(raw);
498
+ const workloads = Array.isArray(parsed?.workloads) ? parsed.workloads : [];
499
+ const match = workloads.find((entry) => String(entry?.id || '').trim() === workloadId);
500
+ if (!match) {
501
+ throw new Error(`training workload id "${workloadId}" not found in registry ${absoluteRegistryPath}.`);
502
+ }
503
+ const relativePath = asNonEmptyString(match.path, `${absoluteRegistryPath}.workloads[].path`);
504
+ return resolve(relativePath);
505
+ }
506
+
507
+ export async function loadTrainingWorkloadPack(input, options = {}) {
508
+ const normalizedInput = asNonEmptyString(input, 'workload input');
509
+ const looksLikePath = normalizedInput.endsWith('.json') || normalizedInput.includes('/') || normalizedInput.includes('\\');
510
+ const absolutePath = looksLikePath
511
+ ? resolve(normalizedInput)
512
+ : await readRegistryEntryById(
513
+ options.registryPath || 'tools/configs/training-workloads/registry.json',
514
+ normalizedInput
515
+ );
516
+ const raw = await readFile(absolutePath, 'utf8');
517
+ const parsed = JSON.parse(raw);
518
+ const normalized = normalizeTrainingWorkloadPack(parsed, {
519
+ label: absolutePath,
520
+ });
521
+ return {
522
+ absolutePath,
523
+ path: absolutePath,
524
+ raw,
525
+ workloadSha256: sha256Hex(raw),
526
+ workload: normalized,
527
+ };
528
+ }
529
+
530
+ export function serializeTrainingWorkloadLock(loadedWorkload) {
531
+ return stableJson({
532
+ schemaVersion: TRAINING_WORKLOAD_SCHEMA_VERSION,
533
+ artifactType: 'training_workload_lock',
534
+ workloadId: loadedWorkload.workload.id,
535
+ workloadPath: loadedWorkload.absolutePath,
536
+ workloadSha256: loadedWorkload.workloadSha256,
537
+ workload: loadedWorkload.workload,
538
+ });
539
+ }
@@ -0,0 +1,2 @@
1
+ export declare const DOPPLER_VERSION: string;
2
+ export declare const DOPPLER_PROVIDER_VERSION: string;