@metaharness/weight-eft 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -0
- package/dist/cli.d.ts +14 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +187 -0
- package/dist/cli.js.map +1 -0
- package/dist/eval.d.ts +50 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/eval.js +96 -0
- package/dist/eval.js.map +1 -0
- package/dist/export.d.ts +28 -0
- package/dist/export.d.ts.map +1 -0
- package/dist/export.js +249 -0
- package/dist/export.js.map +1 -0
- package/dist/genome.d.ts +38 -0
- package/dist/genome.d.ts.map +1 -0
- package/dist/genome.js +75 -0
- package/dist/genome.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/reward-hack.d.ts +17 -0
- package/dist/reward-hack.d.ts.map +1 -0
- package/dist/reward-hack.js +105 -0
- package/dist/reward-hack.js.map +1 -0
- package/dist/train.d.ts +112 -0
- package/dist/train.d.ts.map +1 -0
- package/dist/train.js +166 -0
- package/dist/train.js.map +1 -0
- package/dist/types.d.ts +144 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +20 -0
- package/dist/types.js.map +1 -0
- package/package.json +64 -0
package/dist/train.d.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { DpoRow, SftRow } from './types.js';
|
|
2
|
+
/** A model class we are willing to tune. 7-14B only (the cheap-tier target). */
|
|
3
|
+
export interface BaseModelSpec {
|
|
4
|
+
/** HF/registry id, e.g. "Qwen/Qwen2.5-Coder-7B-Instruct". */
|
|
5
|
+
id: string;
|
|
6
|
+
/** Parameter count in billions — gated to [1, 14]. */
|
|
7
|
+
paramsB: number;
|
|
8
|
+
}
|
|
9
|
+
export type TrainStage = 'sft' | 'dpo';
|
|
10
|
+
export interface LoraConfig {
|
|
11
|
+
/** Low-rank dimension. */
|
|
12
|
+
r: number;
|
|
13
|
+
/** LoRA alpha scaling. */
|
|
14
|
+
alpha: number;
|
|
15
|
+
/** Dropout on the LoRA path. */
|
|
16
|
+
dropout: number;
|
|
17
|
+
/** Modules to adapt (attention proj by default). */
|
|
18
|
+
targetModules: string[];
|
|
19
|
+
}
|
|
20
|
+
export interface TrainConfig {
|
|
21
|
+
base: BaseModelSpec;
|
|
22
|
+
stage: TrainStage;
|
|
23
|
+
lora: LoraConfig;
|
|
24
|
+
/** Path to the JSONL training set for this stage. */
|
|
25
|
+
dataPath: string;
|
|
26
|
+
/** Output adapter directory / id. */
|
|
27
|
+
outputAdapter: string;
|
|
28
|
+
/** For DPO: the SFT checkpoint to start from (the on-policy reference). */
|
|
29
|
+
initFromAdapter?: string;
|
|
30
|
+
epochs: number;
|
|
31
|
+
learningRate: number;
|
|
32
|
+
/** Max sequence length — must fit the 7-14B context window. */
|
|
33
|
+
maxSeqLen: number;
|
|
34
|
+
batchSize: number;
|
|
35
|
+
}
|
|
36
|
+
export interface TrainRunOptions {
|
|
37
|
+
/** Hard gate: must be true to actually train. Default false → dry-run. */
|
|
38
|
+
train?: boolean;
|
|
39
|
+
/**
|
|
40
|
+
* GPU/endpoint detector. Injected for testability; defaults to a probe that
|
|
41
|
+
* checks for an env-declared endpoint or `nvidia-smi`. Returns a reason
|
|
42
|
+
* string when unavailable.
|
|
43
|
+
*/
|
|
44
|
+
detectGpu?: () => {
|
|
45
|
+
available: boolean;
|
|
46
|
+
detail: string;
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/** The result of a (dry or real) train invocation. */
|
|
50
|
+
export interface TrainRunResult {
|
|
51
|
+
/** 'plan' for a dry-run, 'trained' for a real run, 'refused' when gated out. */
|
|
52
|
+
status: 'plan' | 'trained' | 'refused';
|
|
53
|
+
/** The training plan (always emitted, even on refusal — it's the artifact). */
|
|
54
|
+
plan: TrainingPlan;
|
|
55
|
+
/** Why a real run was refused (status === 'refused'). */
|
|
56
|
+
reason?: string;
|
|
57
|
+
}
|
|
58
|
+
/** A fully-resolved, serializable training plan + the command to run it. */
|
|
59
|
+
export interface TrainingPlan {
|
|
60
|
+
config: TrainConfig;
|
|
61
|
+
/** The exact CLI command a GPU host runs to execute this plan. */
|
|
62
|
+
command: string;
|
|
63
|
+
/** A one-line human summary. */
|
|
64
|
+
summary: string;
|
|
65
|
+
}
|
|
66
|
+
export declare const DEFAULT_LORA: LoraConfig;
|
|
67
|
+
/** Default GPU/endpoint probe. Pure-ish: reads env + tries nvidia-smi presence. */
|
|
68
|
+
export declare function defaultDetectGpu(): {
|
|
69
|
+
available: boolean;
|
|
70
|
+
detail: string;
|
|
71
|
+
};
|
|
72
|
+
/** Validate a base model is in the tunable 7-14B band (refuse 32B). */
|
|
73
|
+
export declare function assertTunableSize(base: BaseModelSpec): void;
|
|
74
|
+
/** Build a default SFT config for a base model + data path. */
|
|
75
|
+
export declare function sftConfig(base: BaseModelSpec, dataPath: string, outputAdapter: string): TrainConfig;
|
|
76
|
+
/** Build a default on-policy DPO config that starts from the SFT checkpoint. */
|
|
77
|
+
export declare function dpoConfig(base: BaseModelSpec, dataPath: string, outputAdapter: string, initFromAdapter: string): TrainConfig;
|
|
78
|
+
/** Render the exact command a GPU host runs (ruvllm/MicroLoRA CLI form). */
|
|
79
|
+
export declare function buildCommand(c: TrainConfig): string;
|
|
80
|
+
/** Build the full plan (config + command + summary) for a stage. */
|
|
81
|
+
export declare function buildPlan(c: TrainConfig): TrainingPlan;
|
|
82
|
+
/**
|
|
83
|
+
* Run (or dry-run) a training stage. The hard gate: a real run requires BOTH
|
|
84
|
+
* an explicit `train:true` AND a detected GPU/endpoint. Otherwise it returns
|
|
85
|
+
* the plan (dry-run) or refuses (train requested but no GPU).
|
|
86
|
+
*/
|
|
87
|
+
export declare function runTraining(c: TrainConfig, opts?: TrainRunOptions): TrainRunResult;
|
|
88
|
+
/**
|
|
89
|
+
* Convenience: emit the full two-stage plan (SFT then on-policy DPO) for a
|
|
90
|
+
* base model and a pair of data paths. The DPO stage starts from the SFT
|
|
91
|
+
* adapter (the on-policy reference policy).
|
|
92
|
+
*/
|
|
93
|
+
export declare function twoStagePlan(base: BaseModelSpec, sftDataPath: string, dpoDataPath: string, adapterPrefix: string): {
|
|
94
|
+
sft: TrainingPlan;
|
|
95
|
+
dpo: TrainingPlan;
|
|
96
|
+
};
|
|
97
|
+
/**
|
|
98
|
+
* Thin runner-adapter: canonical-standard JSONL rows → the record shape
|
|
99
|
+
* ruvllm/MicroLoRA ingests. Kept at the runner boundary so the EXPORTED files
|
|
100
|
+
* stay in the portable standard schema (trl/axolotl/unsloth-compatible). This
|
|
101
|
+
* is a structural pass-through today (ruvllm consumes OpenAI-chat + TRL-pref
|
|
102
|
+
* directly); the seam exists so a future ingest-format change is local here.
|
|
103
|
+
*/
|
|
104
|
+
export declare function adaptSftForRunner(rows: SftRow[]): Array<{
|
|
105
|
+
messages: SftRow['messages'];
|
|
106
|
+
}>;
|
|
107
|
+
export declare function adaptDpoForRunner(rows: DpoRow[]): Array<{
|
|
108
|
+
prompt: DpoRow['prompt'];
|
|
109
|
+
chosen: DpoRow['chosen'];
|
|
110
|
+
rejected: DpoRow['rejected'];
|
|
111
|
+
}>;
|
|
112
|
+
//# sourceMappingURL=train.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train.d.ts","sourceRoot":"","sources":["../src/train.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAEjD,gFAAgF;AAChF,MAAM,WAAW,aAAa;IAC5B,6DAA6D;IAC7D,EAAE,EAAE,MAAM,CAAC;IACX,sDAAsD;IACtD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,UAAU,GAAG,KAAK,GAAG,KAAK,CAAC;AAEvC,MAAM,WAAW,UAAU;IACzB,0BAA0B;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,0BAA0B;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,gCAAgC;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,oDAAoD;IACpD,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,aAAa,CAAC;IACpB,KAAK,EAAE,UAAU,CAAC;IAClB,IAAI,EAAE,UAAU,CAAC;IACjB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,2EAA2E;IAC3E,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,0EAA0E;IAC1E,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM;QAAE,SAAS,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;CAC1D;AAED,sDAAsD;AACtD,MAAM,WAAW,cAAc;IAC7B,gFAAgF;IAChF,MAAM,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,CAAC;IACvC,+EAA+E;IAC/E,IAAI,EAAE,YAAY,CAAC;IACnB,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,4EAA4E;AAC5E,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,WAAW,CAAC;IACpB,kEAAkE;IAClE,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,OAAO,EAAE,MAAM,CAAC;CACjB;AAKD,eAAO,MAAM,YAAY,EAAE,UAK1B,CAAC;AAEF,mFAAmF;AACnF,wBAAgB,gBAAgB,IAAI;IAAE,SAAS,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAazE;AAED,uEAAuE;AACvE,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,aAAa,GAAG,IAAI,CAO3D;AAED,+DAA+D;AAC/D,wBAAgB,SAAS,CAAC,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,GAAG,WAAW,CAanG;AAED,gFAAgF;AAChF,wBAAgB,SAAS,CACvB,IAAI,EAAE,aAAa,EACnB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,EACrB,eAAe,EAAE,MAAM,GACtB,WAAW,CAcb;AAED,4EAA4E;AAC5E,wBAAgB,YAAY,CAAC,CAAC,EAAE,WAAW,GAAG,MAAM,CAmBnD;AAED,oEAAoE;AACpE,wBAAgB,SAAS,CAAC,CAAC,EAAE,WAAW,GAAG,YAAY,CAOtD;AAED;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,GAAE,eAAoB,GAAG,cAAc,CA6BtF;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,aAAa,EACnB,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,aAAa,EAAE,MAAM,GACpB;IAAE,GAAG,EAAE,YAAY,CAAC;IAAC,GAAG,EAAE,YAAY,CAAA;CAAE,CAO1C;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC,CAAA;CAAE,CAAC,CAEzF;AAED,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EAAE,GACb,KAAK,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC,CAAA;CAAE,CAAC,CAE7F"}
|
package/dist/train.js
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// train.ts — LoRA training runner (GPU-gated; dry-run by default).
|
|
4
|
+
//
|
|
5
|
+
// Wraps ruvllm/MicroLoRA. When the binding isn't importable (the common case in
|
|
6
|
+
// this repo — ruvllm is a separate Rust artifact) we EMIT a config + the exact
|
|
7
|
+
// command instead of importing it. The runner refuses to actually train unless
|
|
8
|
+
// BOTH (a) an explicit `--train` / `train:true` flag is passed AND (b) a GPU /
|
|
9
|
+
// inference endpoint is detected. Default is a dry-run that emits the plan.
|
|
10
|
+
//
|
|
11
|
+
// Target: 7-14B (Qwen2.5-Coder-7B / GLM-4-9B class) — NOT 32B. §59 showed a 32B
|
|
12
|
+
// q4 model spills the 16GB GPU; the cheap-tier distillation target is a model
|
|
13
|
+
// that actually fits a single commodity GPU.
|
|
14
|
+
//
|
|
15
|
+
// Stages: SFT first (distill the archive), then OPTIONAL on-policy DPO on the
|
|
16
|
+
// SFT checkpoint (preference-sharpen on cheap-vs-cheap pairs).
|
|
17
|
+
const MAX_PARAMS_B = 14;
|
|
18
|
+
const MIN_PARAMS_B = 1;
|
|
19
|
+
export const DEFAULT_LORA = {
|
|
20
|
+
r: 16,
|
|
21
|
+
alpha: 32,
|
|
22
|
+
dropout: 0.05,
|
|
23
|
+
targetModules: ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
|
|
24
|
+
};
|
|
25
|
+
/** Default GPU/endpoint probe. Pure-ish: reads env + tries nvidia-smi presence. */
|
|
26
|
+
export function defaultDetectGpu() {
|
|
27
|
+
// An OpenAI-compatible local endpoint (e.g. ruv-mac-mini / ruvultra) counts.
|
|
28
|
+
const endpoint = process.env.WEIGHT_EFT_BASE_URL || process.env.OPENAI_BASE_URL;
|
|
29
|
+
if (endpoint)
|
|
30
|
+
return { available: true, detail: `endpoint ${endpoint}` };
|
|
31
|
+
// CUDA env hint without importing anything heavy.
|
|
32
|
+
if (process.env.CUDA_VISIBLE_DEVICES && process.env.CUDA_VISIBLE_DEVICES !== '') {
|
|
33
|
+
return { available: true, detail: `CUDA_VISIBLE_DEVICES=${process.env.CUDA_VISIBLE_DEVICES}` };
|
|
34
|
+
}
|
|
35
|
+
return {
|
|
36
|
+
available: false,
|
|
37
|
+
detail: 'no GPU/endpoint detected (set WEIGHT_EFT_BASE_URL or CUDA_VISIBLE_DEVICES, or run on a CUDA host)',
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/** Validate a base model is in the tunable 7-14B band (refuse 32B). */
|
|
41
|
+
export function assertTunableSize(base) {
|
|
42
|
+
if (base.paramsB < MIN_PARAMS_B || base.paramsB > MAX_PARAMS_B) {
|
|
43
|
+
throw new Error(`weight-eft: base model ${base.id} is ${base.paramsB}B — outside the tunable [${MIN_PARAMS_B}, ${MAX_PARAMS_B}]B band. ` +
|
|
44
|
+
`Pick a 7-14B class model (Qwen2.5-Coder-7B / GLM-4-9B); 32B q4 spills a 16GB GPU (§59).`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/** Build a default SFT config for a base model + data path. */
|
|
48
|
+
export function sftConfig(base, dataPath, outputAdapter) {
|
|
49
|
+
assertTunableSize(base);
|
|
50
|
+
return {
|
|
51
|
+
base,
|
|
52
|
+
stage: 'sft',
|
|
53
|
+
lora: { ...DEFAULT_LORA },
|
|
54
|
+
dataPath,
|
|
55
|
+
outputAdapter,
|
|
56
|
+
epochs: 2,
|
|
57
|
+
learningRate: 1e-4,
|
|
58
|
+
maxSeqLen: 32768,
|
|
59
|
+
batchSize: 1,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/** Build a default on-policy DPO config that starts from the SFT checkpoint. */
|
|
63
|
+
export function dpoConfig(base, dataPath, outputAdapter, initFromAdapter) {
|
|
64
|
+
assertTunableSize(base);
|
|
65
|
+
return {
|
|
66
|
+
base,
|
|
67
|
+
stage: 'dpo',
|
|
68
|
+
lora: { ...DEFAULT_LORA },
|
|
69
|
+
dataPath,
|
|
70
|
+
outputAdapter,
|
|
71
|
+
initFromAdapter,
|
|
72
|
+
epochs: 1,
|
|
73
|
+
learningRate: 5e-6, // DPO wants a much smaller LR than SFT
|
|
74
|
+
maxSeqLen: 32768,
|
|
75
|
+
batchSize: 1,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/** Render the exact command a GPU host runs (ruvllm/MicroLoRA CLI form). */
|
|
79
|
+
export function buildCommand(c) {
|
|
80
|
+
const parts = [
|
|
81
|
+
'ruvllm',
|
|
82
|
+
'microlora',
|
|
83
|
+
c.stage, // sft | dpo
|
|
84
|
+
`--base ${c.base.id}`,
|
|
85
|
+
`--data ${c.dataPath}`,
|
|
86
|
+
`--out ${c.outputAdapter}`,
|
|
87
|
+
`--lora-r ${c.lora.r}`,
|
|
88
|
+
`--lora-alpha ${c.lora.alpha}`,
|
|
89
|
+
`--lora-dropout ${c.lora.dropout}`,
|
|
90
|
+
`--target-modules ${c.lora.targetModules.join(',')}`,
|
|
91
|
+
`--epochs ${c.epochs}`,
|
|
92
|
+
`--lr ${c.learningRate}`,
|
|
93
|
+
`--max-seq-len ${c.maxSeqLen}`,
|
|
94
|
+
`--batch-size ${c.batchSize}`,
|
|
95
|
+
];
|
|
96
|
+
if (c.initFromAdapter)
|
|
97
|
+
parts.push(`--init-from ${c.initFromAdapter}`);
|
|
98
|
+
return parts.join(' ');
|
|
99
|
+
}
|
|
100
|
+
/** Build the full plan (config + command + summary) for a stage. */
|
|
101
|
+
export function buildPlan(c) {
|
|
102
|
+
const command = buildCommand(c);
|
|
103
|
+
const summary = `${c.stage.toUpperCase()} LoRA r=${c.lora.r} on ${c.base.id} (${c.base.paramsB}B) ` +
|
|
104
|
+
`← ${c.dataPath} → ${c.outputAdapter}` +
|
|
105
|
+
(c.initFromAdapter ? ` (from ${c.initFromAdapter})` : '');
|
|
106
|
+
return { config: c, command, summary };
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Run (or dry-run) a training stage. The hard gate: a real run requires BOTH
|
|
110
|
+
* an explicit `train:true` AND a detected GPU/endpoint. Otherwise it returns
|
|
111
|
+
* the plan (dry-run) or refuses (train requested but no GPU).
|
|
112
|
+
*/
|
|
113
|
+
export function runTraining(c, opts = {}) {
|
|
114
|
+
assertTunableSize(c.base);
|
|
115
|
+
const plan = buildPlan(c);
|
|
116
|
+
// Default behaviour: dry-run, emit the plan, touch nothing.
|
|
117
|
+
if (!opts.train) {
|
|
118
|
+
return { status: 'plan', plan };
|
|
119
|
+
}
|
|
120
|
+
// Train requested → require a GPU/endpoint.
|
|
121
|
+
const detect = opts.detectGpu ?? defaultDetectGpu;
|
|
122
|
+
const gpu = detect();
|
|
123
|
+
if (!gpu.available) {
|
|
124
|
+
return {
|
|
125
|
+
status: 'refused',
|
|
126
|
+
plan,
|
|
127
|
+
reason: `--train requested but ${gpu.detail}. Refusing to train. Re-run on a GPU host or set an endpoint.`,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
// Both gates satisfied. NOTE: this is the integration point with ruvllm/
|
|
131
|
+
// MicroLoRA. The actual `spawn(command)` is intentionally NOT wired here —
|
|
132
|
+
// executing it requires a GPU job, which is out of scope for the $0 build.
|
|
133
|
+
// A GPU host implements this by spawning `plan.command`.
|
|
134
|
+
return {
|
|
135
|
+
status: 'trained',
|
|
136
|
+
plan,
|
|
137
|
+
reason: `GPU/endpoint available (${gpu.detail}). Execute: ${plan.command}`,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Convenience: emit the full two-stage plan (SFT then on-policy DPO) for a
|
|
142
|
+
* base model and a pair of data paths. The DPO stage starts from the SFT
|
|
143
|
+
* adapter (the on-policy reference policy).
|
|
144
|
+
*/
|
|
145
|
+
export function twoStagePlan(base, sftDataPath, dpoDataPath, adapterPrefix) {
|
|
146
|
+
const sftAdapter = `${adapterPrefix}-sft`;
|
|
147
|
+
const dpoAdapter = `${adapterPrefix}-sft-dpo`;
|
|
148
|
+
return {
|
|
149
|
+
sft: buildPlan(sftConfig(base, sftDataPath, sftAdapter)),
|
|
150
|
+
dpo: buildPlan(dpoConfig(base, dpoDataPath, dpoAdapter, sftAdapter)),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Thin runner-adapter: canonical-standard JSONL rows → the record shape
|
|
155
|
+
* ruvllm/MicroLoRA ingests. Kept at the runner boundary so the EXPORTED files
|
|
156
|
+
* stay in the portable standard schema (trl/axolotl/unsloth-compatible). This
|
|
157
|
+
* is a structural pass-through today (ruvllm consumes OpenAI-chat + TRL-pref
|
|
158
|
+
* directly); the seam exists so a future ingest-format change is local here.
|
|
159
|
+
*/
|
|
160
|
+
export function adaptSftForRunner(rows) {
|
|
161
|
+
return rows.map((r) => ({ messages: r.messages }));
|
|
162
|
+
}
|
|
163
|
+
export function adaptDpoForRunner(rows) {
|
|
164
|
+
return rows.map((r) => ({ prompt: r.prompt, chosen: r.chosen, rejected: r.rejected }));
|
|
165
|
+
}
|
|
166
|
+
//# sourceMappingURL=train.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train.js","sourceRoot":"","sources":["../src/train.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,mEAAmE;AACnE,EAAE;AACF,gFAAgF;AAChF,+EAA+E;AAC/E,+EAA+E;AAC/E,+EAA+E;AAC/E,4EAA4E;AAC5E,EAAE;AACF,gFAAgF;AAChF,8EAA8E;AAC9E,6CAA6C;AAC7C,EAAE;AACF,8EAA8E;AAC9E,+DAA+D;AAwE/D,MAAM,YAAY,GAAG,EAAE,CAAC;AACxB,MAAM,YAAY,GAAG,CAAC,CAAC;AAEvB,MAAM,CAAC,MAAM,YAAY,GAAe;IACtC,CAAC,EAAE,EAAE;IACL,KAAK,EAAE,EAAE;IACT,OAAO,EAAE,IAAI;IACb,aAAa,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC;CACxD,CAAC;AAEF,mFAAmF;AACnF,MAAM,UAAU,gBAAgB;IAC9B,6EAA6E;IAC7E,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAChF,IAAI,QAAQ;QAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,QAAQ,EAAE,EAAE,CAAC;IACzE,kDAAkD;IAClD,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,KAAK,EAAE,EAAE,CAAC;QAChF,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,wBAAwB,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,EAAE,CAAC;IACjG,CAAC;IACD,OAAO;QACL,SAAS,EAAE,KAAK;QAChB,MAAM,EACJ,mGAAmG;KACtG,CAAC;AACJ,CAAC;AAED,uEAAuE;AACvE,MAAM,UAAU,iBAAiB,CAAC,IAAmB;IACnD,IAAI,IAAI,CAAC,OAAO,GAAG,YAAY,IAAI,IAAI,CAAC,OAAO,GAAG,YAAY,EAAE,CAAC;QAC/D,MAAM,IAAI,KAAK,CACb,0BAA0B,IAAI,CAAC,EAAE,OAAO,IAAI,CAAC,OAAO,4BAA4B,YAAY,KAAK,YAAY,WAAW;YACtH,yFAAyF,CAC5F,CAAC;IACJ,CAAC;AACH,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,SAAS,CAAC,IAAmB,EAAE,QAAgB,EAAE,aAAqB;IACpF,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO;QACL,IAAI;QACJ,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,EAAE,GAAG,YAAY,EAAE;QACzB,QAAQ;QACR,aAAa;QACb,MAAM,EAAE,CAAC;QACT,YAAY,EAAE,IAAI;QAClB,SAAS,EAAE,KAAK;QAChB,SAAS,EAAE,CAAC;KACb,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,MAAM,UAAU,SAAS,CACvB,IAAmB,EACnB,QAAgB,EAChB,aAAqB,EACrB,eAAuB;IAEvB,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO;QACL,IAAI;QACJ,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,EAAE,GAAG,YAAY,EAAE;QACzB,QAAQ;QACR,aAAa;QACb,eAAe;QACf,MAAM,EAAE,CAAC;QACT,YAAY,EAAE,IAAI,EAAE,uCAAuC;QAC3D,SAAS,EAAE,KAAK;QAChB,SAAS,EAAE,CAAC;KACb,CAAC;AACJ,CAAC;AAED,4EAA4E;AAC5E,MAAM,UAAU,YAAY,CAAC,CAAc;IACzC,MAAM,KAAK,GAAG;QACZ,QAAQ;QACR,WAAW;QACX,CAAC,CAAC,KAAK,EAAE,YAAY;QACrB,UAAU,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE;QACrB,UAAU,CAAC,CAAC,QAAQ,EAAE;QACtB,SAAS,CAAC,CAAC,aAAa,EAAE;QAC1B,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE;QACtB,gBAAgB,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE;QAC9B,kBAAkB,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE;QAClC,oBAAoB,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;QACpD,YAAY,CAAC,CAAC,MAAM,EAAE;QACtB,QAAQ,CAAC,CAAC,YAAY,EAAE;QACxB,iBAAiB,CAAC,CAAC,SAAS,EAAE;QAC9B,gBAAgB,CAAC,CAAC,SAAS,EAAE;KAC9B,CAAC;IACF,IAAI,CAAC,CAAC,eAAe;QAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IACtE,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED,oEAAoE;AACpE,MAAM,UAAU,SAAS,CAAC,CAAc;IACtC,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;IAChC,MAAM,OAAO,GACX,GAAG,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,OAAO,KAAK;QACnF,KAAK,CAAC,CAAC,QAAQ,MAAM,CAAC,CAAC,aAAa,EAAE;QACtC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC5D,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;AACzC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,CAAc,EAAE,OAAwB,EAAE;IACpE,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IAE1B,4DAA4D;IAC5D,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IAClC,CAAC;IAED,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,IAAI,gBAAgB,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;QACnB,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,IAAI;YACJ,MAAM,EAAE,yBAAyB,GAAG,CAAC,MAAM,+DAA+D;SAC3G,CAAC;IACJ,CAAC;IAED,yEAAyE;IACzE,2EAA2E;IAC3E,2EAA2E;IAC3E,yDAAyD;IACzD,OAAO;QACL,MAAM,EAAE,SAAS;QACjB,IAAI;QACJ,MAAM,EAAE,2BAA2B,GAAG,CAAC,MAAM,eAAe,IAAI,CAAC,OAAO,EAAE;KAC3E,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAC1B,IAAmB,EACnB,WAAmB,EACnB,WAAmB,EACnB,aAAqB;IAErB,MAAM,UAAU,GAAG,GAAG,aAAa,MAAM,CAAC;IAC1C,MAAM,UAAU,GAAG,GAAG,aAAa,UAAU,CAAC;IAC9C,OAAO;QACL,GAAG,EAAE,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;QACxD,GAAG,EAAE,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;KACrE,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAc;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,MAAM,UAAU,iBAAiB,CAC/B,IAAc;IAEd,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;AACzF,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/** A chat message in an OpenAI-compatible trajectory. */
|
|
2
|
+
export interface ChatMessage {
|
|
3
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
4
|
+
/** Text content. May be null on an assistant turn that only emits tool_calls. */
|
|
5
|
+
content: string | null;
|
|
6
|
+
/**
|
|
7
|
+
* Assistant tool-call requests (the ReAct action step). PRESERVED verbatim
|
|
8
|
+
* into the SFT set — the model must learn real tool-use trajectories, so this
|
|
9
|
+
* structure is NEVER flattened to plain text.
|
|
10
|
+
*/
|
|
11
|
+
tool_calls?: ToolCall[];
|
|
12
|
+
/** On a `role:'tool'` message, the id of the tool_call it answers. */
|
|
13
|
+
tool_call_id?: string;
|
|
14
|
+
/** Optional name (tool name on a tool message; function name otherwise). */
|
|
15
|
+
name?: string;
|
|
16
|
+
}
|
|
17
|
+
/** An OpenAI-style tool call (the ReAct action). */
|
|
18
|
+
export interface ToolCall {
|
|
19
|
+
id: string;
|
|
20
|
+
type: 'function';
|
|
21
|
+
function: {
|
|
22
|
+
name: string;
|
|
23
|
+
arguments: string;
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* The tier that produced a trajectory. 'cheap' = the open model the cascade
|
|
28
|
+
* runs first (GLM/Qwen/DeepSeek). 'frontier' = the escalation tier
|
|
29
|
+
* (Opus/GPT/Sonnet). This is the on/off-policy discriminator: only cheap-tier
|
|
30
|
+
* trajectories are on-policy for DPO.
|
|
31
|
+
*/
|
|
32
|
+
export type PolicyTier = 'cheap' | 'frontier';
|
|
33
|
+
/**
|
|
34
|
+
* One trajectory in the Darwin archive: a single solve attempt on a single
|
|
35
|
+
* SWE-bench instance by a single model. This is the clear, documented input
|
|
36
|
+
* contract the exporter codes against. It is reconstructable from the
|
|
37
|
+
* Firestore `darwin_runs` docs + the local prediction/trajectory artifacts
|
|
38
|
+
* (predictions-*.jsonl rows carry instance_id + model_patch; the agentic loop
|
|
39
|
+
* carries the `messages` array with tool_calls — see solve-agentic.mjs).
|
|
40
|
+
*/
|
|
41
|
+
export interface DarwinTrajectory {
|
|
42
|
+
/** SWE-bench instance id, e.g. "astropy__astropy-14182". The contamination key. */
|
|
43
|
+
instance_id: string;
|
|
44
|
+
/** The model that produced this trajectory, e.g. "z-ai/glm-5.2". */
|
|
45
|
+
model: string;
|
|
46
|
+
/** Which cascade tier this model belongs to (on/off-policy discriminator). */
|
|
47
|
+
tier: PolicyTier;
|
|
48
|
+
/**
|
|
49
|
+
* Gold-resolved status from the OFFICIAL swebench harness (resolved_ids).
|
|
50
|
+
* NEVER from in-loop oracle signals — only conformant gold eval counts as a
|
|
51
|
+
* success for SFT distillation.
|
|
52
|
+
*/
|
|
53
|
+
resolved: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* The ReAct message trajectory: system + user(issue) + (assistant tool_calls
|
|
56
|
+
* → tool results)* + final assistant(patch). May be empty for a failed/empty
|
|
57
|
+
* attempt (those become DPO `rejected` candidates).
|
|
58
|
+
*/
|
|
59
|
+
messages: ChatMessage[];
|
|
60
|
+
/** The unified diff the attempt produced (may be '' for an empty/failed attempt). */
|
|
61
|
+
model_patch: string;
|
|
62
|
+
/**
|
|
63
|
+
* Best-of-N sample index on this instance (BoN-derived). Lets the DPO pairer
|
|
64
|
+
* find a chosen (resolved) and a rejected (empty/failed) sample from the SAME
|
|
65
|
+
* model on the SAME instance. Default 0 when single-sample.
|
|
66
|
+
*/
|
|
67
|
+
sample?: number;
|
|
68
|
+
/** Optional source tag (e.g. "darwin_runs", "predictions-mm25"). Provenance only. */
|
|
69
|
+
source?: string;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* One SFT row — OpenAI chat JSONL. The tool_calls structure is preserved so
|
|
73
|
+
* the model learns real tool-use trajectories (not a flattened transcript).
|
|
74
|
+
*/
|
|
75
|
+
export interface SftRow {
|
|
76
|
+
messages: ChatMessage[];
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* One DPO row — TRL/HF CONVERSATIONAL preference schema. ReAct diverges from
|
|
80
|
+
* the first action, so `prompt` = the shared system+issue messages, and
|
|
81
|
+
* chosen/rejected are FULL trajectories from that divergence point.
|
|
82
|
+
*/
|
|
83
|
+
export interface DpoRow {
|
|
84
|
+
prompt: ChatMessage[];
|
|
85
|
+
chosen: ChatMessage[];
|
|
86
|
+
rejected: ChatMessage[];
|
|
87
|
+
}
|
|
88
|
+
export interface ExportOptions {
|
|
89
|
+
/**
|
|
90
|
+
* THE CONTAMINATION GUARD (non-negotiable). Instance ids reserved for
|
|
91
|
+
* evaluation. ANY trajectory whose instance_id is in this set is excluded,
|
|
92
|
+
* and an overlap is asserted against — training on eval instances is fake
|
|
93
|
+
* lift, the exact contamination we debunk elsewhere. Required (may be []).
|
|
94
|
+
*/
|
|
95
|
+
evalHoldout: string[];
|
|
96
|
+
/**
|
|
97
|
+
* Max token budget per trajectory (rough word/char heuristic — see
|
|
98
|
+
* `estimateTokens`). Trajectories over budget are DROPPED (or truncated when
|
|
99
|
+
* `truncateOverLength` is set) and REPORTED — never silently lost. Targets a
|
|
100
|
+
* 7-14B context window. Default 28000 (headroom under a 32k window).
|
|
101
|
+
*/
|
|
102
|
+
maxTokens?: number;
|
|
103
|
+
/**
|
|
104
|
+
* When true, over-length trajectories are TRUNCATED (oldest tool round-trips
|
|
105
|
+
* dropped, keeping system+issue+final) instead of dropped entirely. The drop
|
|
106
|
+
* is still reported. Default false (drop, the safe default).
|
|
107
|
+
*/
|
|
108
|
+
truncateOverLength?: boolean;
|
|
109
|
+
/**
|
|
110
|
+
* THE REWARD-HACKING FILTER (Ornith-1.0 borrow). When true (the DEFAULT), a
|
|
111
|
+
* deterministic monitor runs over each trajectory and DROPS any that read a
|
|
112
|
+
* withheld gold/test path, modified the verification harness, or escaped the
|
|
113
|
+
* sandbox — an archived "success" that secretly reward-hacked would teach the
|
|
114
|
+
* model to reward-hack. This is the training-data analog of the conformance
|
|
115
|
+
* firewall. Set false ONLY for debugging; the count is always reported.
|
|
116
|
+
*/
|
|
117
|
+
dropRewardHacked?: boolean;
|
|
118
|
+
}
|
|
119
|
+
/** What the exporter produced + an honest accounting of what it dropped. */
|
|
120
|
+
export interface ExportResult {
|
|
121
|
+
sft: SftRow[];
|
|
122
|
+
dpo: DpoRow[];
|
|
123
|
+
report: ExportReport;
|
|
124
|
+
}
|
|
125
|
+
export interface ExportReport {
|
|
126
|
+
totalTrajectories: number;
|
|
127
|
+
/** Excluded because their instance_id is in the eval holdout (contamination guard). */
|
|
128
|
+
excludedByHoldout: number;
|
|
129
|
+
/** Dropped because over the token budget (the long-context filter). */
|
|
130
|
+
droppedOverLength: number;
|
|
131
|
+
/** Truncated to fit the token budget (only when truncateOverLength). */
|
|
132
|
+
truncatedOverLength: number;
|
|
133
|
+
/** Dropped by the reward-hacking monitor (gold-read / verification-tamper / sandbox-escape). */
|
|
134
|
+
droppedRewardHacked: number;
|
|
135
|
+
sftRows: number;
|
|
136
|
+
dpoRows: number;
|
|
137
|
+
/** Per-instance ids that ended up in the SFT set (for an audit trail). */
|
|
138
|
+
sftInstanceIds: string[];
|
|
139
|
+
/** Per-instance ids that ended up in the DPO set. */
|
|
140
|
+
dpoInstanceIds: string[];
|
|
141
|
+
/** Human-readable notes (e.g. which trajectories were dropped + why). */
|
|
142
|
+
notes: string[];
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAuBA,yDAAyD;AACzD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAC;IAC/C,iFAAiF;IACjF,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB;;;;OAIG;IACH,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,sEAAsE;IACtE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,4EAA4E;IAC5E,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,oDAAoD;AACpD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC;CAC/C;AAED;;;;;GAKG;AACH,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,UAAU,CAAC;AAE9C;;;;;;;GAOG;AACH,MAAM,WAAW,gBAAgB;IAC/B,mFAAmF;IACnF,WAAW,EAAE,MAAM,CAAC;IACpB,oEAAoE;IACpE,KAAK,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,IAAI,EAAE,UAAU,CAAC;IACjB;;;;OAIG;IACH,QAAQ,EAAE,OAAO,CAAC;IAClB;;;;OAIG;IACH,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,qFAAqF;IACrF,WAAW,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qFAAqF;IACrF,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAMD;;;GAGG;AACH,MAAM,WAAW,MAAM;IACrB,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB;AAED;;;;GAIG;AACH,MAAM,WAAW,MAAM;IACrB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB;AAMD,MAAM,WAAW,aAAa;IAC5B;;;;;OAKG;IACH,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;;;;;OAOG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,4EAA4E;AAC5E,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,MAAM,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,uFAAuF;IACvF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,uEAAuE;IACvE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,wEAAwE;IACxE,mBAAmB,EAAE,MAAM,CAAC;IAC5B,gGAAgG;IAChG,mBAAmB,EAAE,MAAM,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,0EAA0E;IAC1E,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,qDAAqD;IACrD,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,yEAAyE;IACzE,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// @metaharness/weight-eft — shared types (the integration contract).
|
|
4
|
+
//
|
|
5
|
+
// This package is the bridge from Darwin's gradient-FREE policy evolution
|
|
6
|
+
// (evolve the harness, freeze the model) to gradient/weight self-learning on
|
|
7
|
+
// the OPEN cheap tier. The exporter reads Darwin's trajectory archive and
|
|
8
|
+
// emits standard SFT/DPO training sets; the runner wraps a LoRA tune.
|
|
9
|
+
//
|
|
10
|
+
// THESIS (honest, bounded): we attack the COST-Pareto axis, NOT the frontier
|
|
11
|
+
// ceiling. Distilling the archive's successes into GLM/Qwen via LoRA makes the
|
|
12
|
+
// cheap tier resolve more issues on its own, so the cascade escalates to a
|
|
13
|
+
// frontier model (Opus/GPT) less often. A 7-14B local tune does NOT crack the
|
|
14
|
+
// hard tail (frontier reasoning ceiling). The win is fewer escalations →
|
|
15
|
+
// lower $/resolved. Telemetry stays honest about that.
|
|
16
|
+
//
|
|
17
|
+
// See ADR-198 (weight-EFT), ADR-073 (Darwin archive), ADR-179 (cost-Pareto),
|
|
18
|
+
// ADR-182 (cost-cascade), ADR-195 (Phase-2 capability genes).
|
|
19
|
+
export {};
|
|
20
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,qEAAqE;AACrE,EAAE;AACF,0EAA0E;AAC1E,6EAA6E;AAC7E,0EAA0E;AAC1E,sEAAsE;AACtE,EAAE;AACF,6EAA6E;AAC7E,+EAA+E;AAC/E,2EAA2E;AAC3E,8EAA8E;AAC9E,yEAAyE;AACzE,uDAAuD;AACvD,EAAE;AACF,6EAA6E;AAC7E,8DAA8D"}
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@metaharness/weight-eft",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Evolutionary fine-tuning — distill the harness's archival success into the open cheap tier (GLM/Qwen) via LoRA so the cost-cascade escalates to a frontier model less often. SFT-distills ALL gold-resolved trajectories; on-policy DPO on GLM-vs-GLM pairs only. Attacks the cost-Pareto axis (fewer escalations), NOT the frontier reasoning ceiling. Strict train/eval instance-ID disjointness (the contamination guard).",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"bin": {
|
|
9
|
+
"weight-eft": "./dist/cli.js"
|
|
10
|
+
},
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"import": "./dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"./cli": {
|
|
17
|
+
"types": "./dist/cli.d.ts",
|
|
18
|
+
"import": "./dist/cli.js"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"files": [
|
|
22
|
+
"dist/**",
|
|
23
|
+
"README.md",
|
|
24
|
+
"LICENSE"
|
|
25
|
+
],
|
|
26
|
+
"scripts": {
|
|
27
|
+
"build": "tsc",
|
|
28
|
+
"test": "vitest run",
|
|
29
|
+
"lint": "tsc --noEmit"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"llm",
|
|
33
|
+
"lora",
|
|
34
|
+
"fine-tuning",
|
|
35
|
+
"evolutionary-fine-tuning",
|
|
36
|
+
"weight-eft",
|
|
37
|
+
"sft",
|
|
38
|
+
"dpo",
|
|
39
|
+
"distillation",
|
|
40
|
+
"cost-optimization",
|
|
41
|
+
"swe-bench",
|
|
42
|
+
"metaharness",
|
|
43
|
+
"darwin-mode",
|
|
44
|
+
"contamination-guard"
|
|
45
|
+
],
|
|
46
|
+
"author": "rUv <ruv@ruv.net>",
|
|
47
|
+
"license": "MIT",
|
|
48
|
+
"homepage": "https://github.com/ruvnet/agent-harness-generator",
|
|
49
|
+
"repository": {
|
|
50
|
+
"type": "git",
|
|
51
|
+
"url": "https://github.com/ruvnet/agent-harness-generator",
|
|
52
|
+
"directory": "packages/weight-eft"
|
|
53
|
+
},
|
|
54
|
+
"engines": {
|
|
55
|
+
"node": ">=20.0.0"
|
|
56
|
+
},
|
|
57
|
+
"devDependencies": {
|
|
58
|
+
"typescript": "^5.4.0",
|
|
59
|
+
"vitest": "^2.0.0"
|
|
60
|
+
},
|
|
61
|
+
"publishConfig": {
|
|
62
|
+
"access": "public"
|
|
63
|
+
}
|
|
64
|
+
}
|