npm - @tangle-network/agent-eval - Versions diffs - 0.70.0 → 0.72.0 - Mend

@tangle-network/agent-eval 0.70.0 → 0.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/CHANGELOG.md +24 -0
package/dist/adapters/http.js +1 -1
package/dist/adapters/http.js.map +1 -1
package/dist/campaign/index.d.ts +10 -0
package/dist/campaign/index.js +48 -11
package/dist/campaign/index.js.map +1 -1
package/dist/{chunk-ZZCQQHW7.js → chunk-4QJN7RDX.js} +4 -4
package/dist/chunk-4QJN7RDX.js.map +1 -0
package/dist/{chunk-3B7Y5AUR.js → chunk-GWGO2K6Y.js} +3 -2
package/dist/chunk-GWGO2K6Y.js.map +1 -0
package/dist/{chunk-Z4ZCBC7M.js → chunk-ODGETRTM.js} +4 -3
package/dist/chunk-ODGETRTM.js.map +1 -0
package/dist/chunk-SL55X4VN.js +186 -0
package/dist/chunk-SL55X4VN.js.map +1 -0
package/dist/{chunk-GYELOWB6.js → chunk-UD6EF73X.js} +3 -3
package/dist/{chunk-6XQIEUQ2.js → chunk-ZPSKPT3V.js} +5 -3
package/dist/{chunk-6XQIEUQ2.js.map → chunk-ZPSKPT3V.js.map} +1 -1
package/dist/contract/index.js +3 -3
package/dist/index.js +31 -171
package/dist/index.js.map +1 -1
package/dist/openapi.json +1 -1
package/dist/pipelines/index.js +1 -1
package/dist/rl.d.ts +155 -1
package/dist/rl.js +195 -6
package/dist/rl.js.map +1 -1
package/dist/{run-campaign-BVY3RGAZ.js → run-campaign-OVEZF24D.js} +2 -2
package/dist/traces.js +1 -1
package/package.json +1 -1
package/dist/chunk-3B7Y5AUR.js.map +0 -1
package/dist/chunk-PQV2TKC3.js +0 -27
package/dist/chunk-PQV2TKC3.js.map +0 -1
package/dist/chunk-Z4ZCBC7M.js.map +0 -1
package/dist/chunk-ZZCQQHW7.js.map +0 -1
/package/dist/{chunk-GYELOWB6.js.map → chunk-UD6EF73X.js.map} +0 -0
/package/dist/{run-campaign-BVY3RGAZ.js.map → run-campaign-OVEZF24D.js.map} +0 -0

package/dist/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "@tangle-network/agent-eval — wire protocol",
-    "version": "0.70.0",
+    "version": "0.72.0",
     "description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
     "contact": {
       "name": "Tangle Network",

package/dist/pipelines/index.js CHANGED Viewed

@@ -3,7 +3,7 @@ import {
   classifyFailure,
   compareToBaseline,
   computeToolUseMetrics
-} from "../chunk-3B7Y5AUR.js";
+} from "../chunk-GWGO2K6Y.js";
 import {
   buildTrajectory
 } from "../chunk-RZTMDUO7.js";

package/dist/rl.d.ts CHANGED Viewed

@@ -1380,6 +1380,160 @@ interface StepRewardJsonlRow {
 }
 declare function stepRewardsToJsonl(stepRewards: StepReward[]): string;
+/**
+ * RL dataset packaging + datasheet — the publishable, sellable bundle.
+ *
+ * The format exporters (`toGrpoRows` / `toSftRows` / `toDpoRows`) already
+ * produce trainer-ready shapes (prime-rl GRPO, TRL DPO, conversational SFT).
+ * What turns that into a dataset someone can PUBLISH or BUY is the provenance
+ * + a datasheet: which models produced it, which prompt/agent versions, how the
+ * reward was derived (deterministic verifiable vs probabilistic judge — the
+ * credibility axis a buyer checks first), the split discipline, the reward
+ * distribution, the quality gates, the license, and the intended/out-of-scope
+ * uses. This module computes those facts from the `RunRecord[]` and renders a
+ * "Datasheet for Datasets" (Gebru et al. 2018) card alongside the format files.
+ *
+ * It composes the existing `rl/exporters` — it does not reimplement any trainer
+ * format. The renderers token-identity step (DeepSeek/Kimi/Qwen tokenization
+ * with per-token loss masks) is a downstream Python stage that consumes the
+ * `messages`/`completions` this bundle emits.
+ */
+type RewardKind = 'deterministic' | 'probabilistic' | 'mixed';
+type DatasetFormat = 'grpo' | 'sft' | 'dpo';
+/** Caller-declared context — the qualitative half of the datasheet that can't
+ *  be computed from records. */
+interface RlDatasetConfig {
+    name: string;
+    version: string;
+    /** Product/task domain, e.g. 'legal-m&a', 'tax-1040'. */
+    domain: string;
+    /** SPDX id or a named commercial license. Required — an unlicensed dataset
+     *  cannot be published or sold. */
+    license: string;
+    /** How the reward was produced. `kind: 'deterministic'` (a test/schema/XPath
+     *  decided it) is the credibility signal; 'probabilistic' = LLM-judge. */
+    reward: {
+        kind: RewardKind;
+        source: string;
+        description: string;
+    };
+    intendedUse: string;
+    outOfScope: string;
+    limitations: string;
+    /** ISO timestamp — passed in (the substrate forbids Date.now()). */
+    createdAtIso: string;
+    /** Default: ['grpo', 'sft']. */
+    formats?: DatasetFormat[];
+    /** Quality gates already run, recorded on the card for the buyer. */
+    qualityGates?: {
+        contaminationProbe?: 'passed' | 'failed' | 'not-run';
+        dedup?: boolean;
+        verifiableRewardFilter?: boolean;
+    };
+}
+interface RewardStats {
+    n: number;
+    mean: number;
+    median: number;
+    min: number;
+    max: number;
+    std: number;
+}
+interface RlDatasetStats {
+    records: number;
+    /** Record count per split — a publishable dataset must declare its holdout. */
+    splits: Record<RunSplitTag, number>;
+    reward: RewardStats;
+    /** Distinct snapshot-pinned models that produced the trajectories. */
+    models: string[];
+    /** Distinct effective-prompt hashes (the agent profile/prompt versions). */
+    promptHashes: string[];
+    commitShas: string[];
+    totalTokens: {
+        input: number;
+        output: number;
+    };
+    totalCostUsd: number;
+}
+interface RlDatasetManifest extends RlDatasetConfig {
+    formats: DatasetFormat[];
+    rowCounts: Partial<Record<DatasetFormat, number>>;
+    stats: RlDatasetStats;
+}
+interface RlDatasetBundle {
+    manifest: RlDatasetManifest;
+    /** Relative filename -> contents. Write these to a directory to publish. */
+    files: Record<string, string>;
+}
+/**
+ * Package graded `RunRecord[]` into a publishable RL dataset bundle: the
+ * trainer-format JSONL files + a manifest + a datasheet. DPO requires
+ * pre-extracted preference triples (pass `preferences`); GRPO/SFT derive from
+ * the records directly via the supplied lookups. Throws on an empty corpus —
+ * an empty dataset must never be published.
+ */
+declare function buildRlDataset(records: RunRecord[], lookups: GrpoLookups & SftLookups, config: RlDatasetConfig, preferences?: {
+    triples: PreferenceTriple[];
+    lookups: DpoLookups;
+}): Promise<RlDatasetBundle>;
+/** Render the "Datasheet for Datasets" card — the artifact a buyer reads. */
+declare function datasheetToMarkdown(m: RlDatasetManifest): string;
+/**
+ * RL corpus — the durable, append-only accumulation of graded RunRecords that
+ * every eval run deposits BY DEFAULT.
+ *
+ * The dataset is the free exhaust of the normal eval process: we run evals
+ * constantly to get an agent production-ready, and those runs already produce
+ * graded trajectories. Instead of writing them to an ephemeral run dir and
+ * throwing them away, `appendToCorpus` accumulates them into a durable corpus;
+ * `buildDatasetFromCorpus` later harvests the whole corpus into a publishable
+ * bundle. No separate data-collection campaign — the data accrues from work we
+ * do anyway. This is the "best things for free by our process" layer.
+ *
+ * Trajectory text rides on the record as top-level `prompt` / `completion`
+ * (what the eval harnesses capture; the RunRecord validator ignores the extra
+ * keys). The harvest reads them directly — no trace store round-trip needed.
+ */
+/** A corpus record is a RunRecord carrying the trajectory text the harness
+ *  captured. `prompt`/`completion` are top-level (the validator ignores extras). */
+type CorpusRecord = RunRecord & {
+    prompt?: string;
+    completion?: string;
+};
+interface CorpusAppendResult {
+    appended: number;
+    /** Skipped because a record with the same runId was already in the corpus
+     *  (idempotent appends — NOT re-run collapsing; re-runs get fresh runIds). */
+    skipped: number;
+    total: number;
+}
+/**
+ * Append graded records to the corpus (append-only JSONL). Deduplicates by
+ * `runId` against what's already on disk so re-running the same harness is
+ * idempotent. Creates the file and parent dir. This is the call every eval
+ * harness makes by default after producing its records.
+ */
+declare function appendToCorpus(records: CorpusRecord[], corpusPath: string): CorpusAppendResult;
+/** Read the full corpus. Returns [] if the corpus does not exist yet. */
+declare function readCorpus(corpusPath: string): CorpusRecord[];
+interface HarvestOptions {
+    /** Keep only records scoring >= this (rejection-sampling for SFT). */
+    minScore?: number;
+    /** Keep only these splits (e.g. ['holdout'] for an eval-only dataset). */
+    splits?: RunRecord['splitTag'][];
+}
+/**
+ * Harvest the accumulated corpus into a publishable RL dataset bundle. Reads
+ * trajectory text from each record's top-level `prompt`/`completion`; records
+ * missing either are excluded (a graded score with no trajectory can't train).
+ * Optionally filters by score / split. Throws (via buildRlDataset) if nothing
+ * survives — an empty dataset must never be published.
+ */
+declare function buildDatasetFromCorpus(corpusPath: string, config: RlDatasetConfig, opts?: HarvestOptions): Promise<RlDatasetBundle>;
 /**
  * `PredictiveValidityResearcher` — concrete `Researcher` implementation
  * that drives selection from outcome-anchored predictive validity.
@@ -1626,4 +1780,4 @@ interface RLCampaignResult<V> {
 }
 declare function runRLCampaign<V>(opts: RunRLCampaignOptions<V>): Promise<RLCampaignResult<V>>;
-export { type AdaptationCurve, type AdaptationPoint, type AdaptationRunner, type AdapterContext, type AdversarialMutation, type AdversarialScenario, type AdversarialSearchOptions, type AdversarialSearchReport, type BradleyTerryFit, type BradleyTerryRating, type BuildPairwiseFromCampaignInput, type CellObservation, type CompareCurvesResult, type ComputeBestOfNOptions, type ComputeBestOfNResult, type ComputeCurve, type ComputeCurveBudget, type ComputeCurvePoint, type ContaminationProbeInput, type ContaminationProbeOptions, type ContaminationProbeReport, type CurriculumAllocation, type DetectRewardHackingInput, type DpoExportRow, type DpoLookups, type EloOptions, type ExtractPreferencesOptions, type ExtractStepRewardsOptions, type GrpoExportRow, type GrpoLookups, type OffPolicyEstimate, type OffPolicyOptions, type OffPolicyTrajectory, OutcomeStore, type PairwiseOutcome, type ParetoPointInput, PredictiveValidityResearcher, type PredictiveValidityResearcherOptions, type PreferenceExtractionReport, type PreferenceStrategy, type PreferenceTriple, type PrmExportRow, type PrmLookups, type PrmTrainingTriple, type RLCampaignResult, type RewardHackingFinding, type RewardHackingReport, type RewardHackingSignal, type RunAdaptationCurveOptions, type RunComputeCurveOptions, type RunRLCampaignOptions, type RunwiseStepSummary, type ScenarioPerturbation, type ScenarioPerturbationKind, type SelfConsistencyOptions, type SelfConsistencyResult, type SftExportRow, type SftLookups, type StepReward, type StepRewardJsonlRow, type StepScorer, type ThompsonCurriculumOptions, type VarianceCurriculumOptions, type VerifiableReward, type VerifiableRewardExtractionOptions, type VerifiableRewardSource, adversarialScenarioSearch, applyEloUpdate, bestOfN, buildPairwiseFromCampaign, campaignToRunRecords, compareAdaptationCurves, detectRewardHacking, doublyRobust, extractPreferences, extractStepRewards, extractVerifiableReward, extractVerifiableRewardsFromRecords, filterDeterministicallyRewarded, firstPassK, fitBradleyTerry, injectIrrelevantClause, inverseProbabilityWeighting, observationsFromRunRecords, offPolicyEstimateAll, paretoFrontier, prmTrainingPairs, renameVariables, runAdaptationCurve, runComputeCurve, runContaminationProbe, runRLCampaign, runwiseStepRewardSummary, selfConsistency, selfNormalizedImportanceWeighting, shuffleOrder, stepRewardsToJsonl, thompsonCurriculum, toAnthropicFormat, toDpoJsonl, toDpoRows, toGrpoJsonl, toGrpoRows, toPrmJsonl, toPrmRows, toSftJsonl, toSftRows, toTRLFormat, varianceBasedCurriculum, verificationReportToRunRecord };
+export { type AdaptationCurve, type AdaptationPoint, type AdaptationRunner, type AdapterContext, type AdversarialMutation, type AdversarialScenario, type AdversarialSearchOptions, type AdversarialSearchReport, type BradleyTerryFit, type BradleyTerryRating, type BuildPairwiseFromCampaignInput, type CellObservation, type CompareCurvesResult, type ComputeBestOfNOptions, type ComputeBestOfNResult, type ComputeCurve, type ComputeCurveBudget, type ComputeCurvePoint, type ContaminationProbeInput, type ContaminationProbeOptions, type ContaminationProbeReport, type CorpusAppendResult, type CorpusRecord, type CurriculumAllocation, type DatasetFormat, type DetectRewardHackingInput, type DpoExportRow, type DpoLookups, type EloOptions, type ExtractPreferencesOptions, type ExtractStepRewardsOptions, type GrpoExportRow, type GrpoLookups, type HarvestOptions, type OffPolicyEstimate, type OffPolicyOptions, type OffPolicyTrajectory, OutcomeStore, type PairwiseOutcome, type ParetoPointInput, PredictiveValidityResearcher, type PredictiveValidityResearcherOptions, type PreferenceExtractionReport, type PreferenceStrategy, type PreferenceTriple, type PrmExportRow, type PrmLookups, type PrmTrainingTriple, type RLCampaignResult, type RewardHackingFinding, type RewardHackingReport, type RewardHackingSignal, type RewardKind, type RewardStats, type RlDatasetBundle, type RlDatasetConfig, type RlDatasetManifest, type RlDatasetStats, type RunAdaptationCurveOptions, type RunComputeCurveOptions, type RunRLCampaignOptions, type RunwiseStepSummary, type ScenarioPerturbation, type ScenarioPerturbationKind, type SelfConsistencyOptions, type SelfConsistencyResult, type SftExportRow, type SftLookups, type StepReward, type StepRewardJsonlRow, type StepScorer, type ThompsonCurriculumOptions, type VarianceCurriculumOptions, type VerifiableReward, type VerifiableRewardExtractionOptions, type VerifiableRewardSource, adversarialScenarioSearch, appendToCorpus, applyEloUpdate, bestOfN, buildDatasetFromCorpus, buildPairwiseFromCampaign, buildRlDataset, campaignToRunRecords, compareAdaptationCurves, datasheetToMarkdown, detectRewardHacking, doublyRobust, extractPreferences, extractStepRewards, extractVerifiableReward, extractVerifiableRewardsFromRecords, filterDeterministicallyRewarded, firstPassK, fitBradleyTerry, injectIrrelevantClause, inverseProbabilityWeighting, observationsFromRunRecords, offPolicyEstimateAll, paretoFrontier, prmTrainingPairs, readCorpus, renameVariables, runAdaptationCurve, runComputeCurve, runContaminationProbe, runRLCampaign, runwiseStepRewardSummary, selfConsistency, selfNormalizedImportanceWeighting, shuffleOrder, stepRewardsToJsonl, thompsonCurriculum, toAnthropicFormat, toDpoJsonl, toDpoRows, toGrpoJsonl, toGrpoRows, toPrmJsonl, toPrmRows, toSftJsonl, toSftRows, toTRLFormat, varianceBasedCurriculum, verificationReportToRunRecord };

package/dist/rl.js CHANGED Viewed

@@ -361,11 +361,11 @@ function extractPreferences(runs, opts = {}) {
   const strategy = opts.strategy ?? "paired-by-scenario-and-seed";
   const minMargin = opts.minMargin ?? 0.05;
   const splitTag = opts.splitTag ?? SPLIT_TAG_DEFAULT;
-  const rewardOf = opts.rewardOf ?? DEFAULT_REWARD;
+  const rewardOf2 = opts.rewardOf ?? DEFAULT_REWARD;
   const filtered = runs.filter((r) => r.splitTag === splitTag);
   const scoredEntries = [];
   for (const run of filtered) {
-    const s = rewardOf(run);
+    const s = rewardOf2(run);
     if (s === null) continue;
     scoredEntries.push({ run, score: s });
   }
@@ -1060,6 +1060,10 @@ function mulberry32(seed) {
   };
 }
+// src/rl/corpus.ts
+import { appendFileSync, existsSync, mkdirSync, readFileSync } from "fs";
+import { dirname } from "path";
 // src/rl/exporters.ts
 async function toDpoRows(triples, lookups) {
   const out = [];
@@ -1091,7 +1095,7 @@ function toDpoJsonl(rows) {
   return rows.map((r) => JSON.stringify(r)).join("\n") + (rows.length > 0 ? "\n" : "");
 }
 async function toGrpoRows(runs, lookups) {
-  const rewardOf = lookups.rewardOf ?? defaultReward;
+  const rewardOf2 = lookups.rewardOf ?? defaultReward;
   const grouped = /* @__PURE__ */ new Map();
   for (const r of runs) {
     const sid = r.scenarioId ?? r.experimentId;
@@ -1107,11 +1111,11 @@ async function toGrpoRows(runs, lookups) {
     const rewards = [];
     const runIds = [];
     for (const r of group) {
-      const reward = rewardOf(r);
-      if (reward === null) continue;
+      const reward2 = rewardOf2(r);
+      if (reward2 === null) continue;
       const completion = await Promise.resolve(lookups.completionOf(r.runId));
       completions.push(completion);
-      rewards.push(reward);
+      rewards.push(reward2);
       runIds.push(r.runId);
     }
     if (completions.length === 0) continue;
@@ -1212,6 +1216,186 @@ function defaultReward(run) {
   return typeof v === "number" && Number.isFinite(v) ? v : null;
 }
+// src/rl/dataset.ts
+function reward(r) {
+  const v = r.outcome.holdoutScore ?? r.outcome.searchScore;
+  return typeof v === "number" && Number.isFinite(v) ? v : null;
+}
+function distinct(xs) {
+  return [...new Set(xs)].sort();
+}
+function computeRewardStats(values) {
+  if (values.length === 0) return { n: 0, mean: 0, median: 0, min: 0, max: 0, std: 0 };
+  const sorted = [...values].sort((a, b) => a - b);
+  const n = sorted.length;
+  const mean = sorted.reduce((s, x) => s + x, 0) / n;
+  const mid = Math.floor(n / 2);
+  const median = n % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
+  const variance = sorted.reduce((s, x) => s + (x - mean) ** 2, 0) / n;
+  return { n, mean, median, min: sorted[0], max: sorted[n - 1], std: Math.sqrt(variance) };
+}
+function computeStats(records) {
+  const splits = { search: 0, dev: 0, holdout: 0 };
+  let inTok = 0;
+  let outTok = 0;
+  let cost = 0;
+  const rewards = [];
+  for (const r of records) {
+    splits[r.splitTag] = (splits[r.splitTag] ?? 0) + 1;
+    inTok += r.tokenUsage.input;
+    outTok += r.tokenUsage.output;
+    cost += r.costUsd;
+    const rw = reward(r);
+    if (rw !== null) rewards.push(rw);
+  }
+  return {
+    records: records.length,
+    splits,
+    reward: computeRewardStats(rewards),
+    models: distinct(records.map((r) => r.model)),
+    promptHashes: distinct(records.map((r) => r.promptHash)),
+    commitShas: distinct(records.map((r) => r.commitSha)),
+    totalTokens: { input: inTok, output: outTok },
+    totalCostUsd: cost
+  };
+}
+async function buildRlDataset(records, lookups, config, preferences) {
+  if (records.length === 0) {
+    throw new Error("buildRlDataset: no records \u2014 refusing to package an empty dataset");
+  }
+  const formats = config.formats ?? ["grpo", "sft"];
+  const files = {};
+  const rowCounts = {};
+  if (formats.includes("grpo")) {
+    const rows = await toGrpoRows(records, lookups);
+    files["train.grpo.jsonl"] = toGrpoJsonl(rows);
+    rowCounts.grpo = rows.length;
+  }
+  if (formats.includes("sft")) {
+    const rows = await toSftRows(records, lookups);
+    files["train.sft.jsonl"] = toSftJsonl(rows);
+    rowCounts.sft = rows.length;
+  }
+  if (formats.includes("dpo")) {
+    if (!preferences) {
+      throw new Error("buildRlDataset: format 'dpo' requires `preferences` (triples + lookups)");
+    }
+    const rows = await toDpoRows(preferences.triples, preferences.lookups);
+    files["train.dpo.jsonl"] = toDpoJsonl(rows);
+    rowCounts.dpo = rows.length;
+  }
+  const manifest = {
+    ...config,
+    formats,
+    rowCounts,
+    stats: computeStats(records)
+  };
+  files["manifest.json"] = `${JSON.stringify(manifest, null, 2)}
+`;
+  files["DATASHEET.md"] = datasheetToMarkdown(manifest);
+  return { manifest, files };
+}
+function pct(x) {
+  return `${(x * 100).toFixed(1)}%`;
+}
+function datasheetToMarkdown(m) {
+  const s = m.stats;
+  const total = s.records || 1;
+  const splitLines = ["search", "dev", "holdout"].map((k) => `  - \`${k}\`: ${s.splits[k]} (${pct(s.splits[k] / total)})`).join("\n");
+  const deterministic = m.reward.kind === "deterministic";
+  return [
+    `# Dataset: ${m.name} \`v${m.version}\``,
+    "",
+    `**Domain:** ${m.domain} \xB7 **Created:** ${m.createdAtIso} \xB7 **License:** ${m.license}`,
+    "",
+    "## Reward provenance",
+    `- **Kind:** ${m.reward.kind}${deterministic ? " \u2705 (decidable \u2014 not judge-noise)" : ""}`,
+    `- **Source:** ${m.reward.source}`,
+    `- **Description:** ${m.reward.description}`,
+    "",
+    "## Composition",
+    `- **Records (trajectories):** ${s.records}`,
+    `- **Formats:** ${m.formats.map((f) => `${f} (${m.rowCounts[f] ?? 0} rows)`).join(", ")}`,
+    "- **Splits:**",
+    splitLines,
+    "",
+    "## Reward distribution",
+    `- n=${s.reward.n} \xB7 mean=${s.reward.mean.toFixed(3)} \xB7 median=${s.reward.median.toFixed(3)} \xB7 min=${s.reward.min.toFixed(3)} \xB7 max=${s.reward.max.toFixed(3)} \xB7 std=${s.reward.std.toFixed(3)}`,
+    "",
+    "## Provenance",
+    `- **Models:** ${s.models.join(", ")}`,
+    `- **Prompt/agent versions (sha256):** ${s.promptHashes.length} distinct`,
+    `- **Commits:** ${s.commitShas.join(", ")}`,
+    `- **Tokens:** ${s.totalTokens.input} in / ${s.totalTokens.output} out \xB7 **Cost:** $${s.totalCostUsd.toFixed(2)}`,
+    "",
+    "## Quality gates",
+    `- Contamination probe: ${m.qualityGates?.contaminationProbe ?? "not-run"}`,
+    `- Dedup: ${m.qualityGates?.dedup ? "yes" : "no"} \xB7 Verifiable-reward filter: ${m.qualityGates?.verifiableRewardFilter ? "yes" : "no"}`,
+    "",
+    "## Recommended uses",
+    m.intendedUse,
+    "",
+    "## Out of scope",
+    m.outOfScope,
+    "",
+    "## Limitations",
+    m.limitations,
+    "",
+    "## Token rendering",
+    "For RL/SFT training, tokenize with the per-model renderer (DeepSeek-V3 / Kimi-K2 / Qwen3) to preserve token identity and per-token loss masks across tool-call turns \u2014 see `renderers` (PrimeIntellect). The `messages` / `completions` here are the renderer input.",
+    ""
+  ].join("\n");
+}
+// src/rl/corpus.ts
+function appendToCorpus(records, corpusPath) {
+  mkdirSync(dirname(corpusPath), { recursive: true });
+  const existing = existsSync(corpusPath) ? readCorpus(corpusPath) : [];
+  const seen = new Set(existing.map((r) => r.runId));
+  const lines = [];
+  let appended = 0;
+  let skipped = 0;
+  for (const r of records) {
+    if (seen.has(r.runId)) {
+      skipped++;
+      continue;
+    }
+    seen.add(r.runId);
+    lines.push(JSON.stringify(r));
+    appended++;
+  }
+  if (lines.length > 0) appendFileSync(corpusPath, `${lines.join("\n")}
+`);
+  return { appended, skipped, total: existing.length + appended };
+}
+function readCorpus(corpusPath) {
+  if (!existsSync(corpusPath)) return [];
+  const out = [];
+  for (const line of readFileSync(corpusPath, "utf8").split("\n")) {
+    if (line.trim()) out.push(JSON.parse(line));
+  }
+  return out;
+}
+function rewardOf(r) {
+  const v = r.outcome.holdoutScore ?? r.outcome.searchScore;
+  return typeof v === "number" && Number.isFinite(v) ? v : 0;
+}
+async function buildDatasetFromCorpus(corpusPath, config, opts = {}) {
+  let records = readCorpus(corpusPath).filter(
+    (r) => typeof r.prompt === "string" && typeof r.completion === "string"
+  );
+  if (opts.splits) records = records.filter((r) => opts.splits.includes(r.splitTag));
+  if (opts.minScore != null) records = records.filter((r) => rewardOf(r) >= opts.minScore);
+  const text = new Map(
+    records.map((r) => [r.runId, { prompt: r.prompt, completion: r.completion }])
+  );
+  const lookups = {
+    promptOf: (id) => text.get(id)?.prompt ?? "",
+    completionOf: (id) => text.get(id)?.completion ?? ""
+  };
+  return buildRlDataset(records, lookups, config);
+}
 // src/rl/predictive-validity-researcher.ts
 var PredictiveValidityResearcher = class {
   opts;
@@ -1596,11 +1780,15 @@ export {
   InMemoryOutcomeStore,
   PredictiveValidityResearcher,
   adversarialScenarioSearch,
+  appendToCorpus,
   applyEloUpdate,
   bestOfN,
+  buildDatasetFromCorpus,
   buildPairwiseFromCampaign,
+  buildRlDataset,
   campaignToRunRecords,
   compareAdaptationCurves,
+  datasheetToMarkdown,
   detectRewardHacking,
   doublyRobust,
   extractPreferences,
@@ -1616,6 +1804,7 @@ export {
   offPolicyEstimateAll,
   paretoFrontier,
   prmTrainingPairs,
+  readCorpus,
   renameVariables,
   runAdaptationCurve,
   runComputeCurve,