npm - @mlx-node/trl - Versions diffs - 0.0.0 → 0.0.2 - Mend

@mlx-node/trl 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +389 -0
package/package.json +16 -5
package/dist/data/dataset.d.ts +0 -22
package/dist/data/dataset.d.ts.map +0 -1
package/dist/data/dataset.js +0 -142
package/dist/data/sft-dataset.d.ts +0 -156
package/dist/data/sft-dataset.d.ts.map +0 -1
package/dist/data/sft-dataset.js +0 -415
package/dist/index.d.ts +0 -33
package/dist/index.d.ts.map +0 -1
package/dist/index.js +0 -47
package/dist/trainers/grpo-config.d.ts +0 -42
package/dist/trainers/grpo-config.d.ts.map +0 -1
package/dist/trainers/grpo-config.js +0 -220
package/dist/trainers/grpo-entropy.d.ts +0 -33
package/dist/trainers/grpo-entropy.d.ts.map +0 -1
package/dist/trainers/grpo-entropy.js +0 -18
package/dist/trainers/grpo-trainer.d.ts +0 -602
package/dist/trainers/grpo-trainer.d.ts.map +0 -1
package/dist/trainers/grpo-trainer.js +0 -1439
package/dist/trainers/sft-config.d.ts +0 -32
package/dist/trainers/sft-config.d.ts.map +0 -1
package/dist/trainers/sft-config.js +0 -186
package/dist/trainers/sft-trainer.d.ts +0 -141
package/dist/trainers/sft-trainer.d.ts.map +0 -1
package/dist/trainers/sft-trainer.js +0 -502
package/dist/trainers/training-logger.d.ts +0 -375
package/dist/trainers/training-logger.d.ts.map +0 -1
package/dist/trainers/training-logger.js +0 -542
package/dist/types.d.ts +0 -54
package/dist/types.d.ts.map +0 -1
package/dist/types.js +0 -1
package/dist/utils/path-security.d.ts +0 -51
package/dist/utils/path-security.d.ts.map +0 -1
package/dist/utils/path-security.js +0 -69
package/dist/utils/xml-parser.d.ts +0 -6
package/dist/utils/xml-parser.d.ts.map +0 -1
package/dist/utils/xml-parser.js +0 -184

package/dist/data/sft-dataset.d.ts DELETED Viewed

@@ -1,156 +0,0 @@
-/**
- * SFT Dataset handling for Supervised Fine-Tuning
- *
- * Supports two data formats (auto-detected):
- * 1. Prompt-Completion: { prompt: ChatMessage[], completion: ChatMessage }
- * 2. Full Conversation: { messages: ChatMessage[] }
- *
- * Both formats produce tokenized batches with labels masked appropriately.
- */
-import type { Qwen3Tokenizer } from '@mlx-node/core';
-import type { ChatMessage } from '../types';
-import { type PathValidationOptions } from '../utils/path-security';
-/**
- * Special token IDs for SFT label masking
- *
- * These are used to detect assistant message boundaries in tokenized conversations.
- * The IDs can be derived from the tokenizer or provided explicitly.
- */
-export interface SpecialTokenIds {
-    /** Token ID for <|im_start|> */
-    imStart: number;
-    /** Token ID for <|im_end|> */
-    imEnd: number;
-    /** Token IDs that represent newlines (for detecting end of role header) */
-    newlineTokens: number[];
-}
-/**
- * Prompt-Completion format for tool-use training
- */
-export interface SFTPromptCompletionExample {
-    prompt: ChatMessage[];
-    completion: ChatMessage;
-}
-/**
- * Full conversation format for multi-turn dialogue
- */
-export interface SFTConversationExample {
-    messages: ChatMessage[];
-}
-/**
- * Union type for SFT examples
- */
-export type SFTExample = SFTPromptCompletionExample | SFTConversationExample;
-/**
- * A tokenized batch ready for SFT training
- */
-export interface SFTBatch {
-    inputIds: Int32Array;
-    labels: Int32Array;
-    shape: [number, number];
-}
-/**
- * Configuration for SFT dataset
- */
-export interface SFTDatasetConfig {
-    maxSeqLength?: number;
-    completionOnly?: boolean;
-    enableThinking?: boolean;
-    seed?: number;
-    /**
-     * Special token IDs for label masking.
-     *
-     * If not provided, these are automatically derived from the tokenizer.
-     * This option allows explicit overriding for custom tokenizers or
-     * non-standard vocabularies.
-     */
-    specialTokenIds?: Partial<SpecialTokenIds>;
-}
-/**
- * SFT Dataset class for handling SFT training data
- */
-export declare class SFTDataset {
-    private examples;
-    private tokenizer;
-    private config;
-    private format;
-    private shuffledIndices;
-    private rng;
-    /** Cached special token IDs for label masking */
-    private specialTokenIds;
-    constructor(examples: SFTExample[], tokenizer: Qwen3Tokenizer, config?: SFTDatasetConfig);
-    /**
-     * Get the number of examples in the dataset
-     */
-    get length(): number;
-    /**
-     * Shuffle dataset for a specific epoch using epoch-based seeding.
-     * This ensures reproducible shuffles across training resumes.
-     * Each epoch gets a deterministic shuffle based on (baseSeed + epoch).
-     *
-     * @param epoch - The epoch number (used as seed offset)
-     */
-    shuffleForEpoch(epoch: number): void;
-    /**
-     * Create a seeded pseudo-random number generator (Linear Congruential Generator)
-     */
-    private createSeededRandom;
-    /**
-     * Find length of common prefix between two token arrays
-     * Handles chat template quirks where prompt tokens may not be exact prefix of full tokens
-     */
-    private findCommonPrefixLength;
-    /**
-     * Tokenize a prompt-completion example
-     */
-    private tokenizePromptCompletion;
-    /**
-     * Check if a token ID is a newline token
-     */
-    private isNewlineToken;
-    /**
-     * Tokenize a conversation example
-     *
-     * For conversations, we train on all assistant turns.
-     * Non-assistant tokens (system, user) are masked with -100.
-     *
-     * Uses single-pass tokenization with token-based boundary detection.
-     * Token IDs are derived from the tokenizer for portability across models.
-     */
-    private tokenizeConversation;
-    /**
-     * Tokenize a single example based on its format
-     */
-    private tokenizeExample;
-    /**
-     * Collate multiple examples into a padded batch
-     */
-    collateBatch(indices: number[]): Promise<SFTBatch>;
-    /**
-     * Generate batches for training
-     */
-    batches(batchSize: number): AsyncGenerator<SFTBatch>;
-    /**
-     * Get total number of batches for a given batch size
-     */
-    numBatches(batchSize: number): number;
-}
-/**
- * Load SFT dataset from a JSONL file
- *
- * Supports two formats:
- * 1. Prompt-Completion: {"prompt": [...], "completion": {...}}
- * 2. Conversation: {"messages": [...]}
- *
- * @param path - Path to the JSONL file (relative to cwd or allowedRoot)
- * @param tokenizer - Qwen3 tokenizer instance
- * @param config - Optional configuration including path validation options
- */
-export declare function loadSFTDataset(path: string, tokenizer: Qwen3Tokenizer, config?: SFTDatasetConfig & {
-    limit?: number;
-} & PathValidationOptions): Promise<SFTDataset>;
-/**
- * Create SFT dataset from examples directly
- */
-export declare function createSFTDataset(examples: SFTExample[], tokenizer: Qwen3Tokenizer, config?: SFTDatasetConfig): SFTDataset;
-//# sourceMappingURL=sft-dataset.d.ts.map

package/dist/data/sft-dataset.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"sft-dataset.d.ts","sourceRoot":"","sources":["../../src/data/sft-dataset.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAA2C,KAAK,qBAAqB,EAAE,MAAM,wBAAwB,CAAC;AAK7G;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,gCAAgC;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,2EAA2E;IAC3E,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAgDD;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,UAAU,EAAE,WAAW,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,0BAA0B,GAAG,sBAAsB,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,UAAU,CAAC;IACrB,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;CAC5C;AAeD;;GAEG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAe;IAC/B,OAAO,CAAC,SAAS,CAAiB;IAClC,OAAO,CAAC,MAAM,CAAkF;IAChG,OAAO,CAAC,MAAM,CAAuC;IACrD,OAAO,CAAC,eAAe,CAAW;IAClC,OAAO,CAAC,GAAG,CAAe;IAC1B,iDAAiD;IACjD,OAAO,CAAC,eAAe,CAAkB;gBAE7B,QAAQ,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,GAAE,gBAAqB;IAsC5F;;OAEG;IACH,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED;;;;;;OAMG;IACH,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAYpC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAS9B;;OAEG;YACW,wBAAwB;IA8CtC;;OAEG;IACH,OAAO,CAAC,cAAc;IAItB;;;;;;;;OAQG;YACW,oBAAoB;IA2DlC;;OAEG;YACW,eAAe;IAQ7B;;OAEG;IACG,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,QAAQ,CAAC;IA8CxD;;OAEG;IACI,OAAO,CAAC,SAAS,EAAE,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC;IAQ3D;;OAEG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;CAGtC;AAoFD;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,cAAc,EACzB,MAAM,CAAC,EAAE,gBAAgB,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,qBAAqB,GACrE,OAAO,CAAC,UAAU,CAAC,CAarB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,QAAQ,EAAE,UAAU,EAAE,EACtB,SAAS,EAAE,cAAc,EACzB,MAAM,CAAC,EAAE,gBAAgB,GACxB,UAAU,CAEZ"}

package/dist/data/sft-dataset.js DELETED Viewed

@@ -1,415 +0,0 @@
-/**
- * SFT Dataset handling for Supervised Fine-Tuning
- *
- * Supports two data formats (auto-detected):
- * 1. Prompt-Completion: { prompt: ChatMessage[], completion: ChatMessage }
- * 2. Full Conversation: { messages: ChatMessage[] }
- *
- * Both formats produce tokenized batches with labels masked appropriately.
- */
-import { readFileSync } from 'node:fs';
-import { resolve as resolvePath } from 'node:path';
-import { validatePathContainment, getAllowedRoot } from '../utils/path-security';
-// -100 is the standard ignore index for cross-entropy loss
-const IGNORE_INDEX = -100;
-/**
- * Get special token IDs from a tokenizer
- *
- * Queries the tokenizer to get the actual token IDs for special tokens.
- * This ensures portability across different tokenizers/vocabularies.
- *
- * @param tokenizer - The tokenizer instance
- * @returns Special token IDs derived from the tokenizer
- * @throws Error if required special tokens are not found
- */
-function getSpecialTokenIds(tokenizer) {
-    // Get im_start and im_end tokens using the tokenizer's special token getters
-    const imStartToken = tokenizer.getImStartToken(); // "<|im_start|>"
-    const imEndToken = tokenizer.getImEndToken(); // "<|im_end|>"
-    const imStart = tokenizer.tokenToId(imStartToken);
-    const imEnd = tokenizer.tokenToId(imEndToken);
-    // Validate that we got valid IDs (tokenToId returns null for unknown tokens)
-    if (imStart === null || imEnd === null) {
-        throw new Error(`Tokenizer does not have required special tokens for ChatML format. ` +
-            `Got im_start=${imStart}, im_end=${imEnd}. ` +
-            `This tokenizer may not be compatible with ChatML format.`);
-    }
-    // Get newline token IDs - these vary by tokenizer
-    // Try common newline representations
-    const newlineTokens = [];
-    const potentialNewlines = ['\n', ' \n', '\r\n', '\n\n'];
-    for (const nl of potentialNewlines) {
-        const id = tokenizer.tokenToId(nl);
-        if (id !== null && !newlineTokens.includes(id)) {
-            newlineTokens.push(id);
-        }
-    }
-    // If no newline tokens found, we'll rely on the fallback in tokenizeConversation
-    return {
-        imStart,
-        imEnd,
-        newlineTokens,
-    };
-}
-/**
- * Detect the format of an SFT example
- */
-function detectFormat(example) {
-    if ('prompt' in example && 'completion' in example) {
-        return 'prompt-completion';
-    }
-    if ('messages' in example) {
-        return 'conversation';
-    }
-    throw new Error('Invalid SFT example format. Expected either {prompt, completion} or {messages}');
-}
-/**
- * SFT Dataset class for handling SFT training data
- */
-export class SFTDataset {
-    examples;
-    tokenizer;
-    config;
-    format;
-    shuffledIndices;
-    rng;
-    /** Cached special token IDs for label masking */
-    specialTokenIds;
-    constructor(examples, tokenizer, config = {}) {
-        if (examples.length === 0) {
-            throw new Error('SFT dataset must contain at least one example');
-        }
-        this.examples = examples;
-        this.tokenizer = tokenizer;
-        this.config = {
-            maxSeqLength: config.maxSeqLength ?? 2048,
-            completionOnly: config.completionOnly ?? false, // Changed to false for TRL parity
-            enableThinking: config.enableThinking ?? false,
-            seed: config.seed ?? 42,
-        };
-        this.rng = this.createSeededRandom(this.config.seed);
-        // Get special token IDs from tokenizer, with optional overrides
-        const derivedTokenIds = getSpecialTokenIds(tokenizer);
-        this.specialTokenIds = {
-            imStart: config.specialTokenIds?.imStart ?? derivedTokenIds.imStart,
-            imEnd: config.specialTokenIds?.imEnd ?? derivedTokenIds.imEnd,
-            newlineTokens: config.specialTokenIds?.newlineTokens ?? derivedTokenIds.newlineTokens,
-        };
-        // Detect format from first example
-        this.format = detectFormat(examples[0]);
-        // Validate all examples have the same format
-        for (let i = 1; i < examples.length; i++) {
-            const fmt = detectFormat(examples[i]);
-            if (fmt !== this.format) {
-                throw new Error(`Inconsistent SFT data format: example 0 is ${this.format}, example ${i} is ${fmt}`);
-            }
-        }
-        // Initialize indices
-        this.shuffledIndices = Array.from({ length: examples.length }, (_, i) => i);
-    }
-    /**
-     * Get the number of examples in the dataset
-     */
-    get length() {
-        return this.examples.length;
-    }
-    /**
-     * Shuffle dataset for a specific epoch using epoch-based seeding.
-     * This ensures reproducible shuffles across training resumes.
-     * Each epoch gets a deterministic shuffle based on (baseSeed + epoch).
-     *
-     * @param epoch - The epoch number (used as seed offset)
-     */
-    shuffleForEpoch(epoch) {
-        // Reset RNG with epoch-specific seed for reproducibility
-        this.rng = this.createSeededRandom(this.config.seed + epoch);
-        // Reset indices to original order
-        this.shuffledIndices = Array.from({ length: this.examples.length }, (_, i) => i);
-        // Fisher-Yates shuffle
-        for (let i = this.shuffledIndices.length - 1; i > 0; i--) {
-            const j = Math.floor(this.rng() * (i + 1));
-            [this.shuffledIndices[i], this.shuffledIndices[j]] = [this.shuffledIndices[j], this.shuffledIndices[i]];
-        }
-    }
-    /**
-     * Create a seeded pseudo-random number generator (Linear Congruential Generator)
-     */
-    createSeededRandom(seed) {
-        let s = seed;
-        return () => {
-            s = (s * 1103515245 + 12345) & 0x7fffffff;
-            return s / 0x7fffffff;
-        };
-    }
-    /**
-     * Find length of common prefix between two token arrays
-     * Handles chat template quirks where prompt tokens may not be exact prefix of full tokens
-     */
-    findCommonPrefixLength(prompt, full) {
-        let i = 0;
-        const maxLen = Math.min(prompt.length, full.length);
-        while (i < maxLen && prompt[i] === full[i]) {
-            i++;
-        }
-        return i;
-    }
-    /**
-     * Tokenize a prompt-completion example
-     */
-    async tokenizePromptCompletion(example) {
-        // Tokenize prompt with generation prompt (so the model learns to continue)
-        const promptTokens = await this.tokenizer.applyChatTemplate(example.prompt, true, // add generation prompt
-        null, this.config.enableThinking);
-        // Create full messages for tokenization
-        const fullMessages = [...example.prompt, example.completion];
-        const fullTokens = await this.tokenizer.applyChatTemplate(fullMessages, false, // no generation prompt at the end
-        null, this.config.enableThinking);
-        // Convert to regular arrays for manipulation
-        const promptArr = Array.from(promptTokens, Number);
-        const inputIds = Array.from(fullTokens, Number);
-        // Use common prefix detection to handle chat template quirks
-        // (some templates may not produce prompt tokens as exact prefix of full tokens)
-        const promptLen = this.findCommonPrefixLength(promptArr, inputIds);
-        if (promptLen !== promptArr.length) {
-            console.warn(`[SFT Dataset] Prompt tokens differ from prefix of full sequence ` +
-                `(${promptArr.length} vs ${promptLen}). Using common prefix for masking.`);
-        }
-        // Create labels: -100 for prompt tokens, actual tokens for completion
-        const labels = inputIds.map((id, i) => {
-            if (this.config.completionOnly && i < promptLen) {
-                return IGNORE_INDEX;
-            }
-            return id;
-        });
-        return { inputIds, labels };
-    }
-    /**
-     * Check if a token ID is a newline token
-     */
-    isNewlineToken(tokenId) {
-        return this.specialTokenIds.newlineTokens.includes(tokenId);
-    }
-    /**
-     * Tokenize a conversation example
-     *
-     * For conversations, we train on all assistant turns.
-     * Non-assistant tokens (system, user) are masked with -100.
-     *
-     * Uses single-pass tokenization with token-based boundary detection.
-     * Token IDs are derived from the tokenizer for portability across models.
-     */
-    async tokenizeConversation(example) {
-        const messages = example.messages;
-        // Single tokenization pass
-        const fullTokens = await this.tokenizer.applyChatTemplate(messages, false, null, this.config.enableThinking);
-        const inputIds = Array.from(fullTokens, Number);
-        // If not masking prompts, all tokens are trainable
-        if (!this.config.completionOnly) {
-            return { inputIds, labels: inputIds.slice() };
-        }
-        // Token-based boundary detection using special tokens (derived from tokenizer)
-        const { imStart, imEnd } = this.specialTokenIds;
-        // Get "assistant" token ID (it's a single token in Qwen3)
-        const assistantTokenId = this.tokenizer.tokenToId('assistant');
-        const labels = Array.from({ length: inputIds.length }, () => IGNORE_INDEX);
-        let inAssistant = false;
-        for (let i = 0; i < inputIds.length; i++) {
-            // Detect assistant region: <|im_start|> followed by "assistant" token
-            if (inputIds[i] === imStart && i + 1 < inputIds.length && inputIds[i + 1] === assistantTokenId) {
-                // Skip the <|im_start|>assistant\n header, start training from content
-                // Find the newline after "assistant"
-                let j = i + 2;
-                while (j < inputIds.length && inputIds[j] !== imEnd) {
-                    // Look for newline token (dynamically derived from tokenizer)
-                    if (this.isNewlineToken(inputIds[j])) {
-                        inAssistant = true;
-                        i = j; // Skip to after header
-                        break;
-                    }
-                    j++;
-                }
-                if (!inAssistant) {
-                    // Fallback: just start after assistant token
-                    inAssistant = true;
-                    i = i + 1;
-                }
-                continue;
-            }
-            if (inAssistant && inputIds[i] !== imEnd) {
-                labels[i] = inputIds[i];
-            }
-            if (inputIds[i] === imEnd) {
-                inAssistant = false;
-            }
-        }
-        return { inputIds, labels };
-    }
-    /**
-     * Tokenize a single example based on its format
-     */
-    async tokenizeExample(example) {
-        if (this.format === 'prompt-completion') {
-            return this.tokenizePromptCompletion(example);
-        }
-        else {
-            return this.tokenizeConversation(example);
-        }
-    }
-    /**
-     * Collate multiple examples into a padded batch
-     */
-    async collateBatch(indices) {
-        const examples = indices.map((i) => this.examples[this.shuffledIndices[i]]);
-        // Tokenize all examples
-        const tokenized = [];
-        for (const example of examples) {
-            tokenized.push(await this.tokenizeExample(example));
-        }
-        // Find max length (capped at maxSeqLength)
-        const maxLen = Math.min(this.config.maxSeqLength, Math.max(...tokenized.map((t) => t.inputIds.length)));
-        // Pad and truncate
-        const batchSize = examples.length;
-        const paddedInputIds = new Int32Array(batchSize * maxLen);
-        const paddedLabels = new Int32Array(batchSize * maxLen);
-        const padTokenId = this.tokenizer.getPadTokenId();
-        for (let b = 0; b < batchSize; b++) {
-            const { inputIds, labels } = tokenized[b];
-            const seqLen = Math.min(inputIds.length, maxLen);
-            // Truncate from the left if necessary (keep the end of the sequence)
-            const startIdx = Math.max(0, inputIds.length - maxLen);
-            for (let s = 0; s < maxLen; s++) {
-                const offset = b * maxLen + s;
-                if (s < seqLen) {
-                    paddedInputIds[offset] = inputIds[startIdx + s];
-                    paddedLabels[offset] = labels[startIdx + s];
-                }
-                else {
-                    // Pad
-                    paddedInputIds[offset] = padTokenId;
-                    paddedLabels[offset] = IGNORE_INDEX;
-                }
-            }
-        }
-        return {
-            inputIds: paddedInputIds,
-            labels: paddedLabels,
-            shape: [batchSize, maxLen],
-        };
-    }
-    /**
-     * Generate batches for training
-     */
-    async *batches(batchSize) {
-        for (let i = 0; i < this.examples.length; i += batchSize) {
-            const end = Math.min(i + batchSize, this.examples.length);
-            const indices = Array.from({ length: end - i }, (_, j) => i + j);
-            yield await this.collateBatch(indices);
-        }
-    }
-    /**
-     * Get total number of batches for a given batch size
-     */
-    numBatches(batchSize) {
-        return Math.ceil(this.examples.length / batchSize);
-    }
-}
-/**
- * Read JSONL file and parse into records
- */
-function readJsonl(path, limit) {
-    let fileContents;
-    try {
-        fileContents = readFileSync(path, 'utf8');
-    }
-    catch (error) {
-        const message = error instanceof Error ? error.message : String(error);
-        throw new Error(`Failed to read SFT dataset at ${path}: ${message}`);
-    }
-    const lines = fileContents.split(/\r?\n/).filter((line) => line.trim().length > 0);
-    const records = [];
-    const max = typeof limit === 'number' && limit > 0 ? limit : Number.POSITIVE_INFINITY;
-    for (let i = 0; i < lines.length && records.length < max; i++) {
-        const line = lines[i];
-        try {
-            const parsed = JSON.parse(line);
-            records.push(parsed);
-        }
-        catch (error) {
-            const message = error instanceof Error ? error.message : String(error);
-            throw new Error(`Failed to parse JSONL at ${path}:${i + 1} - ${message}`);
-        }
-    }
-    return records;
-}
-/**
- * Validate an SFT example
- */
-function validateSFTExample(example, index) {
-    if (typeof example !== 'object' || example === null) {
-        throw new Error(`SFT example ${index} must be an object`);
-    }
-    const obj = example;
-    // Check for prompt-completion format
-    if ('prompt' in obj && 'completion' in obj) {
-        if (!Array.isArray(obj.prompt)) {
-            throw new Error(`SFT example ${index}: prompt must be an array of messages`);
-        }
-        if (typeof obj.completion !== 'object' || obj.completion === null) {
-            throw new Error(`SFT example ${index}: completion must be a message object`);
-        }
-        const completion = obj.completion;
-        if (completion.role !== 'assistant') {
-            throw new Error(`SFT example ${index}: completion.role must be 'assistant'`);
-        }
-        if (typeof completion.content !== 'string') {
-            throw new Error(`SFT example ${index}: completion.content must be a string`);
-        }
-        return {
-            prompt: obj.prompt,
-            completion: obj.completion,
-        };
-    }
-    // Check for conversation format
-    if ('messages' in obj) {
-        if (!Array.isArray(obj.messages)) {
-            throw new Error(`SFT example ${index}: messages must be an array`);
-        }
-        if (obj.messages.length === 0) {
-            throw new Error(`SFT example ${index}: messages cannot be empty`);
-        }
-        // Check that at least one message is from assistant
-        const hasAssistant = obj.messages.some((m) => typeof m === 'object' && m !== null && m.role === 'assistant');
-        if (!hasAssistant) {
-            throw new Error(`SFT example ${index}: messages must contain at least one assistant message`);
-        }
-        return { messages: obj.messages };
-    }
-    throw new Error(`SFT example ${index}: must have either {prompt, completion} or {messages}`);
-}
-/**
- * Load SFT dataset from a JSONL file
- *
- * Supports two formats:
- * 1. Prompt-Completion: {"prompt": [...], "completion": {...}}
- * 2. Conversation: {"messages": [...]}
- *
- * @param path - Path to the JSONL file (relative to cwd or allowedRoot)
- * @param tokenizer - Qwen3 tokenizer instance
- * @param config - Optional configuration including path validation options
- */
-export async function loadSFTDataset(path, tokenizer, config) {
-    const allowedRoot = getAllowedRoot(config);
-    const absolutePath = resolvePath(allowedRoot, path);
-    // Validate the path stays within allowed root to prevent directory traversal
-    validatePathContainment(absolutePath, allowedRoot);
-    const rawRecords = readJsonl(absolutePath, config?.limit);
-    // Validate and convert
-    const examples = rawRecords.map((record, i) => validateSFTExample(record, i));
-    return new SFTDataset(examples, tokenizer, config);
-}
-/**
- * Create SFT dataset from examples directly
- */
-export function createSFTDataset(examples, tokenizer, config) {
-    return new SFTDataset(examples, tokenizer, config);
-}

package/dist/index.d.ts DELETED Viewed

@@ -1,33 +0,0 @@
-/**
- * @mlx-node/trl - Training utilities for MLX models
- *
- * This package provides everything needed for training ML models,
- * aligned with Python's TRL (Transformer Reinforcement Learning) library.
- *
- * For model loading and inference, import from @mlx-node/lm.
- *
- * @example
- * ```typescript
- * import { GRPOTrainer, GRPOConfig, loadLocalGsm8kDataset } from '@mlx-node/trl';
- * import { ModelLoader } from '@mlx-node/lm';
- *
- * const model = await ModelLoader.loadPretrained('./models/qwen3-0.6b');
- * const trainer = await GRPOTrainer.create({ modelPath: './models/qwen3-0.6b' });
- * ```
- */
-export type { ToolDefinition, FunctionDefinition, FunctionParameters } from '@mlx-node/core';
-export { MxArray } from '@mlx-node/core';
-export { convertModel, convertParquetToJsonl } from '@mlx-node/core';
-export type { ConversionOptions, ConversionResult } from '@mlx-node/core';
-export { type MLXGRPOConfig, ConfigError, getDefaultConfig, mergeConfig, loadTomlConfig, applyOverrides, } from './trainers/grpo-config';
-export { GRPOTrainer, type GRPOTrainerConfig, DEFAULT_GRPO_CONFIG, createRewardRegistry, computeDatasetHash, RewardTimeoutError, type GenerateBatchResult, type TrainStepMetrics, type TrainingMetrics, type TrainingState, type DatasetMetadata, GrpoTrainingEngine, NativeRewardRegistry, type GrpoEngineConfig, type EngineStepMetrics, type EngineEpochMetrics, type BuiltinRewardConfig, } from './trainers/grpo-trainer';
-export { TrainingLogger, createTrainingLogger, type TrainingLoggerConfig, type TrainingMetrics as TrainingLoggerMetrics, type GenerationSample, type TrainingConfigFields, type TuiMessage, type LogEvent, type PromptChoice, type PromptOptions, } from './trainers/training-logger';
-export { type EntropyFilteringConfig, DEFAULT_ENTROPY_CONFIG } from './trainers/grpo-entropy';
-export { SFTTrainer, SftTrainingEngine, type SFTTrainStepResult, type SFTTrainingState, type SftEngineConfig, type SftStepMetrics, type SftEpochMetrics, } from './trainers/sft-trainer';
-export { type SFTTrainerConfig, SFTConfigError, getDefaultSFTConfig, mergeSFTConfig, loadSFTTomlConfig, applySFTOverrides, DEFAULT_SFT_CONFIG, } from './trainers/sft-config';
-export * from './data/dataset';
-export { SFTDataset, loadSFTDataset, createSFTDataset, type SFTExample, type SFTPromptCompletionExample, type SFTConversationExample, type SFTBatch, type SFTDatasetConfig, type SpecialTokenIds, } from './data/sft-dataset';
-export * from './utils/xml-parser';
-export { validatePathContainment, resolveAndValidatePath, getAllowedRoot, PathTraversalError, type PathValidationOptions, } from './utils/path-security';
-export type { ChatRole, ChatMessage, CompletionMessage, Completion, DatasetSplit, DatasetExample, XmlParseResult, RewardComputationInput, PromptFormatterOptions, PromptTemplate, DatasetLoader, RewardFunction, PromptFormatter, CompletionInfo, RewardOutput, } from './types';
-//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAOH,YAAY,EAAE,cAAc,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAG7F,OAAO,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAWzC,OAAO,EAAE,YAAY,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACrE,YAAY,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAO1E,OAAO,EACL,KAAK,aAAa,EAClB,WAAW,EACX,gBAAgB,EAChB,WAAW,EACX,cAAc,EACd,cAAc,GACf,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EACL,WAAW,EACX,KAAK,iBAAiB,EACtB,mBAAmB,EACnB,oBAAoB,EACpB,kBAAkB,EAClB,kBAAkB,EAClB,KAAK,mBAAmB,EACxB,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,eAAe,EAEpB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,gBAAgB,EACrB,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,KAAK,mBAAmB,GACzB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,cAAc,EACd,oBAAoB,EACpB,KAAK,oBAAoB,EACzB,KAAK,eAAe,IAAI,qBAAqB,EAC7C,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,UAAU,EACf,KAAK,QAAQ,EACb,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,4BAA4B,CAAC;AAGpC,OAAO,EAAE,KAAK,sBAAsB,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAG9F,OAAO,EACL,UAAU,EACV,iBAAiB,EACjB,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,cAAc,EACnB,KAAK,eAAe,GACrB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EACL,KAAK,gBAAgB,EACrB,cAAc,EACd,mBAAmB,EACnB,cAAc,EACd,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,GACnB,MAAM,uBAAuB,CAAC;AAG/B,cAAc,gBAAgB,CAAC;AAC/B,OAAO,EACL,UAAU,EACV,cAAc,EACd,gBAAgB,EAChB,KAAK,UAAU,EACf,KAAK,0BAA0B,EAC/B,KAAK,sBAAsB,EAC3B,KAAK,QAAQ,EACb,KAAK,gBAAgB,EACrB,KAAK,eAAe,GACrB,MAAM,oBAAoB,CAAC;AAG5B,cAAc,oBAAoB,CAAC;AACnC,OAAO,EACL,uBAAuB,EACvB,sBAAsB,EACtB,cAAc,EACd,kBAAkB,EAClB,KAAK,qBAAqB,GAC3B,MAAM,uBAAuB,CAAC;AAG/B,YAAY,EACV,QAAQ,EACR,WAAW,EACX,iBAAiB,EACjB,UAAU,EACV,YAAY,EACZ,cAAc,EACd,cAAc,EACd,sBAAsB,EACtB,sBAAsB,EACtB,cAAc,EACd,aAAa,EACb,cAAc,EACd,eAAe,EAEf,cAAc,EACd,YAAY,GACb,MAAM,SAAS,CAAC"}

package/dist/index.js DELETED Viewed

@@ -1,47 +0,0 @@
-/**
- * @mlx-node/trl - Training utilities for MLX models
- *
- * This package provides everything needed for training ML models,
- * aligned with Python's TRL (Transformer Reinforcement Learning) library.
- *
- * For model loading and inference, import from @mlx-node/lm.
- *
- * @example
- * ```typescript
- * import { GRPOTrainer, GRPOConfig, loadLocalGsm8kDataset } from '@mlx-node/trl';
- * import { ModelLoader } from '@mlx-node/lm';
- *
- * const model = await ModelLoader.loadPretrained('./models/qwen3-0.6b');
- * const trainer = await GRPOTrainer.create({ modelPath: './models/qwen3-0.6b' });
- * ```
- */
-// Core tensor (for custom rewards/models)
-export { MxArray } from '@mlx-node/core';
-// Activations are internal-only (Rust) - used by transformers, sampling, GRPO
-// Transformer components are now internal-only (Rust)
-// Use model.chat() or model.generate() instead
-// GRPO utilities (computeAdvantages, computeEntropy, getHighEntropyMask) are internal-only
-// They are used by GRPOTrainingEngine in Rust
-// Model conversion
-export { convertModel, convertParquetToJsonl } from '@mlx-node/core';
-// =============================================================================
-// TRL-specific exports
-// =============================================================================
-// Trainers
-export { ConfigError, getDefaultConfig, mergeConfig, loadTomlConfig, applyOverrides, } from './trainers/grpo-config';
-export { GRPOTrainer, DEFAULT_GRPO_CONFIG, createRewardRegistry, computeDatasetHash, RewardTimeoutError,
-// Re-export native types from trainer
-GrpoTrainingEngine, NativeRewardRegistry, } from './trainers/grpo-trainer';
-// Unified Training Logger (recommended)
-export { TrainingLogger, createTrainingLogger, } from './trainers/training-logger';
-// Entropy configuration
-export { DEFAULT_ENTROPY_CONFIG } from './trainers/grpo-entropy';
-// SFT Trainer
-export { SFTTrainer, SftTrainingEngine, } from './trainers/sft-trainer';
-export { SFTConfigError, getDefaultSFTConfig, mergeSFTConfig, loadSFTTomlConfig, applySFTOverrides, DEFAULT_SFT_CONFIG, } from './trainers/sft-config';
-// Data
-export * from './data/dataset';
-export { SFTDataset, loadSFTDataset, createSFTDataset, } from './data/sft-dataset';
-// Utils
-export * from './utils/xml-parser';
-export { validatePathContainment, resolveAndValidatePath, getAllowedRoot, PathTraversalError, } from './utils/path-security';