npm - node-llama-cpp - Versions diffs - 2.8.4 → 3.0.0-beta.2 - Mend

node-llama-cpp 2.8.4 → 3.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (185) hide show

package/dist/llamaEvaluator/LlamaContext/types.d.ts ADDED Viewed

@@ -0,0 +1,86 @@
+import { Token } from "../../types.js";
+import { LlamaModel } from "../LlamaModel.js";
+import { LlamaContextSequence } from "./LlamaContext.js";
+export type LlamaContextOptions = {
+    model: LlamaModel;
+    /**
+     * number of sequences for the context.
+     * Each sequence is a different "text generation process" that can run in parallel to other sequences in the same context.
+     * Although a single context has multiple sequences, the sequences are separate from each other and do not share data with each other.
+     * This is beneficial for performance, as multiple sequences can be evaluated in parallel (on the same batch).
+     */
+    sequences?: number;
+    /** If null, a random seed will be used */
+    seed?: number | null;
+    /** text context size */
+    contextSize?: number;
+    /** prompt processing batch size */
+    batchSize?: number;
+    /** the llama_eval() call computes all logits, not just the last one */
+    logitsAll?: boolean;
+    /** embedding mode only */
+    embedding?: boolean;
+    /**
+     * number of threads to use to evaluate tokens.
+     * set to 0 to use the maximum threads supported by the current machine hardware
+     */
+    threads?: number;
+    /** control the parallel sequences processing behavior */
+    batching?: BatchingOptions;
+};
+export type LlamaContextSequenceRepeatPenalty = {
+    /** Tokens to lower the predication probability of to be the next predicted token */
+    punishTokens: Token[] | (() => Token[]);
+    /**
+     * The relative amount to lower the probability of the tokens in `punishTokens` by
+     * Defaults to `1.1`.
+     * Set to `1` to disable.
+     */
+    penalty?: number;
+    /**
+     * For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
+     * Disabled by default (`0`).
+     * Set to a value between `0` and `1` to enable.
+     */
+    frequencyPenalty?: number;
+    /**
+     * Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
+     * Disabled by default (`0`).
+     * Set to a value between `0` and `1` to enable.
+     */
+    presencePenalty?: number;
+};
+export type BatchingOptions = {
+    dispatchSchedule?: "nextTick" | CustomBatchingDispatchSchedule;
+    itemsPrioritizingStrategy?: "maximumParallelism" | "firstInFirstOut" | CustomBatchingPrioritizeStrategy;
+};
+export type CustomBatchingDispatchSchedule = (dispatch: () => void) => void;
+export type CustomBatchingPrioritizeStrategy = (options: {
+    items: readonly BatchItem[];
+    size: number;
+}) => PrioritizedBatchItem[];
+export type ContextShiftOptions = {
+    size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
+    strategy?: "eraseBeginning" | ((options: {
+        sequence: LlamaContextSequence;
+        size: number;
+    }) => ContextTokensDeleteRange[] | Promise<ContextTokensDeleteRange[]>);
+};
+export type ContextTokensDeleteRange = {
+    start: number;
+    end: number;
+};
+/**
+ * 1 - low
+ *
+ * 5 - high
+ */
+export type EvaluationPriority = 1 | 2 | 3 | 4 | 5;
+export type BatchItem = {
+    readonly tokens: readonly Token[];
+    readonly evaluationPriority: EvaluationPriority;
+};
+export type PrioritizedBatchItem = {
+    item: BatchItem;
+    processAmount: number;
+};

package/dist/llamaEvaluator/LlamaContext/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=types.js.map

package/dist/llamaEvaluator/LlamaContext/types.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/llamaEvaluator/LlamaContext/types.ts"],"names":[],"mappings":""}

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { BatchItem, PrioritizedBatchItem } from "../../types.js";
+export declare function firstInFirstOutStrategy({ items, size }: {
+    items: readonly BatchItem[];
+    size: number;
+}): PrioritizedBatchItem[];

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js ADDED Viewed

@@ -0,0 +1,16 @@
+export function firstInFirstOutStrategy({ items, size }) {
+    const res = [];
+    const sortedItems = items
+        .slice()
+        .sort((a, b) => b.evaluationPriority - a.evaluationPriority);
+    let leftFreeTokens = size;
+    for (const item of sortedItems) {
+        const processAmount = Math.min(item.tokens.length, leftFreeTokens);
+        res.push({ item, processAmount });
+        leftFreeTokens -= processAmount;
+        if (leftFreeTokens === 0)
+            break;
+    }
+    return res;
+}
+//# sourceMappingURL=firstInFirstOutStrategy.js.map

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"firstInFirstOutStrategy.js","sourceRoot":"","sources":["../../../../../src/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,uBAAuB,CAAC,EAAC,KAAK,EAAE,IAAI,EAAgD;IAChG,MAAM,GAAG,GAA2B,EAAE,CAAC;IAEvC,MAAM,WAAW,GAAG,KAAK;SACpB,KAAK,EAAE;SACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,kBAAkB,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC;IAEjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE;QAC5B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACnE,GAAG,CAAC,IAAI,CAAC,EAAC,IAAI,EAAE,aAAa,EAAC,CAAC,CAAC;QAChC,cAAc,IAAI,aAAa,CAAC;QAEhC,IAAI,cAAc,KAAK,CAAC;YACpB,MAAM;KACb;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { BatchItem, PrioritizedBatchItem } from "../../types.js";
+export declare function maximumParallelismStrategy({ items, size }: {
+    items: readonly BatchItem[];
+    size: number;
+}): PrioritizedBatchItem[];

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js ADDED Viewed

@@ -0,0 +1,42 @@
+export function maximumParallelismStrategy({ items, size }) {
+    let leftFreeTokens = size;
+    const minTokensForEachItem = Math.floor(leftFreeTokens / items.length);
+    const res = [];
+    const clippedItems = [];
+    for (const item of items) {
+        const processAmount = Math.min(item.tokens.length, leftFreeTokens, minTokensForEachItem);
+        const prioritizeItem = { item, processAmount };
+        res.push(prioritizeItem);
+        leftFreeTokens -= processAmount;
+        if (processAmount < item.tokens.length)
+            clippedItems.push(prioritizeItem);
+        if (leftFreeTokens === 0)
+            break;
+    }
+    for (let passesLeft = 3; leftFreeTokens > 0 && clippedItems.length > 0 && passesLeft > 0; passesLeft--) {
+        const minIncreaseAmount = Math.ceil(leftFreeTokens / clippedItems.length);
+        for (let i = 0; i < clippedItems.length && leftFreeTokens > 0; i++) {
+            const prioritizeItem = clippedItems[i];
+            const unprocessedAmount = prioritizeItem.item.tokens.length - prioritizeItem.processAmount;
+            const increaseAmount = Math.min(unprocessedAmount, leftFreeTokens, minIncreaseAmount);
+            prioritizeItem.processAmount += increaseAmount;
+            if (increaseAmount === unprocessedAmount) {
+                clippedItems.splice(i, 1);
+                i--;
+            }
+        }
+    }
+    clippedItems.sort((a, b) => b.item.evaluationPriority - a.item.evaluationPriority);
+    for (let i = 0; i < clippedItems.length && leftFreeTokens > 0; i++) {
+        const prioritizeItem = clippedItems[i];
+        const unprocessedAmount = prioritizeItem.item.tokens.length - prioritizeItem.processAmount;
+        const increaseAmount = Math.min(unprocessedAmount, leftFreeTokens);
+        prioritizeItem.processAmount += increaseAmount;
+        if (increaseAmount === unprocessedAmount) {
+            clippedItems.splice(i, 1);
+            i--;
+        }
+    }
+    return res;
+}
+//# sourceMappingURL=maximumParallelismStrategy.js.map

package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"maximumParallelismStrategy.js","sourceRoot":"","sources":["../../../../../src/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,0BAA0B,CAAC,EAAC,KAAK,EAAE,IAAI,EAAgD;IACnG,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,MAAM,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;IAEvE,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,MAAM,YAAY,GAA2B,EAAE,CAAC;IAEhD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE;QACtB,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,EAAE,oBAAoB,CAAC,CAAC;QACzF,MAAM,cAAc,GAAG,EAAC,IAAI,EAAE,aAAa,EAAC,CAAC;QAE7C,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACzB,cAAc,IAAI,aAAa,CAAC;QAEhC,IAAI,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YAClC,YAAY,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEtC,IAAI,cAAc,KAAK,CAAC;YACpB,MAAM;KACb;IAED,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,cAAc,GAAG,CAAC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,EAAE,EAAE;QACpG,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QAE1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;YAChE,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACvC,MAAM,iBAAiB,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC,aAAa,CAAC;YAC3F,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,iBAAiB,EAAE,cAAc,EAAE,iBAAiB,CAAC,CAAC;YACtF,cAAc,CAAC,aAAa,IAAI,cAAc,CAAC;YAE/C,IAAI,cAAc,KAAK,iBAAiB,EAAE;gBACtC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1B,CAAC,EAAE,CAAC;aACP;SACJ;KACJ;IAED,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAEnF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;QAChE,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,iBAAiB,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC,aAAa,CAAC;QAC3F,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QACnE,cAAc,CAAC,aAAa,IAAI,cAAc,CAAC;QAE/C,IAAI,cAAc,KAAK,iBAAiB,EAAE;YACtC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAC1B,CAAC,EAAE,CAAC;SACP;KACJ;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}

package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { BatchingOptions } from "../types.js";
2	+ export declare function resolveBatchItemsPrioritizingStrategy(strategy: Required<BatchingOptions>["itemsPrioritizingStrategy"]): import("../types.js").CustomBatchingPrioritizeStrategy;

package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js ADDED Viewed

@@ -0,0 +1,13 @@
+import { maximumParallelismStrategy } from "./batchItemsPrioritizingStrategies/maximumParallelismStrategy.js";
+import { firstInFirstOutStrategy } from "./batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js";
+export function resolveBatchItemsPrioritizingStrategy(strategy) {
+    if (strategy instanceof Function)
+        return strategy;
+    else if (strategy === "maximumParallelism")
+        return maximumParallelismStrategy;
+    else if (strategy === "firstInFirstOut")
+        return firstInFirstOutStrategy;
+    void (strategy);
+    throw new Error(`Unknown batch items prioritize strategy: ${strategy}`);
+}
+//# sourceMappingURL=resolveBatchItemsPrioritizingStrategy.js.map

package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"resolveBatchItemsPrioritizingStrategy.js","sourceRoot":"","sources":["../../../../src/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,0BAA0B,EAAC,MAAM,kEAAkE,CAAC;AAC5G,OAAO,EAAC,uBAAuB,EAAC,MAAM,+DAA+D,CAAC;AAEtG,MAAM,UAAU,qCAAqC,CAAC,QAAgE;IAClH,IAAI,QAAQ,YAAY,QAAQ;QAC5B,OAAO,QAAQ,CAAC;SACf,IAAI,QAAQ,KAAK,oBAAoB;QACtC,OAAO,0BAA0B,CAAC;SACjC,IAAI,QAAQ,KAAK,iBAAiB;QACnC,OAAO,uBAAuB,CAAC;IAEnC,KAAK,CAAC,QAAwB,CAAC,CAAC;IAEhC,MAAM,IAAI,KAAK,CAAC,4CAA4C,QAAQ,EAAE,CAAC,CAAC;AAC5E,CAAC"}

package/dist/llamaEvaluator/LlamaGrammar.d.ts CHANGED Viewed

@@ -1,32 +1,28 @@
+import { LlamaText } from "../utils/LlamaText.js";
+import { StopGenerationTrigger } from "../utils/StopGenerationDetector.js";
 export type LlamaGrammarOptions = {
     /** GBNF grammar */
     grammar: string;
     /** print the grammar to stdout */
     printGrammar?: boolean;
-    /** Consider any of these texts as EOS for the generated out. Only supported by `LlamaChatSession` */
-    stopStrings?: string[];
-    /** Trim whitespace from the end of the generated text. Only supported by `LlamaChatSession` */
+    /** Consider any of these as EOS for the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
+    stopGenerationTriggers?: readonly (StopGenerationTrigger | LlamaText)[];
+    /** Trim whitespace from the end of the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
     trimWhitespaceSuffix?: boolean;
 };
 export declare class LlamaGrammar {
-    private readonly _stopStrings;
+    private readonly _stopGenerationTriggers;
     private readonly _trimWhitespaceSuffix;
     private readonly _grammarText;
     /**
      * > GBNF files are supported.
      * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
      * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
-     * @param {object} options
-     * @param {string} options.grammar - GBNF grammar
-     * @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
-     * Only supported by `LlamaChatSession`
-     * @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
-     * Only supported by `LlamaChatSession`
-     * @param {boolean} [options.printGrammar] - print the grammar to stdout
+     * @param options
      */
-    constructor({ grammar, stopStrings, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
+    constructor({ grammar, stopGenerationTriggers, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
     get grammar(): string;
-    get stopStrings(): readonly string[];
+    get stopGenerationTriggers(): readonly (StopGenerationTrigger | LlamaText)[];
     get trimWhitespaceSuffix(): boolean;
     static getFor(type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
 }

package/dist/llamaEvaluator/LlamaGrammar.js CHANGED Viewed

@@ -1,38 +1,33 @@
 import path from "path";
 import fs from "fs-extra";
 import { getGrammarsFolder } from "../utils/getGrammarsFolder.js";
-import { LLAMAGrammar } from "./LlamaBins.js";
+import { LlamaText } from "../utils/LlamaText.js";
+import { AddonGrammar } from "./LlamaBins.js";
 export class LlamaGrammar {
     /** @internal */
     _grammar;
-    _stopStrings;
+    _stopGenerationTriggers;
     _trimWhitespaceSuffix;
     _grammarText;
     /**
      * > GBNF files are supported.
      * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
      * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
-     * @param {object} options
-     * @param {string} options.grammar - GBNF grammar
-     * @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
-     * Only supported by `LlamaChatSession`
-     * @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
-     * Only supported by `LlamaChatSession`
-     * @param {boolean} [options.printGrammar] - print the grammar to stdout
+     * @param options
      */
-    constructor({ grammar, stopStrings = [], trimWhitespaceSuffix = false, printGrammar = false }) {
-        this._grammar = new LLAMAGrammar(grammar, {
+    constructor({ grammar, stopGenerationTriggers = [], trimWhitespaceSuffix = false, printGrammar = false }) {
+        this._grammar = new AddonGrammar(grammar, {
             printGrammar
         });
-        this._stopStrings = stopStrings ?? [];
+        this._stopGenerationTriggers = stopGenerationTriggers ?? [];
         this._trimWhitespaceSuffix = trimWhitespaceSuffix;
         this._grammarText = grammar;
     }
     get grammar() {
         return this._grammarText;
     }
-    get stopStrings() {
-        return this._stopStrings;
+    get stopGenerationTriggers() {
+        return this._stopGenerationTriggers;
     }
     get trimWhitespaceSuffix() {
         return this._trimWhitespaceSuffix;
@@ -44,7 +39,7 @@ export class LlamaGrammar {
             const grammar = await fs.readFile(grammarFile, "utf8");
             return new LlamaGrammar({
                 grammar,
-                stopStrings: ["\n".repeat(10)],
+                stopGenerationTriggers: [LlamaText(["\n".repeat(10)])],
                 trimWhitespaceSuffix: true
             });
         }

package/dist/llamaEvaluator/LlamaGrammar.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiB5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IACtB,~~YAAY~~,~~CAAoB~~;~~IAChC~~,qBAAqB,CAAU;IAC/B,YAAY,CAAS;IAEtC~~;;;;;;;;;;;OAWG~~;IACH,YAAmB,EACf,OAAO,EAAE,~~WAAW~~,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,~~EAC3D~~;QAClB,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,~~YAAY~~,GAAG,~~WAAW~~,IAAI,EAAE,CAAC;~~QACtC~~,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,~~WAAW~~;~~QAClB~~,OAAO,IAAI,CAAC,~~YAAY~~,CAAC;~~IAC7B~~,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,OAAO;gBACP,~~WAAW~~,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;~~gBAC9B~~,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
1	+ {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAEhD,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiB5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IACtB,uBAAuB,CAAiD;IACxE,qBAAqB,CAAU;IAC/B,YAAY,CAAS;IAEtC;;;;;OAKG;IACH,YAAmB,EACf,OAAO,EAAE,sBAAsB,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,EACtE;QAClB,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,uBAAuB,GAAG,sBAAsB,IAAI,EAAE,CAAC;QAC5D,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,sBAAsB;QAC7B,OAAO,IAAI,CAAC,uBAAuB,CAAC;IACxC,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,OAAO;gBACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBACtD,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}

package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts CHANGED Viewed

@@ -2,13 +2,14 @@ import { LlamaGrammar } from "./LlamaGrammar.js";
 export type LlamaGrammarEvaluationStateOptions = {
     grammar: LlamaGrammar;
 };
+/**
+ * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
+ * Create a new grammar evaluation state for every response you generate with the model.
+ * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
+ */
 export declare class LlamaGrammarEvaluationState {
     /**
-     * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
-     * Create a new grammar evaluation state for every response you generate with the model.
-     * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
-     * @param {object} options
-     * @param {LlamaGrammar} options.grammar
+     * @param options
      */
     constructor({ grammar }: LlamaGrammarEvaluationStateOptions);
 }

package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js CHANGED Viewed

@@ -1,16 +1,17 @@
-import { LLAMAGrammarEvaluationState } from "./LlamaBins.js";
+import { AddonGrammarEvaluationState } from "./LlamaBins.js";
+/**
+ * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
+ * Create a new grammar evaluation state for every response you generate with the model.
+ * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
+ */
 export class LlamaGrammarEvaluationState {
     /** @internal */
     _state;
     /**
-     * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
-     * Create a new grammar evaluation state for every response you generate with the model.
-     * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
-     * @param {object} options
-     * @param {LlamaGrammar} options.grammar
+     * @param options
      */
     constructor({ grammar }) {
-        this._state = new LLAMAGrammarEvaluationState(grammar._grammar);
+        this._state = new AddonGrammarEvaluationState(grammar._grammar);
     }
 }
 //# sourceMappingURL=LlamaGrammarEvaluationState.js.map

package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,2BAA2B,EAAC,MAAM,gBAAgB,CAAC;AAQ3D,MAAM,OAAO,2BAA2B;IACpC,gBAAgB;IACA,MAAM,CAA8B;IAEpD~~;;;;;;OAMG~~;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,IAAI,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpE,CAAC;CACJ"}
1	+ {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,2BAA2B,EAAC,MAAM,gBAAgB,CAAC;AAQ3D;;;;GAIG;AACH,MAAM,OAAO,2BAA2B;IACpC,gBAAgB;IACA,MAAM,CAA8B;IAEpD;;OAEG;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,IAAI,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpE,CAAC;CACJ"}

package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { getGbnfGrammarForGbnfJsonSchema } from "../utils/getGbnfGrammarForGbnfJsonSchema.js";
 import { validateObjectAgainstGbnfSchema } from "../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
+import { LlamaText } from "../utils/LlamaText.js";
 import { LlamaGrammar } from "./LlamaGrammar.js";
 export class LlamaJsonSchemaGrammar extends LlamaGrammar {
     _schema;
@@ -7,7 +8,7 @@ export class LlamaJsonSchemaGrammar extends LlamaGrammar {
         const grammar = getGbnfGrammarForGbnfJsonSchema(schema);
         super({
             grammar,
-            stopStrings: ["\n".repeat(4)],
+            stopGenerationTriggers: [LlamaText(["\n".repeat(4)])],
             trimWhitespaceSuffix: true
         });
         this._schema = schema;

package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,6CAA6C,CAAC;AAC5F,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B,YAAmB,MAAS;QACxB,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC;YACF,OAAO;YACP,~~WAAW~~,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;~~YAC7B~~,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}
1	+ {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,6CAA6C,CAAC;AAC5F,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B,YAAmB,MAAS;QACxB,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC;YACF,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}

package/dist/llamaEvaluator/LlamaModel.d.ts CHANGED Viewed

@@ -1,123 +1,119 @@
+import { EventRelay } from "lifecycle-utils";
+import { Token } from "../types.js";
+import { ModelTypeDescription } from "../utils/getBin.js";
+import type { BuiltinSpecialTokenValue } from "../utils/LlamaText.js";
 export type LlamaModelOptions = {
     /** path to the model on the filesystem */
     modelPath: string;
-    /**
-     * If null, a random seed will be used
-     * @deprecated use the `seed` option on `LlamaContext` instead
-     * @hidden
-     * */
-    seed?: number | null;
-    /**
-     * text context size
-     * @deprecated use the `contextSize` option on `LlamaContext` instead
-     * @hidden
-     * */
-    contextSize?: number;
-    /**
-     * prompt processing batch size
-     * @deprecated use the `batchSize` option on `LlamaContext` instead
-     * @hidden
-     * */
-    batchSize?: number;
     /** number of layers to store in VRAM */
     gpuLayers?: number;
-    /**
-     * number of threads to use to evaluate tokens
-     * @deprecated use the `threads` option on `LlamaContext` instead
-     * @hidden
-     * */
-    threads?: number;
-    /**
-     * Temperature is a hyperparameter that controls the randomness of the generated text.
-     * It affects the probability distribution of the model's output tokens.
-     * A higher temperature (e.g., 1.5) makes the output more random and creative,
-     * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
-     * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
-     * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
-     *
-     * Set to `0` to disable.
-     * @deprecated use the `temperature` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
-     * @hidden
-     */
-    temperature?: number;
-    /**
-     * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
-     * An integer number between `1` and the size of the vocabulary.
-     * Set to `0` to disable (which uses the full vocabulary).
-     *
-     * Only relevant when `temperature` is set to a value greater than 0.
-     * @deprecated use the `topK` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
-     * @hidden
-     * */
-    topK?: number;
-    /**
-     * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
-     * and samples the next token only from this set.
-     * A float number between `0` and `1`.
-     * Set to `1` to disable.
-     *
-     * Only relevant when `temperature` is set to a value greater than `0`.
-     * @deprecated use the `topP` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
-     * @hidden
-     */
-    topP?: number;
-    /**
-     * the llama_eval() call computes all logits, not just the last one
-     * @deprecated use the `logitsAll` option on `LlamaContext` instead
-     * @hidden
-     */
-    logitsAll?: boolean;
     /** only load the vocabulary, no weights */
     vocabOnly?: boolean;
     /** use mmap if possible */
     useMmap?: boolean;
     /** force system to keep model in RAM */
     useMlock?: boolean;
-    /**
-     * embedding mode only
-     * @deprecated use the `embedding` option on `LlamaContext` instead
-     * @hidden
-     */
-    embedding?: boolean;
 };
 export declare class LlamaModel {
+    readonly onDispose: EventRelay<void>;
     /**
      * > options source:
      * > [github:ggerganov/llama.cpp/llama.h](
-     * > https://github.com/ggerganov/llama.cpp/blob/b5ffb2849d23afe73647f68eec7b68187af09be6/llama.h#L102) (`struct llama_context_params`)
-     * @param {object} options
-     * @param {string} options.modelPath - path to the model on the filesystem
-     * @param {number | null} [options.seed] - If null, a random seed will be used
-     * @param {number} [options.contextSize] - text context size
-     * @param {number} [options.batchSize] - prompt processing batch size
-     * @param {number} [options.gpuLayers] - number of layers to store in VRAM
-     * @param {number} [options.threads] - number of threads to use to evaluate tokens
-     * @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
-     * It affects the probability distribution of the model's output tokens.
-     * A higher temperature (e.g., 1.5) makes the output more random and creative,
-     * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
-     * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
-     * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
-     *
-     * Set to `0` to disable.
-     * @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
-     * sequence generation.
-     * An integer number between `1` and the size of the vocabulary.
-     * Set to `0` to disable (which uses the full vocabulary).
-     *
-     * Only relevant when `temperature` is set to a value greater than 0.
-     * @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
-     * and samples the next token only from this set.
-     * A float number between `0` and `1`.
-     * Set to `1` to disable.
-     *
-     * Only relevant when `temperature` is set to a value greater than `0`.
-     * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
-     * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
-     * @param {boolean} [options.useMmap] - use mmap if possible
-     * @param {boolean} [options.useMlock] - force system to keep model in RAM
-     * @param {boolean} [options.embedding] - embedding mode only
-     */
-    constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, threads, temperature, topK, topP, logitsAll, vocabOnly, useMmap, useMlock, embedding }: LlamaModelOptions);
+     * > https://github.com/ggerganov/llama.cpp/blob/05816027d649f977468fc804cdb54e99eac246d1/llama.h#L161) (`struct llama_model_params`)
+     * @param options
+     * @param options.modelPath - path to the model on the filesystem
+     * @param [options.gpuLayers] - number of layers to store in VRAM
+     * @param [options.vocabOnly] - only load the vocabulary, no weights
+     * @param [options.useMmap] - use mmap if possible
+     * @param [options.useMlock] - force system to keep model in RAM
+     */
+    constructor({ modelPath, gpuLayers, vocabOnly, useMmap, useMlock }: LlamaModelOptions);
+    dispose(): void;
+    /** @hidden */
+    [Symbol.dispose](): void;
+    get disposed(): boolean;
+    get tokens(): LlamaModelTokens;
+    get filename(): string | undefined;
+    /**
+     * Transform text into tokens that can be fed to the model
+     * @param text - the text to tokenize
+     * @param [specialTokens] - if set to true, text that correspond to special tokens will be tokenized to those tokens.
+     * For example, `<s>` will be tokenized to the BOS token if `specialTokens` is set to `true`,
+     * otherwise it will be tokenized to tokens that corresponds to the plaintext `<s>` string.
+     */
+    tokenize(text: string, specialTokens?: boolean): Token[];
+    tokenize(text: BuiltinSpecialTokenValue, specialTokens: "builtin"): Token[];
+    /** Transform tokens into text */
+    detokenize(tokens: readonly Token[]): string;
+    /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
+    get typeDescription(): ModelTypeDescription;
+    /** The context size the model was trained on */
+    get trainContextSize(): number;
     static get systemInfo(): string;
 }
+export declare class LlamaModelTokens {
+    private constructor();
+    /**
+     * @returns infill tokens
+     */
+    get infill(): LlamaModelInfillTokens;
+    /**
+     * @returns The BOS (Beginning Of Sequence) token.
+     */
+    get bos(): Token | null;
+    /**
+     * @returns The EOS (End Of Sequence) token.
+     */
+    get eos(): Token | null;
+    /**
+     * @returns The NL (New Line) token.
+     */
+    get nl(): Token | null;
+    /**
+     * @returns The BOS (Beginning Of Sequence) token as a string.
+     */
+    get bosString(): string | null;
+    /**
+     * @returns The EOS (End Of Sequence) token as a string.
+     */
+    get eosString(): string | null;
+    /**
+     * @returns The NL (New Line) token as a string.
+     */
+    get nlString(): string | null;
+}
+export declare class LlamaModelInfillTokens {
+    private constructor();
+    /**
+     * @returns The beginning of infill prefix token.
+     */
+    get prefix(): Token | null;
+    /**
+     * @returns The beginning of infill middle token.
+     */
+    get middle(): Token | null;
+    /**
+     * @returns The beginning of infill suffix token.
+     */
+    get suffix(): Token | null;
+    /**
+     * @returns End of infill middle token (End Of Text).
+     */
+    get eot(): Token | null;
+    /**
+     * @returns The beginning of infill prefix token as a string.
+     */
+    get prefixString(): string | null;
+    /**
+     * @returns The beginning of infill middle token as a string.
+     */
+    get middleString(): string | null;
+    /**
+     * @returns The beginning of infill suffix token as a string.
+     */
+    get suffixString(): string | null;
+    /**
+     * @returns End of infill middle token (End Of Text) as a string.
+     */
+    get eotString(): string | null;
+}