@tryhamster/gerbil 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -34
- package/dist/browser/index.d.ts +12 -2
- package/dist/browser/index.d.ts.map +1 -1
- package/dist/browser/index.js +12 -5
- package/dist/browser/index.js.map +1 -1
- package/dist/cli.mjs +7 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +1 -1
- package/dist/frameworks/next.d.mts +3 -3
- package/dist/frameworks/next.mjs +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +1 -1
- package/dist/{gerbil-BT9fCydo.d.mts → gerbil-BetB5xb0.d.mts} +3 -3
- package/dist/{gerbil-BT9fCydo.d.mts.map → gerbil-BetB5xb0.d.mts.map} +1 -1
- package/dist/gerbil-CTZUa8EZ.mjs +4 -0
- package/dist/{gerbil-BHrJJIa4.mjs → gerbil-DNniplr4.mjs} +2 -2
- package/dist/{gerbil-BHrJJIa4.mjs.map → gerbil-DNniplr4.mjs.map} +1 -1
- package/dist/gpu/hooks.d.mts +122 -2
- package/dist/gpu/hooks.d.mts.map +1 -1
- package/dist/gpu/hooks.mjs +187 -6
- package/dist/gpu/hooks.mjs.map +1 -1
- package/dist/gpu/index.d.mts +2 -2
- package/dist/gpu/index.mjs +2 -2
- package/dist/{gpu-33qCAtHW.mjs → gpu-DFuglcEx.mjs} +178 -3
- package/dist/gpu-DFuglcEx.mjs.map +1 -0
- package/dist/{index-jEAL2s-A.d.mts → index-DukkJRMj.d.mts} +104 -12
- package/dist/{index-jEAL2s-A.d.mts.map → index-DukkJRMj.d.mts.map} +1 -1
- package/dist/index.d.mts +3 -3
- package/dist/index.mjs +4 -4
- package/dist/integrations/ai-sdk.d.mts +1 -1
- package/dist/integrations/ai-sdk.mjs +1 -1
- package/dist/integrations/langchain.d.mts +1 -1
- package/dist/integrations/langchain.mjs +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +1 -1
- package/dist/integrations/mcp.d.mts +3 -3
- package/dist/integrations/mcp.mjs +4 -4
- package/dist/{mcp-1DaMsaBc.mjs → mcp-D2vvH1Xc.mjs} +3 -3
- package/dist/{mcp-1DaMsaBc.mjs.map → mcp-D2vvH1Xc.mjs.map} +1 -1
- package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
- package/dist/{moonshine-stt-v_P_Ci_m.mjs → moonshine-stt-4ojLtMq7.mjs} +30 -4
- package/dist/{moonshine-stt-v_P_Ci_m.mjs.map → moonshine-stt-4ojLtMq7.mjs.map} +1 -1
- package/dist/{one-liner-DnQn7HJK.mjs → one-liner-JhdIPxzF.mjs} +2 -2
- package/dist/{one-liner-DnQn7HJK.mjs.map → one-liner-JhdIPxzF.mjs.map} +1 -1
- package/dist/{repl-jV5gcJFA.mjs → repl-BDRkwPGX.mjs} +3 -3
- package/dist/skills/index.d.mts +3 -3
- package/dist/skills/index.mjs +3 -3
- package/dist/{skills-DX8D59UH.mjs → skills-CU694Dc8.mjs} +2 -2
- package/dist/{skills-DX8D59UH.mjs.map → skills-CU694Dc8.mjs.map} +1 -1
- package/dist/{types-D6FiR_oh.d.mts → types-LlyYILII.d.mts} +7 -3
- package/dist/types-LlyYILII.d.mts.map +1 -0
- package/package.json +1 -1
- package/dist/gerbil-DomNfIr1.mjs +0 -4
- package/dist/gpu-33qCAtHW.mjs.map +0 -1
- package/dist/moonshine-stt-BLyVoRpB.mjs +0 -4
- package/dist/types-D6FiR_oh.d.mts.map +0 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { n as resolveDefaultRepo } from "./defaults-9komdrbY.mjs";
|
|
2
2
|
import { C as parseKaniConfig, E as GEMMA4_VIS_KEYS, T as DTYPE_BYTES, _ as generateNanoCodecDecoderGraph, b as kaniLayerAlpha, d as KANI_START_OF_HUMAN, f as audioTokensToCodes, g as generateKaniTtsGraph, m as computeKaniPositions, p as buildKaniLayerCosSin, u as KANI_END_OF_HUMAN, v as kaniAttentionLayerIndices, w as CANONICAL_KEYS, x as kaniSinTensor, y as kaniCosTensor } from "./architectures-C1I5V3Dt.mjs";
|
|
3
|
-
import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-
|
|
3
|
+
import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-4ojLtMq7.mjs";
|
|
4
4
|
|
|
5
5
|
//#region src/gpu/architectures/gemma4_vision.ts
|
|
6
6
|
/**
|
|
@@ -2342,6 +2342,62 @@ function extractJson(text) {
|
|
|
2342
2342
|
* console.log(result.text);
|
|
2343
2343
|
* engine.destroy();
|
|
2344
2344
|
*/
|
|
2345
|
+
/** System prompt that locks the model into "continue the text" autocomplete mode. */
|
|
2346
|
+
const AUTOCOMPLETE_SYSTEM = [
|
|
2347
|
+
"You are an inline autocomplete engine.",
|
|
2348
|
+
"Continue the user's text with a brief, natural continuation of the SAME sentence or thought.",
|
|
2349
|
+
"Output ONLY the continuation text — no preamble, no quotes, no explanations, no assistant voice.",
|
|
2350
|
+
"Do not answer questions; just continue the writing.",
|
|
2351
|
+
"Example — input: \"The quick brown fox\" → continuation: \" jumps over the lazy dog.\""
|
|
2352
|
+
].join(" ");
|
|
2353
|
+
/**
|
|
2354
|
+
* Turn raw model output into a clean inline continuation: cut after the first
|
|
2355
|
+
* newline (single-line), strip wrapping quotes, drop an echoed copy of the typed
|
|
2356
|
+
* text, and add a single leading space unless the suggestion hugs punctuation or
|
|
2357
|
+
* the typed text already ends with whitespace.
|
|
2358
|
+
*/
|
|
2359
|
+
function normalizeContinuation(raw, typed, singleLine) {
|
|
2360
|
+
let s = singleLine ? raw.replace(/\n[\s\S]*$/, "") : raw;
|
|
2361
|
+
s = s.replace(/^["'“”']+/, "").replace(/["'“”']+$/, "");
|
|
2362
|
+
if (s.startsWith(typed)) s = s.slice(typed.length);
|
|
2363
|
+
s = s.replace(/^\s+/, "");
|
|
2364
|
+
if (!s) return "";
|
|
2365
|
+
const startsWithPunct = /^[.,;:!?)\]}'"”’%]/.test(s);
|
|
2366
|
+
const typedEndsWithSpace = /\s$/.test(typed) || typed.length === 0;
|
|
2367
|
+
return startsWithPunct || typedEndsWithSpace ? s : ` ${s}`;
|
|
2368
|
+
}
|
|
2369
|
+
function formatAgentToolsPrompt(tools) {
|
|
2370
|
+
return `You are a helpful assistant with access to tools.
|
|
2371
|
+
|
|
2372
|
+
# Tools
|
|
2373
|
+
|
|
2374
|
+
${tools.map((t) => `## ${t.name}\nDescription: ${t.description}\nParameters: ${JSON.stringify(t.parameters ?? {
|
|
2375
|
+
type: "object",
|
|
2376
|
+
properties: {}
|
|
2377
|
+
})}`).join("\n\n")}
|
|
2378
|
+
|
|
2379
|
+
## How to call a tool
|
|
2380
|
+
|
|
2381
|
+
Reply with ONLY:
|
|
2382
|
+
<tool_call>
|
|
2383
|
+
{"name": "tool_name", "arguments": {"param": "value"}}
|
|
2384
|
+
</tool_call>
|
|
2385
|
+
|
|
2386
|
+
When you have the final answer, reply normally with no tool_call.`;
|
|
2387
|
+
}
|
|
2388
|
+
function parseAgentToolCall(text) {
|
|
2389
|
+
const tagged = text.match(/<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/);
|
|
2390
|
+
const json = tagged ? tagged[1] : text.match(/\{\s*"name"\s*:[\s\S]*\}/)?.[0] ?? null;
|
|
2391
|
+
if (!json) return null;
|
|
2392
|
+
try {
|
|
2393
|
+
const parsed = JSON.parse(json);
|
|
2394
|
+
if (typeof parsed.name === "string") return {
|
|
2395
|
+
name: parsed.name,
|
|
2396
|
+
args: parsed.arguments ?? parsed.args ?? {}
|
|
2397
|
+
};
|
|
2398
|
+
} catch {}
|
|
2399
|
+
return null;
|
|
2400
|
+
}
|
|
2345
2401
|
var WebGPUEngine = class WebGPUEngine {
|
|
2346
2402
|
ctx;
|
|
2347
2403
|
executor;
|
|
@@ -2747,6 +2803,7 @@ var WebGPUEngine = class WebGPUEngine {
|
|
|
2747
2803
|
let finishReason = "max_tokens";
|
|
2748
2804
|
let generatedText = "";
|
|
2749
2805
|
const eosId = this.tokenizer.config.eosTokenId;
|
|
2806
|
+
const decodeStart = performance.now();
|
|
2750
2807
|
const consumeToken = (nextToken) => {
|
|
2751
2808
|
generatedIds.push(nextToken);
|
|
2752
2809
|
if (eosId !== null && nextToken === eosId) {
|
|
@@ -2755,7 +2812,15 @@ var WebGPUEngine = class WebGPUEngine {
|
|
|
2755
2812
|
}
|
|
2756
2813
|
const tokenText = this.tokenizer.decode([nextToken], true);
|
|
2757
2814
|
generatedText += tokenText;
|
|
2758
|
-
onToken
|
|
2815
|
+
if (onToken) {
|
|
2816
|
+
const elapsedMs = performance.now() - decodeStart;
|
|
2817
|
+
const tokenIndex = generatedIds.length;
|
|
2818
|
+
onToken(tokenText, {
|
|
2819
|
+
tokenIndex,
|
|
2820
|
+
elapsedMs,
|
|
2821
|
+
tps: elapsedMs > 0 ? tokenIndex / elapsedMs * 1e3 : 0
|
|
2822
|
+
});
|
|
2823
|
+
}
|
|
2759
2824
|
if (stopSequences.some((s) => generatedText.includes(s))) {
|
|
2760
2825
|
for (const s of stopSequences) {
|
|
2761
2826
|
const idx = generatedText.indexOf(s);
|
|
@@ -2812,6 +2877,116 @@ var WebGPUEngine = class WebGPUEngine {
|
|
|
2812
2877
|
};
|
|
2813
2878
|
}
|
|
2814
2879
|
/**
|
|
2880
|
+
* Inline autocomplete: continue `prefix` with a brief, single-line continuation.
|
|
2881
|
+
* Wraps `generate` with low-latency defaults (16 tokens, temp 0.3, stop at the
|
|
2882
|
+
* first newline) + a continuation system prompt, then cleans the output (strip
|
|
2883
|
+
* after newline, dequote, drop an echoed prefix, smart leading space).
|
|
2884
|
+
*
|
|
2885
|
+
* ```ts
|
|
2886
|
+
* const suggestion = await engine.autocomplete("The quick brown fox");
|
|
2887
|
+
* // " jumps over the lazy dog."
|
|
2888
|
+
* ```
|
|
2889
|
+
*/
|
|
2890
|
+
async autocomplete(prefix, opts = {}) {
|
|
2891
|
+
return normalizeContinuation((await this.generate(prefix, {
|
|
2892
|
+
systemPrompt: AUTOCOMPLETE_SYSTEM,
|
|
2893
|
+
maxTokens: opts.maxTokens ?? 16,
|
|
2894
|
+
sampling: { temperature: opts.temperature ?? .3 },
|
|
2895
|
+
stopSequences: opts.stop ?? ["\n"]
|
|
2896
|
+
})).text, prefix, opts.singleLine ?? true);
|
|
2897
|
+
}
|
|
2898
|
+
/**
|
|
2899
|
+
* Rewrite `text` in a target tone (e.g. "professional", "friendly", "concise",
|
|
2900
|
+
* "playful", "pirate") or with free-form `instructions`. Returns only the
|
|
2901
|
+
* rewritten text.
|
|
2902
|
+
*
|
|
2903
|
+
* ```ts
|
|
2904
|
+
* await engine.rewrite("hey can u send the file", { tone: "professional" });
|
|
2905
|
+
* ```
|
|
2906
|
+
*/
|
|
2907
|
+
async rewrite(text, opts = {}) {
|
|
2908
|
+
const system = opts.instructions ?? `Rewrite the user's text in a ${opts.tone ?? "professional"} tone. Output ONLY the rewritten text — no preamble, no quotes, no commentary.`;
|
|
2909
|
+
return (await this.generate(text, {
|
|
2910
|
+
systemPrompt: system,
|
|
2911
|
+
maxTokens: opts.maxTokens ?? 256,
|
|
2912
|
+
sampling: { temperature: opts.temperature ?? .7 }
|
|
2913
|
+
})).text.trim();
|
|
2914
|
+
}
|
|
2915
|
+
/**
|
|
2916
|
+
* Agentic tool-calling loop: generate, parse a `<tool_call>`, run the matching
|
|
2917
|
+
* tool's `execute`, feed the result back, and repeat up to `maxSteps` until the
|
|
2918
|
+
* model answers without calling a tool. Returns the final text + a step trace.
|
|
2919
|
+
*
|
|
2920
|
+
* ```ts
|
|
2921
|
+
* const { text, steps } = await engine.generateWithTools("Weather in Paris?", {
|
|
2922
|
+
* tools: [weatherTool],
|
|
2923
|
+
* });
|
|
2924
|
+
* ```
|
|
2925
|
+
*/
|
|
2926
|
+
async generateWithTools(prompt, opts) {
|
|
2927
|
+
const { tools, maxSteps = 5, onStep, maxTokens, sampling } = opts;
|
|
2928
|
+
const systemPrompt = formatAgentToolsPrompt(tools);
|
|
2929
|
+
const messages = typeof prompt === "string" ? [{
|
|
2930
|
+
role: "user",
|
|
2931
|
+
content: prompt
|
|
2932
|
+
}] : [...prompt];
|
|
2933
|
+
const steps = [];
|
|
2934
|
+
let finalText = "";
|
|
2935
|
+
for (let i = 0; i < maxSteps; i++) {
|
|
2936
|
+
const result = await this.generate(messages, {
|
|
2937
|
+
systemPrompt,
|
|
2938
|
+
maxTokens,
|
|
2939
|
+
sampling
|
|
2940
|
+
});
|
|
2941
|
+
const call = parseAgentToolCall(result.text);
|
|
2942
|
+
if (!call) {
|
|
2943
|
+
finalText = result.text;
|
|
2944
|
+
const answer = {
|
|
2945
|
+
kind: "answer",
|
|
2946
|
+
text: result.text
|
|
2947
|
+
};
|
|
2948
|
+
steps.push(answer);
|
|
2949
|
+
onStep?.(answer);
|
|
2950
|
+
break;
|
|
2951
|
+
}
|
|
2952
|
+
const callStep = {
|
|
2953
|
+
kind: "tool_call",
|
|
2954
|
+
tool: call.name,
|
|
2955
|
+
args: call.args
|
|
2956
|
+
};
|
|
2957
|
+
steps.push(callStep);
|
|
2958
|
+
onStep?.(callStep);
|
|
2959
|
+
const tool = tools.find((t) => t.name === call.name);
|
|
2960
|
+
let resultText;
|
|
2961
|
+
if (tool) try {
|
|
2962
|
+
resultText = String(await tool.execute(call.args));
|
|
2963
|
+
} catch (e) {
|
|
2964
|
+
resultText = `Error executing ${call.name}: ${e}`;
|
|
2965
|
+
}
|
|
2966
|
+
else resultText = `Error: unknown tool "${call.name}"`;
|
|
2967
|
+
const resultStep = {
|
|
2968
|
+
kind: "tool_result",
|
|
2969
|
+
tool: call.name,
|
|
2970
|
+
result: resultText
|
|
2971
|
+
};
|
|
2972
|
+
steps.push(resultStep);
|
|
2973
|
+
onStep?.(resultStep);
|
|
2974
|
+
messages.push({
|
|
2975
|
+
role: "assistant",
|
|
2976
|
+
content: result.text
|
|
2977
|
+
});
|
|
2978
|
+
messages.push({
|
|
2979
|
+
role: "user",
|
|
2980
|
+
content: `Tool ${call.name} returned:\n${resultText}`
|
|
2981
|
+
});
|
|
2982
|
+
finalText = resultText;
|
|
2983
|
+
}
|
|
2984
|
+
return {
|
|
2985
|
+
text: finalText,
|
|
2986
|
+
steps
|
|
2987
|
+
};
|
|
2988
|
+
}
|
|
2989
|
+
/**
|
|
2815
2990
|
* Generate a STRUCTURED object: generate text, extract the first JSON
|
|
2816
2991
|
* object/array, parse it, validate it, and RETRY until it is valid (on-device
|
|
2817
2992
|
* tokens are free, so re-rolling a malformed JSON is cheap).
|
|
@@ -3612,4 +3787,4 @@ var WebGPUEngine = class WebGPUEngine {
|
|
|
3612
3787
|
|
|
3613
3788
|
//#endregion
|
|
3614
3789
|
export { generateGemma4VisionGraph as C, dequantizeMLXProjection as S, resolveGemma4VisionInfo as T, smartResize as _, buildGemma4PosEmbeds as a, generateQwen3_5VisionGraph as b, buildMRoPECosSin as c, buildPositionIds as d, buildRotaryCosSin as f, preprocessImageGemma4 as g, preprocessImage as h, buildGemma4PoolMatrix as i, buildMRoPEPositionIds as l, mropeFreqDims as m, GEMMA4_IMAGE_PROCESSOR as n, buildGemma4RotaryCosSin as o, buildVisionPositionTensors as p, QWEN3_5_IMAGE_PROCESSOR as r, buildGemma4VisionPositionTensors as s, WebGPUEngine as t, buildPosEmbeds as u, VisionExecutor as v, patchGemma4VisionClips as w, dequantizeGemma4VisionProjection as x, KaniTTS as y };
|
|
3615
|
-
//# sourceMappingURL=gpu-
|
|
3790
|
+
//# sourceMappingURL=gpu-DFuglcEx.mjs.map
|