@tryhamster/gerbil 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +99 -34
  2. package/dist/browser/index.d.ts +12 -2
  3. package/dist/browser/index.d.ts.map +1 -1
  4. package/dist/browser/index.js +12 -5
  5. package/dist/browser/index.js.map +1 -1
  6. package/dist/cli.mjs +7 -7
  7. package/dist/cli.mjs.map +1 -1
  8. package/dist/frameworks/express.d.mts +1 -1
  9. package/dist/frameworks/express.mjs +1 -1
  10. package/dist/frameworks/fastify.d.mts +1 -1
  11. package/dist/frameworks/fastify.mjs +1 -1
  12. package/dist/frameworks/hono.d.mts +1 -1
  13. package/dist/frameworks/hono.mjs +1 -1
  14. package/dist/frameworks/next.d.mts +3 -3
  15. package/dist/frameworks/next.mjs +1 -1
  16. package/dist/frameworks/react.d.mts +1 -1
  17. package/dist/frameworks/trpc.d.mts +1 -1
  18. package/dist/frameworks/trpc.mjs +1 -1
  19. package/dist/gerbil-B-jMOrnE.mjs +4 -0
  20. package/dist/{gerbil-BT9fCydo.d.mts → gerbil-BetB5xb0.d.mts} +3 -3
  21. package/dist/{gerbil-BT9fCydo.d.mts.map → gerbil-BetB5xb0.d.mts.map} +1 -1
  22. package/dist/{gerbil-BHrJJIa4.mjs → gerbil-CV4VpF4_.mjs} +2 -2
  23. package/dist/{gerbil-BHrJJIa4.mjs.map → gerbil-CV4VpF4_.mjs.map} +1 -1
  24. package/dist/gpu/hooks.d.mts +166 -1
  25. package/dist/gpu/hooks.d.mts.map +1 -1
  26. package/dist/gpu/hooks.mjs +214 -6
  27. package/dist/gpu/hooks.mjs.map +1 -1
  28. package/dist/gpu/index.d.mts +2 -2
  29. package/dist/gpu/index.mjs +2 -2
  30. package/dist/{gpu-33qCAtHW.mjs → gpu-836grvrv.mjs} +178 -3
  31. package/dist/gpu-836grvrv.mjs.map +1 -0
  32. package/dist/index-Dgmb2kE3.d.mts.map +1 -1
  33. package/dist/{index-jEAL2s-A.d.mts → index-DukkJRMj.d.mts} +104 -12
  34. package/dist/index-DukkJRMj.d.mts.map +1 -0
  35. package/dist/index.d.mts +3 -3
  36. package/dist/index.mjs +4 -4
  37. package/dist/integrations/ai-sdk.d.mts +1 -1
  38. package/dist/integrations/ai-sdk.mjs +1 -1
  39. package/dist/integrations/langchain.d.mts +1 -1
  40. package/dist/integrations/langchain.mjs +1 -1
  41. package/dist/integrations/llamaindex.d.mts +1 -1
  42. package/dist/integrations/llamaindex.mjs +1 -1
  43. package/dist/integrations/mcp.d.mts +3 -3
  44. package/dist/integrations/mcp.mjs +4 -4
  45. package/dist/{mcp-1DaMsaBc.mjs → mcp-BVHI5vzD.mjs} +3 -3
  46. package/dist/{mcp-1DaMsaBc.mjs.map → mcp-BVHI5vzD.mjs.map} +1 -1
  47. package/dist/{moonshine-stt-v_P_Ci_m.mjs → moonshine-stt-BzQRl-BO.mjs} +51 -19
  48. package/dist/{moonshine-stt-v_P_Ci_m.mjs.map → moonshine-stt-BzQRl-BO.mjs.map} +1 -1
  49. package/dist/moonshine-stt-CIolM_SX.mjs +4 -0
  50. package/dist/{one-liner-DnQn7HJK.mjs → one-liner-Cn7IEg1G.mjs} +2 -2
  51. package/dist/{one-liner-DnQn7HJK.mjs.map → one-liner-Cn7IEg1G.mjs.map} +1 -1
  52. package/dist/{repl-jV5gcJFA.mjs → repl-CKAf2M7H.mjs} +3 -3
  53. package/dist/skills/index.d.mts +5 -5
  54. package/dist/skills/index.mjs +3 -3
  55. package/dist/{skills-DX8D59UH.mjs → skills-uuU5GONV.mjs} +2 -2
  56. package/dist/{skills-DX8D59UH.mjs.map → skills-uuU5GONV.mjs.map} +1 -1
  57. package/dist/{types-D6FiR_oh.d.mts → types-LlyYILII.d.mts} +7 -3
  58. package/dist/types-LlyYILII.d.mts.map +1 -0
  59. package/package.json +1 -1
  60. package/dist/gerbil-DomNfIr1.mjs +0 -4
  61. package/dist/gpu-33qCAtHW.mjs.map +0 -1
  62. package/dist/index-jEAL2s-A.d.mts.map +0 -1
  63. package/dist/moonshine-stt-BLyVoRpB.mjs +0 -4
  64. package/dist/types-D6FiR_oh.d.mts.map +0 -1
@@ -1,6 +1,6 @@
1
1
  import { n as resolveDefaultRepo } from "./defaults-9komdrbY.mjs";
2
2
  import { C as parseKaniConfig, E as GEMMA4_VIS_KEYS, T as DTYPE_BYTES, _ as generateNanoCodecDecoderGraph, b as kaniLayerAlpha, d as KANI_START_OF_HUMAN, f as audioTokensToCodes, g as generateKaniTtsGraph, m as computeKaniPositions, p as buildKaniLayerCosSin, u as KANI_END_OF_HUMAN, v as kaniAttentionLayerIndices, w as CANONICAL_KEYS, x as kaniSinTensor, y as kaniCosTensor } from "./architectures-C1I5V3Dt.mjs";
3
- import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-v_P_Ci_m.mjs";
3
+ import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-BzQRl-BO.mjs";
4
4
 
5
5
  //#region src/gpu/architectures/gemma4_vision.ts
6
6
  /**
@@ -2342,6 +2342,62 @@ function extractJson(text) {
2342
2342
  * console.log(result.text);
2343
2343
  * engine.destroy();
2344
2344
  */
2345
+ /** System prompt that locks the model into "continue the text" autocomplete mode. */
2346
+ const AUTOCOMPLETE_SYSTEM = [
2347
+ "You are an inline autocomplete engine.",
2348
+ "Continue the user's text with a brief, natural continuation of the SAME sentence or thought.",
2349
+ "Output ONLY the continuation text — no preamble, no quotes, no explanations, no assistant voice.",
2350
+ "Do not answer questions; just continue the writing.",
2351
+ "Example — input: \"The quick brown fox\" → continuation: \" jumps over the lazy dog.\""
2352
+ ].join(" ");
2353
+ /**
2354
+ * Turn raw model output into a clean inline continuation: cut after the first
2355
+ * newline (single-line), strip wrapping quotes, drop an echoed copy of the typed
2356
+ * text, and add a single leading space unless the suggestion hugs punctuation or
2357
+ * the typed text already ends with whitespace.
2358
+ */
2359
+ function normalizeContinuation(raw, typed, singleLine) {
2360
+ let s = singleLine ? raw.replace(/\n[\s\S]*$/, "") : raw;
2361
+ s = s.replace(/^["'“”']+/, "").replace(/["'“”']+$/, "");
2362
+ if (s.startsWith(typed)) s = s.slice(typed.length);
2363
+ s = s.replace(/^\s+/, "");
2364
+ if (!s) return "";
2365
+ const startsWithPunct = /^[.,;:!?)\]}'"”’%]/.test(s);
2366
+ const typedEndsWithSpace = /\s$/.test(typed) || typed.length === 0;
2367
+ return startsWithPunct || typedEndsWithSpace ? s : ` ${s}`;
2368
+ }
2369
+ function formatAgentToolsPrompt(tools) {
2370
+ return `You are a helpful assistant with access to tools.
2371
+
2372
+ # Tools
2373
+
2374
+ ${tools.map((t) => `## ${t.name}\nDescription: ${t.description}\nParameters: ${JSON.stringify(t.parameters ?? {
2375
+ type: "object",
2376
+ properties: {}
2377
+ })}`).join("\n\n")}
2378
+
2379
+ ## How to call a tool
2380
+
2381
+ Reply with ONLY:
2382
+ <tool_call>
2383
+ {"name": "tool_name", "arguments": {"param": "value"}}
2384
+ </tool_call>
2385
+
2386
+ When you have the final answer, reply normally with no tool_call.`;
2387
+ }
2388
+ function parseAgentToolCall(text) {
2389
+ const tagged = text.match(/<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/);
2390
+ const json = tagged ? tagged[1] : text.match(/\{\s*"name"\s*:[\s\S]*\}/)?.[0] ?? null;
2391
+ if (!json) return null;
2392
+ try {
2393
+ const parsed = JSON.parse(json);
2394
+ if (typeof parsed.name === "string") return {
2395
+ name: parsed.name,
2396
+ args: parsed.arguments ?? parsed.args ?? {}
2397
+ };
2398
+ } catch {}
2399
+ return null;
2400
+ }
2345
2401
  var WebGPUEngine = class WebGPUEngine {
2346
2402
  ctx;
2347
2403
  executor;
@@ -2747,6 +2803,7 @@ var WebGPUEngine = class WebGPUEngine {
2747
2803
  let finishReason = "max_tokens";
2748
2804
  let generatedText = "";
2749
2805
  const eosId = this.tokenizer.config.eosTokenId;
2806
+ const decodeStart = performance.now();
2750
2807
  const consumeToken = (nextToken) => {
2751
2808
  generatedIds.push(nextToken);
2752
2809
  if (eosId !== null && nextToken === eosId) {
@@ -2755,7 +2812,15 @@ var WebGPUEngine = class WebGPUEngine {
2755
2812
  }
2756
2813
  const tokenText = this.tokenizer.decode([nextToken], true);
2757
2814
  generatedText += tokenText;
2758
- onToken?.(tokenText);
2815
+ if (onToken) {
2816
+ const elapsedMs = performance.now() - decodeStart;
2817
+ const tokenIndex = generatedIds.length;
2818
+ onToken(tokenText, {
2819
+ tokenIndex,
2820
+ elapsedMs,
2821
+ tps: elapsedMs > 0 ? tokenIndex / elapsedMs * 1e3 : 0
2822
+ });
2823
+ }
2759
2824
  if (stopSequences.some((s) => generatedText.includes(s))) {
2760
2825
  for (const s of stopSequences) {
2761
2826
  const idx = generatedText.indexOf(s);
@@ -2812,6 +2877,116 @@ var WebGPUEngine = class WebGPUEngine {
2812
2877
  };
2813
2878
  }
2814
2879
  /**
2880
+ * Inline autocomplete: continue `prefix` with a brief, single-line continuation.
2881
+ * Wraps `generate` with low-latency defaults (16 tokens, temp 0.3, stop at the
2882
+ * first newline) + a continuation system prompt, then cleans the output (strip
2883
+ * after newline, dequote, drop an echoed prefix, smart leading space).
2884
+ *
2885
+ * ```ts
2886
+ * const suggestion = await engine.autocomplete("The quick brown fox");
2887
+ * // " jumps over the lazy dog."
2888
+ * ```
2889
+ */
2890
+ async autocomplete(prefix, opts = {}) {
2891
+ return normalizeContinuation((await this.generate(prefix, {
2892
+ systemPrompt: AUTOCOMPLETE_SYSTEM,
2893
+ maxTokens: opts.maxTokens ?? 16,
2894
+ sampling: { temperature: opts.temperature ?? .3 },
2895
+ stopSequences: opts.stop ?? ["\n"]
2896
+ })).text, prefix, opts.singleLine ?? true);
2897
+ }
2898
+ /**
2899
+ * Rewrite `text` in a target tone (e.g. "professional", "friendly", "concise",
2900
+ * "playful", "pirate") or with free-form `instructions`. Returns only the
2901
+ * rewritten text.
2902
+ *
2903
+ * ```ts
2904
+ * await engine.rewrite("hey can u send the file", { tone: "professional" });
2905
+ * ```
2906
+ */
2907
+ async rewrite(text, opts = {}) {
2908
+ const system = opts.instructions ?? `Rewrite the user's text in a ${opts.tone ?? "professional"} tone. Output ONLY the rewritten text — no preamble, no quotes, no commentary.`;
2909
+ return (await this.generate(text, {
2910
+ systemPrompt: system,
2911
+ maxTokens: opts.maxTokens ?? 256,
2912
+ sampling: { temperature: opts.temperature ?? .7 }
2913
+ })).text.trim();
2914
+ }
2915
+ /**
2916
+ * Agentic tool-calling loop: generate, parse a `<tool_call>`, run the matching
2917
+ * tool's `execute`, feed the result back, and repeat up to `maxSteps` until the
2918
+ * model answers without calling a tool. Returns the final text + a step trace.
2919
+ *
2920
+ * ```ts
2921
+ * const { text, steps } = await engine.generateWithTools("Weather in Paris?", {
2922
+ * tools: [weatherTool],
2923
+ * });
2924
+ * ```
2925
+ */
2926
+ async generateWithTools(prompt, opts) {
2927
+ const { tools, maxSteps = 5, onStep, maxTokens, sampling } = opts;
2928
+ const systemPrompt = formatAgentToolsPrompt(tools);
2929
+ const messages = typeof prompt === "string" ? [{
2930
+ role: "user",
2931
+ content: prompt
2932
+ }] : [...prompt];
2933
+ const steps = [];
2934
+ let finalText = "";
2935
+ for (let i = 0; i < maxSteps; i++) {
2936
+ const result = await this.generate(messages, {
2937
+ systemPrompt,
2938
+ maxTokens,
2939
+ sampling
2940
+ });
2941
+ const call = parseAgentToolCall(result.text);
2942
+ if (!call) {
2943
+ finalText = result.text;
2944
+ const answer = {
2945
+ kind: "answer",
2946
+ text: result.text
2947
+ };
2948
+ steps.push(answer);
2949
+ onStep?.(answer);
2950
+ break;
2951
+ }
2952
+ const callStep = {
2953
+ kind: "tool_call",
2954
+ tool: call.name,
2955
+ args: call.args
2956
+ };
2957
+ steps.push(callStep);
2958
+ onStep?.(callStep);
2959
+ const tool = tools.find((t) => t.name === call.name);
2960
+ let resultText;
2961
+ if (tool) try {
2962
+ resultText = String(await tool.execute(call.args));
2963
+ } catch (e) {
2964
+ resultText = `Error executing ${call.name}: ${e}`;
2965
+ }
2966
+ else resultText = `Error: unknown tool "${call.name}"`;
2967
+ const resultStep = {
2968
+ kind: "tool_result",
2969
+ tool: call.name,
2970
+ result: resultText
2971
+ };
2972
+ steps.push(resultStep);
2973
+ onStep?.(resultStep);
2974
+ messages.push({
2975
+ role: "assistant",
2976
+ content: result.text
2977
+ });
2978
+ messages.push({
2979
+ role: "user",
2980
+ content: `Tool ${call.name} returned:\n${resultText}`
2981
+ });
2982
+ finalText = resultText;
2983
+ }
2984
+ return {
2985
+ text: finalText,
2986
+ steps
2987
+ };
2988
+ }
2989
+ /**
2815
2990
  * Generate a STRUCTURED object: generate text, extract the first JSON
2816
2991
  * object/array, parse it, validate it, and RETRY until it is valid (on-device
2817
2992
  * tokens are free, so re-rolling a malformed JSON is cheap).
@@ -3612,4 +3787,4 @@ var WebGPUEngine = class WebGPUEngine {
3612
3787
 
3613
3788
  //#endregion
3614
3789
  export { generateGemma4VisionGraph as C, dequantizeMLXProjection as S, resolveGemma4VisionInfo as T, smartResize as _, buildGemma4PosEmbeds as a, generateQwen3_5VisionGraph as b, buildMRoPECosSin as c, buildPositionIds as d, buildRotaryCosSin as f, preprocessImageGemma4 as g, preprocessImage as h, buildGemma4PoolMatrix as i, buildMRoPEPositionIds as l, mropeFreqDims as m, GEMMA4_IMAGE_PROCESSOR as n, buildGemma4RotaryCosSin as o, buildVisionPositionTensors as p, QWEN3_5_IMAGE_PROCESSOR as r, buildGemma4VisionPositionTensors as s, WebGPUEngine as t, buildPosEmbeds as u, VisionExecutor as v, patchGemma4VisionClips as w, dequantizeGemma4VisionProjection as x, KaniTTS as y };
3615
- //# sourceMappingURL=gpu-33qCAtHW.mjs.map
3790
+ //# sourceMappingURL=gpu-836grvrv.mjs.map