npm - @tryhamster/gerbil - Versions diffs - 1.0.0 → 1.0.1 - Mend

@tryhamster/gerbil 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/README.md +85 -34
package/dist/browser/index.d.ts +12 -2
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +12 -5
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +7 -7
package/dist/cli.mjs.map +1 -1
package/dist/frameworks/express.d.mts +1 -1
package/dist/frameworks/express.mjs +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.mjs +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.mjs +1 -1
package/dist/frameworks/next.d.mts +3 -3
package/dist/frameworks/next.mjs +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.mjs +1 -1
package/dist/{gerbil-BT9fCydo.d.mts → gerbil-BetB5xb0.d.mts} +3 -3
package/dist/{gerbil-BT9fCydo.d.mts.map → gerbil-BetB5xb0.d.mts.map} +1 -1
package/dist/gerbil-CTZUa8EZ.mjs +4 -0
package/dist/{gerbil-BHrJJIa4.mjs → gerbil-DNniplr4.mjs} +2 -2
package/dist/{gerbil-BHrJJIa4.mjs.map → gerbil-DNniplr4.mjs.map} +1 -1
package/dist/gpu/hooks.d.mts +122 -2
package/dist/gpu/hooks.d.mts.map +1 -1
package/dist/gpu/hooks.mjs +187 -6
package/dist/gpu/hooks.mjs.map +1 -1
package/dist/gpu/index.d.mts +2 -2
package/dist/gpu/index.mjs +2 -2
package/dist/{gpu-33qCAtHW.mjs → gpu-DFuglcEx.mjs} +178 -3
package/dist/gpu-DFuglcEx.mjs.map +1 -0
package/dist/{index-jEAL2s-A.d.mts → index-DukkJRMj.d.mts} +104 -12
package/dist/{index-jEAL2s-A.d.mts.map → index-DukkJRMj.d.mts.map} +1 -1
package/dist/index.d.mts +3 -3
package/dist/index.mjs +4 -4
package/dist/integrations/ai-sdk.d.mts +1 -1
package/dist/integrations/ai-sdk.mjs +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.mjs +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.mjs +1 -1
package/dist/integrations/mcp.d.mts +3 -3
package/dist/integrations/mcp.mjs +4 -4
package/dist/{mcp-1DaMsaBc.mjs → mcp-D2vvH1Xc.mjs} +3 -3
package/dist/{mcp-1DaMsaBc.mjs.map → mcp-D2vvH1Xc.mjs.map} +1 -1
package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
package/dist/{moonshine-stt-v_P_Ci_m.mjs → moonshine-stt-4ojLtMq7.mjs} +30 -4
package/dist/{moonshine-stt-v_P_Ci_m.mjs.map → moonshine-stt-4ojLtMq7.mjs.map} +1 -1
package/dist/{one-liner-DnQn7HJK.mjs → one-liner-JhdIPxzF.mjs} +2 -2
package/dist/{one-liner-DnQn7HJK.mjs.map → one-liner-JhdIPxzF.mjs.map} +1 -1
package/dist/{repl-jV5gcJFA.mjs → repl-BDRkwPGX.mjs} +3 -3
package/dist/skills/index.d.mts +3 -3
package/dist/skills/index.mjs +3 -3
package/dist/{skills-DX8D59UH.mjs → skills-CU694Dc8.mjs} +2 -2
package/dist/{skills-DX8D59UH.mjs.map → skills-CU694Dc8.mjs.map} +1 -1
package/dist/{types-D6FiR_oh.d.mts → types-LlyYILII.d.mts} +7 -3
package/dist/types-LlyYILII.d.mts.map +1 -0
package/package.json +1 -1
package/dist/gerbil-DomNfIr1.mjs +0 -4
package/dist/gpu-33qCAtHW.mjs.map +0 -1
package/dist/moonshine-stt-BLyVoRpB.mjs +0 -4
package/dist/types-D6FiR_oh.d.mts.map +0 -1

package/dist/{gpu-33qCAtHW.mjs → gpu-DFuglcEx.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import { n as resolveDefaultRepo } from "./defaults-9komdrbY.mjs";
 import { C as parseKaniConfig, E as GEMMA4_VIS_KEYS, T as DTYPE_BYTES, _ as generateNanoCodecDecoderGraph, b as kaniLayerAlpha, d as KANI_START_OF_HUMAN, f as audioTokensToCodes, g as generateKaniTtsGraph, m as computeKaniPositions, p as buildKaniLayerCosSin, u as KANI_END_OF_HUMAN, v as kaniAttentionLayerIndices, w as CANONICAL_KEYS, x as kaniSinTensor, y as kaniCosTensor } from "./architectures-C1I5V3Dt.mjs";
-import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-v_P_Ci_m.mjs";
+import { c as MATMUL_BIAS_F16C_SPEC, d as createStorageBuffer, f as createUniformBuffer, g as verifyGPU, h as initGPU, i as loadModel, l as clearPipelineCache, m as getOrCreatePipeline, o as Executor, p as destroyBuffers, r as loadKaniTTS, s as KERNEL_REGISTRY, u as createBindGroup } from "./moonshine-stt-4ojLtMq7.mjs";
 //#region src/gpu/architectures/gemma4_vision.ts
 /**
@@ -2342,6 +2342,62 @@ function extractJson(text) {
 *   console.log(result.text);
 *   engine.destroy();
 */
+/** System prompt that locks the model into "continue the text" autocomplete mode. */
+const AUTOCOMPLETE_SYSTEM = [
+	"You are an inline autocomplete engine.",
+	"Continue the user's text with a brief, natural continuation of the SAME sentence or thought.",
+	"Output ONLY the continuation text — no preamble, no quotes, no explanations, no assistant voice.",
+	"Do not answer questions; just continue the writing.",
+	"Example — input: \"The quick brown fox\" → continuation: \" jumps over the lazy dog.\""
+].join(" ");
+/**
+* Turn raw model output into a clean inline continuation: cut after the first
+* newline (single-line), strip wrapping quotes, drop an echoed copy of the typed
+* text, and add a single leading space unless the suggestion hugs punctuation or
+* the typed text already ends with whitespace.
+*/
+function normalizeContinuation(raw, typed, singleLine) {
+	let s = singleLine ? raw.replace(/\n[\s\S]*$/, "") : raw;
+	s = s.replace(/^["'“”']+/, "").replace(/["'“”']+$/, "");
+	if (s.startsWith(typed)) s = s.slice(typed.length);
+	s = s.replace(/^\s+/, "");
+	if (!s) return "";
+	const startsWithPunct = /^[.,;:!?)\]}'"”’%]/.test(s);
+	const typedEndsWithSpace = /\s$/.test(typed) || typed.length === 0;
+	return startsWithPunct || typedEndsWithSpace ? s : ` ${s}`;
+}
+function formatAgentToolsPrompt(tools) {
+	return `You are a helpful assistant with access to tools.
+# Tools
+${tools.map((t) => `## ${t.name}\nDescription: ${t.description}\nParameters: ${JSON.stringify(t.parameters ?? {
+		type: "object",
+		properties: {}
+	})}`).join("\n\n")}
+## How to call a tool
+Reply with ONLY:
+<tool_call>
+{"name": "tool_name", "arguments": {"param": "value"}}
+</tool_call>
+When you have the final answer, reply normally with no tool_call.`;
+}
+function parseAgentToolCall(text) {
+	const tagged = text.match(/<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/);
+	const json = tagged ? tagged[1] : text.match(/\{\s*"name"\s*:[\s\S]*\}/)?.[0] ?? null;
+	if (!json) return null;
+	try {
+		const parsed = JSON.parse(json);
+		if (typeof parsed.name === "string") return {
+			name: parsed.name,
+			args: parsed.arguments ?? parsed.args ?? {}
+		};
+	} catch {}
+	return null;
+}
 var WebGPUEngine = class WebGPUEngine {
 	ctx;
 	executor;
@@ -2747,6 +2803,7 @@ var WebGPUEngine = class WebGPUEngine {
 		let finishReason = "max_tokens";
 		let generatedText = "";
 		const eosId = this.tokenizer.config.eosTokenId;
+		const decodeStart = performance.now();
 		const consumeToken = (nextToken) => {
 			generatedIds.push(nextToken);
 			if (eosId !== null && nextToken === eosId) {
@@ -2755,7 +2812,15 @@ var WebGPUEngine = class WebGPUEngine {
 			}
 			const tokenText = this.tokenizer.decode([nextToken], true);
 			generatedText += tokenText;
-			onToken?.(tokenText);
+			if (onToken) {
+				const elapsedMs = performance.now() - decodeStart;
+				const tokenIndex = generatedIds.length;
+				onToken(tokenText, {
+					tokenIndex,
+					elapsedMs,
+					tps: elapsedMs > 0 ? tokenIndex / elapsedMs * 1e3 : 0
+				});
+			}
 			if (stopSequences.some((s) => generatedText.includes(s))) {
 				for (const s of stopSequences) {
 					const idx = generatedText.indexOf(s);
@@ -2812,6 +2877,116 @@ var WebGPUEngine = class WebGPUEngine {
 		};
 	}
 	/**
+	* Inline autocomplete: continue `prefix` with a brief, single-line continuation.
+	* Wraps `generate` with low-latency defaults (16 tokens, temp 0.3, stop at the
+	* first newline) + a continuation system prompt, then cleans the output (strip
+	* after newline, dequote, drop an echoed prefix, smart leading space).
+	*
+	* ```ts
+	* const suggestion = await engine.autocomplete("The quick brown fox");
+	* // " jumps over the lazy dog."
+	* ```
+	*/
+	async autocomplete(prefix, opts = {}) {
+		return normalizeContinuation((await this.generate(prefix, {
+			systemPrompt: AUTOCOMPLETE_SYSTEM,
+			maxTokens: opts.maxTokens ?? 16,
+			sampling: { temperature: opts.temperature ?? .3 },
+			stopSequences: opts.stop ?? ["\n"]
+		})).text, prefix, opts.singleLine ?? true);
+	}
+	/**
+	* Rewrite `text` in a target tone (e.g. "professional", "friendly", "concise",
+	* "playful", "pirate") or with free-form `instructions`. Returns only the
+	* rewritten text.
+	*
+	* ```ts
+	* await engine.rewrite("hey can u send the file", { tone: "professional" });
+	* ```
+	*/
+	async rewrite(text, opts = {}) {
+		const system = opts.instructions ?? `Rewrite the user's text in a ${opts.tone ?? "professional"} tone. Output ONLY the rewritten text — no preamble, no quotes, no commentary.`;
+		return (await this.generate(text, {
+			systemPrompt: system,
+			maxTokens: opts.maxTokens ?? 256,
+			sampling: { temperature: opts.temperature ?? .7 }
+		})).text.trim();
+	}
+	/**
+	* Agentic tool-calling loop: generate, parse a `<tool_call>`, run the matching
+	* tool's `execute`, feed the result back, and repeat up to `maxSteps` until the
+	* model answers without calling a tool. Returns the final text + a step trace.
+	*
+	* ```ts
+	* const { text, steps } = await engine.generateWithTools("Weather in Paris?", {
+	*   tools: [weatherTool],
+	* });
+	* ```
+	*/
+	async generateWithTools(prompt, opts) {
+		const { tools, maxSteps = 5, onStep, maxTokens, sampling } = opts;
+		const systemPrompt = formatAgentToolsPrompt(tools);
+		const messages = typeof prompt === "string" ? [{
+			role: "user",
+			content: prompt
+		}] : [...prompt];
+		const steps = [];
+		let finalText = "";
+		for (let i = 0; i < maxSteps; i++) {
+			const result = await this.generate(messages, {
+				systemPrompt,
+				maxTokens,
+				sampling
+			});
+			const call = parseAgentToolCall(result.text);
+			if (!call) {
+				finalText = result.text;
+				const answer = {
+					kind: "answer",
+					text: result.text
+				};
+				steps.push(answer);
+				onStep?.(answer);
+				break;
+			}
+			const callStep = {
+				kind: "tool_call",
+				tool: call.name,
+				args: call.args
+			};
+			steps.push(callStep);
+			onStep?.(callStep);
+			const tool = tools.find((t) => t.name === call.name);
+			let resultText;
+			if (tool) try {
+				resultText = String(await tool.execute(call.args));
+			} catch (e) {
+				resultText = `Error executing ${call.name}: ${e}`;
+			}
+			else resultText = `Error: unknown tool "${call.name}"`;
+			const resultStep = {
+				kind: "tool_result",
+				tool: call.name,
+				result: resultText
+			};
+			steps.push(resultStep);
+			onStep?.(resultStep);
+			messages.push({
+				role: "assistant",
+				content: result.text
+			});
+			messages.push({
+				role: "user",
+				content: `Tool ${call.name} returned:\n${resultText}`
+			});
+			finalText = resultText;
+		}
+		return {
+			text: finalText,
+			steps
+		};
+	}
+	/**
 	* Generate a STRUCTURED object: generate text, extract the first JSON
 	* object/array, parse it, validate it, and RETRY until it is valid (on-device
 	* tokens are free, so re-rolling a malformed JSON is cheap).
@@ -3612,4 +3787,4 @@ var WebGPUEngine = class WebGPUEngine {
 //#endregion
 export { generateGemma4VisionGraph as C, dequantizeMLXProjection as S, resolveGemma4VisionInfo as T, smartResize as _, buildGemma4PosEmbeds as a, generateQwen3_5VisionGraph as b, buildMRoPECosSin as c, buildPositionIds as d, buildRotaryCosSin as f, preprocessImageGemma4 as g, preprocessImage as h, buildGemma4PoolMatrix as i, buildMRoPEPositionIds as l, mropeFreqDims as m, GEMMA4_IMAGE_PROCESSOR as n, buildGemma4RotaryCosSin as o, buildVisionPositionTensors as p, QWEN3_5_IMAGE_PROCESSOR as r, buildGemma4VisionPositionTensors as s, WebGPUEngine as t, buildPosEmbeds as u, VisionExecutor as v, patchGemma4VisionClips as w, dequantizeGemma4VisionProjection as x, KaniTTS as y };
-//# sourceMappingURL=gpu-33qCAtHW.mjs.map
+//# sourceMappingURL=gpu-DFuglcEx.mjs.map