npm - @nevescloud/pip - Versions diffs - 3.5.1 → 3.7.0 - Mend

@nevescloud/pip 3.5.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -48,6 +48,18 @@ const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource:
 // the turn loop entirely).
 ```
+```js
+// Chrome — on-device Gemini Nano via the Prompt API (zero download for users on Chrome 138+ that already has weights; ~2B-effective-param quality)
+import { createRuntime }  from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/runtime.esm.js';
+import { createPip }      from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/pip-core.esm.js';
+import { chrome }         from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/providers/chrome.esm.js';
+const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
+const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource: rt.slashSource });
+// No bundle — Chrome doesn't need its own re-export of createPip + createRuntime
+// (`bundle/anthropic` already brings those, and chrome() composes alongside).
+```
 On jsdelivr the `.esm.js` suffix is required — jsdelivr serves files by raw path, not via `package.json` exports. npm-installed consumers can use the shorter `@nevescloud/pip/bundle/anthropic` (Node ESM resolver honors the exports map). `pip/bundle.esm.js` (or `pip/bundle` via npm) is an alias for `bundle/anthropic` — the default when you haven't picked a brain. Bundles are sugar over the layered files; hosts with a different brain shape (UI only, custom provider, in-browser model) import the granular files directly. See [CONSUMERS.md](../../CONSUMERS.md) for the full entry-point list.
 ## Options

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nevescloud/pip",
-  "version": "3.5.1",
+  "version": "3.7.0",
   "description": "Floating assistant bubble + panel + chat runtime. ESM, no build.",
   "type": "module",
   "main": "pip-core.esm.js",
@@ -19,7 +19,8 @@
     "./bundle/local.esm.js": "./bundle/local.esm.js",
     "./providers/anthropic.esm.js": "./providers/anthropic.esm.js",
     "./providers/openai.esm.js": "./providers/openai.esm.js",
-    "./providers/local.esm.js": "./providers/local.esm.js"
+    "./providers/local.esm.js": "./providers/local.esm.js",
+    "./providers/chrome.esm.js": "./providers/chrome.esm.js"
   },
   "files": [
     "pip-core.esm.js",

package/providers/chrome.esm.js ADDED Viewed

@@ -0,0 +1,111 @@
+// Chrome's built-in Prompt API (on-device Gemini Nano / Gemma-derived).
+// Wraps `LanguageModel.create()` + `session.promptStreaming()` into a
+// runtime-compatible provider — zero-download for users on Chrome that
+// already has the weights, reply quality in the ~2B-effective-param
+// range (well above what transformers.js practically pulls in-browser).
+//
+// Usage:
+//   import { createRuntime } from '@nevescloud/pip/runtime.esm.js';
+//   import { chrome }        from '@nevescloud/pip/providers/chrome.esm.js';
+//
+//   const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
+//
+// Surface shifted across Chrome versions: the API moved from
+// `window.ai.languageModel` (earlier flag-gated builds) to the top-level
+// `LanguageModel` constructor as the Prompt API spec settled. We try
+// the newer surface first and fall back. Chrome 138+ ships the origin
+// trial; ~Chrome 148+ runs without a flag for many origins. Non-Chrome
+// browsers throw a friendly error on first invocation.
+//
+// Limitations: no tool-use, no images. Tools registered on the runtime
+// won't be exposed — pip's turn loop still works for slash commands and
+// chat, but tool dispatch is a no-op with this provider.
+const UNAVAILABLE =
+  "Chrome's built-in AI isn't available here. Use Chrome 138+ or enable " +
+  "chrome://flags#prompt-api-for-gemini-nano on earlier versions.";
+function getApi() {
+  // Newer spec: top-level constructor. Older: nested under window.ai.
+  if (globalThis.LanguageModel) return globalThis.LanguageModel;
+  if (globalThis.ai?.languageModel) return globalThis.ai.languageModel;
+  return null;
+}
+async function ensureAvailable(LM) {
+  // Newer: availability() → 'available' | 'downloadable' | 'downloading' | 'unavailable'
+  // Older: capabilities() → { available: 'readily' | 'after-download' | 'no' }
+  if (typeof LM.availability === 'function') {
+    const v = await LM.availability();
+    if (v === 'unavailable') throw new Error(UNAVAILABLE);
+    return;
+  }
+  if (typeof LM.capabilities === 'function') {
+    const c = await LM.capabilities();
+    if (c?.available === 'no') throw new Error(UNAVAILABLE);
+  }
+}
+export function chrome({ systemPrompt, temperature, topK } = {}) {
+  return ({ messages, signal, system }) => (async function* () {
+    const LM = getApi();
+    if (!LM) throw new Error(UNAVAILABLE);
+    await ensureAvailable(LM);
+    // Runtime's per-call `system` wins over the factory default — same
+    // precedence anthropic/openai providers use.
+    const sys = system || systemPrompt;
+    const initialPrompts = [];
+    if (sys) initialPrompts.push({ role: 'system', content: sys });
+    // Replay prior turns. The Prompt API doesn't model tool dispatch, so
+    // skip non-string content (tool_use / tool_result turns) — they'd
+    // serialize to "[object Object]" and confuse the model.
+    for (const m of messages.slice(0, -1)) {
+      if (typeof m.content === 'string') {
+        initialPrompts.push({ role: m.role, content: m.content });
+      }
+    }
+    const tail = messages[messages.length - 1];
+    const userText = typeof tail?.content === 'string' ? tail.content : '';
+    const opts = {};
+    if (initialPrompts.length) opts.initialPrompts = initialPrompts;
+    // The Prompt API requires both topK and temperature to be set, or
+    // neither — passing one alone throws "Initializing a new session
+    // must either specify both topK and temperature, or neither". If
+    // the caller specified one, fetch the other's default from
+    // LanguageModel.params() so a half-spec doesn't reject the session.
+    const hasT = temperature != null;
+    const hasK = topK != null;
+    if (hasT || hasK) {
+      let t = temperature, k = topK;
+      if (hasT !== hasK && typeof LM.params === 'function') {
+        try {
+          const p = await LM.params();
+          if (!hasT) t = p?.defaultTemperature;
+          if (!hasK) k = p?.defaultTopK;
+        } catch {}
+      }
+      // Last-resort defaults if params() isn't available.
+      if (t == null) t = 1.0;
+      if (k == null) k = 40;
+      opts.temperature = t;
+      opts.topK = k;
+    }
+    const session = await LM.create(opts);
+    try {
+      for await (const chunk of session.promptStreaming(userText)) {
+        if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');
+        if (typeof chunk === 'string' && chunk) {
+          yield { type: 'text_delta', text: chunk };
+        }
+      }
+      yield { type: 'turn_end', stopReason: 'end_turn' };
+    } finally {
+      try { session.destroy?.(); } catch {}
+    }
+  })();
+}

package/providers/local.esm.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // In-browser model via transformers.js + WebGPU. Two shapes ship:
 //
-// 1. `local({ model, dtype, maxTokens, genParams })` — runtime-compatible
-//    provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
+// 1. `local({ model, dtype, maxTokens, genParams, chatTemplate })` — runtime-
+//    compatible provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
 //    next to anthropic() and openai(). Wraps the renderer below and adapts
 //    its setReplyText-callback paint into the runtime's async-generator
 //    event protocol. Use this when local should participate in `/model`.
@@ -194,9 +194,13 @@ export function createTransformersRenderer() {
     await ensureLoaded(turnEl);
     const tf = await loadTransformers();
+    // chatTemplate spread last so consumers can override defaults (e.g.
+    // Gemma 4 needs { enable_thinking: false } to suppress channeled
+    // thought-token leaks; Qwen has its own templating knobs).
     const inputs = tokenizer.apply_chat_template(messages, {
       add_generation_prompt: true,
       return_tensors: 'pt',
+      ...(config.chatTemplate || {}),
     });
     const start = performance.now();
@@ -269,9 +273,9 @@ export function createTransformersRenderer() {
 // buffer (no new tokens emitted between calls), so the diff guards
 // against zero-length deltas. AbortSignal flows through naturally —
 // the underlying TextStreamer throws AbortError, which we surface.
-export function local({ model, dtype = 'q4', maxTokens = 256, genParams } = {}) {
+export function local({ model, dtype = 'q4', maxTokens = 256, genParams, chatTemplate } = {}) {
   const renderer = createTransformersRenderer();
-  if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams });
+  if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams, chatTemplate });
   return ({ messages, signal, turnEl, setReplyText }) => (async function* () {
     let lastFull = '';