@nevescloud/pip 3.5.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,6 +48,18 @@ const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource:
48
48
  // the turn loop entirely).
49
49
  ```
50
50
 
51
+ ```js
52
+ // Chrome — on-device Gemini Nano via the Prompt API (zero download for users on Chrome 138+ that already has weights; ~2B-effective-param quality)
53
+ import { createRuntime } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/runtime.esm.js';
54
+ import { createPip } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/pip-core.esm.js';
55
+ import { chrome } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/providers/chrome.esm.js';
56
+
57
+ const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
58
+ const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource: rt.slashSource });
59
+ // No bundle — Chrome doesn't need its own re-export of createPip + createRuntime
60
+ // (`bundle/anthropic` already brings those, and chrome() composes alongside).
61
+ ```
62
+
51
63
  On jsdelivr the `.esm.js` suffix is required — jsdelivr serves files by raw path, not via `package.json` exports. npm-installed consumers can use the shorter `@nevescloud/pip/bundle/anthropic` (Node ESM resolver honors the exports map). `pip/bundle.esm.js` (or `pip/bundle` via npm) is an alias for `bundle/anthropic` — the default when you haven't picked a brain. Bundles are sugar over the layered files; hosts with a different brain shape (UI only, custom provider, in-browser model) import the granular files directly. See [CONSUMERS.md](../../CONSUMERS.md) for the full entry-point list.
52
64
 
53
65
  ## Options
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nevescloud/pip",
3
- "version": "3.5.1",
3
+ "version": "3.7.0",
4
4
  "description": "Floating assistant bubble + panel + chat runtime. ESM, no build.",
5
5
  "type": "module",
6
6
  "main": "pip-core.esm.js",
@@ -19,7 +19,8 @@
19
19
  "./bundle/local.esm.js": "./bundle/local.esm.js",
20
20
  "./providers/anthropic.esm.js": "./providers/anthropic.esm.js",
21
21
  "./providers/openai.esm.js": "./providers/openai.esm.js",
22
- "./providers/local.esm.js": "./providers/local.esm.js"
22
+ "./providers/local.esm.js": "./providers/local.esm.js",
23
+ "./providers/chrome.esm.js": "./providers/chrome.esm.js"
23
24
  },
24
25
  "files": [
25
26
  "pip-core.esm.js",
@@ -0,0 +1,111 @@
1
+ // Chrome's built-in Prompt API (on-device Gemini Nano / Gemma-derived).
2
+ // Wraps `LanguageModel.create()` + `session.promptStreaming()` into a
3
+ // runtime-compatible provider — zero-download for users on Chrome that
4
+ // already has the weights, reply quality in the ~2B-effective-param
5
+ // range (well above what transformers.js practically pulls in-browser).
6
+ //
7
+ // Usage:
8
+ // import { createRuntime } from '@nevescloud/pip/runtime.esm.js';
9
+ // import { chrome } from '@nevescloud/pip/providers/chrome.esm.js';
10
+ //
11
+ // const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
12
+ //
13
+ // Surface shifted across Chrome versions: the API moved from
14
+ // `window.ai.languageModel` (earlier flag-gated builds) to the top-level
15
+ // `LanguageModel` constructor as the Prompt API spec settled. We try
16
+ // the newer surface first and fall back. Chrome 138+ ships the origin
17
+ // trial; ~Chrome 148+ runs without a flag for many origins. Non-Chrome
18
+ // browsers throw a friendly error on first invocation.
19
+ //
20
+ // Limitations: no tool-use, no images. Tools registered on the runtime
21
+ // won't be exposed — pip's turn loop still works for slash commands and
22
+ // chat, but tool dispatch is a no-op with this provider.
23
+
24
+ const UNAVAILABLE =
25
+ "Chrome's built-in AI isn't available here. Use Chrome 138+ or enable " +
26
+ "chrome://flags#prompt-api-for-gemini-nano on earlier versions.";
27
+
28
+ function getApi() {
29
+ // Newer spec: top-level constructor. Older: nested under window.ai.
30
+ if (globalThis.LanguageModel) return globalThis.LanguageModel;
31
+ if (globalThis.ai?.languageModel) return globalThis.ai.languageModel;
32
+ return null;
33
+ }
34
+
35
+ async function ensureAvailable(LM) {
36
+ // Newer: availability() → 'available' | 'downloadable' | 'downloading' | 'unavailable'
37
+ // Older: capabilities() → { available: 'readily' | 'after-download' | 'no' }
38
+ if (typeof LM.availability === 'function') {
39
+ const v = await LM.availability();
40
+ if (v === 'unavailable') throw new Error(UNAVAILABLE);
41
+ return;
42
+ }
43
+ if (typeof LM.capabilities === 'function') {
44
+ const c = await LM.capabilities();
45
+ if (c?.available === 'no') throw new Error(UNAVAILABLE);
46
+ }
47
+ }
48
+
49
+ export function chrome({ systemPrompt, temperature, topK } = {}) {
50
+ return ({ messages, signal, system }) => (async function* () {
51
+ const LM = getApi();
52
+ if (!LM) throw new Error(UNAVAILABLE);
53
+ await ensureAvailable(LM);
54
+
55
+ // Runtime's per-call `system` wins over the factory default — same
56
+ // precedence anthropic/openai providers use.
57
+ const sys = system || systemPrompt;
58
+ const initialPrompts = [];
59
+ if (sys) initialPrompts.push({ role: 'system', content: sys });
60
+ // Replay prior turns. The Prompt API doesn't model tool dispatch, so
61
+ // skip non-string content (tool_use / tool_result turns) — they'd
62
+ // serialize to "[object Object]" and confuse the model.
63
+ for (const m of messages.slice(0, -1)) {
64
+ if (typeof m.content === 'string') {
65
+ initialPrompts.push({ role: m.role, content: m.content });
66
+ }
67
+ }
68
+ const tail = messages[messages.length - 1];
69
+ const userText = typeof tail?.content === 'string' ? tail.content : '';
70
+
71
+ const opts = {};
72
+ if (initialPrompts.length) opts.initialPrompts = initialPrompts;
73
+
74
+ // The Prompt API requires both topK and temperature to be set, or
75
+ // neither — passing one alone throws "Initializing a new session
76
+ // must either specify both topK and temperature, or neither". If
77
+ // the caller specified one, fetch the other's default from
78
+ // LanguageModel.params() so a half-spec doesn't reject the session.
79
+ const hasT = temperature != null;
80
+ const hasK = topK != null;
81
+ if (hasT || hasK) {
82
+ let t = temperature, k = topK;
83
+ if (hasT !== hasK && typeof LM.params === 'function') {
84
+ try {
85
+ const p = await LM.params();
86
+ if (!hasT) t = p?.defaultTemperature;
87
+ if (!hasK) k = p?.defaultTopK;
88
+ } catch {}
89
+ }
90
+ // Last-resort defaults if params() isn't available.
91
+ if (t == null) t = 1.0;
92
+ if (k == null) k = 40;
93
+ opts.temperature = t;
94
+ opts.topK = k;
95
+ }
96
+
97
+ const session = await LM.create(opts);
98
+
99
+ try {
100
+ for await (const chunk of session.promptStreaming(userText)) {
101
+ if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');
102
+ if (typeof chunk === 'string' && chunk) {
103
+ yield { type: 'text_delta', text: chunk };
104
+ }
105
+ }
106
+ yield { type: 'turn_end', stopReason: 'end_turn' };
107
+ } finally {
108
+ try { session.destroy?.(); } catch {}
109
+ }
110
+ })();
111
+ }
@@ -1,7 +1,7 @@
1
1
  // In-browser model via transformers.js + WebGPU. Two shapes ship:
2
2
  //
3
- // 1. `local({ model, dtype, maxTokens, genParams })` — runtime-compatible
4
- // provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
3
+ // 1. `local({ model, dtype, maxTokens, genParams, chatTemplate })` — runtime-
4
+ // compatible provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
5
5
  // next to anthropic() and openai(). Wraps the renderer below and adapts
6
6
  // its setReplyText-callback paint into the runtime's async-generator
7
7
  // event protocol. Use this when local should participate in `/model`.
@@ -194,9 +194,13 @@ export function createTransformersRenderer() {
194
194
  await ensureLoaded(turnEl);
195
195
 
196
196
  const tf = await loadTransformers();
197
+ // chatTemplate spread last so consumers can override defaults (e.g.
198
+ // Gemma 4 needs { enable_thinking: false } to suppress channeled
199
+ // thought-token leaks; Qwen has its own templating knobs).
197
200
  const inputs = tokenizer.apply_chat_template(messages, {
198
201
  add_generation_prompt: true,
199
202
  return_tensors: 'pt',
203
+ ...(config.chatTemplate || {}),
200
204
  });
201
205
 
202
206
  const start = performance.now();
@@ -269,9 +273,9 @@ export function createTransformersRenderer() {
269
273
  // buffer (no new tokens emitted between calls), so the diff guards
270
274
  // against zero-length deltas. AbortSignal flows through naturally —
271
275
  // the underlying TextStreamer throws AbortError, which we surface.
272
- export function local({ model, dtype = 'q4', maxTokens = 256, genParams } = {}) {
276
+ export function local({ model, dtype = 'q4', maxTokens = 256, genParams, chatTemplate } = {}) {
273
277
  const renderer = createTransformersRenderer();
274
- if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams });
278
+ if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams, chatTemplate });
275
279
 
276
280
  return ({ messages, signal, turnEl, setReplyText }) => (async function* () {
277
281
  let lastFull = '';