@nevescloud/pip 3.5.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/package.json +3 -2
- package/pip-core.esm.js +7 -2
- package/providers/chrome.esm.js +111 -0
- package/providers/local.esm.js +8 -4
package/README.md
CHANGED
|
@@ -48,6 +48,18 @@ const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource:
|
|
|
48
48
|
// the turn loop entirely).
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
+
```js
|
|
52
|
+
// Chrome — on-device Gemini Nano via the Prompt API (zero download for users on Chrome 138+ that already has weights; ~2B-effective-param quality)
|
|
53
|
+
import { createRuntime } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/runtime.esm.js';
|
|
54
|
+
import { createPip } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/pip-core.esm.js';
|
|
55
|
+
import { chrome } from 'https://cdn.jsdelivr.net/npm/@nevescloud/pip@latest/providers/chrome.esm.js';
|
|
56
|
+
|
|
57
|
+
const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
|
|
58
|
+
const pip = createPip({ onSubmit: rt.onSubmit, onSlash: rt.onSlash, slashSource: rt.slashSource });
|
|
59
|
+
// No bundle — Chrome doesn't need its own re-export of createPip + createRuntime
|
|
60
|
+
// (`bundle/anthropic` already brings those, and chrome() composes alongside).
|
|
61
|
+
```
|
|
62
|
+
|
|
51
63
|
On jsdelivr the `.esm.js` suffix is required — jsdelivr serves files by raw path, not via `package.json` exports. npm-installed consumers can use the shorter `@nevescloud/pip/bundle/anthropic` (Node ESM resolver honors the exports map). `pip/bundle.esm.js` (or `pip/bundle` via npm) is an alias for `bundle/anthropic` — the default when you haven't picked a brain. Bundles are sugar over the layered files; hosts with a different brain shape (UI only, custom provider, in-browser model) import the granular files directly. See [CONSUMERS.md](../../CONSUMERS.md) for the full entry-point list.
|
|
52
64
|
|
|
53
65
|
## Options
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nevescloud/pip",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.0",
|
|
4
4
|
"description": "Floating assistant bubble + panel + chat runtime. ESM, no build.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "pip-core.esm.js",
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
"./bundle/local.esm.js": "./bundle/local.esm.js",
|
|
20
20
|
"./providers/anthropic.esm.js": "./providers/anthropic.esm.js",
|
|
21
21
|
"./providers/openai.esm.js": "./providers/openai.esm.js",
|
|
22
|
-
"./providers/local.esm.js": "./providers/local.esm.js"
|
|
22
|
+
"./providers/local.esm.js": "./providers/local.esm.js",
|
|
23
|
+
"./providers/chrome.esm.js": "./providers/chrome.esm.js"
|
|
23
24
|
},
|
|
24
25
|
"files": [
|
|
25
26
|
"pip-core.esm.js",
|
package/pip-core.esm.js
CHANGED
|
@@ -2085,12 +2085,17 @@ export function createPip(opts = {}) {
|
|
|
2085
2085
|
const slice = value.slice(1);
|
|
2086
2086
|
const sp = slice.indexOf(" ");
|
|
2087
2087
|
if (sp === -1) {
|
|
2088
|
-
// Command-name mode.
|
|
2088
|
+
// Command-name mode. Sort alphabetically — registration order is
|
|
2089
|
+
// arbitrary and a stable A→Z list is easier to scan than a list that
|
|
2090
|
+
// shifts shape per host. Arg mode is NOT sorted: hosts often return
|
|
2091
|
+
// suggestions in deliberate order (most-likely-next first, context-
|
|
2092
|
+
// aware ranking) and that intent shouldn't be overridden here.
|
|
2089
2093
|
const prefix = slice.toLowerCase();
|
|
2090
2094
|
const all = getSlashSource();
|
|
2091
2095
|
slashCurrent = all
|
|
2092
2096
|
.filter((s) => s.name.toLowerCase().startsWith(prefix))
|
|
2093
|
-
.map((s) => ({ name: s.name, description: s.description }))
|
|
2097
|
+
.map((s) => ({ name: s.name, description: s.description }))
|
|
2098
|
+
.sort((a, b) => a.name.localeCompare(b.name));
|
|
2094
2099
|
slashCmdContext = null;
|
|
2095
2100
|
} else {
|
|
2096
2101
|
// Argument mode — look up the selected command and ask its complete().
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// Chrome's built-in Prompt API (on-device Gemini Nano / Gemma-derived).
|
|
2
|
+
// Wraps `LanguageModel.create()` + `session.promptStreaming()` into a
|
|
3
|
+
// runtime-compatible provider — zero-download for users on Chrome that
|
|
4
|
+
// already has the weights, reply quality in the ~2B-effective-param
|
|
5
|
+
// range (well above what transformers.js practically pulls in-browser).
|
|
6
|
+
//
|
|
7
|
+
// Usage:
|
|
8
|
+
// import { createRuntime } from '@nevescloud/pip/runtime.esm.js';
|
|
9
|
+
// import { chrome } from '@nevescloud/pip/providers/chrome.esm.js';
|
|
10
|
+
//
|
|
11
|
+
// const rt = createRuntime({ provider: chrome({ temperature: 0.1 }) });
|
|
12
|
+
//
|
|
13
|
+
// Surface shifted across Chrome versions: the API moved from
|
|
14
|
+
// `window.ai.languageModel` (earlier flag-gated builds) to the top-level
|
|
15
|
+
// `LanguageModel` constructor as the Prompt API spec settled. We try
|
|
16
|
+
// the newer surface first and fall back. Chrome 138+ ships the origin
|
|
17
|
+
// trial; ~Chrome 148+ runs without a flag for many origins. Non-Chrome
|
|
18
|
+
// browsers throw a friendly error on first invocation.
|
|
19
|
+
//
|
|
20
|
+
// Limitations: no tool-use, no images. Tools registered on the runtime
|
|
21
|
+
// won't be exposed — pip's turn loop still works for slash commands and
|
|
22
|
+
// chat, but tool dispatch is a no-op with this provider.
|
|
23
|
+
|
|
24
|
+
const UNAVAILABLE =
|
|
25
|
+
"Chrome's built-in AI isn't available here. Use Chrome 138+ or enable " +
|
|
26
|
+
"chrome://flags#prompt-api-for-gemini-nano on earlier versions.";
|
|
27
|
+
|
|
28
|
+
function getApi() {
|
|
29
|
+
// Newer spec: top-level constructor. Older: nested under window.ai.
|
|
30
|
+
if (globalThis.LanguageModel) return globalThis.LanguageModel;
|
|
31
|
+
if (globalThis.ai?.languageModel) return globalThis.ai.languageModel;
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function ensureAvailable(LM) {
|
|
36
|
+
// Newer: availability() → 'available' | 'downloadable' | 'downloading' | 'unavailable'
|
|
37
|
+
// Older: capabilities() → { available: 'readily' | 'after-download' | 'no' }
|
|
38
|
+
if (typeof LM.availability === 'function') {
|
|
39
|
+
const v = await LM.availability();
|
|
40
|
+
if (v === 'unavailable') throw new Error(UNAVAILABLE);
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
if (typeof LM.capabilities === 'function') {
|
|
44
|
+
const c = await LM.capabilities();
|
|
45
|
+
if (c?.available === 'no') throw new Error(UNAVAILABLE);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function chrome({ systemPrompt, temperature, topK } = {}) {
|
|
50
|
+
return ({ messages, signal, system }) => (async function* () {
|
|
51
|
+
const LM = getApi();
|
|
52
|
+
if (!LM) throw new Error(UNAVAILABLE);
|
|
53
|
+
await ensureAvailable(LM);
|
|
54
|
+
|
|
55
|
+
// Runtime's per-call `system` wins over the factory default — same
|
|
56
|
+
// precedence anthropic/openai providers use.
|
|
57
|
+
const sys = system || systemPrompt;
|
|
58
|
+
const initialPrompts = [];
|
|
59
|
+
if (sys) initialPrompts.push({ role: 'system', content: sys });
|
|
60
|
+
// Replay prior turns. The Prompt API doesn't model tool dispatch, so
|
|
61
|
+
// skip non-string content (tool_use / tool_result turns) — they'd
|
|
62
|
+
// serialize to "[object Object]" and confuse the model.
|
|
63
|
+
for (const m of messages.slice(0, -1)) {
|
|
64
|
+
if (typeof m.content === 'string') {
|
|
65
|
+
initialPrompts.push({ role: m.role, content: m.content });
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
const tail = messages[messages.length - 1];
|
|
69
|
+
const userText = typeof tail?.content === 'string' ? tail.content : '';
|
|
70
|
+
|
|
71
|
+
const opts = {};
|
|
72
|
+
if (initialPrompts.length) opts.initialPrompts = initialPrompts;
|
|
73
|
+
|
|
74
|
+
// The Prompt API requires both topK and temperature to be set, or
|
|
75
|
+
// neither — passing one alone throws "Initializing a new session
|
|
76
|
+
// must either specify both topK and temperature, or neither". If
|
|
77
|
+
// the caller specified one, fetch the other's default from
|
|
78
|
+
// LanguageModel.params() so a half-spec doesn't reject the session.
|
|
79
|
+
const hasT = temperature != null;
|
|
80
|
+
const hasK = topK != null;
|
|
81
|
+
if (hasT || hasK) {
|
|
82
|
+
let t = temperature, k = topK;
|
|
83
|
+
if (hasT !== hasK && typeof LM.params === 'function') {
|
|
84
|
+
try {
|
|
85
|
+
const p = await LM.params();
|
|
86
|
+
if (!hasT) t = p?.defaultTemperature;
|
|
87
|
+
if (!hasK) k = p?.defaultTopK;
|
|
88
|
+
} catch {}
|
|
89
|
+
}
|
|
90
|
+
// Last-resort defaults if params() isn't available.
|
|
91
|
+
if (t == null) t = 1.0;
|
|
92
|
+
if (k == null) k = 40;
|
|
93
|
+
opts.temperature = t;
|
|
94
|
+
opts.topK = k;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const session = await LM.create(opts);
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
for await (const chunk of session.promptStreaming(userText)) {
|
|
101
|
+
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');
|
|
102
|
+
if (typeof chunk === 'string' && chunk) {
|
|
103
|
+
yield { type: 'text_delta', text: chunk };
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
yield { type: 'turn_end', stopReason: 'end_turn' };
|
|
107
|
+
} finally {
|
|
108
|
+
try { session.destroy?.(); } catch {}
|
|
109
|
+
}
|
|
110
|
+
})();
|
|
111
|
+
}
|
package/providers/local.esm.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// In-browser model via transformers.js + WebGPU. Two shapes ship:
|
|
2
2
|
//
|
|
3
|
-
// 1. `local({ model, dtype, maxTokens, genParams })` — runtime-
|
|
4
|
-
// provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
|
|
3
|
+
// 1. `local({ model, dtype, maxTokens, genParams, chatTemplate })` — runtime-
|
|
4
|
+
// compatible provider, slots into `createRuntime({ models: [{ provider: local(...) }] })`
|
|
5
5
|
// next to anthropic() and openai(). Wraps the renderer below and adapts
|
|
6
6
|
// its setReplyText-callback paint into the runtime's async-generator
|
|
7
7
|
// event protocol. Use this when local should participate in `/model`.
|
|
@@ -194,9 +194,13 @@ export function createTransformersRenderer() {
|
|
|
194
194
|
await ensureLoaded(turnEl);
|
|
195
195
|
|
|
196
196
|
const tf = await loadTransformers();
|
|
197
|
+
// chatTemplate spread last so consumers can override defaults (e.g.
|
|
198
|
+
// Gemma 4 needs { enable_thinking: false } to suppress channeled
|
|
199
|
+
// thought-token leaks; Qwen has its own templating knobs).
|
|
197
200
|
const inputs = tokenizer.apply_chat_template(messages, {
|
|
198
201
|
add_generation_prompt: true,
|
|
199
202
|
return_tensors: 'pt',
|
|
203
|
+
...(config.chatTemplate || {}),
|
|
200
204
|
});
|
|
201
205
|
|
|
202
206
|
const start = performance.now();
|
|
@@ -269,9 +273,9 @@ export function createTransformersRenderer() {
|
|
|
269
273
|
// buffer (no new tokens emitted between calls), so the diff guards
|
|
270
274
|
// against zero-length deltas. AbortSignal flows through naturally —
|
|
271
275
|
// the underlying TextStreamer throws AbortError, which we surface.
|
|
272
|
-
export function local({ model, dtype = 'q4', maxTokens = 256, genParams } = {}) {
|
|
276
|
+
export function local({ model, dtype = 'q4', maxTokens = 256, genParams, chatTemplate } = {}) {
|
|
273
277
|
const renderer = createTransformersRenderer();
|
|
274
|
-
if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams });
|
|
278
|
+
if (model) renderer.setModel({ id: model, dtype, maxTokens, genParams, chatTemplate });
|
|
275
279
|
|
|
276
280
|
return ({ messages, signal, turnEl, setReplyText }) => (async function* () {
|
|
277
281
|
let lastFull = '';
|