@nevescloud/pip 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ // Anthropic provider for pip-runtime. Streams /v1/messages over fetch, no SDK.
2
+ //
3
+ // Pip is a no-build, no-dependency ESM module loaded directly via jsdelivr;
4
+ // importing @anthropic-ai/sdk would inflate the bundle and force consumers to
5
+ // load it too. Raw fetch keeps the module tiny and follows the public wire
6
+ // format from the API reference.
7
+ //
8
+ // Production hosts should route through their own server-side proxy (pass
9
+ // `baseUrl`) to keep the API key out of the browser. Direct browser access
10
+ // requires the documented escape-hatch header.
11
+
12
+ const DEFAULT_BASE = 'https://api.anthropic.com';
13
+ const DEFAULT_MODEL = 'claude-opus-4-7';
14
+ const DEFAULT_MAX_TOKENS = 4096;
15
+ const ANTHROPIC_VERSION = '2023-06-01';
16
+
17
+ export function anthropic({
18
+ apiKey,
19
+ model = DEFAULT_MODEL,
20
+ baseUrl = DEFAULT_BASE,
21
+ maxTokens = DEFAULT_MAX_TOKENS,
22
+ thinking = { type: 'adaptive' },
23
+ effort,
24
+ cacheControl,
25
+ extraHeaders = {},
26
+ } = {}) {
27
+ if (!apiKey) throw new Error('anthropic provider: apiKey is required');
28
+
29
+ return async function* anthropicProvider({ messages, tools, system, signal }) {
30
+ const body = {
31
+ model,
32
+ max_tokens: maxTokens,
33
+ stream: true,
34
+ messages,
35
+ };
36
+ if (system) body.system = system;
37
+ if (tools && tools.length) {
38
+ body.tools = tools.map((t) => ({
39
+ name: t.name,
40
+ description: t.description,
41
+ input_schema: t.schema || t.input_schema,
42
+ }));
43
+ }
44
+ if (thinking) body.thinking = thinking;
45
+ if (effort) body.output_config = { effort };
46
+ if (cacheControl) body.cache_control = { type: cacheControl };
47
+
48
+ const res = await fetch(`${baseUrl}/v1/messages`, {
49
+ method: 'POST',
50
+ headers: {
51
+ 'content-type': 'application/json',
52
+ 'x-api-key': apiKey,
53
+ 'anthropic-version': ANTHROPIC_VERSION,
54
+ 'anthropic-dangerous-direct-browser-access': 'true',
55
+ ...extraHeaders,
56
+ },
57
+ body: JSON.stringify(body),
58
+ signal,
59
+ });
60
+
61
+ if (!res.ok) {
62
+ const errText = await res.text().catch(() => res.statusText);
63
+ throw new Error(`Anthropic API ${res.status}: ${errText}`);
64
+ }
65
+
66
+ const reader = res.body.getReader();
67
+ const decoder = new TextDecoder();
68
+ let pending = '';
69
+ let pendingBlock = null;
70
+ let usage = null;
71
+
72
+ try {
73
+ while (true) {
74
+ const { done, value } = await reader.read();
75
+ if (done) break;
76
+ pending += decoder.decode(value, { stream: true });
77
+ const lines = pending.split('\n');
78
+ pending = lines.pop();
79
+ for (const line of lines) {
80
+ const trimmed = line.trim();
81
+ if (!trimmed.startsWith('data:')) continue;
82
+ const data = trimmed.slice(5).trim();
83
+ if (!data || data === '[DONE]') continue;
84
+ let event;
85
+ try { event = JSON.parse(data); } catch { continue; }
86
+
87
+ if (event.type === 'message_start') {
88
+ if (event.message?.usage) usage = { ...event.message.usage };
89
+ } else if (event.type === 'content_block_start') {
90
+ const cb = event.content_block;
91
+ if (cb && cb.type === 'tool_use') {
92
+ pendingBlock = { id: cb.id, name: cb.name, inputJson: '' };
93
+ }
94
+ } else if (event.type === 'content_block_delta') {
95
+ const d = event.delta;
96
+ if (d.type === 'text_delta') {
97
+ yield { type: 'text_delta', text: d.text };
98
+ } else if (d.type === 'input_json_delta' && pendingBlock) {
99
+ pendingBlock.inputJson += d.partial_json || '';
100
+ }
101
+ } else if (event.type === 'content_block_stop') {
102
+ if (pendingBlock) {
103
+ let input = {};
104
+ try { input = JSON.parse(pendingBlock.inputJson || '{}'); } catch {}
105
+ yield { type: 'tool_use', id: pendingBlock.id, name: pendingBlock.name, input };
106
+ pendingBlock = null;
107
+ }
108
+ } else if (event.type === 'message_delta') {
109
+ if (event.usage) usage = { ...usage, ...event.usage };
110
+ if (event.delta && event.delta.stop_reason) {
111
+ yield { type: 'turn_end', stopReason: event.delta.stop_reason, usage };
112
+ }
113
+ }
114
+ }
115
+ }
116
+ } finally {
117
+ try { reader.releaseLock(); } catch {}
118
+ }
119
+ };
120
+ }
@@ -0,0 +1,164 @@
1
+ // OpenAI-compatible provider for pip-runtime. Streams /chat/completions over
2
+ // fetch, no SDK. Covers any host that speaks the OpenAI chat-completions
3
+ // wire format — direct OpenAI, GitHub Models, Together, Groq, OpenRouter,
4
+ // LM Studio, llama.cpp's openai shim, etc. Same provider, different baseUrl
5
+ // + model + auth.
6
+ //
7
+ // Usage:
8
+ // openai({ apiKey: '...', model: 'gpt-4o-mini' }) // direct OpenAI
9
+ // openai({ apiKey: oauthToken,
10
+ // model: 'openai/gpt-4o-mini',
11
+ // baseUrl: 'https://models.github.ai/inference' }) // GitHub Models
12
+ //
13
+ // Yields the same { text_delta | tool_use | turn_end } event shape as the
14
+ // anthropic provider, so the runtime doesn't need to know the difference.
15
+
16
+ const DEFAULT_BASE = 'https://api.openai.com/v1';
17
+ const DEFAULT_MAX_TOKENS = 4096;
18
+
19
+ export function openai({
20
+ apiKey,
21
+ model,
22
+ baseUrl = DEFAULT_BASE,
23
+ maxTokens = DEFAULT_MAX_TOKENS,
24
+ extraHeaders = {},
25
+ } = {}) {
26
+ if (!apiKey) throw new Error('openai provider: apiKey is required');
27
+ if (!model) throw new Error('openai provider: model is required');
28
+
29
+ return async function* openaiProvider({ messages, tools, system, signal }) {
30
+ // OpenAI inlines system prompt as the first message; anthropic puts it on
31
+ // the request envelope. Translate by prepending if provided.
32
+ const allMessages = system
33
+ ? [{ role: 'system', content: system }, ...messages]
34
+ : messages;
35
+
36
+ const body = {
37
+ model,
38
+ max_tokens: maxTokens,
39
+ stream: true,
40
+ messages: allMessages,
41
+ };
42
+
43
+ if (tools && tools.length) {
44
+ // Anthropic shape → OpenAI function shape. The JSON Schema body for
45
+ // parameters transfers verbatim; only the wrapper differs.
46
+ body.tools = tools.map((t) => ({
47
+ type: 'function',
48
+ function: {
49
+ name: t.name,
50
+ description: t.description,
51
+ parameters: t.schema || t.input_schema,
52
+ },
53
+ }));
54
+ }
55
+
56
+ const res = await fetch(`${baseUrl}/chat/completions`, {
57
+ method: 'POST',
58
+ headers: {
59
+ 'content-type': 'application/json',
60
+ authorization: `Bearer ${apiKey}`,
61
+ ...extraHeaders,
62
+ },
63
+ body: JSON.stringify(body),
64
+ signal,
65
+ });
66
+
67
+ if (!res.ok) {
68
+ const errText = await res.text().catch(() => res.statusText);
69
+ throw new Error(`OpenAI API ${res.status}: ${errText}`);
70
+ }
71
+
72
+ const reader = res.body.getReader();
73
+ const decoder = new TextDecoder();
74
+ let pending = '';
75
+ // Tool-call deltas arrive across multiple chunks indexed by `index`;
76
+ // each chunk supplies a fragment of the JSON-stringified arguments
77
+ // that we concat and parse on completion.
78
+ const pendingToolCalls = new Map();
79
+ let stopReason = null;
80
+ let usage = null;
81
+
82
+ function flushToolCalls() {
83
+ for (const tc of pendingToolCalls.values()) {
84
+ let input = {};
85
+ try { input = JSON.parse(tc.argsJson || '{}'); } catch { /* malformed → empty */ }
86
+ // Each yield is its own event; runtime treats them like Anthropic
87
+ // tool_use blocks.
88
+ }
89
+ // (yields happen below — split for readability since we can't yield from
90
+ // a non-generator helper without extra plumbing)
91
+ }
92
+
93
+ try {
94
+ while (true) {
95
+ const { done, value } = await reader.read();
96
+ if (done) break;
97
+ pending += decoder.decode(value, { stream: true });
98
+ const lines = pending.split('\n');
99
+ pending = lines.pop();
100
+ for (const line of lines) {
101
+ const trimmed = line.trim();
102
+ if (!trimmed.startsWith('data:')) continue;
103
+ const data = trimmed.slice(5).trim();
104
+ if (!data) continue;
105
+ if (data === '[DONE]') {
106
+ for (const tc of pendingToolCalls.values()) {
107
+ let input = {};
108
+ try { input = JSON.parse(tc.argsJson || '{}'); } catch {}
109
+ yield { type: 'tool_use', id: tc.id, name: tc.name, input };
110
+ }
111
+ pendingToolCalls.clear();
112
+ yield { type: 'turn_end', stopReason: stopReason || 'stop', usage };
113
+ continue;
114
+ }
115
+ let event;
116
+ try { event = JSON.parse(data); } catch { continue; }
117
+
118
+ if (event.usage) usage = event.usage;
119
+
120
+ const choice = event.choices?.[0];
121
+ if (!choice) continue;
122
+ const delta = choice.delta || {};
123
+
124
+ if (typeof delta.content === 'string' && delta.content) {
125
+ yield { type: 'text_delta', text: delta.content };
126
+ }
127
+
128
+ if (Array.isArray(delta.tool_calls)) {
129
+ for (const tcDelta of delta.tool_calls) {
130
+ const idx = tcDelta.index ?? 0;
131
+ if (!pendingToolCalls.has(idx)) {
132
+ pendingToolCalls.set(idx, {
133
+ id: tcDelta.id,
134
+ name: tcDelta.function?.name,
135
+ argsJson: '',
136
+ });
137
+ }
138
+ const tc = pendingToolCalls.get(idx);
139
+ if (tcDelta.id) tc.id = tcDelta.id;
140
+ if (tcDelta.function?.name) tc.name = tcDelta.function.name;
141
+ if (tcDelta.function?.arguments) tc.argsJson += tcDelta.function.arguments;
142
+ }
143
+ }
144
+
145
+ if (choice.finish_reason) stopReason = choice.finish_reason;
146
+ }
147
+ }
148
+ // Some servers (LM Studio in particular) close the stream without
149
+ // sending [DONE]. Flush any tool calls and emit turn_end so the
150
+ // runtime sees a clean termination.
151
+ if (pendingToolCalls.size) {
152
+ for (const tc of pendingToolCalls.values()) {
153
+ let input = {};
154
+ try { input = JSON.parse(tc.argsJson || '{}'); } catch {}
155
+ yield { type: 'tool_use', id: tc.id, name: tc.name, input };
156
+ }
157
+ pendingToolCalls.clear();
158
+ }
159
+ if (stopReason) yield { type: 'turn_end', stopReason, usage };
160
+ } finally {
161
+ try { reader.releaseLock(); } catch {}
162
+ }
163
+ };
164
+ }
@@ -0,0 +1,273 @@
1
+ // In-browser model renderer using transformers.js. One-shot generate
2
+ // (not part of pip-runtime's turn loop) — hands the host a tokenizer +
3
+ // model loaded over WebGPU and a streaming generate() that paints into
4
+ // a Pip turnEl. Includes the download-progress UI and a `<think>` pill
5
+ // for reasoning models, so consumers don't reinvent either.
6
+ //
7
+ // Usage:
8
+ // import { createTransformersRenderer } from
9
+ // '@nevescloud/pip/providers/transformers.esm.js';
10
+ //
11
+ // const renderer = createTransformersRenderer();
12
+ // renderer.setModel({
13
+ // id: 'LiquidAI/LFM2.5-350M-ONNX',
14
+ // dtype: 'q4',
15
+ // maxTokens: 256,
16
+ // genParams: { temperature: 0.1, top_k: 50, repetition_penalty: 1.05 },
17
+ // });
18
+ // const text = await renderer.generate({
19
+ // messages, // chat messages array
20
+ // turnEl, // pip turnEl (download progress + <think> pill render here)
21
+ // setReplyText, // pip's setReplyText
22
+ // signal, // optional AbortSignal for mid-stream cancel
23
+ // });
24
+ //
25
+ // Switching models: call setModel() again with a new config — the cached
26
+ // model+tokenizer drop and reload on the next generate(). Within a single
27
+ // model, repeat generate() calls reuse the loaded artifacts.
28
+
29
+ import { showLoading, hideLoading } from '../pip-core.esm.js';
30
+
31
+ const TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@huggingface/transformers';
32
+
33
+ let _tf = null;
34
+ async function loadTransformers() {
35
+ if (!_tf) _tf = await import(/* @vite-ignore */ TRANSFORMERS_URL);
36
+ return _tf;
37
+ }
38
+
39
+ // Network hints injected at construction so the first generate() doesn't pay
40
+ // the TLS handshake or the library fetch on the critical path. Bytes are
41
+ // trivial vs. model weights; preconnect is free; modulepreload pulls the
42
+ // library JS (~hundreds of KB gzipped) which the consumer was going to fetch
43
+ // anyway by importing this provider. Idempotent and SSR-safe.
44
+ let _hintsInjected = false;
45
+ function injectNetworkHints() {
46
+ if (_hintsInjected || typeof document === 'undefined') return;
47
+ _hintsInjected = true;
48
+ const head = document.head;
49
+ if (!head) return;
50
+ const link = (rel, href, attrs = {}) => {
51
+ const el = document.createElement('link');
52
+ el.rel = rel;
53
+ el.href = href;
54
+ el.crossOrigin = 'anonymous';
55
+ for (const [k, v] of Object.entries(attrs)) el.setAttribute(k, v);
56
+ head.appendChild(el);
57
+ };
58
+ // Library origin: where transformers.js itself loads from.
59
+ link('preconnect', 'https://cdn.jsdelivr.net');
60
+ // Model origin: HF Hub serves model index + redirects to its blob CDN.
61
+ link('preconnect', 'https://huggingface.co');
62
+ // Library bytes: fetched in parallel with page load; executed only when
63
+ // loadTransformers() runs the dynamic import.
64
+ link('modulepreload', TRANSFORMERS_URL);
65
+ }
66
+
67
+ let _stylesInjected = false;
68
+ function injectStyles() {
69
+ if (_stylesInjected || typeof document === 'undefined') return;
70
+ _stylesInjected = true;
71
+ const css = document.createElement('style');
72
+ css.textContent = `
73
+ .pip-tx-think {
74
+ margin: 4px 0 8px;
75
+ font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
76
+ }
77
+ .pip-tx-think summary {
78
+ font-size: 11px;
79
+ color: var(--pip-ink-muted, #8a8a8a);
80
+ cursor: pointer;
81
+ list-style: none;
82
+ display: inline-block;
83
+ padding: 2px 9px;
84
+ border: 1px solid var(--pip-border, color-mix(in srgb, currentColor 18%, transparent));
85
+ border-radius: 100px;
86
+ user-select: none;
87
+ letter-spacing: 0.04em;
88
+ text-transform: uppercase;
89
+ opacity: 0.7;
90
+ transition: opacity 0.15s ease, border-color 0.15s ease;
91
+ }
92
+ .pip-tx-think summary::-webkit-details-marker { display: none; }
93
+ .pip-tx-think summary:hover,
94
+ .pip-tx-think[open] summary { opacity: 1; }
95
+ .pip-tx-think.thinking summary {
96
+ animation: pip-tx-think-pulse 1.4s ease-in-out infinite;
97
+ }
98
+ @keyframes pip-tx-think-pulse {
99
+ 0%, 100% { opacity: 0.5; }
100
+ 50% { opacity: 1; }
101
+ }
102
+ @media (prefers-reduced-motion: reduce) {
103
+ .pip-tx-think.thinking summary { animation: none; opacity: 0.85; }
104
+ }
105
+ .pip-tx-think-body {
106
+ margin-top: 6px;
107
+ padding: 8px 10px;
108
+ font-size: 11px;
109
+ color: var(--pip-ink-muted, #8a8a8a);
110
+ background: color-mix(in srgb, currentColor 5%, transparent);
111
+ border-radius: 6px;
112
+ white-space: pre-wrap;
113
+ line-height: 1.5;
114
+ max-height: 240px;
115
+ overflow-y: auto;
116
+ }
117
+ `.trim();
118
+ document.head.appendChild(css);
119
+ }
120
+
121
+ function createThinkPill(turnEl) {
122
+ const details = document.createElement('details');
123
+ details.className = 'pip-tx-think thinking';
124
+ const summary = document.createElement('summary');
125
+ summary.textContent = 'thinking\u2026';
126
+ const body = document.createElement('div');
127
+ body.className = 'pip-tx-think-body';
128
+ details.append(summary, body);
129
+ const reply = turnEl.querySelector('.pip-reply');
130
+ turnEl.insertBefore(details, reply || null);
131
+ return { details, summary, body };
132
+ }
133
+
134
+ function splitThinking(text) {
135
+ const m = text.match(/<think>([\s\S]*?)<\/think>([\s\S]*)/i);
136
+ if (m) return { think: m[1].trim(), answer: m[2].trim() };
137
+ if (/<think>/i.test(text)) return { think: text.replace(/<\/?think>/gi, '').trim(), answer: null };
138
+ return { think: null, answer: text.trim() };
139
+ }
140
+
141
+ export function createTransformersRenderer() {
142
+ injectStyles();
143
+ injectNetworkHints();
144
+
145
+ let config = null;
146
+ let tokenizer = null;
147
+ let model = null;
148
+ let loadingPromise = null;
149
+
150
+ function setModel(c) {
151
+ if (!c || !c.id) {
152
+ config = c || null;
153
+ tokenizer = null;
154
+ model = null;
155
+ loadingPromise = null;
156
+ return;
157
+ }
158
+ if (config?.id === c.id && tokenizer && model) {
159
+ // Same model — keep loaded artifacts; allow other config tweaks.
160
+ config = c;
161
+ return;
162
+ }
163
+ config = c;
164
+ tokenizer = null;
165
+ model = null;
166
+ loadingPromise = null;
167
+ }
168
+
169
+ async function ensureLoaded(turnEl) {
170
+ if (model) return;
171
+ if (loadingPromise) return loadingPromise;
172
+ if (!config?.id) throw new Error('setModel({id, ...}) must be called before generate.');
173
+ if (typeof navigator === 'undefined' || !navigator.gpu) {
174
+ throw new Error('No WebGPU on this device — local models need it.');
175
+ }
176
+
177
+ loadingPromise = (async () => {
178
+ showLoading(turnEl, 'loading runtime\u2026', 0);
179
+ const tf = await loadTransformers();
180
+ const onProgress = (p) => {
181
+ if (p?.status === 'progress') {
182
+ const file = (p.file || '').split('/').pop() || '';
183
+ const pct = Math.round(p.progress || 0);
184
+ showLoading(turnEl, `${file} ${pct}%`, pct);
185
+ }
186
+ };
187
+ tokenizer = await tf.AutoTokenizer.from_pretrained(config.id, { progress_callback: onProgress });
188
+ model = await tf.AutoModelForCausalLM.from_pretrained(config.id, {
189
+ device: 'webgpu',
190
+ dtype: config.dtype || 'q4',
191
+ progress_callback: onProgress,
192
+ });
193
+ hideLoading(turnEl);
194
+ })().catch((err) => {
195
+ loadingPromise = null;
196
+ model = null;
197
+ hideLoading(turnEl);
198
+ throw err;
199
+ }).finally(() => { loadingPromise = null; });
200
+ return loadingPromise;
201
+ }
202
+
203
+ async function generate({ messages, turnEl, setReplyText, signal }) {
204
+ if (!config?.id) throw new Error('setModel({id, ...}) must be called before generate.');
205
+ await ensureLoaded(turnEl);
206
+
207
+ const tf = await loadTransformers();
208
+ const inputs = tokenizer.apply_chat_template(messages, {
209
+ add_generation_prompt: true,
210
+ return_tensors: 'pt',
211
+ });
212
+
213
+ const start = performance.now();
214
+ let buffer = '';
215
+ let pill = null;
216
+ let thinkClosedAt = null;
217
+
218
+ const streamer = new tf.TextStreamer(tokenizer, {
219
+ skip_prompt: true,
220
+ decode_kwargs: { skip_special_tokens: true },
221
+ callback_function: (chunk) => {
222
+ if (signal?.aborted) throw new DOMException('Aborted', 'AbortError');
223
+ buffer += chunk;
224
+ const closed = buffer.match(/<think>([\s\S]*?)<\/think>([\s\S]*)/i);
225
+ const opening = /<think>/i.test(buffer);
226
+ if (opening && !pill) pill = createThinkPill(turnEl);
227
+ if (closed) {
228
+ pill.body.textContent = closed[1].trim();
229
+ if (thinkClosedAt == null) {
230
+ thinkClosedAt = performance.now();
231
+ pill.summary.textContent = `thought for ${((thinkClosedAt - start) / 1000).toFixed(1)}s`;
232
+ pill.details.classList.remove('thinking');
233
+ }
234
+ setReplyText(turnEl, closed[2].trim(), true);
235
+ } else if (opening) {
236
+ pill.body.textContent = buffer.replace(/^[\s\S]*?<think>/i, '');
237
+ pill.body.scrollTop = pill.body.scrollHeight;
238
+ } else {
239
+ setReplyText(turnEl, buffer.trim(), true);
240
+ }
241
+ },
242
+ });
243
+
244
+ try {
245
+ await model.generate({
246
+ ...(inputs.input_ids ? inputs : { input_ids: inputs }),
247
+ max_new_tokens: config.maxTokens || 256,
248
+ do_sample: true,
249
+ ...(config.genParams || {}),
250
+ streamer,
251
+ });
252
+ } catch (err) {
253
+ if (err?.name === 'AbortError' || signal?.aborted) {
254
+ throw err?.name === 'AbortError' ? err : new DOMException('Aborted', 'AbortError');
255
+ }
256
+ throw err;
257
+ }
258
+
259
+ if (pill && thinkClosedAt == null) {
260
+ pill.summary.textContent = 'truncated mid-thought';
261
+ pill.details.classList.remove('thinking');
262
+ }
263
+ return splitThinking(buffer).answer || buffer.trim();
264
+ }
265
+
266
+ return {
267
+ setModel,
268
+ generate,
269
+ get currentModelId() { return config?.id || null; },
270
+ };
271
+ }
272
+
273
+ export { splitThinking };