@octomil/browser 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +106 -13
- package/dist/index.cjs.map +3 -3
- package/dist/octomil.min.js +29 -27
- package/dist/octomil.min.js.map +3 -3
- package/dist/transformers-local-runtime.d.ts.map +1 -1
- package/dist/transformers-local-runtime.js +123 -13
- package/dist/transformers-local-runtime.js.map +1 -1
- package/package.json +2 -1
package/dist/index.cjs
CHANGED
|
@@ -34646,7 +34646,51 @@ function createTransformersJsLocalResponsesRuntime(options = {}) {
|
|
|
34646
34646
|
return toResponse(request, config.model, generatedText);
|
|
34647
34647
|
},
|
|
34648
34648
|
async *stream(request) {
|
|
34649
|
-
const
|
|
34649
|
+
const generator = await getGenerator(config);
|
|
34650
|
+
const messages = buildMessages(request, config.maxInputChars);
|
|
34651
|
+
const generationInput = renderGenerationInput(generator, messages);
|
|
34652
|
+
const pending = [];
|
|
34653
|
+
let wake = null;
|
|
34654
|
+
const push = (item) => {
|
|
34655
|
+
pending.push(item);
|
|
34656
|
+
if (wake) {
|
|
34657
|
+
wake();
|
|
34658
|
+
wake = null;
|
|
34659
|
+
}
|
|
34660
|
+
};
|
|
34661
|
+
const pull = async () => {
|
|
34662
|
+
while (pending.length === 0) {
|
|
34663
|
+
await new Promise((r) => {
|
|
34664
|
+
wake = r;
|
|
34665
|
+
});
|
|
34666
|
+
}
|
|
34667
|
+
return pending.shift();
|
|
34668
|
+
};
|
|
34669
|
+
const genPromise = generator(generationInput, {
|
|
34670
|
+
max_new_tokens: request.maxOutputTokens ?? config.maxNewTokens,
|
|
34671
|
+
temperature: request.temperature ?? config.temperature,
|
|
34672
|
+
top_p: request.topP ?? config.topP,
|
|
34673
|
+
repetition_penalty: config.repetitionPenalty,
|
|
34674
|
+
do_sample: (request.temperature ?? config.temperature) > 0,
|
|
34675
|
+
return_full_text: false,
|
|
34676
|
+
callback_function: (text) => {
|
|
34677
|
+
if (typeof text === "string" && text.length > 0) {
|
|
34678
|
+
push({ kind: "token", text });
|
|
34679
|
+
}
|
|
34680
|
+
}
|
|
34681
|
+
}).then(() => push({ kind: "done" })).catch((err) => push({ kind: "error", error: err }));
|
|
34682
|
+
let fullText = "";
|
|
34683
|
+
while (true) {
|
|
34684
|
+
const item = await pull();
|
|
34685
|
+
if (item.kind === "error") {
|
|
34686
|
+
throw item.error instanceof Error ? item.error : new Error(String(item.error));
|
|
34687
|
+
}
|
|
34688
|
+
if (item.kind === "done") break;
|
|
34689
|
+
fullText += item.text;
|
|
34690
|
+
yield { type: "text_delta", delta: item.text };
|
|
34691
|
+
}
|
|
34692
|
+
await genPromise;
|
|
34693
|
+
const response = toResponse(request, config.model, fullText);
|
|
34650
34694
|
const firstOutput = response.output[0];
|
|
34651
34695
|
if (firstOutput?.type === "tool_call") {
|
|
34652
34696
|
yield {
|
|
@@ -34656,8 +34700,6 @@ function createTransformersJsLocalResponsesRuntime(options = {}) {
|
|
|
34656
34700
|
name: firstOutput.toolCall?.name,
|
|
34657
34701
|
argumentsDelta: firstOutput.toolCall?.arguments
|
|
34658
34702
|
};
|
|
34659
|
-
} else if (firstOutput?.type === "text" && firstOutput.text) {
|
|
34660
|
-
yield { type: "text_delta", delta: firstOutput.text };
|
|
34661
34703
|
}
|
|
34662
34704
|
yield { type: "done", response };
|
|
34663
34705
|
}
|
|
@@ -34702,17 +34744,14 @@ async function runLocalGeneration(request, config) {
|
|
|
34702
34744
|
return extractGeneratedText(generation);
|
|
34703
34745
|
}
|
|
34704
34746
|
async function getGenerator(config) {
|
|
34705
|
-
const
|
|
34706
|
-
|
|
34707
|
-
resolveDevice(config.device),
|
|
34708
|
-
config.dtype
|
|
34709
|
-
]);
|
|
34747
|
+
const device = await resolveDevice(config.device);
|
|
34748
|
+
const key = JSON.stringify([config.runtimeModel, device, config.dtype]);
|
|
34710
34749
|
let pending = pipelineCache.get(key);
|
|
34711
34750
|
if (!pending) {
|
|
34712
34751
|
pending = (async () => {
|
|
34713
34752
|
const { pipeline: pipeline3 } = await importTransformers(config);
|
|
34714
34753
|
return pipeline3("text-generation", config.runtimeModel, {
|
|
34715
|
-
device
|
|
34754
|
+
device,
|
|
34716
34755
|
dtype: config.dtype
|
|
34717
34756
|
});
|
|
34718
34757
|
})();
|
|
@@ -34732,11 +34771,65 @@ async function importTransformers(config) {
|
|
|
34732
34771
|
}
|
|
34733
34772
|
return transformers;
|
|
34734
34773
|
}
|
|
34735
|
-
|
|
34736
|
-
|
|
34737
|
-
|
|
34774
|
+
var resolvedDeviceCache = null;
|
|
34775
|
+
async function resolveDevice(device) {
|
|
34776
|
+
if (device === "webgpu" || device === "wasm") return device;
|
|
34777
|
+
if (resolvedDeviceCache) return resolvedDeviceCache;
|
|
34778
|
+
const result = await probeWebGPU();
|
|
34779
|
+
resolvedDeviceCache = result;
|
|
34780
|
+
return result;
|
|
34781
|
+
}
|
|
34782
|
+
async function probeWebGPU() {
|
|
34783
|
+
try {
|
|
34784
|
+
if (typeof navigator === "undefined" || !("gpu" in navigator)) return "wasm";
|
|
34785
|
+
const gpu = navigator.gpu;
|
|
34786
|
+
const adapter = await gpu.requestAdapter();
|
|
34787
|
+
if (!adapter) return "wasm";
|
|
34788
|
+
const device = await adapter.requestDevice();
|
|
34789
|
+
const module2 = device.createShaderModule({
|
|
34790
|
+
code: `@group(0) @binding(0) var<storage, read_write> out: array<f32>;
|
|
34791
|
+
@compute @workgroup_size(1)
|
|
34792
|
+
fn main() { out[0] = 42.0; }`
|
|
34793
|
+
});
|
|
34794
|
+
const storageBuffer = device.createBuffer({
|
|
34795
|
+
size: 4,
|
|
34796
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
|
|
34797
|
+
});
|
|
34798
|
+
const readBuffer = device.createBuffer({
|
|
34799
|
+
size: 4,
|
|
34800
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
34801
|
+
});
|
|
34802
|
+
const bindGroupLayout = device.createBindGroupLayout({
|
|
34803
|
+
entries: [{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }]
|
|
34804
|
+
});
|
|
34805
|
+
const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout] });
|
|
34806
|
+
const pipeline3 = device.createComputePipeline({
|
|
34807
|
+
layout: pipelineLayout,
|
|
34808
|
+
compute: { module: module2, entryPoint: "main" }
|
|
34809
|
+
});
|
|
34810
|
+
const bindGroup = device.createBindGroup({
|
|
34811
|
+
layout: bindGroupLayout,
|
|
34812
|
+
entries: [{ binding: 0, resource: { buffer: storageBuffer } }]
|
|
34813
|
+
});
|
|
34814
|
+
const encoder = device.createCommandEncoder();
|
|
34815
|
+
const pass = encoder.beginComputePass();
|
|
34816
|
+
pass.setPipeline(pipeline3);
|
|
34817
|
+
pass.setBindGroup(0, bindGroup);
|
|
34818
|
+
pass.dispatchWorkgroups(1);
|
|
34819
|
+
pass.end();
|
|
34820
|
+
encoder.copyBufferToBuffer(storageBuffer, 0, readBuffer, 0, 4);
|
|
34821
|
+
device.queue.submit([encoder.finish()]);
|
|
34822
|
+
await readBuffer.mapAsync(GPUMapMode.READ);
|
|
34823
|
+
const data = new Float32Array(readBuffer.getMappedRange());
|
|
34824
|
+
const value = data[0];
|
|
34825
|
+
readBuffer.unmap();
|
|
34826
|
+
storageBuffer.destroy();
|
|
34827
|
+
readBuffer.destroy();
|
|
34828
|
+
device.destroy();
|
|
34829
|
+
return value === 42 ? "webgpu" : "wasm";
|
|
34830
|
+
} catch {
|
|
34831
|
+
return "wasm";
|
|
34738
34832
|
}
|
|
34739
|
-
return typeof navigator !== "undefined" && "gpu" in navigator ? "webgpu" : "wasm";
|
|
34740
34833
|
}
|
|
34741
34834
|
function renderGenerationInput(generator, messages) {
|
|
34742
34835
|
const applyChatTemplate = generator.tokenizer?.apply_chat_template;
|