@octomil/browser 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34646,7 +34646,51 @@ function createTransformersJsLocalResponsesRuntime(options = {}) {
34646
34646
  return toResponse(request, config.model, generatedText);
34647
34647
  },
34648
34648
  async *stream(request) {
34649
- const response = await this.create(request);
34649
+ const generator = await getGenerator(config);
34650
+ const messages = buildMessages(request, config.maxInputChars);
34651
+ const generationInput = renderGenerationInput(generator, messages);
34652
+ const pending = [];
34653
+ let wake = null;
34654
+ const push = (item) => {
34655
+ pending.push(item);
34656
+ if (wake) {
34657
+ wake();
34658
+ wake = null;
34659
+ }
34660
+ };
34661
+ const pull = async () => {
34662
+ while (pending.length === 0) {
34663
+ await new Promise((r) => {
34664
+ wake = r;
34665
+ });
34666
+ }
34667
+ return pending.shift();
34668
+ };
34669
+ const genPromise = generator(generationInput, {
34670
+ max_new_tokens: request.maxOutputTokens ?? config.maxNewTokens,
34671
+ temperature: request.temperature ?? config.temperature,
34672
+ top_p: request.topP ?? config.topP,
34673
+ repetition_penalty: config.repetitionPenalty,
34674
+ do_sample: (request.temperature ?? config.temperature) > 0,
34675
+ return_full_text: false,
34676
+ callback_function: (text) => {
34677
+ if (typeof text === "string" && text.length > 0) {
34678
+ push({ kind: "token", text });
34679
+ }
34680
+ }
34681
+ }).then(() => push({ kind: "done" })).catch((err) => push({ kind: "error", error: err }));
34682
+ let fullText = "";
34683
+ while (true) {
34684
+ const item = await pull();
34685
+ if (item.kind === "error") {
34686
+ throw item.error instanceof Error ? item.error : new Error(String(item.error));
34687
+ }
34688
+ if (item.kind === "done") break;
34689
+ fullText += item.text;
34690
+ yield { type: "text_delta", delta: item.text };
34691
+ }
34692
+ await genPromise;
34693
+ const response = toResponse(request, config.model, fullText);
34650
34694
  const firstOutput = response.output[0];
34651
34695
  if (firstOutput?.type === "tool_call") {
34652
34696
  yield {
@@ -34656,8 +34700,6 @@ function createTransformersJsLocalResponsesRuntime(options = {}) {
34656
34700
  name: firstOutput.toolCall?.name,
34657
34701
  argumentsDelta: firstOutput.toolCall?.arguments
34658
34702
  };
34659
- } else if (firstOutput?.type === "text" && firstOutput.text) {
34660
- yield { type: "text_delta", delta: firstOutput.text };
34661
34703
  }
34662
34704
  yield { type: "done", response };
34663
34705
  }
@@ -34702,17 +34744,14 @@ async function runLocalGeneration(request, config) {
34702
34744
  return extractGeneratedText(generation);
34703
34745
  }
34704
34746
  async function getGenerator(config) {
34705
- const key = JSON.stringify([
34706
- config.runtimeModel,
34707
- resolveDevice(config.device),
34708
- config.dtype
34709
- ]);
34747
+ const device = await resolveDevice(config.device);
34748
+ const key = JSON.stringify([config.runtimeModel, device, config.dtype]);
34710
34749
  let pending = pipelineCache.get(key);
34711
34750
  if (!pending) {
34712
34751
  pending = (async () => {
34713
34752
  const { pipeline: pipeline3 } = await importTransformers(config);
34714
34753
  return pipeline3("text-generation", config.runtimeModel, {
34715
- device: resolveDevice(config.device),
34754
+ device,
34716
34755
  dtype: config.dtype
34717
34756
  });
34718
34757
  })();
@@ -34732,11 +34771,65 @@ async function importTransformers(config) {
34732
34771
  }
34733
34772
  return transformers;
34734
34773
  }
34735
- function resolveDevice(device) {
34736
- if (device === "webgpu" || device === "wasm") {
34737
- return device;
34774
+ var resolvedDeviceCache = null;
34775
+ async function resolveDevice(device) {
34776
+ if (device === "webgpu" || device === "wasm") return device;
34777
+ if (resolvedDeviceCache) return resolvedDeviceCache;
34778
+ const result = await probeWebGPU();
34779
+ resolvedDeviceCache = result;
34780
+ return result;
34781
+ }
34782
+ async function probeWebGPU() {
34783
+ try {
34784
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) return "wasm";
34785
+ const gpu = navigator.gpu;
34786
+ const adapter = await gpu.requestAdapter();
34787
+ if (!adapter) return "wasm";
34788
+ const device = await adapter.requestDevice();
34789
+ const module2 = device.createShaderModule({
34790
+ code: `@group(0) @binding(0) var<storage, read_write> out: array<f32>;
34791
+ @compute @workgroup_size(1)
34792
+ fn main() { out[0] = 42.0; }`
34793
+ });
34794
+ const storageBuffer = device.createBuffer({
34795
+ size: 4,
34796
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
34797
+ });
34798
+ const readBuffer = device.createBuffer({
34799
+ size: 4,
34800
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
34801
+ });
34802
+ const bindGroupLayout = device.createBindGroupLayout({
34803
+ entries: [{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }]
34804
+ });
34805
+ const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout] });
34806
+ const pipeline3 = device.createComputePipeline({
34807
+ layout: pipelineLayout,
34808
+ compute: { module: module2, entryPoint: "main" }
34809
+ });
34810
+ const bindGroup = device.createBindGroup({
34811
+ layout: bindGroupLayout,
34812
+ entries: [{ binding: 0, resource: { buffer: storageBuffer } }]
34813
+ });
34814
+ const encoder = device.createCommandEncoder();
34815
+ const pass = encoder.beginComputePass();
34816
+ pass.setPipeline(pipeline3);
34817
+ pass.setBindGroup(0, bindGroup);
34818
+ pass.dispatchWorkgroups(1);
34819
+ pass.end();
34820
+ encoder.copyBufferToBuffer(storageBuffer, 0, readBuffer, 0, 4);
34821
+ device.queue.submit([encoder.finish()]);
34822
+ await readBuffer.mapAsync(GPUMapMode.READ);
34823
+ const data = new Float32Array(readBuffer.getMappedRange());
34824
+ const value = data[0];
34825
+ readBuffer.unmap();
34826
+ storageBuffer.destroy();
34827
+ readBuffer.destroy();
34828
+ device.destroy();
34829
+ return value === 42 ? "webgpu" : "wasm";
34830
+ } catch {
34831
+ return "wasm";
34738
34832
  }
34739
- return typeof navigator !== "undefined" && "gpu" in navigator ? "webgpu" : "wasm";
34740
34833
  }
34741
34834
  function renderGenerationInput(generator, messages) {
34742
34835
  const applyChatTemplate = generator.tokenizer?.apply_chat_template;