@simulatte/doppler 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +25 -6
- package/package.json +25 -38
- package/src/browser/browser-converter.js +5 -0
- package/src/client/doppler-api.browser.js +6 -0
- package/src/client/doppler-api.d.ts +3 -0
- package/src/client/doppler-api.js +11 -2
- package/src/client/doppler-registry.js +3 -5
- package/src/client/doppler-registry.json +2 -2
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +13 -0
- package/src/config/kernels/kernel-ref-digests.js +23 -21
- package/src/config/kernels/moe/mixtral.paths.json +46 -0
- package/src/config/kernels/registry.json +74 -0
- package/src/config/loader.js +9 -0
- package/src/config/merge-contract-check.js +7 -0
- package/src/config/platforms/loader.js +3 -1
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-nosubgroups.json +16 -16
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +8 -8
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-small-attn.json +61 -0
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +21 -0
- package/src/config/presets/models/gemma2.json +2 -1
- package/src/config/presets/models/gemma3.json +4 -1
- package/src/config/presets/models/gemma4.json +61 -0
- package/src/config/presets/models/granite-docling.json +70 -0
- package/src/config/presets/models/lfm2.json +6 -1
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/models/qwen3_vl.json +40 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +2 -1
- package/src/config/presets/runtime/experiments/verify/lfm2-verify.json +46 -0
- package/src/config/presets/runtime/experiments/verify/translategemma-verify.json +39 -0
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/modes/trace-layers.json +1 -0
- package/src/config/presets/runtime/tiers/gemma4-16gb.json +69 -0
- package/src/config/presets/runtime/tiers/gemma4-24gb.json +66 -0
- package/src/config/presets/runtime/tiers/gemma4-32gb.json +66 -0
- package/src/config/runtime.js +3 -0
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +40 -0
- package/src/config/schema/debug.schema.js +28 -0
- package/src/config/schema/index.js +2 -0
- package/src/config/schema/inference-defaults.schema.js +1 -1
- package/src/config/schema/kernel-path.schema.d.ts +1 -0
- package/src/config/schema/manifest.schema.d.ts +1 -1
- package/src/config/schema/manifest.schema.js +1 -1
- package/src/config/schema/memory-limits.schema.js +2 -2
- package/src/config/schema/storage.schema.js +2 -2
- package/src/converter/conversion-plan.js +11 -3
- package/src/converter/core.js +19 -8
- package/src/converter/manifest-inference.js +12 -22
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +5 -1
- package/src/converter/quantizer.d.ts +5 -0
- package/src/converter/quantizer.js +34 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/distribution/shard-delivery.js +40 -1
- package/src/formats/rdrr/classification.js +32 -0
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +14 -1
- package/src/gpu/kernel-runtime.js +4 -2
- package/src/gpu/kernels/attention.js +2 -1
- package/src/gpu/kernels/dequant_f16_out.wgsl +4 -2
- package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +5 -2
- package/src/gpu/kernels/dequant_shared.wgsl +4 -2
- package/src/gpu/kernels/dequant_shared_vec4.wgsl +4 -2
- package/src/gpu/kernels/dequant_subgroup.wgsl +6 -2
- package/src/gpu/kernels/gated-short-conv.d.ts +63 -0
- package/src/gpu/kernels/gated-short-conv.js +284 -0
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/linear-attention-core.js +37 -17
- package/src/gpu/kernels/matmul-selection.js +48 -4
- package/src/gpu/kernels/matmul.d.ts +5 -0
- package/src/gpu/kernels/matmul.js +71 -2
- package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +77 -79
- package/src/gpu/kernels/rmsnorm.js +9 -2
- package/src/gpu/kernels/sample.js +1 -3
- package/src/gpu/kernels/sample.wgsl +39 -9
- package/src/gpu/kernels/sample_f16.wgsl +38 -8
- package/src/gpu/kernels/shader-cache.js +9 -4
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/inference/browser-harness.d.ts +2 -0
- package/src/inference/browser-harness.js +20 -1
- package/src/inference/kv-cache/base.js +3 -10
- package/src/inference/pipelines/diffusion/helpers.js +3 -0
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +10 -3
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +13 -1
- package/src/inference/pipelines/text/attention/projections.js +54 -13
- package/src/inference/pipelines/text/attention/record.js +16 -6
- package/src/inference/pipelines/text/attention/run.js +59 -6
- package/src/inference/pipelines/text/config.d.ts +1 -0
- package/src/inference/pipelines/text/config.js +46 -4
- package/src/inference/pipelines/text/embed.js +26 -7
- package/src/inference/pipelines/text/execution-plan.js +5 -4
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +10 -3
- package/src/inference/pipelines/text/execution-v0.js +12 -1
- package/src/inference/pipelines/text/generator-helpers.js +1 -0
- package/src/inference/pipelines/text/generator-runtime.js +19 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +15 -0
- package/src/inference/pipelines/text/generator-steps.js +71 -26
- package/src/inference/pipelines/text/generator.d.ts +5 -0
- package/src/inference/pipelines/text/generator.js +353 -166
- package/src/inference/pipelines/text/init.d.ts +15 -0
- package/src/inference/pipelines/text/init.js +35 -10
- package/src/inference/pipelines/text/layer.js +38 -8
- package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
- package/src/inference/pipelines/text/linear-attention.js +33 -3
- package/src/inference/pipelines/text/logits/gpu.js +2 -2
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +3 -1
- package/src/inference/pipelines/text/model-load.js +3 -0
- package/src/inference/pipelines/text/moe-gpu.js +21 -3
- package/src/inference/pipelines/text/moe-shape-validator.d.ts +9 -0
- package/src/inference/pipelines/text/moe-shape-validator.js +31 -11
- package/src/inference/pipelines/text/ops.js +123 -53
- package/src/inference/pipelines/text/probes.js +1 -0
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/state.js +2 -0
- package/src/inference/pipelines/text.d.ts +5 -0
- package/src/inference/pipelines/text.js +59 -1
- package/src/inference/pipelines/vision/encoder.js +386 -0
- package/src/inference/pipelines/vision/image-preprocess.js +151 -0
- package/src/inference/pipelines/vision/index.js +173 -0
- package/src/inference/pipelines/vision/ops.js +78 -0
- package/src/inference/pipelines/vision/patch-embed.js +151 -0
- package/src/inference/test-harness.js +11 -9
- package/src/loader/doppler-loader.d.ts +3 -0
- package/src/loader/doppler-loader.js +20 -3
- package/src/loader/experts/expert-cache.js +6 -2
- package/src/loader/experts/expert-loader.js +6 -2
- package/src/loader/final-weights-loader.js +2 -0
- package/src/loader/layer-loader.js +42 -3
- package/src/loader/manifest-config.js +3 -1
- package/src/loader/shard-cache.js +3 -2
- package/src/loader/tensors/tensor-loader.d.ts +3 -0
- package/src/loader/tensors/tensor-loader.js +130 -4
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +2 -2
- package/src/rules/kernels/moe.rules.mixtral.json +75 -0
- package/src/rules/kernels/softmax.rules.json +2 -0
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.d.ts +1 -0
- package/src/rules/rule-registry.js +4 -0
- package/src/storage/downloader.js +2 -1
- package/src/storage/quickstart-downloader.d.ts +3 -0
- package/src/storage/quickstart-downloader.js +27 -30
- package/src/storage/shard-manager.js +4 -3
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/node-converter.js +28 -7
- package/src/tooling/node-source-runtime.js +65 -5
- package/src/tooling/node-webgpu.js +24 -7
- package/src/types/model.d.ts +5 -0
- package/src/utils/hf-resolve-url.d.ts +16 -0
- package/src/utils/hf-resolve-url.js +17 -0
- package/src/version.js +1 -1
- package/tools/doppler-cli.js +6 -1
- package/src/tooling/node-convert.d.ts +0 -54
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
import { createReadStream } from 'node:fs';
|
|
1
2
|
import fs from 'node:fs/promises';
|
|
3
|
+
import { createHash } from 'node:crypto';
|
|
2
4
|
import path from 'node:path';
|
|
3
5
|
import {
|
|
4
6
|
HEADER_READ_SIZE,
|
|
@@ -16,7 +18,6 @@ import { parseTransformerModel } from '../converter/parsers/transformer.js';
|
|
|
16
18
|
import { parseGGUFHeader } from '../formats/gguf/types.js';
|
|
17
19
|
import { parseSafetensorsHeader } from '../formats/safetensors/types.js';
|
|
18
20
|
import { log } from '../debug/index.js';
|
|
19
|
-
import { computeHash } from '../storage/shard-manager.js';
|
|
20
21
|
import {
|
|
21
22
|
buildSourceRuntimeBundle,
|
|
22
23
|
createSourceStorageContext,
|
|
@@ -137,7 +138,12 @@ async function readRange(filePath, offset, length) {
|
|
|
137
138
|
return new ArrayBuffer(0);
|
|
138
139
|
}
|
|
139
140
|
const out = Buffer.allocUnsafe(end - start);
|
|
140
|
-
|
|
141
|
+
let pos = 0;
|
|
142
|
+
while (pos < out.length) {
|
|
143
|
+
const { bytesRead } = await handle.read(out, pos, out.length - pos, start + pos);
|
|
144
|
+
if (bytesRead === 0) break;
|
|
145
|
+
pos += bytesRead;
|
|
146
|
+
}
|
|
141
147
|
return out.buffer.slice(out.byteOffset, out.byteOffset + out.byteLength);
|
|
142
148
|
} finally {
|
|
143
149
|
await handle.close();
|
|
@@ -411,23 +417,74 @@ function buildNodeFileReaders() {
|
|
|
411
417
|
};
|
|
412
418
|
}
|
|
413
419
|
|
|
420
|
+
// Source dtype → compute precision mapping for source-runtime inference.
|
|
421
|
+
// BF16/F32 sources require f32 compute (BF16 has no native WebGPU support).
|
|
422
|
+
// Quantized formats require f32 compute for dequantization accuracy.
|
|
423
|
+
// F16 sources can use f16 compute directly.
|
|
424
|
+
const SOURCE_QUANT_COMPUTE_MAP = {
|
|
425
|
+
'F16': 'f16',
|
|
426
|
+
'BF16': 'f32',
|
|
427
|
+
'F32': 'f32',
|
|
428
|
+
'Q4_K': 'f32',
|
|
429
|
+
'Q4_K_M': 'f32',
|
|
430
|
+
'Q6_K': 'f32',
|
|
431
|
+
};
|
|
432
|
+
const SOURCE_COMPUTE_DEFAULT = 'f16';
|
|
433
|
+
|
|
434
|
+
function resolveSourceRuntimeComputePrecision(tensors, sourceQuantization) {
|
|
435
|
+
const dtypes = new Set();
|
|
436
|
+
for (const tensor of Array.isArray(tensors) ? tensors : []) {
|
|
437
|
+
const dtype = String(tensor?.dtype || '').trim().toUpperCase();
|
|
438
|
+
if (dtype) {
|
|
439
|
+
dtypes.add(dtype);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
// If any tensor requires f32 compute, use f32 for all.
|
|
443
|
+
for (const dtype of dtypes) {
|
|
444
|
+
if (SOURCE_QUANT_COMPUTE_MAP[dtype] === 'f32') {
|
|
445
|
+
return 'f32';
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const normalized = String(sourceQuantization || '').trim().toUpperCase();
|
|
450
|
+
return SOURCE_QUANT_COMPUTE_MAP[normalized] ?? SOURCE_COMPUTE_DEFAULT;
|
|
451
|
+
}
|
|
452
|
+
|
|
414
453
|
async function addHashesToFileEntries(entries, hashAlgorithm) {
|
|
415
454
|
const normalized = [];
|
|
416
455
|
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
417
456
|
const filePath = normalizePath(entry?.path);
|
|
418
457
|
if (!filePath) continue;
|
|
419
|
-
const
|
|
458
|
+
const stats = await getPathStats(filePath, `source asset (${filePath})`);
|
|
420
459
|
normalized.push({
|
|
421
460
|
...entry,
|
|
422
461
|
path: filePath,
|
|
423
|
-
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) :
|
|
424
|
-
hash: await
|
|
462
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : Number(stats.size),
|
|
463
|
+
hash: await computeFileHash(filePath, hashAlgorithm),
|
|
425
464
|
hashAlgorithm,
|
|
426
465
|
});
|
|
427
466
|
}
|
|
428
467
|
return normalized;
|
|
429
468
|
}
|
|
430
469
|
|
|
470
|
+
async function computeFileHash(filePath, hashAlgorithm) {
|
|
471
|
+
return new Promise((resolve, reject) => {
|
|
472
|
+
const hash = createHash(hashAlgorithm);
|
|
473
|
+
const stream = createReadStream(filePath);
|
|
474
|
+
|
|
475
|
+
stream.on('data', (chunk) => {
|
|
476
|
+
hash.update(chunk);
|
|
477
|
+
});
|
|
478
|
+
stream.on('end', () => {
|
|
479
|
+
resolve(hash.digest('hex'));
|
|
480
|
+
});
|
|
481
|
+
stream.on('error', (error) => {
|
|
482
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
483
|
+
reject(new Error(`Failed to stream source asset "${filePath}" for hashing: ${message}`));
|
|
484
|
+
});
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
|
|
431
488
|
export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
432
489
|
const inputPath = normalizePath(options.inputPath);
|
|
433
490
|
if (!inputPath) {
|
|
@@ -473,6 +530,9 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
473
530
|
assertSupportedSourceDtypes(parsed.tensors, parsed.sourceKind);
|
|
474
531
|
|
|
475
532
|
const converterConfig = createConverterConfig({
|
|
533
|
+
quantization: {
|
|
534
|
+
computePrecision: resolveSourceRuntimeComputePrecision(parsed.tensors, parsed.sourceQuantization),
|
|
535
|
+
},
|
|
476
536
|
output: {
|
|
477
537
|
modelBaseId: options.modelId || null,
|
|
478
538
|
},
|
|
@@ -51,7 +51,7 @@ function resolveCandidateModuleSpecifier(candidate) {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
function resolveDefaultWebgpuModuleSpecifiers() {
|
|
54
|
-
return ['
|
|
54
|
+
return ['webgpu', '@simulatte/webgpu'];
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
function resolveExplicitWebgpuModuleSpecifier() {
|
|
@@ -189,18 +189,35 @@ function resolveGpuFromModule(mod) {
|
|
|
189
189
|
return fromModule;
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
192
|
+
const tryCreateFactory = (factory) => {
|
|
193
|
+
if (typeof factory !== 'function') {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
195
196
|
try {
|
|
196
|
-
|
|
197
|
+
return factory([]);
|
|
197
198
|
} catch {
|
|
198
199
|
try {
|
|
199
|
-
|
|
200
|
+
return factory();
|
|
200
201
|
} catch {
|
|
201
|
-
|
|
202
|
+
return null;
|
|
202
203
|
}
|
|
203
204
|
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const instanceFactory = mod.createInstance || mod.default?.createInstance;
|
|
208
|
+
const createdFromInstanceFactory = tryCreateFactory(instanceFactory);
|
|
209
|
+
if (createdFromInstanceFactory) {
|
|
210
|
+
if (typeof createdFromInstanceFactory.requestAdapter === 'function') {
|
|
211
|
+
return createdFromInstanceFactory;
|
|
212
|
+
}
|
|
213
|
+
if (createdFromInstanceFactory.gpu && typeof createdFromInstanceFactory.gpu.requestAdapter === 'function') {
|
|
214
|
+
return createdFromInstanceFactory.gpu;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const factory = mod.create || mod.default?.create;
|
|
219
|
+
if (typeof factory === 'function') {
|
|
220
|
+
const created = tryCreateFactory(factory);
|
|
204
221
|
if (created) {
|
|
205
222
|
if (typeof created.requestAdapter === 'function') {
|
|
206
223
|
return created;
|
package/src/types/model.d.ts
CHANGED
|
@@ -9,7 +9,11 @@ export type ModelArchitecture =
|
|
|
9
9
|
| 'gemma'
|
|
10
10
|
| 'gemma2'
|
|
11
11
|
| 'gemma3'
|
|
12
|
+
| 'embeddinggemma'
|
|
12
13
|
| 'functiongemma'
|
|
14
|
+
| 'janus_text'
|
|
15
|
+
| 'lfm2'
|
|
16
|
+
| 'modernbert'
|
|
13
17
|
| 'qwen2'
|
|
14
18
|
| 'qwen3'
|
|
15
19
|
| 'phi3'
|
|
@@ -19,6 +23,7 @@ export type ModelArchitecture =
|
|
|
19
23
|
| 'deepseek'
|
|
20
24
|
| 'mamba'
|
|
21
25
|
| 'kimi_k2'
|
|
26
|
+
| 'translategemma'
|
|
22
27
|
| 'transformer';
|
|
23
28
|
|
|
24
29
|
/** Attention type variants */
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface HfResolveConfig {
|
|
2
|
+
repoId: string;
|
|
3
|
+
revision?: string | null;
|
|
4
|
+
path: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface HfResolveUrlOptions {
|
|
8
|
+
cdnBasePath?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export declare const DEFAULT_HF_CDN_BASE_URL: string;
|
|
12
|
+
|
|
13
|
+
export declare function buildHfResolveBaseUrl(
|
|
14
|
+
hfConfig: HfResolveConfig | null | undefined,
|
|
15
|
+
options?: HfResolveUrlOptions
|
|
16
|
+
): string;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export const DEFAULT_HF_CDN_BASE_URL = 'https://huggingface.co';
|
|
2
|
+
|
|
3
|
+
export function buildHfResolveBaseUrl(hfConfig, options = {}) {
|
|
4
|
+
const repoId = typeof hfConfig?.repoId === 'string' ? hfConfig.repoId.trim() : '';
|
|
5
|
+
const repoPath = typeof hfConfig?.path === 'string' ? hfConfig.path.trim().replace(/^\/+/, '') : '';
|
|
6
|
+
if (!repoId || !repoPath) {
|
|
7
|
+
throw new Error('Hosted Hugging Face source requires repoId and path.');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const revision = typeof hfConfig?.revision === 'string' && hfConfig.revision.trim().length > 0
|
|
11
|
+
? hfConfig.revision.trim()
|
|
12
|
+
: 'main';
|
|
13
|
+
const cdnBasePath = typeof options?.cdnBasePath === 'string' && options.cdnBasePath.trim().length > 0
|
|
14
|
+
? options.cdnBasePath.trim()
|
|
15
|
+
: DEFAULT_HF_CDN_BASE_URL;
|
|
16
|
+
return `${cdnBasePath.replace(/\/$/, '')}/${repoId}/resolve/${revision}/${repoPath}`;
|
|
17
|
+
}
|
package/src/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const DOPPLER_VERSION = '0.1.
|
|
1
|
+
export const DOPPLER_VERSION = '0.1.9';
|
|
2
2
|
export const DOPPLER_PROVIDER_VERSION = DOPPLER_VERSION;
|
package/tools/doppler-cli.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
import { existsSync } from 'node:fs';
|
|
3
4
|
import fs from 'node:fs/promises';
|
|
4
5
|
import path from 'node:path';
|
|
5
6
|
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
@@ -13,7 +14,8 @@ import { createToolingErrorEnvelope } from '../src/tooling/command-envelope.js';
|
|
|
13
14
|
|
|
14
15
|
const NODE_WEBGPU_INCOMPLETE_MESSAGE = 'node command: WebGPU runtime is incomplete in Node';
|
|
15
16
|
const CLI_POLICY_PATH = fileURLToPath(new URL('./configs/cli/doppler-cli-policy.json', import.meta.url));
|
|
16
|
-
const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT
|
|
17
|
+
const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT
|
|
18
|
+
|| (existsSync('/Volumes/models') ? '/Volumes/models' : '/media/x/models');
|
|
17
19
|
const DEFAULT_EXTERNAL_RDRR_ROOT = path.join(DEFAULT_EXTERNAL_MODELS_ROOT, 'rdrr');
|
|
18
20
|
const DEFAULT_CLI_POLICY = {
|
|
19
21
|
defaults: {
|
|
@@ -1260,6 +1262,9 @@ function printMetricsSummary(result) {
|
|
|
1260
1262
|
`prefill=${formatNumber(metrics.prefillTokensPerSec)} ` +
|
|
1261
1263
|
`decode=${formatNumber(metrics.decodeTokensPerSec)}`
|
|
1262
1264
|
);
|
|
1265
|
+
if (typeof result.output === 'string' && result.output.length > 0) {
|
|
1266
|
+
console.log(`[output] ${quoteOneLine(result.output)}`);
|
|
1267
|
+
}
|
|
1263
1268
|
printExecutionContractSummary(result);
|
|
1264
1269
|
printExecutionV0GraphSummary(metrics.executionV0GraphContractArtifact);
|
|
1265
1270
|
return;
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import type { ConverterConfigSchema } from '../config/schema/converter.schema.js';
|
|
2
|
-
import type { ExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
3
|
-
import type { ExecutionV0GraphContractArtifact } from '../config/execution-v0-graph-contract-check.js';
|
|
4
|
-
import type { ManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
|
|
5
|
-
import type { SavedReportInfo } from '../storage/reports.js';
|
|
6
|
-
|
|
7
|
-
export interface NodeConvertProgress {
|
|
8
|
-
stage: string | null;
|
|
9
|
-
current: number | null;
|
|
10
|
-
total: number | null;
|
|
11
|
-
message: string | null;
|
|
12
|
-
tensorName?: string | null;
|
|
13
|
-
tensorBytesCurrent?: number | null;
|
|
14
|
-
tensorBytesTotal?: number | null;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export interface NodeConvertExecutionConfig {
|
|
18
|
-
workers?: number | null;
|
|
19
|
-
workerCountPolicy?: 'cap' | 'error' | null;
|
|
20
|
-
maxInFlightJobs?: number | null;
|
|
21
|
-
rowChunkRows?: number | null;
|
|
22
|
-
rowChunkMinTensorBytes?: number | null;
|
|
23
|
-
useGpuCast?: boolean | null;
|
|
24
|
-
gpuCastMinTensorBytes?: number | null;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export interface ConvertSafetensorsDirectoryOptions {
|
|
28
|
-
/** Directory with safetensors/diffusion assets, or a direct .gguf file path. */
|
|
29
|
-
inputDir: string;
|
|
30
|
-
outputDir?: string | null;
|
|
31
|
-
modelId?: string | null;
|
|
32
|
-
converterConfig?: Partial<ConverterConfigSchema> | null;
|
|
33
|
-
execution?: NodeConvertExecutionConfig | null;
|
|
34
|
-
onProgress?: (progress: NodeConvertProgress) => void;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export interface ConvertSafetensorsDirectoryResult {
|
|
38
|
-
manifest: Record<string, unknown>;
|
|
39
|
-
shardCount: number;
|
|
40
|
-
tensorCount: number;
|
|
41
|
-
executionContractArtifact: ExecutionContractArtifact | null;
|
|
42
|
-
executionV0GraphContractArtifact: ExecutionV0GraphContractArtifact | null;
|
|
43
|
-
layerPatternContractArtifact: Record<string, unknown> | null;
|
|
44
|
-
requiredInferenceFieldsArtifact: ManifestRequiredInferenceFieldsArtifact | null;
|
|
45
|
-
report: Record<string, unknown>;
|
|
46
|
-
reportInfo: SavedReportInfo;
|
|
47
|
-
presetId: string;
|
|
48
|
-
modelType: string;
|
|
49
|
-
outputDir: string;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export declare function convertSafetensorsDirectory(
|
|
53
|
-
options: ConvertSafetensorsDirectoryOptions
|
|
54
|
-
): Promise<ConvertSafetensorsDirectoryResult>;
|