@simulatte/doppler 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -1
- package/README.md +25 -6
- package/package.json +5 -3
- package/src/client/doppler-api.browser.js +6 -0
- package/src/client/doppler-api.d.ts +3 -0
- package/src/client/doppler-api.js +11 -2
- package/src/client/doppler-registry.js +3 -5
- package/src/client/doppler-registry.json +16 -0
- package/src/config/kernels/kernel-ref-digests.js +23 -21
- package/src/config/kernels/moe/mixtral.paths.json +46 -0
- package/src/config/loader.js +6 -0
- package/src/config/platforms/loader.js +3 -1
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-nosubgroups.json +16 -16
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-online.json +8 -8
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32a-small-attn.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +7 -0
- package/src/config/presets/models/gemma3.json +2 -1
- package/src/config/presets/models/gemma4.json +61 -0
- package/src/config/presets/models/granite-docling.json +70 -0
- package/src/config/presets/models/lfm2.json +6 -1
- package/src/config/presets/models/qwen3_vl.json +40 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +2 -1
- package/src/config/presets/runtime/experiments/verify/lfm2-verify.json +46 -0
- package/src/config/presets/runtime/experiments/verify/translategemma-verify.json +39 -0
- package/src/config/presets/runtime/modes/trace-layers.json +1 -0
- package/src/config/presets/runtime/tiers/gemma4-16gb.json +69 -0
- package/src/config/presets/runtime/tiers/gemma4-24gb.json +66 -0
- package/src/config/presets/runtime/tiers/gemma4-32gb.json +66 -0
- package/src/config/runtime.js +3 -0
- package/src/config/schema/debug.schema.d.ts +40 -0
- package/src/config/schema/debug.schema.js +28 -0
- package/src/config/schema/index.js +2 -0
- package/src/config/schema/inference-defaults.schema.js +1 -1
- package/src/config/schema/kernel-path.schema.d.ts +1 -0
- package/src/config/schema/memory-limits.schema.js +2 -2
- package/src/config/schema/storage.schema.js +1 -1
- package/src/converter/conversion-plan.js +1 -1
- package/src/converter/core.js +17 -8
- package/src/converter/quantizer.d.ts +5 -0
- package/src/converter/quantizer.js +15 -0
- package/src/distribution/shard-delivery.js +34 -0
- package/src/formats/rdrr/classification.js +32 -0
- package/src/gpu/kernel-runtime.js +4 -2
- package/src/gpu/kernels/attention.js +2 -1
- package/src/gpu/kernels/dequant_f16_out.wgsl +4 -2
- package/src/gpu/kernels/dequant_f16_out_vec4.wgsl +5 -2
- package/src/gpu/kernels/dequant_shared.wgsl +4 -2
- package/src/gpu/kernels/dequant_shared_vec4.wgsl +4 -2
- package/src/gpu/kernels/dequant_subgroup.wgsl +6 -2
- package/src/gpu/kernels/gated-short-conv.d.ts +63 -0
- package/src/gpu/kernels/gated-short-conv.js +284 -0
- package/src/gpu/kernels/linear-attention-core.js +37 -17
- package/src/gpu/kernels/matmul-selection.js +1 -0
- package/src/gpu/kernels/matmul.d.ts +3 -0
- package/src/gpu/kernels/matmul.js +70 -1
- package/src/gpu/kernels/matmul_gemv_subgroup.wgsl +77 -79
- package/src/gpu/kernels/sample.js +1 -3
- package/src/gpu/kernels/sample.wgsl +39 -9
- package/src/gpu/kernels/sample_f16.wgsl +38 -8
- package/src/gpu/kernels/shader-cache.js +9 -4
- package/src/inference/kv-cache/base.js +3 -10
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +2 -1
- package/src/inference/pipelines/text/attention/projections.d.ts +3 -0
- package/src/inference/pipelines/text/attention/projections.js +13 -2
- package/src/inference/pipelines/text/attention/record.js +1 -0
- package/src/inference/pipelines/text/attention/run.js +9 -0
- package/src/inference/pipelines/text/config.d.ts +1 -0
- package/src/inference/pipelines/text/config.js +32 -4
- package/src/inference/pipelines/text/embed.js +26 -7
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +10 -3
- package/src/inference/pipelines/text/execution-v0.js +12 -1
- package/src/inference/pipelines/text/generator-helpers.js +1 -0
- package/src/inference/pipelines/text/generator-runtime.js +14 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +9 -0
- package/src/inference/pipelines/text/generator-steps.js +46 -29
- package/src/inference/pipelines/text/generator.d.ts +5 -0
- package/src/inference/pipelines/text/generator.js +320 -166
- package/src/inference/pipelines/text/init.d.ts +2 -0
- package/src/inference/pipelines/text/init.js +19 -5
- package/src/inference/pipelines/text/layer.js +37 -8
- package/src/inference/pipelines/text/moe-gpu.js +21 -3
- package/src/inference/pipelines/text/moe-shape-validator.d.ts +9 -0
- package/src/inference/pipelines/text/moe-shape-validator.js +31 -11
- package/src/inference/pipelines/text/ops.js +123 -53
- package/src/inference/pipelines/text/probes.js +1 -0
- package/src/inference/pipelines/text/state.js +2 -0
- package/src/inference/pipelines/text.d.ts +5 -0
- package/src/inference/pipelines/text.js +59 -1
- package/src/inference/pipelines/vision/encoder.js +386 -0
- package/src/inference/pipelines/vision/image-preprocess.js +151 -0
- package/src/inference/pipelines/vision/index.js +173 -0
- package/src/inference/pipelines/vision/ops.js +78 -0
- package/src/inference/pipelines/vision/patch-embed.js +151 -0
- package/src/inference/test-harness.js +9 -7
- package/src/loader/doppler-loader.d.ts +3 -0
- package/src/loader/doppler-loader.js +20 -3
- package/src/loader/experts/expert-cache.js +6 -2
- package/src/loader/experts/expert-loader.js +6 -2
- package/src/loader/layer-loader.js +42 -3
- package/src/loader/manifest-config.js +3 -1
- package/src/loader/tensors/tensor-loader.d.ts +3 -0
- package/src/loader/tensors/tensor-loader.js +124 -3
- package/src/rules/kernels/moe.rules.mixtral.json +75 -0
- package/src/rules/kernels/softmax.rules.json +2 -0
- package/src/rules/rule-registry.d.ts +1 -0
- package/src/rules/rule-registry.js +2 -0
- package/src/storage/quickstart-downloader.d.ts +3 -0
- package/src/storage/quickstart-downloader.js +27 -30
- package/src/tooling/node-converter.js +25 -7
- package/src/tooling/node-source-runtime.js +29 -5
- package/src/tooling/node-webgpu.js +24 -7
- package/src/utils/hf-resolve-url.d.ts +16 -0
- package/src/utils/hf-resolve-url.js +17 -0
- package/src/version.js +1 -1
- package/src/tooling/node-convert.d.ts +0 -54
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
{
|
|
2
|
+
"vendorQuirkProfile": [
|
|
3
|
+
{
|
|
4
|
+
"match": {
|
|
5
|
+
"vendor": {
|
|
6
|
+
"contains": ["intel", "amd"]
|
|
7
|
+
}
|
|
8
|
+
},
|
|
9
|
+
"value": {
|
|
10
|
+
"preferVec4Dequant": false,
|
|
11
|
+
"dequantTileShape": "scalar",
|
|
12
|
+
"routerWorkgroupSize": 128,
|
|
13
|
+
"maxTokensPerExpertScale": 0.85
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"match": {
|
|
18
|
+
"vendor": {
|
|
19
|
+
"contains": ["nvidia", "apple", "qualcomm"]
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"value": {
|
|
23
|
+
"preferVec4Dequant": false,
|
|
24
|
+
"dequantTileShape": "scalar",
|
|
25
|
+
"routerWorkgroupSize": 256,
|
|
26
|
+
"maxTokensPerExpertScale": 1.0
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"match": {},
|
|
31
|
+
"value": {
|
|
32
|
+
"preferVec4Dequant": false,
|
|
33
|
+
"dequantTileShape": "scalar",
|
|
34
|
+
"routerWorkgroupSize": 128,
|
|
35
|
+
"maxTokensPerExpertScale": 1.0
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"routerTopKVariant": [
|
|
40
|
+
{
|
|
41
|
+
"match": { "modelType": "mixtral", "hasF16": true, "hasSubgroups": true, "routerDtype": "f32" },
|
|
42
|
+
"value": "softmax_topk_f32_subgroup"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"match": { "modelType": "mixtral", "routerDtype": "f32" },
|
|
46
|
+
"value": "softmax_topk_f32"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"match": { "modelType": "mixtral" },
|
|
50
|
+
"value": "softmax_topk_f32"
|
|
51
|
+
}
|
|
52
|
+
],
|
|
53
|
+
"dequantVariant": [
|
|
54
|
+
{
|
|
55
|
+
"match": { "modelType": "mixtral", "weightsDtype": "q4k", "hasF16": true, "hasSubgroups": true, "outputDtype": "f32" },
|
|
56
|
+
"value": "q4k_expert_dequant_f32_subgroup"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"match": { "modelType": "mixtral", "weightsDtype": "q4k", "outputDtype": "f16", "hasF16": true },
|
|
60
|
+
"value": "q4k_expert_dequant_f16"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"match": { "modelType": "mixtral", "weightsDtype": "q4k" },
|
|
64
|
+
"value": "q4k_expert_dequant_f32"
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"match": { "modelType": "mixtral", "weightsDtype": "f16", "outputDtype": "f16", "hasF16": true },
|
|
68
|
+
"value": "f16_expert_passthrough"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"match": { "modelType": "mixtral" },
|
|
72
|
+
"value": "f16_expert_upcast_f32"
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
},
|
|
17
17
|
"value": "gptoss_router_topk"
|
|
18
18
|
},
|
|
19
|
+
{ "match": { "modelType": "mixtral", "inputDtype": "f16", "weightsDtype": "f16" }, "value": "fused_f16_w16" },
|
|
20
|
+
{ "match": { "modelType": "mixtral" }, "value": "fused" },
|
|
19
21
|
{ "match": { "inputDtype": "f16", "weightsDtype": "f16" }, "value": "fused_f16_w16" },
|
|
20
22
|
{ "match": { "inputDtype": "f16" }, "value": "fused_f16" },
|
|
21
23
|
{ "match": {}, "value": "fused" }
|
|
@@ -38,6 +38,7 @@ const layernormRules = await loadJson('./kernels/layernorm.rules.json', import.m
|
|
|
38
38
|
const matmulRules = await loadJson('./kernels/matmul.rules.json', import.meta.url, 'Failed to load rules');
|
|
39
39
|
const kernelMoeRules = await loadJson('./kernels/moe.rules.json', import.meta.url, 'Failed to load rules');
|
|
40
40
|
const kernelMoeGptOssRules = await loadJson('./kernels/moe.rules.gptoss.json', import.meta.url, 'Failed to load rules');
|
|
41
|
+
const kernelMoeMixtralRules = await loadJson('./kernels/moe.rules.mixtral.json', import.meta.url, 'Failed to load rules');
|
|
41
42
|
const modulateRules = await loadJson('./kernels/modulate.rules.json', import.meta.url, 'Failed to load rules');
|
|
42
43
|
const pixelShuffleRules = await loadJson('./kernels/pixel_shuffle.rules.json', import.meta.url, 'Failed to load rules');
|
|
43
44
|
const repeatChannelsRules = await loadJson('./kernels/repeat-channels.rules.json', import.meta.url, 'Failed to load rules');
|
|
@@ -113,6 +114,7 @@ const RULE_SETS = {
|
|
|
113
114
|
matmul: matmulRules,
|
|
114
115
|
moe: kernelMoeRules,
|
|
115
116
|
moeGptoss: kernelMoeGptOssRules,
|
|
117
|
+
moeMixtral: kernelMoeMixtralRules,
|
|
116
118
|
modulate: modulateRules,
|
|
117
119
|
pixel_shuffle: pixelShuffleRules,
|
|
118
120
|
repeatChannels: repeatChannelsRules,
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
import type { DownloadProgress } from './downloader.js';
|
|
15
15
|
import type { PreflightResult, ModelRequirements } from './preflight.js';
|
|
16
|
+
import type { HfResolveConfig } from '../utils/hf-resolve-url.js';
|
|
16
17
|
|
|
17
18
|
/**
|
|
18
19
|
* Remote model configuration
|
|
@@ -24,6 +25,8 @@ export interface RemoteModelConfig {
|
|
|
24
25
|
displayName: string;
|
|
25
26
|
/** Base URL for shards (any static CDN) */
|
|
26
27
|
baseUrl?: string | null;
|
|
28
|
+
/** Hosted Hugging Face source used when baseUrl is omitted */
|
|
29
|
+
hf?: HfResolveConfig | null;
|
|
27
30
|
/** Model requirements for pre-flight checks */
|
|
28
31
|
requirements: ModelRequirements;
|
|
29
32
|
}
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
} from './preflight.js';
|
|
8
8
|
import { formatBytes } from './quota.js';
|
|
9
9
|
import { getCdnBasePath } from './download-types.js';
|
|
10
|
+
import { buildHfResolveBaseUrl, DEFAULT_HF_CDN_BASE_URL } from '../utils/hf-resolve-url.js';
|
|
10
11
|
|
|
11
12
|
// ============================================================================
|
|
12
13
|
// Model Registry
|
|
@@ -15,40 +16,14 @@ import { getCdnBasePath } from './download-types.js';
|
|
|
15
16
|
|
|
16
17
|
let cdnBaseOverride = null;
|
|
17
18
|
|
|
18
|
-
|
|
19
|
-
function getEffectiveCDNBaseUrl() {
|
|
20
|
-
const runtimeBase = getCdnBasePath();
|
|
21
|
-
const base = cdnBaseOverride ?? runtimeBase ?? '';
|
|
22
|
-
if (base) return base;
|
|
23
|
-
|
|
24
|
-
// Auto-detect: use same origin for Firebase Hosting or local dev
|
|
25
|
-
if (typeof globalThis.location !== 'undefined') {
|
|
26
|
-
const path = globalThis.location.pathname || '';
|
|
27
|
-
if (
|
|
28
|
-
path === '/d' ||
|
|
29
|
-
path.startsWith('/d/') ||
|
|
30
|
-
path === '/doppler' ||
|
|
31
|
-
path.startsWith('/doppler/') ||
|
|
32
|
-
path === '/dr' ||
|
|
33
|
-
path.startsWith('/dr/') ||
|
|
34
|
-
globalThis.location.host.includes('replo')
|
|
35
|
-
) {
|
|
36
|
-
return `${globalThis.location.origin}/doppler/models`;
|
|
37
|
-
}
|
|
38
|
-
return `${globalThis.location.origin}/models`;
|
|
39
|
-
}
|
|
40
|
-
// Fallback for non-browser-global contexts
|
|
41
|
-
return '/models';
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
19
|
export function setCDNBaseUrl(url) {
|
|
46
|
-
|
|
20
|
+
const normalized = typeof url === 'string' ? url.trim().replace(/\/$/, '') : '';
|
|
21
|
+
cdnBaseOverride = normalized || null;
|
|
47
22
|
}
|
|
48
23
|
|
|
49
24
|
|
|
50
25
|
export function getCDNBaseUrl() {
|
|
51
|
-
return
|
|
26
|
+
return cdnBaseOverride ?? getCdnBasePath() ?? DEFAULT_HF_CDN_BASE_URL;
|
|
52
27
|
}
|
|
53
28
|
|
|
54
29
|
|
|
@@ -57,12 +32,22 @@ export const QUICKSTART_MODELS = {
|
|
|
57
32
|
modelId: 'gemma-3-270m-it-q4k-ehf16-af32',
|
|
58
33
|
displayName: 'Gemma 3 270M IT (Q4K)',
|
|
59
34
|
baseUrl: null,
|
|
35
|
+
hf: {
|
|
36
|
+
repoId: 'Clocksmith/rdrr',
|
|
37
|
+
revision: 'ca6f0dbdf3882d3893a65cf48f2bb6f1520df162',
|
|
38
|
+
path: 'models/gemma-3-270m-it-q4k-ehf16-af32',
|
|
39
|
+
},
|
|
60
40
|
requirements: MODEL_REQUIREMENTS['gemma-3-270m-it-q4k-ehf16-af32'],
|
|
61
41
|
},
|
|
62
42
|
'google-embeddinggemma-300m-q4k-ehf16-af32': {
|
|
63
43
|
modelId: 'google-embeddinggemma-300m-q4k-ehf16-af32',
|
|
64
44
|
displayName: 'EmbeddingGemma 300M (Q4K)',
|
|
65
45
|
baseUrl: null,
|
|
46
|
+
hf: {
|
|
47
|
+
repoId: 'Clocksmith/rdrr',
|
|
48
|
+
revision: '7e79c466d54455bd370c81685956ea9abae0fd30',
|
|
49
|
+
path: 'models/google-embeddinggemma-300m-q4k-ehf16-af32',
|
|
50
|
+
},
|
|
66
51
|
requirements: MODEL_REQUIREMENTS['google-embeddinggemma-300m-q4k-ehf16-af32'],
|
|
67
52
|
},
|
|
68
53
|
};
|
|
@@ -82,6 +67,18 @@ export function registerQuickStartModel(config) {
|
|
|
82
67
|
QUICKSTART_MODELS[config.modelId] = config;
|
|
83
68
|
}
|
|
84
69
|
|
|
70
|
+
function resolveQuickStartModelBaseUrl(config) {
|
|
71
|
+
if (typeof config?.baseUrl === 'string' && config.baseUrl.trim().length > 0) {
|
|
72
|
+
return config.baseUrl.trim().replace(/\/$/, '');
|
|
73
|
+
}
|
|
74
|
+
if (config?.hf) {
|
|
75
|
+
return buildHfResolveBaseUrl(config.hf, { cdnBasePath: getCDNBaseUrl() });
|
|
76
|
+
}
|
|
77
|
+
throw new Error(
|
|
78
|
+
`Quickstart model "${config?.modelId ?? 'unknown'}" is missing an explicit baseUrl or hosted Hugging Face source.`
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
85
82
|
// ============================================================================
|
|
86
83
|
// Download Functions
|
|
87
84
|
// ============================================================================
|
|
@@ -190,7 +187,7 @@ export async function downloadQuickStartModel(
|
|
|
190
187
|
signal,
|
|
191
188
|
};
|
|
192
189
|
|
|
193
|
-
const baseUrl = config
|
|
190
|
+
const baseUrl = resolveQuickStartModelBaseUrl(config);
|
|
194
191
|
const success = await downloadModel(
|
|
195
192
|
baseUrl,
|
|
196
193
|
onProgress,
|
|
@@ -541,18 +541,24 @@ async function listRelativeFiles(rootDir, relDir = '', out = []) {
|
|
|
541
541
|
return out;
|
|
542
542
|
}
|
|
543
543
|
|
|
544
|
-
async function
|
|
544
|
+
async function clearExistingConversionOutputs(outputDir) {
|
|
545
545
|
let entries;
|
|
546
546
|
try {
|
|
547
547
|
entries = await fs.readdir(outputDir, { withFileTypes: true });
|
|
548
548
|
} catch {
|
|
549
549
|
return;
|
|
550
550
|
}
|
|
551
|
-
const
|
|
552
|
-
.filter((entry) =>
|
|
551
|
+
const artifactFiles = entries
|
|
552
|
+
.filter((entry) => (
|
|
553
|
+
entry.isFile()
|
|
554
|
+
&& (
|
|
555
|
+
/^shard_\d{5}\.bin$/i.test(entry.name)
|
|
556
|
+
|| entry.name === 'manifest.json'
|
|
557
|
+
)
|
|
558
|
+
))
|
|
553
559
|
.map((entry) => path.join(outputDir, entry.name));
|
|
554
|
-
if (
|
|
555
|
-
await Promise.all(
|
|
560
|
+
if (artifactFiles.length === 0) return;
|
|
561
|
+
await Promise.all(artifactFiles.map((filePath) => fs.unlink(filePath)));
|
|
556
562
|
}
|
|
557
563
|
|
|
558
564
|
function createNodeConvertIO(outputDir, options) {
|
|
@@ -1153,7 +1159,7 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1153
1159
|
const outputDir = resolveOutputDir(outputDirOverride, converterConfig, modelId);
|
|
1154
1160
|
|
|
1155
1161
|
await fs.mkdir(outputDir, { recursive: true });
|
|
1156
|
-
await
|
|
1162
|
+
await clearExistingConversionOutputs(outputDir);
|
|
1157
1163
|
|
|
1158
1164
|
const model = {
|
|
1159
1165
|
name: path.basename(inputDir),
|
|
@@ -1180,6 +1186,15 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1180
1186
|
computeHash,
|
|
1181
1187
|
readRange: fileRangeReader.readRange,
|
|
1182
1188
|
});
|
|
1189
|
+
const deferredManifestState = {
|
|
1190
|
+
manifest: null,
|
|
1191
|
+
};
|
|
1192
|
+
const convertIo = {
|
|
1193
|
+
...io,
|
|
1194
|
+
async writeManifest(manifest) {
|
|
1195
|
+
deferredManifestState.manifest = manifest;
|
|
1196
|
+
},
|
|
1197
|
+
};
|
|
1183
1198
|
const manifestArchitecture = modelKind === 'diffusion' ? 'diffusion' : architecture;
|
|
1184
1199
|
let workerPool = null;
|
|
1185
1200
|
let workerTensorTransformer = null;
|
|
@@ -1244,7 +1259,7 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1244
1259
|
}));
|
|
1245
1260
|
|
|
1246
1261
|
const convertTimer = createStageTimer('Convert tensors');
|
|
1247
|
-
result = await convertModel(model,
|
|
1262
|
+
result = await convertModel(model, convertIo, {
|
|
1248
1263
|
modelId,
|
|
1249
1264
|
modelType: resolvedModelType,
|
|
1250
1265
|
quantization: targetQuantization,
|
|
@@ -1282,6 +1297,9 @@ export async function convertSafetensorsDirectory(options) {
|
|
|
1282
1297
|
}
|
|
1283
1298
|
|
|
1284
1299
|
normalizeTokenizerManifest(result.manifest);
|
|
1300
|
+
if (!deferredManifestState.manifest) {
|
|
1301
|
+
throw new Error('node convert: convert core did not produce a manifest.');
|
|
1302
|
+
}
|
|
1285
1303
|
await io.writeManifest(result.manifest);
|
|
1286
1304
|
|
|
1287
1305
|
const report = buildConvertReport(result, {
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
import { createReadStream } from 'node:fs';
|
|
1
2
|
import fs from 'node:fs/promises';
|
|
3
|
+
import { createHash } from 'node:crypto';
|
|
2
4
|
import path from 'node:path';
|
|
3
5
|
import {
|
|
4
6
|
HEADER_READ_SIZE,
|
|
@@ -16,7 +18,6 @@ import { parseTransformerModel } from '../converter/parsers/transformer.js';
|
|
|
16
18
|
import { parseGGUFHeader } from '../formats/gguf/types.js';
|
|
17
19
|
import { parseSafetensorsHeader } from '../formats/safetensors/types.js';
|
|
18
20
|
import { log } from '../debug/index.js';
|
|
19
|
-
import { computeHash } from '../storage/shard-manager.js';
|
|
20
21
|
import {
|
|
21
22
|
buildSourceRuntimeBundle,
|
|
22
23
|
createSourceStorageContext,
|
|
@@ -137,7 +138,12 @@ async function readRange(filePath, offset, length) {
|
|
|
137
138
|
return new ArrayBuffer(0);
|
|
138
139
|
}
|
|
139
140
|
const out = Buffer.allocUnsafe(end - start);
|
|
140
|
-
|
|
141
|
+
let pos = 0;
|
|
142
|
+
while (pos < out.length) {
|
|
143
|
+
const { bytesRead } = await handle.read(out, pos, out.length - pos, start + pos);
|
|
144
|
+
if (bytesRead === 0) break;
|
|
145
|
+
pos += bytesRead;
|
|
146
|
+
}
|
|
141
147
|
return out.buffer.slice(out.byteOffset, out.byteOffset + out.byteLength);
|
|
142
148
|
} finally {
|
|
143
149
|
await handle.close();
|
|
@@ -449,18 +455,36 @@ async function addHashesToFileEntries(entries, hashAlgorithm) {
|
|
|
449
455
|
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
450
456
|
const filePath = normalizePath(entry?.path);
|
|
451
457
|
if (!filePath) continue;
|
|
452
|
-
const
|
|
458
|
+
const stats = await getPathStats(filePath, `source asset (${filePath})`);
|
|
453
459
|
normalized.push({
|
|
454
460
|
...entry,
|
|
455
461
|
path: filePath,
|
|
456
|
-
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) :
|
|
457
|
-
hash: await
|
|
462
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : Number(stats.size),
|
|
463
|
+
hash: await computeFileHash(filePath, hashAlgorithm),
|
|
458
464
|
hashAlgorithm,
|
|
459
465
|
});
|
|
460
466
|
}
|
|
461
467
|
return normalized;
|
|
462
468
|
}
|
|
463
469
|
|
|
470
|
+
async function computeFileHash(filePath, hashAlgorithm) {
|
|
471
|
+
return new Promise((resolve, reject) => {
|
|
472
|
+
const hash = createHash(hashAlgorithm);
|
|
473
|
+
const stream = createReadStream(filePath);
|
|
474
|
+
|
|
475
|
+
stream.on('data', (chunk) => {
|
|
476
|
+
hash.update(chunk);
|
|
477
|
+
});
|
|
478
|
+
stream.on('end', () => {
|
|
479
|
+
resolve(hash.digest('hex'));
|
|
480
|
+
});
|
|
481
|
+
stream.on('error', (error) => {
|
|
482
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
483
|
+
reject(new Error(`Failed to stream source asset "${filePath}" for hashing: ${message}`));
|
|
484
|
+
});
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
|
|
464
488
|
export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
465
489
|
const inputPath = normalizePath(options.inputPath);
|
|
466
490
|
if (!inputPath) {
|
|
@@ -51,7 +51,7 @@ function resolveCandidateModuleSpecifier(candidate) {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
function resolveDefaultWebgpuModuleSpecifiers() {
|
|
54
|
-
return ['
|
|
54
|
+
return ['webgpu', '@simulatte/webgpu'];
|
|
55
55
|
}
|
|
56
56
|
|
|
57
57
|
function resolveExplicitWebgpuModuleSpecifier() {
|
|
@@ -189,18 +189,35 @@ function resolveGpuFromModule(mod) {
|
|
|
189
189
|
return fromModule;
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
192
|
+
const tryCreateFactory = (factory) => {
|
|
193
|
+
if (typeof factory !== 'function') {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
195
196
|
try {
|
|
196
|
-
|
|
197
|
+
return factory([]);
|
|
197
198
|
} catch {
|
|
198
199
|
try {
|
|
199
|
-
|
|
200
|
+
return factory();
|
|
200
201
|
} catch {
|
|
201
|
-
|
|
202
|
+
return null;
|
|
202
203
|
}
|
|
203
204
|
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const instanceFactory = mod.createInstance || mod.default?.createInstance;
|
|
208
|
+
const createdFromInstanceFactory = tryCreateFactory(instanceFactory);
|
|
209
|
+
if (createdFromInstanceFactory) {
|
|
210
|
+
if (typeof createdFromInstanceFactory.requestAdapter === 'function') {
|
|
211
|
+
return createdFromInstanceFactory;
|
|
212
|
+
}
|
|
213
|
+
if (createdFromInstanceFactory.gpu && typeof createdFromInstanceFactory.gpu.requestAdapter === 'function') {
|
|
214
|
+
return createdFromInstanceFactory.gpu;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const factory = mod.create || mod.default?.create;
|
|
219
|
+
if (typeof factory === 'function') {
|
|
220
|
+
const created = tryCreateFactory(factory);
|
|
204
221
|
if (created) {
|
|
205
222
|
if (typeof created.requestAdapter === 'function') {
|
|
206
223
|
return created;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface HfResolveConfig {
|
|
2
|
+
repoId: string;
|
|
3
|
+
revision?: string | null;
|
|
4
|
+
path: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface HfResolveUrlOptions {
|
|
8
|
+
cdnBasePath?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export declare const DEFAULT_HF_CDN_BASE_URL: string;
|
|
12
|
+
|
|
13
|
+
export declare function buildHfResolveBaseUrl(
|
|
14
|
+
hfConfig: HfResolveConfig | null | undefined,
|
|
15
|
+
options?: HfResolveUrlOptions
|
|
16
|
+
): string;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export const DEFAULT_HF_CDN_BASE_URL = 'https://huggingface.co';
|
|
2
|
+
|
|
3
|
+
export function buildHfResolveBaseUrl(hfConfig, options = {}) {
|
|
4
|
+
const repoId = typeof hfConfig?.repoId === 'string' ? hfConfig.repoId.trim() : '';
|
|
5
|
+
const repoPath = typeof hfConfig?.path === 'string' ? hfConfig.path.trim().replace(/^\/+/, '') : '';
|
|
6
|
+
if (!repoId || !repoPath) {
|
|
7
|
+
throw new Error('Hosted Hugging Face source requires repoId and path.');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const revision = typeof hfConfig?.revision === 'string' && hfConfig.revision.trim().length > 0
|
|
11
|
+
? hfConfig.revision.trim()
|
|
12
|
+
: 'main';
|
|
13
|
+
const cdnBasePath = typeof options?.cdnBasePath === 'string' && options.cdnBasePath.trim().length > 0
|
|
14
|
+
? options.cdnBasePath.trim()
|
|
15
|
+
: DEFAULT_HF_CDN_BASE_URL;
|
|
16
|
+
return `${cdnBasePath.replace(/\/$/, '')}/${repoId}/resolve/${revision}/${repoPath}`;
|
|
17
|
+
}
|
package/src/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const DOPPLER_VERSION = '0.1.
|
|
1
|
+
export const DOPPLER_VERSION = '0.1.9';
|
|
2
2
|
export const DOPPLER_PROVIDER_VERSION = DOPPLER_VERSION;
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import type { ConverterConfigSchema } from '../config/schema/converter.schema.js';
|
|
2
|
-
import type { ExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
3
|
-
import type { ExecutionV0GraphContractArtifact } from '../config/execution-v0-graph-contract-check.js';
|
|
4
|
-
import type { ManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
|
|
5
|
-
import type { SavedReportInfo } from '../storage/reports.js';
|
|
6
|
-
|
|
7
|
-
export interface NodeConvertProgress {
|
|
8
|
-
stage: string | null;
|
|
9
|
-
current: number | null;
|
|
10
|
-
total: number | null;
|
|
11
|
-
message: string | null;
|
|
12
|
-
tensorName?: string | null;
|
|
13
|
-
tensorBytesCurrent?: number | null;
|
|
14
|
-
tensorBytesTotal?: number | null;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export interface NodeConvertExecutionConfig {
|
|
18
|
-
workers?: number | null;
|
|
19
|
-
workerCountPolicy?: 'cap' | 'error' | null;
|
|
20
|
-
maxInFlightJobs?: number | null;
|
|
21
|
-
rowChunkRows?: number | null;
|
|
22
|
-
rowChunkMinTensorBytes?: number | null;
|
|
23
|
-
useGpuCast?: boolean | null;
|
|
24
|
-
gpuCastMinTensorBytes?: number | null;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export interface ConvertSafetensorsDirectoryOptions {
|
|
28
|
-
/** Directory with safetensors/diffusion assets, or a direct .gguf file path. */
|
|
29
|
-
inputDir: string;
|
|
30
|
-
outputDir?: string | null;
|
|
31
|
-
modelId?: string | null;
|
|
32
|
-
converterConfig?: Partial<ConverterConfigSchema> | null;
|
|
33
|
-
execution?: NodeConvertExecutionConfig | null;
|
|
34
|
-
onProgress?: (progress: NodeConvertProgress) => void;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export interface ConvertSafetensorsDirectoryResult {
|
|
38
|
-
manifest: Record<string, unknown>;
|
|
39
|
-
shardCount: number;
|
|
40
|
-
tensorCount: number;
|
|
41
|
-
executionContractArtifact: ExecutionContractArtifact | null;
|
|
42
|
-
executionV0GraphContractArtifact: ExecutionV0GraphContractArtifact | null;
|
|
43
|
-
layerPatternContractArtifact: Record<string, unknown> | null;
|
|
44
|
-
requiredInferenceFieldsArtifact: ManifestRequiredInferenceFieldsArtifact | null;
|
|
45
|
-
report: Record<string, unknown>;
|
|
46
|
-
reportInfo: SavedReportInfo;
|
|
47
|
-
presetId: string;
|
|
48
|
-
modelType: string;
|
|
49
|
-
outputDir: string;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export declare function convertSafetensorsDirectory(
|
|
53
|
-
options: ConvertSafetensorsDirectoryOptions
|
|
54
|
-
): Promise<ConvertSafetensorsDirectoryResult>;
|