@emilshirokikh/slyos-sdk 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,7 +15,7 @@ const modelMap = {
15
15
  minRAM_MB: { q4: 2048, q8: 3072, fp16: 5120, fp32: 8192 },
16
16
  },
17
17
  'quantum-3b': {
18
- hfModel: 'meta-llama/Llama-3.2-3B-Instruct',
18
+ hfModel: 'Qwen/Qwen2.5-3B-Instruct',
19
19
  task: 'text-generation',
20
20
  category: 'llm',
21
21
  sizesMB: { q4: 1600, q8: 3200, fp16: 6400, fp32: 12800 },
@@ -29,7 +29,7 @@ const modelMap = {
29
29
  minRAM_MB: { q4: 3072, q8: 5120, fp16: 8192, fp32: 16384 },
30
30
  },
31
31
  'quantum-8b': {
32
- hfModel: 'meta-llama/Llama-3.1-8B-Instruct',
32
+ hfModel: 'Qwen/Qwen2.5-7B-Instruct',
33
33
  task: 'text-generation',
34
34
  category: 'llm',
35
35
  sizesMB: { q4: 4200, q8: 8400, fp16: 16800, fp32: 33600 },
@@ -67,7 +67,13 @@ function selectQuantization(memoryMB, modelId) {
67
67
  const info = modelMap[modelId];
68
68
  if (!info)
69
69
  return 'q4';
70
- // Try from best quality down: fp16 q8 q4
70
+ // ONNX/WASM has protobuf size limits fp16 files >2GB crash on many systems.
71
+ // For LLMs, cap at q4 via WASM. FP16/Q8 need native backends (llama.cpp).
72
+ // STT models are small enough for q8/fp16.
73
+ if (info.category === 'llm') {
74
+ return 'q4'; // safest for ONNX/WASM across all platforms
75
+ }
76
+ // STT models: try from best quality down
71
77
  const quants = ['fp16', 'q8', 'q4'];
72
78
  for (const q of quants) {
73
79
  if (memoryMB >= info.minRAM_MB[q])
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@emilshirokikh/slyos-sdk",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "SlyOS - On-Device AI SDK for Web and Node.js",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -75,7 +75,7 @@ const modelMap: Record<string, ModelInfo> = {
75
75
  minRAM_MB: { q4: 2048, q8: 3072, fp16: 5120, fp32: 8192 },
76
76
  },
77
77
  'quantum-3b': {
78
- hfModel: 'meta-llama/Llama-3.2-3B-Instruct',
78
+ hfModel: 'Qwen/Qwen2.5-3B-Instruct',
79
79
  task: 'text-generation',
80
80
  category: 'llm',
81
81
  sizesMB: { q4: 1600, q8: 3200, fp16: 6400, fp32: 12800 },
@@ -89,7 +89,7 @@ const modelMap: Record<string, ModelInfo> = {
89
89
  minRAM_MB: { q4: 3072, q8: 5120, fp16: 8192, fp32: 16384 },
90
90
  },
91
91
  'quantum-8b': {
92
- hfModel: 'meta-llama/Llama-3.1-8B-Instruct',
92
+ hfModel: 'Qwen/Qwen2.5-7B-Instruct',
93
93
  task: 'text-generation',
94
94
  category: 'llm',
95
95
  sizesMB: { q4: 4200, q8: 8400, fp16: 16800, fp32: 33600 },
@@ -128,7 +128,14 @@ function selectQuantization(memoryMB: number, modelId: string): QuantizationLeve
128
128
  const info = modelMap[modelId];
129
129
  if (!info) return 'q4';
130
130
 
131
- // Try from best quality down: fp16 q8 q4
131
+ // ONNX/WASM has protobuf size limits fp16 files >2GB crash on many systems.
132
+ // For LLMs, cap at q4 via WASM. FP16/Q8 need native backends (llama.cpp).
133
+ // STT models are small enough for q8/fp16.
134
+ if (info.category === 'llm') {
135
+ return 'q4'; // safest for ONNX/WASM across all platforms
136
+ }
137
+
138
+ // STT models: try from best quality down
132
139
  const quants: QuantizationLevel[] = ['fp16', 'q8', 'q4'];
133
140
  for (const q of quants) {
134
141
  if (memoryMB >= info.minRAM_MB[q]) return q;