expo-ai-kit 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/android/build.gradle +2 -2
- package/android/src/main/java/expo/modules/aikit/GemmaInferenceClient.kt +61 -47
- package/build/models.d.ts +4 -5
- package/build/models.d.ts.map +1 -1
- package/build/models.js +12 -12
- package/build/models.js.map +1 -1
- package/package.json +1 -1
- package/src/models.ts +16 -17
package/README.md
CHANGED
|
@@ -14,6 +14,15 @@ On-device AI for Expo apps. Run language models locally—no API keys, no cloud,
|
|
|
14
14
|
| iOS 26+ | [Apple Foundation Models](https://developer.apple.com/documentation/FoundationModels) |
|
|
15
15
|
| Android (supported devices) | [ML Kit Prompt API](https://developers.google.com/ml-kit/genai#prompt-device) |
|
|
16
16
|
|
|
17
|
+
### Downloadable Models (Gemma 4)
|
|
18
|
+
|
|
19
|
+
| Platform | Status |
|
|
20
|
+
|----------|--------|
|
|
21
|
+
| Android | Gemma 4 E2B (2.3B) and E4B (4.5B) via [LiteRT-LM](https://ai.google.dev/edge/litert-lm) |
|
|
22
|
+
| iOS | Coming soon — waiting for LiteRT-LM Swift APIs from Google |
|
|
23
|
+
|
|
24
|
+
> **Note:** iOS downloadable model support (Gemma 4 E2B/E4B) is planned for a future release. We are waiting for Google to ship native Swift APIs for LiteRT-LM. Built-in Apple Foundation Models work on iOS 26+ today.
|
|
25
|
+
|
|
17
26
|
### Unsupported
|
|
18
27
|
|
|
19
28
|
| Platform | Fallback Behavior |
|
package/android/build.gradle
CHANGED
|
@@ -43,6 +43,6 @@ android {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
dependencies {
|
|
46
|
-
implementation "com.google.mlkit:genai-prompt:1.0.0-
|
|
47
|
-
implementation "com.google.
|
|
46
|
+
implementation "com.google.mlkit:genai-prompt:1.0.0-beta2"
|
|
47
|
+
implementation "com.google.ai.edge.litertlm:litertlm-android:+"
|
|
48
48
|
}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
package expo.modules.aikit
|
|
2
2
|
|
|
3
3
|
import android.content.Context
|
|
4
|
-
import com.google.
|
|
5
|
-
import
|
|
4
|
+
import com.google.ai.edge.litertlm.Engine
|
|
5
|
+
import com.google.ai.edge.litertlm.EngineConfig
|
|
6
|
+
import com.google.ai.edge.litertlm.Conversation
|
|
7
|
+
import com.google.ai.edge.litertlm.Backend
|
|
6
8
|
import kotlinx.coroutines.Dispatchers
|
|
7
9
|
import kotlinx.coroutines.sync.Mutex
|
|
8
10
|
import kotlinx.coroutines.sync.withLock
|
|
@@ -16,7 +18,7 @@ import java.net.URL
|
|
|
16
18
|
import java.security.MessageDigest
|
|
17
19
|
|
|
18
20
|
/**
|
|
19
|
-
* Wrapper around
|
|
21
|
+
* Wrapper around LiteRT-LM Engine for Gemma 4 models.
|
|
20
22
|
*
|
|
21
23
|
* Concurrency model:
|
|
22
24
|
* - A Mutex guards all state transitions (load, unload, inference).
|
|
@@ -28,7 +30,8 @@ import java.security.MessageDigest
|
|
|
28
30
|
class GemmaInferenceClient(private val context: Context) {
|
|
29
31
|
|
|
30
32
|
private val mutex = Mutex()
|
|
31
|
-
private var
|
|
33
|
+
private var engine: Engine? = null
|
|
34
|
+
private var conversation: Conversation? = null
|
|
32
35
|
private var loadedModelId: String? = null
|
|
33
36
|
|
|
34
37
|
@Volatile
|
|
@@ -39,33 +42,50 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
39
42
|
// -------------------------------------------------------------------------
|
|
40
43
|
|
|
41
44
|
/**
|
|
42
|
-
* Load a model into memory
|
|
45
|
+
* Load a model into memory using LiteRT-LM Engine.
|
|
46
|
+
* Unloads any previously loaded model first.
|
|
43
47
|
* Caller is responsible for emitting onModelStateChange events.
|
|
44
48
|
*/
|
|
45
49
|
suspend fun loadModel(modelId: String, modelPath: String) = mutex.withLock {
|
|
46
50
|
// Unload previous model if different
|
|
47
51
|
if (loadedModelId != null && loadedModelId != modelId) {
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
conversation?.close()
|
|
53
|
+
engine?.close()
|
|
54
|
+
conversation = null
|
|
55
|
+
engine = null
|
|
50
56
|
loadedModelId = null
|
|
51
57
|
}
|
|
52
58
|
|
|
53
|
-
if (loadedModelId == modelId &&
|
|
59
|
+
if (loadedModelId == modelId && engine != null) {
|
|
54
60
|
return@withLock // Already loaded
|
|
55
61
|
}
|
|
56
62
|
|
|
57
63
|
try {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
64
|
+
withContext(Dispatchers.IO) {
|
|
65
|
+
val engineConfig = EngineConfig(
|
|
66
|
+
modelPath = modelPath,
|
|
67
|
+
backend = Backend.GPU()
|
|
68
|
+
)
|
|
69
|
+
val newEngine = Engine(engineConfig)
|
|
70
|
+
newEngine.initialize()
|
|
71
|
+
val newConversation = newEngine.createConversation()
|
|
72
|
+
|
|
73
|
+
engine = newEngine
|
|
74
|
+
conversation = newConversation
|
|
75
|
+
loadedModelId = modelId
|
|
76
|
+
}
|
|
63
77
|
} catch (e: OutOfMemoryError) {
|
|
64
|
-
|
|
78
|
+
conversation?.close()
|
|
79
|
+
engine?.close()
|
|
80
|
+
conversation = null
|
|
81
|
+
engine = null
|
|
65
82
|
loadedModelId = null
|
|
66
83
|
throw RuntimeException("INFERENCE_OOM:$modelId:Device does not have enough memory to load model")
|
|
67
84
|
} catch (e: Exception) {
|
|
68
|
-
|
|
85
|
+
conversation?.close()
|
|
86
|
+
engine?.close()
|
|
87
|
+
conversation = null
|
|
88
|
+
engine = null
|
|
69
89
|
loadedModelId = null
|
|
70
90
|
throw RuntimeException("MODEL_LOAD_FAILED:$modelId:${e.message}")
|
|
71
91
|
}
|
|
@@ -75,14 +95,16 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
75
95
|
* Unload the current model from memory.
|
|
76
96
|
*/
|
|
77
97
|
suspend fun unloadModel() = mutex.withLock {
|
|
78
|
-
|
|
79
|
-
|
|
98
|
+
conversation?.close()
|
|
99
|
+
engine?.close()
|
|
100
|
+
conversation = null
|
|
101
|
+
engine = null
|
|
80
102
|
loadedModelId = null
|
|
81
103
|
}
|
|
82
104
|
|
|
83
105
|
fun getLoadedModelId(): String? = loadedModelId
|
|
84
106
|
|
|
85
|
-
fun isModelLoaded(): Boolean =
|
|
107
|
+
fun isModelLoaded(): Boolean = engine != null
|
|
86
108
|
|
|
87
109
|
// -------------------------------------------------------------------------
|
|
88
110
|
// Inference
|
|
@@ -93,14 +115,14 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
93
115
|
* The mutex ensures this cannot run concurrently with load/unload.
|
|
94
116
|
*/
|
|
95
117
|
suspend fun generateText(prompt: String, systemPrompt: String): String = mutex.withLock {
|
|
96
|
-
val
|
|
118
|
+
val conv = conversation
|
|
97
119
|
?: throw RuntimeException("MODEL_NOT_DOWNLOADED:${loadedModelId ?: "unknown"}:No model loaded")
|
|
98
120
|
|
|
99
121
|
val fullPrompt = buildFullPrompt(prompt, systemPrompt)
|
|
100
122
|
|
|
101
123
|
try {
|
|
102
124
|
withContext(Dispatchers.IO) {
|
|
103
|
-
|
|
125
|
+
conv.sendMessage(contents = fullPrompt).toString()
|
|
104
126
|
}
|
|
105
127
|
} catch (e: OutOfMemoryError) {
|
|
106
128
|
throw RuntimeException("INFERENCE_OOM:${loadedModelId ?: "unknown"}:Out of memory during inference")
|
|
@@ -113,47 +135,37 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
113
135
|
* Generate a streaming response. The onChunk callback receives
|
|
114
136
|
* (token=delta, accumulatedText=full, isDone) matching the PromptApiClient contract.
|
|
115
137
|
*
|
|
116
|
-
*
|
|
117
|
-
*
|
|
118
|
-
*
|
|
119
|
-
* We use a CompletableDeferred to keep the mutex held until streaming completes,
|
|
120
|
-
* preventing concurrent load/unload during active inference.
|
|
138
|
+
* LiteRT-LM's sendMessageAsync() returns a Flow<Message>. Each emission
|
|
139
|
+
* contains accumulated text, so we diff against previousText to extract
|
|
140
|
+
* the delta token.
|
|
121
141
|
*/
|
|
122
142
|
suspend fun generateTextStream(
|
|
123
143
|
prompt: String,
|
|
124
144
|
systemPrompt: String,
|
|
125
145
|
onChunk: (token: String, accumulatedText: String, isDone: Boolean) -> Unit
|
|
126
146
|
) = mutex.withLock {
|
|
127
|
-
val
|
|
147
|
+
val conv = conversation
|
|
128
148
|
?: throw RuntimeException("MODEL_NOT_DOWNLOADED:${loadedModelId ?: "unknown"}:No model loaded")
|
|
129
149
|
|
|
130
150
|
val fullPrompt = buildFullPrompt(prompt, systemPrompt)
|
|
131
151
|
|
|
132
152
|
try {
|
|
133
153
|
withContext(Dispatchers.IO) {
|
|
134
|
-
val completion = CompletableDeferred<String>()
|
|
135
154
|
var previousText = ""
|
|
136
155
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
// (token=delta, accumulatedText=full, isDone) contract.
|
|
140
|
-
inference.generateResponseAsync(fullPrompt) { partialResult, done ->
|
|
141
|
-
val accumulated = partialResult ?: ""
|
|
156
|
+
conv.sendMessageAsync(contents = fullPrompt).collect { message ->
|
|
157
|
+
val accumulated = message.toString()
|
|
142
158
|
val token = if (accumulated.length > previousText.length) {
|
|
143
159
|
accumulated.substring(previousText.length)
|
|
144
160
|
} else {
|
|
145
161
|
""
|
|
146
162
|
}
|
|
147
163
|
previousText = accumulated
|
|
148
|
-
onChunk(token, accumulated,
|
|
149
|
-
|
|
150
|
-
if (done) {
|
|
151
|
-
completion.complete(accumulated)
|
|
152
|
-
}
|
|
164
|
+
onChunk(token, accumulated, false)
|
|
153
165
|
}
|
|
154
166
|
|
|
155
|
-
//
|
|
156
|
-
|
|
167
|
+
// Final done event for consistency with PromptApiClient
|
|
168
|
+
onChunk("", previousText, true)
|
|
157
169
|
}
|
|
158
170
|
} catch (e: OutOfMemoryError) {
|
|
159
171
|
throw RuntimeException("INFERENCE_OOM:${loadedModelId ?: "unknown"}:Out of memory during inference")
|
|
@@ -189,8 +201,8 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
189
201
|
val modelsDir = File(context.filesDir, "models")
|
|
190
202
|
modelsDir.mkdirs()
|
|
191
203
|
|
|
192
|
-
val targetFile = File(modelsDir, "$modelId.
|
|
193
|
-
val tempFile = File(modelsDir, "$modelId.
|
|
204
|
+
val targetFile = File(modelsDir, "$modelId.litertlm")
|
|
205
|
+
val tempFile = File(modelsDir, "$modelId.litertlm.tmp")
|
|
194
206
|
|
|
195
207
|
try {
|
|
196
208
|
val connection = URL(url).openConnection() as HttpURLConnection
|
|
@@ -256,17 +268,19 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
256
268
|
suspend fun deleteModelFile(modelId: String) = mutex.withLock {
|
|
257
269
|
// Unload if this model is currently loaded
|
|
258
270
|
if (loadedModelId == modelId) {
|
|
259
|
-
|
|
260
|
-
|
|
271
|
+
conversation?.close()
|
|
272
|
+
engine?.close()
|
|
273
|
+
conversation = null
|
|
274
|
+
engine = null
|
|
261
275
|
loadedModelId = null
|
|
262
276
|
}
|
|
263
277
|
|
|
264
|
-
val modelFile = File(context.filesDir, "models/$modelId.
|
|
278
|
+
val modelFile = File(context.filesDir, "models/$modelId.litertlm")
|
|
265
279
|
if (modelFile.exists()) {
|
|
266
280
|
modelFile.delete()
|
|
267
281
|
}
|
|
268
282
|
// Also clean up any partial downloads
|
|
269
|
-
val tempFile = File(context.filesDir, "models/$modelId.
|
|
283
|
+
val tempFile = File(context.filesDir, "models/$modelId.litertlm.tmp")
|
|
270
284
|
if (tempFile.exists()) {
|
|
271
285
|
tempFile.delete()
|
|
272
286
|
}
|
|
@@ -276,14 +290,14 @@ class GemmaInferenceClient(private val context: Context) {
|
|
|
276
290
|
* Check if a model file exists on disk.
|
|
277
291
|
*/
|
|
278
292
|
fun isModelFileDownloaded(modelId: String): Boolean {
|
|
279
|
-
return File(context.filesDir, "models/$modelId.
|
|
293
|
+
return File(context.filesDir, "models/$modelId.litertlm").exists()
|
|
280
294
|
}
|
|
281
295
|
|
|
282
296
|
/**
|
|
283
297
|
* Get the file path for a downloaded model.
|
|
284
298
|
*/
|
|
285
299
|
fun getModelFilePath(modelId: String): String {
|
|
286
|
-
return File(context.filesDir, "models/$modelId.
|
|
300
|
+
return File(context.filesDir, "models/$modelId.litertlm").absolutePath
|
|
287
301
|
}
|
|
288
302
|
|
|
289
303
|
// -------------------------------------------------------------------------
|
package/build/models.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ export type ModelRegistryEntry = {
|
|
|
14
14
|
parameterCount: string;
|
|
15
15
|
/** Quantization variant */
|
|
16
16
|
quantization: string;
|
|
17
|
-
/** URL to download the
|
|
17
|
+
/** URL to download the LiteRT-LM model file */
|
|
18
18
|
downloadUrl: string;
|
|
19
19
|
/** SHA256 hash for integrity verification after download */
|
|
20
20
|
sha256: string;
|
|
@@ -23,10 +23,9 @@ export type ModelRegistryEntry = {
|
|
|
23
23
|
/**
|
|
24
24
|
* Practical context window (max tokens) for this model on constrained devices.
|
|
25
25
|
*
|
|
26
|
-
* These are conservative defaults, NOT the base model's theoretical max
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* adjusted during Phase 2 testing with real devices.
|
|
26
|
+
* These are conservative defaults, NOT the base model's theoretical max.
|
|
27
|
+
* These values should be benchmarked and adjusted during testing with
|
|
28
|
+
* real devices.
|
|
30
29
|
*/
|
|
31
30
|
contextWindow: number;
|
|
32
31
|
/** Minimum device RAM in bytes required to run this model */
|
package/build/models.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,MAAM,kBAAkB,GAAG;IAC/B,6DAA6D;IAC7D,EAAE,EAAE,MAAM,CAAC;IACX,gCAAgC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,MAAM,kBAAkB,GAAG;IAC/B,6DAA6D;IAC7D,EAAE,EAAE,MAAM,CAAC;IACX,gCAAgC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,4DAA4D;IAC5D,MAAM,EAAE,MAAM,CAAC;IACf,kCAAkC;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;;OAMG;IACH,aAAa,EAAE,MAAM,CAAC;IACtB,6DAA6D;IAC7D,WAAW,EAAE,MAAM,CAAC;IACpB,sCAAsC;IACtC,kBAAkB,EAAE,CAAC,KAAK,GAAG,SAAS,CAAC,EAAE,CAAC;CAC3C,CAAC;AAEF,eAAO,MAAM,cAAc,EAAE,kBAAkB,EA+B9C,CAAC;AAEF;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAEhF"}
|
package/build/models.js
CHANGED
|
@@ -10,29 +10,29 @@ export const MODEL_REGISTRY = [
|
|
|
10
10
|
id: 'gemma-e2b',
|
|
11
11
|
name: 'Gemma 4 E2B',
|
|
12
12
|
parameterCount: '2.3B',
|
|
13
|
-
quantization: '
|
|
14
|
-
downloadUrl: 'https://huggingface.co/
|
|
13
|
+
quantization: 'mixed-2/4/8-bit',
|
|
14
|
+
downloadUrl: 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it.litertlm',
|
|
15
15
|
sha256: '', // TODO: Fill with actual hash once model file is verified
|
|
16
|
-
sizeBytes:
|
|
17
|
-
// Conservative limit for 4GB RAM devices.
|
|
18
|
-
//
|
|
16
|
+
sizeBytes: 2_580_000_000, // ~2.58GB
|
|
17
|
+
// Conservative limit for 4GB RAM devices.
|
|
18
|
+
// TODO: Benchmark during Phase 2 testing.
|
|
19
19
|
contextWindow: 8_000,
|
|
20
20
|
minRamBytes: 4_000_000_000, // 4GB
|
|
21
|
-
supportedPlatforms: ['
|
|
21
|
+
supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs
|
|
22
22
|
},
|
|
23
23
|
{
|
|
24
24
|
id: 'gemma-e4b',
|
|
25
25
|
name: 'Gemma 4 E4B',
|
|
26
26
|
parameterCount: '4.5B',
|
|
27
|
-
quantization: '
|
|
28
|
-
downloadUrl: 'https://huggingface.co/
|
|
27
|
+
quantization: 'mixed-4/8-bit',
|
|
28
|
+
downloadUrl: 'https://huggingface.co/litert-community/gemma-4-E4B-it-litert-lm/resolve/main/gemma-4-E4B-it.litertlm',
|
|
29
29
|
sha256: '', // TODO: Fill with actual hash once model file is verified
|
|
30
|
-
sizeBytes:
|
|
31
|
-
// Conservative limit for 6GB RAM devices.
|
|
32
|
-
//
|
|
30
|
+
sizeBytes: 3_650_000_000, // ~3.65GB
|
|
31
|
+
// Conservative limit for 6GB RAM devices.
|
|
32
|
+
// TODO: Benchmark during Phase 2 testing.
|
|
33
33
|
contextWindow: 16_000,
|
|
34
34
|
minRamBytes: 6_000_000_000, // 6GB
|
|
35
|
-
supportedPlatforms: ['
|
|
35
|
+
supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs
|
|
36
36
|
},
|
|
37
37
|
];
|
|
38
38
|
/**
|
package/build/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA+BH,MAAM,CAAC,MAAM,cAAc,GAAyB;IAClD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,aAAa;QACnB,cAAc,EAAE,MAAM;QACtB,YAAY,EAAE,iBAAiB;QAC/B,WAAW,EACT,uGAAuG;QACzG,MAAM,EAAE,EAAE,EAAE,0DAA0D;QACtE,SAAS,EAAE,aAAa,EAAE,UAAU;QACpC,0CAA0C;QAC1C,0CAA0C;QAC1C,aAAa,EAAE,KAAK;QACpB,WAAW,EAAE,aAAa,EAAE,MAAM;QAClC,kBAAkB,EAAE,CAAC,SAAS,CAAC,EAAE,uCAAuC;KACzE;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,aAAa;QACnB,cAAc,EAAE,MAAM;QACtB,YAAY,EAAE,eAAe;QAC7B,WAAW,EACT,uGAAuG;QACzG,MAAM,EAAE,EAAE,EAAE,0DAA0D;QACtE,SAAS,EAAE,aAAa,EAAE,UAAU;QACpC,0CAA0C;QAC1C,0CAA0C;QAC1C,aAAa,EAAE,MAAM;QACrB,WAAW,EAAE,aAAa,EAAE,MAAM;QAClC,kBAAkB,EAAE,CAAC,SAAS,CAAC,EAAE,uCAAuC;KACzE;CACF,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe;IAC9C,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,CAAC;AACtD,CAAC","sourcesContent":["/**\n * Model Registry\n *\n * Defines all downloadable models known to expo-ai-kit.\n * getDownloadableModels() reads from this registry and enriches\n * each entry with on-device status from the native layer.\n */\n\nexport type ModelRegistryEntry = {\n /** Unique model identifier used in setModel/downloadModel */\n id: string;\n /** Human-readable model name */\n name: string;\n /** Parameter count label */\n parameterCount: string;\n /** Quantization variant */\n quantization: string;\n /** URL to download the LiteRT-LM model file */\n downloadUrl: string;\n /** SHA256 hash for integrity verification after download */\n sha256: string;\n /** Download file size in bytes */\n sizeBytes: number;\n /**\n * Practical context window (max tokens) for this model on constrained devices.\n *\n * These are conservative defaults, NOT the base model's theoretical max.\n * These values should be benchmarked and adjusted during testing with\n * real devices.\n */\n contextWindow: number;\n /** Minimum device RAM in bytes required to run this model */\n minRamBytes: number;\n /** Platforms this model can run on */\n supportedPlatforms: ('ios' | 'android')[];\n};\n\nexport const MODEL_REGISTRY: ModelRegistryEntry[] = [\n {\n id: 'gemma-e2b',\n name: 'Gemma 4 E2B',\n parameterCount: '2.3B',\n quantization: 'mixed-2/4/8-bit',\n downloadUrl:\n 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it.litertlm',\n sha256: '', // TODO: Fill with actual hash once model file is verified\n sizeBytes: 2_580_000_000, // ~2.58GB\n // Conservative limit for 4GB RAM devices.\n // TODO: Benchmark during Phase 2 testing.\n contextWindow: 8_000,\n minRamBytes: 4_000_000_000, // 4GB\n supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs\n },\n {\n id: 'gemma-e4b',\n name: 'Gemma 4 E4B',\n parameterCount: '4.5B',\n quantization: 'mixed-4/8-bit',\n downloadUrl:\n 'https://huggingface.co/litert-community/gemma-4-E4B-it-litert-lm/resolve/main/gemma-4-E4B-it.litertlm',\n sha256: '', // TODO: Fill with actual hash once model file is verified\n sizeBytes: 3_650_000_000, // ~3.65GB\n // Conservative limit for 6GB RAM devices.\n // TODO: Benchmark during Phase 2 testing.\n contextWindow: 16_000,\n minRamBytes: 6_000_000_000, // 6GB\n supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs\n },\n];\n\n/**\n * Look up a model registry entry by ID.\n * Returns undefined if not found.\n */\nexport function getRegistryEntry(modelId: string): ModelRegistryEntry | undefined {\n return MODEL_REGISTRY.find((m) => m.id === modelId);\n}\n"]}
|
package/package.json
CHANGED
package/src/models.ts
CHANGED
|
@@ -15,7 +15,7 @@ export type ModelRegistryEntry = {
|
|
|
15
15
|
parameterCount: string;
|
|
16
16
|
/** Quantization variant */
|
|
17
17
|
quantization: string;
|
|
18
|
-
/** URL to download the
|
|
18
|
+
/** URL to download the LiteRT-LM model file */
|
|
19
19
|
downloadUrl: string;
|
|
20
20
|
/** SHA256 hash for integrity verification after download */
|
|
21
21
|
sha256: string;
|
|
@@ -24,10 +24,9 @@ export type ModelRegistryEntry = {
|
|
|
24
24
|
/**
|
|
25
25
|
* Practical context window (max tokens) for this model on constrained devices.
|
|
26
26
|
*
|
|
27
|
-
* These are conservative defaults, NOT the base model's theoretical max
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
* adjusted during Phase 2 testing with real devices.
|
|
27
|
+
* These are conservative defaults, NOT the base model's theoretical max.
|
|
28
|
+
* These values should be benchmarked and adjusted during testing with
|
|
29
|
+
* real devices.
|
|
31
30
|
*/
|
|
32
31
|
contextWindow: number;
|
|
33
32
|
/** Minimum device RAM in bytes required to run this model */
|
|
@@ -41,31 +40,31 @@ export const MODEL_REGISTRY: ModelRegistryEntry[] = [
|
|
|
41
40
|
id: 'gemma-e2b',
|
|
42
41
|
name: 'Gemma 4 E2B',
|
|
43
42
|
parameterCount: '2.3B',
|
|
44
|
-
quantization: '
|
|
43
|
+
quantization: 'mixed-2/4/8-bit',
|
|
45
44
|
downloadUrl:
|
|
46
|
-
'https://huggingface.co/
|
|
45
|
+
'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it.litertlm',
|
|
47
46
|
sha256: '', // TODO: Fill with actual hash once model file is verified
|
|
48
|
-
sizeBytes:
|
|
49
|
-
// Conservative limit for 4GB RAM devices.
|
|
50
|
-
//
|
|
47
|
+
sizeBytes: 2_580_000_000, // ~2.58GB
|
|
48
|
+
// Conservative limit for 4GB RAM devices.
|
|
49
|
+
// TODO: Benchmark during Phase 2 testing.
|
|
51
50
|
contextWindow: 8_000,
|
|
52
51
|
minRamBytes: 4_000_000_000, // 4GB
|
|
53
|
-
supportedPlatforms: ['
|
|
52
|
+
supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs
|
|
54
53
|
},
|
|
55
54
|
{
|
|
56
55
|
id: 'gemma-e4b',
|
|
57
56
|
name: 'Gemma 4 E4B',
|
|
58
57
|
parameterCount: '4.5B',
|
|
59
|
-
quantization: '
|
|
58
|
+
quantization: 'mixed-4/8-bit',
|
|
60
59
|
downloadUrl:
|
|
61
|
-
'https://huggingface.co/
|
|
60
|
+
'https://huggingface.co/litert-community/gemma-4-E4B-it-litert-lm/resolve/main/gemma-4-E4B-it.litertlm',
|
|
62
61
|
sha256: '', // TODO: Fill with actual hash once model file is verified
|
|
63
|
-
sizeBytes:
|
|
64
|
-
// Conservative limit for 6GB RAM devices.
|
|
65
|
-
//
|
|
62
|
+
sizeBytes: 3_650_000_000, // ~3.65GB
|
|
63
|
+
// Conservative limit for 6GB RAM devices.
|
|
64
|
+
// TODO: Benchmark during Phase 2 testing.
|
|
66
65
|
contextWindow: 16_000,
|
|
67
66
|
minRamBytes: 6_000_000_000, // 6GB
|
|
68
|
-
supportedPlatforms: ['
|
|
67
|
+
supportedPlatforms: ['android'], // iOS waiting for LiteRT-LM Swift APIs
|
|
69
68
|
},
|
|
70
69
|
];
|
|
71
70
|
|